rmse.c 3.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. /*************************************************************************/
  2. /** File: rmse.c **/
  3. /** Description: calculate root mean squared error of particular **/
  4. /** clustering. **/
  5. /** Author: Sang-Ha Lee **/
  6. /** University of Virginia. **/
  7. /** **/
  8. /** Note: euclid_dist_2() and find_nearest_point() adopted from **/
  9. /** Minebench code. **/
  10. /** **/
  11. /*************************************************************************/
  12. #include <stdio.h>
  13. #include <stdlib.h>
  14. #include <float.h>
  15. #include <math.h>
  16. #include <omp.h>
  17. #include "kmeans.h"
  18. extern double wtime(void);
  19. /*----< euclid_dist_2() >----------------------------------------------------*/
  20. /* multi-dimensional spatial Euclid distance square */
  21. __inline
  22. float euclid_dist_2(float *pt1,
  23. float *pt2,
  24. int numdims)
  25. {
  26. int i;
  27. float ans=0.0;
  28. for (i=0; i<numdims; i++)
  29. ans += (pt1[i]-pt2[i]) * (pt1[i]-pt2[i]);
  30. return(ans);
  31. }
  32. /*----< find_nearest_point() >-----------------------------------------------*/
  33. __inline
  34. int find_nearest_point(float *pt, /* [nfeatures] */
  35. int nfeatures,
  36. float **pts, /* [npts][nfeatures] */
  37. int npts)
  38. {
  39. int index, i;
  40. float max_dist=FLT_MAX;
  41. /* find the cluster center id with min distance to pt */
  42. for (i=0; i<npts; i++) {
  43. float dist;
  44. dist = euclid_dist_2(pt, pts[i], nfeatures); /* no need square root */
  45. if (dist < max_dist) {
  46. max_dist = dist;
  47. index = i;
  48. }
  49. }
  50. return(index);
  51. }
  52. /*----< rms_err(): calculates RMSE of clustering >-------------------------------------*/
  53. float rms_err (float **feature, /* [npoints][nfeatures] */
  54. int nfeatures,
  55. int npoints,
  56. float **cluster_centres, /* [nclusters][nfeatures] */
  57. int nclusters)
  58. {
  59. int i;
  60. int nearest_cluster_index; /* cluster center id with min distance to pt */
  61. float sum_euclid = 0.0; /* sum of Euclidean distance squares */
  62. float ret; /* return value */
  63. /* calculate and sum the sqaure of euclidean distance*/
  64. #pragma omp parallel for \
  65. shared(feature,cluster_centres) \
  66. firstprivate(npoints,nfeatures,nclusters) \
  67. private(i, nearest_cluster_index) \
  68. schedule (static)
  69. for (i=0; i<npoints; i++) {
  70. nearest_cluster_index = find_nearest_point(feature[i],
  71. nfeatures,
  72. cluster_centres,
  73. nclusters);
  74. sum_euclid += euclid_dist_2(feature[i],
  75. cluster_centres[nearest_cluster_index],
  76. nfeatures);
  77. }
  78. /* divide by n, then take sqrt */
  79. ret = sqrt(sum_euclid / npoints);
  80. return(ret);
  81. }