cluster.c 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /*****************************************************************************/
  2. /*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
  3. /*By downloading, copying, installing or using the software you agree */
  4. /*to this license. If you do not agree to this license, do not download, */
  5. /*install, copy or use the software. */
  6. /* */
  7. /* */
  8. /*Copyright (c) 2005 Northwestern University */
  9. /*All rights reserved. */
  10. /*Redistribution of the software in source and binary forms, */
  11. /*with or without modification, is permitted provided that the */
  12. /*following conditions are met: */
  13. /* */
  14. /*1 Redistributions of source code must retain the above copyright */
  15. /* notice, this list of conditions and the following disclaimer. */
  16. /* */
  17. /*2 Redistributions in binary form must reproduce the above copyright */
  18. /* notice, this list of conditions and the following disclaimer in the */
  19. /* documentation and/or other materials provided with the distribution.*/
  20. /* */
  21. /*3 Neither the name of Northwestern University nor the names of its */
  22. /* contributors may be used to endorse or promote products derived */
  23. /* from this software without specific prior written permission. */
  24. /* */
  25. /*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
  26. /*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
  27. /*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
  28. /*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
  29. /*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
  30. /*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  31. /*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
  32. /*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
  33. /*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
  34. /*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
  35. /*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  36. /*POSSIBILITY OF SUCH DAMAGE. */
  37. /******************************************************************************/
  38. /*************************************************************************/
  39. /** File: cluster.c **/
  40. /** Description: Takes as input a file, containing 1 data point per **/
  41. /** per line, and performs a fuzzy c-means clustering **/
  42. /** on the data. Fuzzy clustering is performed using **/
  43. /** min to max clusters and the clustering that gets **/
  44. /** the best score according to a compactness and **/
  45. /** separation criterion are returned. **/
  46. /** Author: Brendan McCane **/
  47. /** James Cook University of North Queensland. **/
  48. /** Australia. email: mccane@cs.jcu.edu.au **/
  49. /** **/
  50. /** Edited by: Jay Pisharath, Wei-keng Liao **/
  51. /** Northwestern University. **/
  52. /** **/
  53. /** ================================================================ **/
  54. /** **/
  55. /** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
  56. /** University of Virginia **/
  57. /** **/
  58. /** Description: No longer supports fuzzy c-means clustering; **/
  59. /** only regular k-means clustering. **/
  60. /** No longer performs "validity" function to analyze **/
  61. /** compactness and separation crietria; instead **/
  62. /** calculate root mean squared error. **/
  63. /** **/
  64. /*************************************************************************/
  65. #include <stdio.h>
  66. #include <stdlib.h>
  67. #include <string.h>
  68. #include <limits.h>
  69. #include <math.h>
  70. #include <float.h>
  71. #include <omp.h>
  72. #include "kmeans.h"
  73. float min_rmse_ref = FLT_MAX;
  74. extern double wtime(void);
  75. /* reference min_rmse value */
  76. /*---< cluster() >-----------------------------------------------------------*/
  77. int cluster(int npoints, /* number of data points */
  78. int nfeatures, /* number of attributes for each point */
  79. float **features, /* array: [npoints][nfeatures] */
  80. int min_nclusters, /* range of min to max number of clusters */
  81. int max_nclusters,
  82. float threshold, /* loop terminating factor */
  83. int *best_nclusters, /* out: number between min and max with lowest RMSE */
  84. float ***cluster_centres, /* out: [best_nclusters][nfeatures] */
  85. float *min_rmse, /* out: minimum RMSE */
  86. int isRMSE, /* calculate RMSE */
  87. int nloops,
  88. int platform_id,
  89. int device_id,
  90. int use_gpu /* number of iteration for each number of clusters */
  91. )
  92. {
  93. int nclusters; /* number of clusters k */
  94. int index =0; /* number of iteration to reach the best RMSE */
  95. int rmse; /* RMSE for each clustering */
  96. int *membership; /* which cluster a data point belongs to */
  97. float **tmp_cluster_centres; /* hold coordinates of cluster centers */
  98. int i;
  99. /* allocate memory for membership */
  100. membership = (int*) malloc(npoints * sizeof(int));
  101. /* sweep k from min to max_nclusters to find the best number of clusters */
  102. for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++)
  103. {
  104. if (nclusters > npoints) break; /* cannot have more clusters than points */
  105. /* allocate device memory, invert data array (@ kmeans_cuda.cu) */
  106. allocate(npoints, nfeatures, nclusters, features, platform_id, device_id, use_gpu);
  107. /* iterate nloops times for each number of clusters */
  108. for(i = 0; i < nloops; i++)
  109. {
  110. /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */
  111. tmp_cluster_centres = kmeans_clustering(features,
  112. nfeatures,
  113. npoints,
  114. nclusters,
  115. threshold,
  116. membership);
  117. if (*cluster_centres) {
  118. free((*cluster_centres)[0]);
  119. free(*cluster_centres);
  120. }
  121. *cluster_centres = tmp_cluster_centres;
  122. /* find the number of clusters with the best RMSE */
  123. if(isRMSE)
  124. {
  125. rmse = rms_err(features,
  126. nfeatures,
  127. npoints,
  128. tmp_cluster_centres,
  129. nclusters);
  130. if(rmse < min_rmse_ref){
  131. min_rmse_ref = rmse; //update reference min RMSE
  132. *min_rmse = min_rmse_ref; //update return min RMSE
  133. *best_nclusters = nclusters; //update optimum number of clusters
  134. index = i; //update number of iteration to reach best RMSE
  135. }
  136. }
  137. }
  138. deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */
  139. }
  140. free(membership);
  141. return index;
  142. }