123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- /*****************************************************************************/
- /*IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. */
- /*By downloading, copying, installing or using the software you agree */
- /*to this license. If you do not agree to this license, do not download, */
- /*install, copy or use the software. */
- /* */
- /* */
- /*Copyright (c) 2005 Northwestern University */
- /*All rights reserved. */
- /*Redistribution of the software in source and binary forms, */
- /*with or without modification, is permitted provided that the */
- /*following conditions are met: */
- /* */
- /*1 Redistributions of source code must retain the above copyright */
- /* notice, this list of conditions and the following disclaimer. */
- /* */
- /*2 Redistributions in binary form must reproduce the above copyright */
- /* notice, this list of conditions and the following disclaimer in the */
- /* documentation and/or other materials provided with the distribution.*/
- /* */
- /*3 Neither the name of Northwestern University nor the names of its */
- /* contributors may be used to endorse or promote products derived */
- /* from this software without specific prior written permission. */
- /* */
- /*THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS */
- /*IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED */
- /*TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY, NON-INFRINGEMENT AND */
- /*FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL */
- /*NORTHWESTERN UNIVERSITY OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, */
- /*INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
- /*(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR */
- /*SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) */
- /*HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, */
- /*STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN */
- /*ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
- /*POSSIBILITY OF SUCH DAMAGE. */
- /******************************************************************************/
- /*************************************************************************/
- /** File: cluster.c **/
- /** Description: Takes as input a file, containing 1 data point per **/
- /** per line, and performs a fuzzy c-means clustering **/
- /** on the data. Fuzzy clustering is performed using **/
- /** min to max clusters and the clustering that gets **/
- /** the best score according to a compactness and **/
- /** separation criterion are returned. **/
- /** Author: Brendan McCane **/
- /** James Cook University of North Queensland. **/
- /** Australia. email: mccane@cs.jcu.edu.au **/
- /** **/
- /** Edited by: Jay Pisharath, Wei-keng Liao **/
- /** Northwestern University. **/
- /** **/
- /** ================================================================ **/
- /** **/
- /** Edited by: Shuai Che, David Tarjan, Sang-Ha Lee **/
- /** University of Virginia **/
- /** **/
- /** Description: No longer supports fuzzy c-means clustering; **/
- /** only regular k-means clustering. **/
- /** No longer performs "validity" function to analyze **/
- /** compactness and separation crietria; instead **/
- /** calculate root mean squared error. **/
- /** **/
- /*************************************************************************/
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <limits.h>
- #include <math.h>
- #include <float.h>
- #include <omp.h>
- #include "kmeans.h"
- float min_rmse_ref = FLT_MAX;
- extern double wtime(void);
- /* reference min_rmse value */
- /*---< cluster() >-----------------------------------------------------------*/
- int cluster(int npoints, /* number of data points */
- int nfeatures, /* number of attributes for each point */
- float **features, /* array: [npoints][nfeatures] */
- int min_nclusters, /* range of min to max number of clusters */
- int max_nclusters,
- float threshold, /* loop terminating factor */
- int *best_nclusters, /* out: number between min and max with lowest RMSE */
- float ***cluster_centres, /* out: [best_nclusters][nfeatures] */
- float *min_rmse, /* out: minimum RMSE */
- int isRMSE, /* calculate RMSE */
- int nloops,
- int platform_id,
- int device_id,
- int use_gpu /* number of iteration for each number of clusters */
- )
- {
- int nclusters; /* number of clusters k */
- int index =0; /* number of iteration to reach the best RMSE */
- int rmse; /* RMSE for each clustering */
- int *membership; /* which cluster a data point belongs to */
- float **tmp_cluster_centres; /* hold coordinates of cluster centers */
- int i;
- /* allocate memory for membership */
- membership = (int*) malloc(npoints * sizeof(int));
- /* sweep k from min to max_nclusters to find the best number of clusters */
- for(nclusters = min_nclusters; nclusters <= max_nclusters; nclusters++)
- {
- if (nclusters > npoints) break; /* cannot have more clusters than points */
- /* allocate device memory, invert data array (@ kmeans_cuda.cu) */
- allocate(npoints, nfeatures, nclusters, features, platform_id, device_id, use_gpu);
- /* iterate nloops times for each number of clusters */
- for(i = 0; i < nloops; i++)
- {
- /* initialize initial cluster centers, CUDA calls (@ kmeans_cuda.cu) */
- tmp_cluster_centres = kmeans_clustering(features,
- nfeatures,
- npoints,
- nclusters,
- threshold,
- membership);
- if (*cluster_centres) {
- free((*cluster_centres)[0]);
- free(*cluster_centres);
- }
- *cluster_centres = tmp_cluster_centres;
-
-
- /* find the number of clusters with the best RMSE */
- if(isRMSE)
- {
- rmse = rms_err(features,
- nfeatures,
- npoints,
- tmp_cluster_centres,
- nclusters);
-
- if(rmse < min_rmse_ref){
- min_rmse_ref = rmse; //update reference min RMSE
- *min_rmse = min_rmse_ref; //update return min RMSE
- *best_nclusters = nclusters; //update optimum number of clusters
- index = i; //update number of iteration to reach best RMSE
- }
- }
- }
-
- deallocateMemory(); /* free device memory (@ kmeans_cuda.cu) */
- }
- free(membership);
- return index;
- }
|