Procházet zdrojové kódy

Implemented device selection for kmeans

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

I reused the available routines for parsing the arguments, and added the needed
parameters passing to initialize correctly the OpenCL runtime.
Andrea Gussoni před 8 roky
rodič
revize
a9ef5b8ea8

+ 16 - 13
opencl/kmeans/cluster.c

@@ -79,24 +79,27 @@ extern double wtime(void);
 	/* reference min_rmse value */
 
 /*---< cluster() >-----------------------------------------------------------*/
-int cluster(int      npoints,				/* number of data points */
-            int      nfeatures,				/* number of attributes for each point */
-            float  **features,			/* array: [npoints][nfeatures] */                  
-            int      min_nclusters,			/* range of min to max number of clusters */
-			int		 max_nclusters,
-            float    threshold,				/* loop terminating factor */
-            int     *best_nclusters,		/* out: number between min and max with lowest RMSE */
+int cluster(int   npoints,				/* number of data points */
+            int   nfeatures,				/* number of attributes for each point */
+            float **features,			/* array: [npoints][nfeatures] */                  
+            int   min_nclusters,			/* range of min to max number of clusters */
+			      int		max_nclusters,
+            float threshold,				/* loop terminating factor */
+            int   *best_nclusters,		/* out: number between min and max with lowest RMSE */
             float ***cluster_centres,		/* out: [best_nclusters][nfeatures] */
-			float	*min_rmse,				/* out: minimum RMSE */
-			int		 isRMSE,				/* calculate RMSE */
-			int		 nloops					/* number of iteration for each number of clusters */
+			      float	*min_rmse,				/* out: minimum RMSE */
+			      int		isRMSE,				/* calculate RMSE */
+			      int	  nloops,
+            int   platform_id,
+            int   device_id,
+            int   use_gpu					/* number of iteration for each number of clusters */
 			)
 {    
 	int		nclusters;						/* number of clusters k */	
 	int		index =0;						/* number of iteration to reach the best RMSE */
 	int		rmse;							/* RMSE for each clustering */
-    int    *membership;						/* which cluster a data point belongs to */
-    float **tmp_cluster_centres;			/* hold coordinates of cluster centers */
+  int    *membership;						/* which cluster a data point belongs to */
+  float **tmp_cluster_centres;			/* hold coordinates of cluster centers */
 	int		i;
 
 	/* allocate memory for membership */
@@ -108,7 +111,7 @@ int cluster(int      npoints,				/* number of data points */
 		if (nclusters > npoints) break;	/* cannot have more clusters than points */
 
 		/* allocate device memory, invert data array (@ kmeans_cuda.cu) */
-		allocate(npoints, nfeatures, nclusters, features);
+		allocate(npoints, nfeatures, nclusters, features, platform_id, device_id, use_gpu);
 
 		/* iterate nloops times for each number of clusters */
 		for(i = 0; i < nloops; i++)

+ 12 - 13
opencl/kmeans/kmeans.cpp

@@ -58,7 +58,7 @@ static cl_device_type   device_type;
 static cl_device_id   * device_list;
 static cl_int           num_devices;
 
-static int initialize(int use_gpu)
+static int initialize(int platform_id, int device_id, int use_gpu)
 {
 	cl_int result;
 	size_t size;
@@ -68,11 +68,11 @@ static int initialize(int use_gpu)
 	cl_platform_id *platforms_ids;
 	// create OpenCL context
 	clGetPlatformIDs(0, NULL, &platformCount);
-    	platforms_ids = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
-    	clGetPlatformIDs(platformCount, platforms_ids, NULL);
+  platforms_ids = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
+  clGetPlatformIDs(platformCount, platforms_ids, NULL);
 
 	if (clGetPlatformIDs(platformCount, platforms_ids, NULL) != CL_SUCCESS) { printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); return -1; }
-	cl_context_properties ctxprop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms_ids[1], 0};
+	cl_context_properties ctxprop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms_ids[platform_id], 0};
 	device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
 	context = clCreateContextFromType( ctxprop, device_type, NULL, NULL, NULL );
 	if( !context ) { printf("ERROR: clCreateContextFromType(%s) failed\n", use_gpu ? "GPU" : "CPU"); return -1; }
@@ -88,7 +88,7 @@ static int initialize(int use_gpu)
 	if( result != CL_SUCCESS ) { printf("ERROR: clGetContextInfo() failed\n"); return -1; }
 
 	// create command queue for the first device
-	cmd_queue = clCreateCommandQueue( context, device_list[0], 0, NULL );
+	cmd_queue = clCreateCommandQueue( context, device_list[device_id], 0, NULL );
 	if( !cmd_queue ) { printf("ERROR: clCreateCommandQueue() failed\n"); return -1; }
 
 	return 0;
@@ -126,7 +126,7 @@ float *feature_d;
 float *clusters_d;
 float *center_d;
 
-int allocate(int n_points, int n_features, int n_clusters, float **feature)
+int allocate(int n_points, int n_features, int n_clusters, float **feature, int platform_id, int device_id, int use_gpu)
 {
 
 	int sourcesize = 1024*1024;
@@ -141,8 +141,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature)
 	fclose(fp);
 		
 	// OpenCL initialization
-	int use_gpu = 0;
-	if(initialize(use_gpu)) return -1;
+	if(initialize(platform_id, device_id, use_gpu)) return -1;
 
 	// compile kernel
 	cl_int err = 0;
@@ -219,13 +218,13 @@ int main( int argc, char** argv)
 }
 
 int	kmeansOCL(float **feature,    /* in: [npoints][nfeatures] */
-           int     n_features,
-           int     n_points,
-           int     n_clusters,
-           int    *membership,
+       int     n_features,
+       int     n_points,
+       int     n_clusters,
+       int    *membership,
 		   float **clusters,
 		   int     *new_centers_len,
-           float  **new_centers)	
+       float  **new_centers)	
 {
   
 	int delta = 0;

+ 2 - 2
opencl/kmeans/kmeans.h

@@ -48,9 +48,9 @@
 float   euclid_dist_2        (float*, float*, int);
 int     find_nearest_point   (float* , int, float**, int);
 float	rms_err(float**, int, int, float**, int);
-int     cluster(int, int, float**, int, int, float, int*, float***, float*, int, int);
+int     cluster(int, int, float**, int, int, float, int*, float***, float*, int, int, int, int, int);
 int setup(int argc, char** argv);
-int allocate(int npoints, int nfeatures, int nclusters, float **feature);
+int allocate(int npoints, int nfeatures, int nclusters, float **feature, int platform_id, int device_id, int use_gpu);
 void deallocateMemory();
 int	kmeansOCL(float **feature, int nfeatures, int npoints, int nclusters, int *membership, float **clusters, int *new_centers_len, float  **new_centers);
 float** kmeans_clustering(float **feature, int nfeatures, int npoints, int nclusters, float threshold, int *membership); 

+ 10 - 10
opencl/kmeans/kmeans_clustering.c

@@ -80,12 +80,12 @@ float** kmeans_clustering(float **feature,    /* in: [npoints][nfeatures] */
                           float   threshold,
                           int    *membership) /* out: [npoints] */
 {    
-    int      i, j, n = 0;				/* counters */
+  int      i, j, n = 0;				/* counters */
 	int		 loop=0, temp;
-    int     *new_centers_len;	/* [nclusters]: no. of points in each cluster */
-    float    delta;				/* if the point moved */
-    float  **clusters;			/* out: [nclusters][nfeatures] */
-    float  **new_centers;		/* [nclusters][nfeatures] */
+  int     *new_centers_len;	/* [nclusters]: no. of points in each cluster */
+  float    delta;				/* if the point moved */
+  float  **clusters;			/* out: [nclusters][nfeatures] */
+  float  **new_centers;		/* [nclusters][nfeatures] */
 
 	int     *initial;			/* used to hold the index of points not yet selected
 								   prevents the "birthday problem" of dual selection (?)
@@ -99,11 +99,11 @@ float** kmeans_clustering(float **feature,    /* in: [npoints][nfeatures] */
 	if (nclusters > npoints)
 		nclusters = npoints;
 
-    /* allocate space for and initialize returning variable clusters[] */
-    clusters    = (float**) malloc(nclusters *             sizeof(float*));
-    clusters[0] = (float*)  malloc(nclusters * nfeatures * sizeof(float));
-    for (i=1; i<nclusters; i++)
-        clusters[i] = clusters[i-1] + nfeatures;
+  /* allocate space for and initialize returning variable clusters[] */
+  clusters    = (float**) malloc(nclusters *             sizeof(float*));
+  clusters[0] = (float*)  malloc(nclusters * nfeatures * sizeof(float));
+  for (i=1; i<nclusters; i++)
+      clusters[i] = clusters[i-1] + nfeatures;
 
 	/* initialize the random clusters */
 	initial = (int *) malloc (npoints * sizeof(int));

+ 32 - 15
opencl/kmeans/read_input.c

@@ -87,15 +87,18 @@ extern double wtime(void);
 /*---< usage() >------------------------------------------------------------*/
 void usage(char *argv0) {
     char *help =
-        "\nUsage: %s [switches] -i filename\n\n"
+    "\nUsage: %s [switches] -i filename\n\n"
 		"    -i filename      :file containing data to be clustered\n"		
 		"    -m max_nclusters :maximum number of clusters allowed    [default=5]\n"
-        "    -n min_nclusters :minimum number of clusters allowed    [default=5]\n"
+    "    -n min_nclusters :minimum number of clusters allowed    [default=5]\n"
 		"    -t threshold     :threshold value                       [default=0.001]\n"
 		"    -l nloops        :iteration for each number of clusters [default=1]\n"
 		"    -b               :input file is in binary format\n"
-        "    -r               :calculate RMSE                        [default=off]\n"
-		"    -o               :output cluster center coordinates     [default=off]\n";
+    "    -r               :calculate RMSE                        [default=off]\n"
+		"    -o               :output cluster center coordinates     [default=off]\n"
+    "    -p platform_id   :OCL platform to use                   [default=0]\n"
+    "    -d device_id     :OCL device to use                     [default=0]\n"
+    "    -g use_gpu       :1 for GPU 0 for CPU                   [default=0]\n";
     fprintf(stderr, help, argv0);
     exit(-1);
 }
@@ -103,7 +106,7 @@ void usage(char *argv0) {
 /*---< main() >-------------------------------------------------------------*/
 int setup(int argc, char **argv) {
 		int		opt;
- extern char   *optarg;
+    extern char   *optarg;
 		char   *filename = 0;
 		float  *buf;
 		char	line[1024];
@@ -127,9 +130,14 @@ int setup(int argc, char **argv) {
 		
 		int		isOutput = 0;
 		//float	cluster_timing, io_timing;		
+    
+    // Variables to store information on platform and device to use_gpu
+    int platform_id = 0;
+    int device_id = 0;
+    int use_gpu = 0;
 
 		/* obtain command line arguments and change appropriate options */
-		while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) {
+		while ( (opt=getopt(argc,argv,"i:t:m:n:l:brop:d:g:"))!= EOF) {
         switch (opt) {
             case 'i': filename=optarg;
                       break;
@@ -141,12 +149,18 @@ int setup(int argc, char **argv) {
                       break;
             case 'n': min_nclusters = atoi(optarg);
                       break;
-			case 'r': isRMSE = 1;
+			      case 'r': isRMSE = 1;
                       break;
-			case 'o': isOutput = 1;
-					  break;
-		    case 'l': nloops = atoi(optarg);
-					  break;
+			      case 'o': isOutput = 1;
+					            break;
+		        case 'l': nloops = atoi(optarg);
+					            break;
+            case 'p': platform_id = atoi(optarg);
+					            break;
+            case 'd': device_id = atoi(optarg);
+					            break;
+            case 'g': use_gpu = atoi(optarg);
+                      break;                                                    
             case '?': usage(argv[0]);
                       break;
             default: usage(argv[0]);
@@ -242,11 +256,14 @@ int setup(int argc, char **argv) {
 					min_nclusters,			/* range of min to max number of clusters */
 					max_nclusters,
 					threshold,				/* loop termination factor */
-				   &best_nclusters,			/* return: number between min and max */
-				   &cluster_centres,		/* return: [best_nclusters][nfeatures] */  
-				   &rmse,					/* Root Mean Squared Error */
+				  &best_nclusters,			/* return: number between min and max */
+				  &cluster_centres,		/* return: [best_nclusters][nfeatures] */  
+				  &rmse,					/* Root Mean Squared Error */
 					isRMSE,					/* calculate RMSE */
-					nloops);				/* number of iteration for each number of clusters */		
+					nloops,
+          platform_id,
+          device_id,
+          use_gpu);				/* number of iteration for each number of clusters */		
     
 	//cluster_timing = omp_get_wtime() - cluster_timing;
 

+ 1 - 1
opencl/kmeans/run-cpu

@@ -1 +1 @@
-./kmeans -o -r -l 10 -i ../../data/kmeans/kdd_cup 
+./kmeans -o -r -l 10 -i ../../data/kmeans/kdd_cup -p 1 -d 0 -g 0 

+ 1 - 0
opencl/kmeans/run-gpu

@@ -0,0 +1 @@
+./kmeans -o -r -l 10 -i ../../data/kmeans/kdd_cup -p 0 -d 0 -g 1