Эх сурвалжийг харах

Implemented device selection for kmeans

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

I reused the available routines for parsing the arguments, and added the needed
parameters passing to initialize correctly the OpenCL runtime.
Andrea Gussoni 8 жил өмнө
parent
commit
a9ef5b8ea8

+ 16 - 13
opencl/kmeans/cluster.c

@@ -79,24 +79,27 @@ extern double wtime(void);
 	/* reference min_rmse value */
 	/* reference min_rmse value */
 
 
 /*---< cluster() >-----------------------------------------------------------*/
 /*---< cluster() >-----------------------------------------------------------*/
-int cluster(int      npoints,				/* number of data points */
-            int      nfeatures,				/* number of attributes for each point */
-            float  **features,			/* array: [npoints][nfeatures] */                  
-            int      min_nclusters,			/* range of min to max number of clusters */
-			int		 max_nclusters,
-            float    threshold,				/* loop terminating factor */
-            int     *best_nclusters,		/* out: number between min and max with lowest RMSE */
+int cluster(int   npoints,				/* number of data points */
+            int   nfeatures,				/* number of attributes for each point */
+            float **features,			/* array: [npoints][nfeatures] */                  
+            int   min_nclusters,			/* range of min to max number of clusters */
+			      int		max_nclusters,
+            float threshold,				/* loop terminating factor */
+            int   *best_nclusters,		/* out: number between min and max with lowest RMSE */
             float ***cluster_centres,		/* out: [best_nclusters][nfeatures] */
             float ***cluster_centres,		/* out: [best_nclusters][nfeatures] */
-			float	*min_rmse,				/* out: minimum RMSE */
-			int		 isRMSE,				/* calculate RMSE */
-			int		 nloops					/* number of iteration for each number of clusters */
+			      float	*min_rmse,				/* out: minimum RMSE */
+			      int		isRMSE,				/* calculate RMSE */
+			      int	  nloops,
+            int   platform_id,
+            int   device_id,
+            int   use_gpu					/* number of iteration for each number of clusters */
 			)
 			)
 {    
 {    
 	int		nclusters;						/* number of clusters k */	
 	int		nclusters;						/* number of clusters k */	
 	int		index =0;						/* number of iteration to reach the best RMSE */
 	int		index =0;						/* number of iteration to reach the best RMSE */
 	int		rmse;							/* RMSE for each clustering */
 	int		rmse;							/* RMSE for each clustering */
-    int    *membership;						/* which cluster a data point belongs to */
-    float **tmp_cluster_centres;			/* hold coordinates of cluster centers */
+  int    *membership;						/* which cluster a data point belongs to */
+  float **tmp_cluster_centres;			/* hold coordinates of cluster centers */
 	int		i;
 	int		i;
 
 
 	/* allocate memory for membership */
 	/* allocate memory for membership */
@@ -108,7 +111,7 @@ int cluster(int      npoints,				/* number of data points */
 		if (nclusters > npoints) break;	/* cannot have more clusters than points */
 		if (nclusters > npoints) break;	/* cannot have more clusters than points */
 
 
 		/* allocate device memory, invert data array (@ kmeans_cuda.cu) */
 		/* allocate device memory, invert data array (@ kmeans_cuda.cu) */
-		allocate(npoints, nfeatures, nclusters, features);
+		allocate(npoints, nfeatures, nclusters, features, platform_id, device_id, use_gpu);
 
 
 		/* iterate nloops times for each number of clusters */
 		/* iterate nloops times for each number of clusters */
 		for(i = 0; i < nloops; i++)
 		for(i = 0; i < nloops; i++)

+ 12 - 13
opencl/kmeans/kmeans.cpp

@@ -58,7 +58,7 @@ static cl_device_type   device_type;
 static cl_device_id   * device_list;
 static cl_device_id   * device_list;
 static cl_int           num_devices;
 static cl_int           num_devices;
 
 
-static int initialize(int use_gpu)
+static int initialize(int platform_id, int device_id, int use_gpu)
 {
 {
 	cl_int result;
 	cl_int result;
 	size_t size;
 	size_t size;
@@ -68,11 +68,11 @@ static int initialize(int use_gpu)
 	cl_platform_id *platforms_ids;
 	cl_platform_id *platforms_ids;
 	// create OpenCL context
 	// create OpenCL context
 	clGetPlatformIDs(0, NULL, &platformCount);
 	clGetPlatformIDs(0, NULL, &platformCount);
-    	platforms_ids = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
-    	clGetPlatformIDs(platformCount, platforms_ids, NULL);
+  platforms_ids = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
+  clGetPlatformIDs(platformCount, platforms_ids, NULL);
 
 
 	if (clGetPlatformIDs(platformCount, platforms_ids, NULL) != CL_SUCCESS) { printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); return -1; }
 	if (clGetPlatformIDs(platformCount, platforms_ids, NULL) != CL_SUCCESS) { printf("ERROR: clGetPlatformIDs(1,*,0) failed\n"); return -1; }
-	cl_context_properties ctxprop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms_ids[1], 0};
+	cl_context_properties ctxprop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms_ids[platform_id], 0};
 	device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
 	device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
 	context = clCreateContextFromType( ctxprop, device_type, NULL, NULL, NULL );
 	context = clCreateContextFromType( ctxprop, device_type, NULL, NULL, NULL );
 	if( !context ) { printf("ERROR: clCreateContextFromType(%s) failed\n", use_gpu ? "GPU" : "CPU"); return -1; }
 	if( !context ) { printf("ERROR: clCreateContextFromType(%s) failed\n", use_gpu ? "GPU" : "CPU"); return -1; }
@@ -88,7 +88,7 @@ static int initialize(int use_gpu)
 	if( result != CL_SUCCESS ) { printf("ERROR: clGetContextInfo() failed\n"); return -1; }
 	if( result != CL_SUCCESS ) { printf("ERROR: clGetContextInfo() failed\n"); return -1; }
 
 
 	// create command queue for the first device
 	// create command queue for the first device
-	cmd_queue = clCreateCommandQueue( context, device_list[0], 0, NULL );
+	cmd_queue = clCreateCommandQueue( context, device_list[device_id], 0, NULL );
 	if( !cmd_queue ) { printf("ERROR: clCreateCommandQueue() failed\n"); return -1; }
 	if( !cmd_queue ) { printf("ERROR: clCreateCommandQueue() failed\n"); return -1; }
 
 
 	return 0;
 	return 0;
@@ -126,7 +126,7 @@ float *feature_d;
 float *clusters_d;
 float *clusters_d;
 float *center_d;
 float *center_d;
 
 
-int allocate(int n_points, int n_features, int n_clusters, float **feature)
+int allocate(int n_points, int n_features, int n_clusters, float **feature, int platform_id, int device_id, int use_gpu)
 {
 {
 
 
 	int sourcesize = 1024*1024;
 	int sourcesize = 1024*1024;
@@ -141,8 +141,7 @@ int allocate(int n_points, int n_features, int n_clusters, float **feature)
 	fclose(fp);
 	fclose(fp);
 		
 		
 	// OpenCL initialization
 	// OpenCL initialization
-	int use_gpu = 0;
-	if(initialize(use_gpu)) return -1;
+	if(initialize(platform_id, device_id, use_gpu)) return -1;
 
 
 	// compile kernel
 	// compile kernel
 	cl_int err = 0;
 	cl_int err = 0;
@@ -219,13 +218,13 @@ int main( int argc, char** argv)
 }
 }
 
 
 int	kmeansOCL(float **feature,    /* in: [npoints][nfeatures] */
 int	kmeansOCL(float **feature,    /* in: [npoints][nfeatures] */
-           int     n_features,
-           int     n_points,
-           int     n_clusters,
-           int    *membership,
+       int     n_features,
+       int     n_points,
+       int     n_clusters,
+       int    *membership,
 		   float **clusters,
 		   float **clusters,
 		   int     *new_centers_len,
 		   int     *new_centers_len,
-           float  **new_centers)	
+       float  **new_centers)	
 {
 {
   
   
 	int delta = 0;
 	int delta = 0;

+ 2 - 2
opencl/kmeans/kmeans.h

@@ -48,9 +48,9 @@
 float   euclid_dist_2        (float*, float*, int);
 float   euclid_dist_2        (float*, float*, int);
 int     find_nearest_point   (float* , int, float**, int);
 int     find_nearest_point   (float* , int, float**, int);
 float	rms_err(float**, int, int, float**, int);
 float	rms_err(float**, int, int, float**, int);
-int     cluster(int, int, float**, int, int, float, int*, float***, float*, int, int);
+int     cluster(int, int, float**, int, int, float, int*, float***, float*, int, int, int, int, int);
 int setup(int argc, char** argv);
 int setup(int argc, char** argv);
-int allocate(int npoints, int nfeatures, int nclusters, float **feature);
+int allocate(int npoints, int nfeatures, int nclusters, float **feature, int platform_id, int device_id, int use_gpu);
 void deallocateMemory();
 void deallocateMemory();
 int	kmeansOCL(float **feature, int nfeatures, int npoints, int nclusters, int *membership, float **clusters, int *new_centers_len, float  **new_centers);
 int	kmeansOCL(float **feature, int nfeatures, int npoints, int nclusters, int *membership, float **clusters, int *new_centers_len, float  **new_centers);
 float** kmeans_clustering(float **feature, int nfeatures, int npoints, int nclusters, float threshold, int *membership); 
 float** kmeans_clustering(float **feature, int nfeatures, int npoints, int nclusters, float threshold, int *membership); 

+ 10 - 10
opencl/kmeans/kmeans_clustering.c

@@ -80,12 +80,12 @@ float** kmeans_clustering(float **feature,    /* in: [npoints][nfeatures] */
                           float   threshold,
                           float   threshold,
                           int    *membership) /* out: [npoints] */
                           int    *membership) /* out: [npoints] */
 {    
 {    
-    int      i, j, n = 0;				/* counters */
+  int      i, j, n = 0;				/* counters */
 	int		 loop=0, temp;
 	int		 loop=0, temp;
-    int     *new_centers_len;	/* [nclusters]: no. of points in each cluster */
-    float    delta;				/* if the point moved */
-    float  **clusters;			/* out: [nclusters][nfeatures] */
-    float  **new_centers;		/* [nclusters][nfeatures] */
+  int     *new_centers_len;	/* [nclusters]: no. of points in each cluster */
+  float    delta;				/* if the point moved */
+  float  **clusters;			/* out: [nclusters][nfeatures] */
+  float  **new_centers;		/* [nclusters][nfeatures] */
 
 
 	int     *initial;			/* used to hold the index of points not yet selected
 	int     *initial;			/* used to hold the index of points not yet selected
 								   prevents the "birthday problem" of dual selection (?)
 								   prevents the "birthday problem" of dual selection (?)
@@ -99,11 +99,11 @@ float** kmeans_clustering(float **feature,    /* in: [npoints][nfeatures] */
 	if (nclusters > npoints)
 	if (nclusters > npoints)
 		nclusters = npoints;
 		nclusters = npoints;
 
 
-    /* allocate space for and initialize returning variable clusters[] */
-    clusters    = (float**) malloc(nclusters *             sizeof(float*));
-    clusters[0] = (float*)  malloc(nclusters * nfeatures * sizeof(float));
-    for (i=1; i<nclusters; i++)
-        clusters[i] = clusters[i-1] + nfeatures;
+  /* allocate space for and initialize returning variable clusters[] */
+  clusters    = (float**) malloc(nclusters *             sizeof(float*));
+  clusters[0] = (float*)  malloc(nclusters * nfeatures * sizeof(float));
+  for (i=1; i<nclusters; i++)
+      clusters[i] = clusters[i-1] + nfeatures;
 
 
 	/* initialize the random clusters */
 	/* initialize the random clusters */
 	initial = (int *) malloc (npoints * sizeof(int));
 	initial = (int *) malloc (npoints * sizeof(int));

+ 32 - 15
opencl/kmeans/read_input.c

@@ -87,15 +87,18 @@ extern double wtime(void);
 /*---< usage() >------------------------------------------------------------*/
 /*---< usage() >------------------------------------------------------------*/
 void usage(char *argv0) {
 void usage(char *argv0) {
     char *help =
     char *help =
-        "\nUsage: %s [switches] -i filename\n\n"
+    "\nUsage: %s [switches] -i filename\n\n"
 		"    -i filename      :file containing data to be clustered\n"		
 		"    -i filename      :file containing data to be clustered\n"		
 		"    -m max_nclusters :maximum number of clusters allowed    [default=5]\n"
 		"    -m max_nclusters :maximum number of clusters allowed    [default=5]\n"
-        "    -n min_nclusters :minimum number of clusters allowed    [default=5]\n"
+    "    -n min_nclusters :minimum number of clusters allowed    [default=5]\n"
 		"    -t threshold     :threshold value                       [default=0.001]\n"
 		"    -t threshold     :threshold value                       [default=0.001]\n"
 		"    -l nloops        :iteration for each number of clusters [default=1]\n"
 		"    -l nloops        :iteration for each number of clusters [default=1]\n"
 		"    -b               :input file is in binary format\n"
 		"    -b               :input file is in binary format\n"
-        "    -r               :calculate RMSE                        [default=off]\n"
-		"    -o               :output cluster center coordinates     [default=off]\n";
+    "    -r               :calculate RMSE                        [default=off]\n"
+		"    -o               :output cluster center coordinates     [default=off]\n"
+    "    -p platform_id   :OCL platform to use                   [default=0]\n"
+    "    -d device_id     :OCL device to use                     [default=0]\n"
+    "    -g use_gpu       :1 for GPU 0 for CPU                   [default=0]\n";
     fprintf(stderr, help, argv0);
     fprintf(stderr, help, argv0);
     exit(-1);
     exit(-1);
 }
 }
@@ -103,7 +106,7 @@ void usage(char *argv0) {
 /*---< main() >-------------------------------------------------------------*/
 /*---< main() >-------------------------------------------------------------*/
 int setup(int argc, char **argv) {
 int setup(int argc, char **argv) {
 		int		opt;
 		int		opt;
- extern char   *optarg;
+    extern char   *optarg;
 		char   *filename = 0;
 		char   *filename = 0;
 		float  *buf;
 		float  *buf;
 		char	line[1024];
 		char	line[1024];
@@ -127,9 +130,14 @@ int setup(int argc, char **argv) {
 		
 		
 		int		isOutput = 0;
 		int		isOutput = 0;
 		//float	cluster_timing, io_timing;		
 		//float	cluster_timing, io_timing;		
+    
+    // Variables to store information on platform and device to use_gpu
+    int platform_id = 0;
+    int device_id = 0;
+    int use_gpu = 0;
 
 
 		/* obtain command line arguments and change appropriate options */
 		/* obtain command line arguments and change appropriate options */
-		while ( (opt=getopt(argc,argv,"i:t:m:n:l:bro"))!= EOF) {
+		while ( (opt=getopt(argc,argv,"i:t:m:n:l:brop:d:g:"))!= EOF) {
         switch (opt) {
         switch (opt) {
             case 'i': filename=optarg;
             case 'i': filename=optarg;
                       break;
                       break;
@@ -141,12 +149,18 @@ int setup(int argc, char **argv) {
                       break;
                       break;
             case 'n': min_nclusters = atoi(optarg);
             case 'n': min_nclusters = atoi(optarg);
                       break;
                       break;
-			case 'r': isRMSE = 1;
+			      case 'r': isRMSE = 1;
                       break;
                       break;
-			case 'o': isOutput = 1;
-					  break;
-		    case 'l': nloops = atoi(optarg);
-					  break;
+			      case 'o': isOutput = 1;
+					            break;
+		        case 'l': nloops = atoi(optarg);
+					            break;
+            case 'p': platform_id = atoi(optarg);
+					            break;
+            case 'd': device_id = atoi(optarg);
+					            break;
+            case 'g': use_gpu = atoi(optarg);
+                      break;                                                    
             case '?': usage(argv[0]);
             case '?': usage(argv[0]);
                       break;
                       break;
             default: usage(argv[0]);
             default: usage(argv[0]);
@@ -242,11 +256,14 @@ int setup(int argc, char **argv) {
 					min_nclusters,			/* range of min to max number of clusters */
 					min_nclusters,			/* range of min to max number of clusters */
 					max_nclusters,
 					max_nclusters,
 					threshold,				/* loop termination factor */
 					threshold,				/* loop termination factor */
-				   &best_nclusters,			/* return: number between min and max */
-				   &cluster_centres,		/* return: [best_nclusters][nfeatures] */  
-				   &rmse,					/* Root Mean Squared Error */
+				  &best_nclusters,			/* return: number between min and max */
+				  &cluster_centres,		/* return: [best_nclusters][nfeatures] */  
+				  &rmse,					/* Root Mean Squared Error */
 					isRMSE,					/* calculate RMSE */
 					isRMSE,					/* calculate RMSE */
-					nloops);				/* number of iteration for each number of clusters */		
+					nloops,
+          platform_id,
+          device_id,
+          use_gpu);				/* number of iteration for each number of clusters */		
     
     
 	//cluster_timing = omp_get_wtime() - cluster_timing;
 	//cluster_timing = omp_get_wtime() - cluster_timing;
 
 

+ 1 - 1
opencl/kmeans/run-cpu

@@ -1 +1 @@
-./kmeans -o -r -l 10 -i ../../data/kmeans/kdd_cup 
+./kmeans -o -r -l 10 -i ../../data/kmeans/kdd_cup -p 1 -d 0 -g 0 

+ 1 - 0
opencl/kmeans/run-gpu

@@ -0,0 +1 @@
+./kmeans -o -r -l 10 -i ../../data/kmeans/kdd_cup -p 0 -d 0 -g 1