ソースを参照

Implemented device selection for hybridsort

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

Since the original benchmark had a really basic parsing structure, I opted for
implementing for scratch an helper function to parse the arguments and
retrieve the needed paramters.
Andrea Gussoni 8 年 前
コミット
cf82024bea

+ 24 - 18
opencl/hybridsort/bucketsort.c

@@ -82,17 +82,20 @@ double sum = 0;
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 // Initialize the bucketsort algorithm
 // Initialize the bucketsort algorithm
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
-void init_bucketsort(int listsize)
+void init_bucketsort(int listsize, int platform_id, int device_id, int use_gpu)
 {
 {
     cl_uint num = 0;
     cl_uint num = 0;
     clGetPlatformIDs(0, NULL, &num);
     clGetPlatformIDs(0, NULL, &num);
     cl_platform_id platformID[num];
     cl_platform_id platformID[num];
     clGetPlatformIDs(num, platformID, NULL);
     clGetPlatformIDs(num, platformID, NULL);
     
     
-    clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,0,NULL,&num);
+    // Selector for CPU/GPU
+    cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
+    
+    clGetDeviceIDs(platformID[platform_id],device_type,0,NULL,&num);
     
     
     cl_device_id devices[num];
     cl_device_id devices[num];
-    err = clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,num,devices,NULL);
+    err = clGetDeviceIDs(platformID[platform_id],device_type,num,devices,NULL);
 //    int gpu = 1;
 //    int gpu = 1;
 //    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 2, &device_id, NULL);
 //    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 2, &device_id, NULL);
     
     
@@ -104,12 +107,12 @@ void init_bucketsort(int listsize)
     char name[128];
     char name[128];
     
     
     
     
-    clGetDeviceInfo(devices[0],CL_DEVICE_NAME,128,name,NULL);
+    clGetDeviceInfo(devices[device_id],CL_DEVICE_NAME,128,name,NULL);
 
 
     
     
-    bucketContext = clCreateContext(0, 1, &devices[0], NULL, NULL, &err);
+    bucketContext = clCreateContext(0, 1, &devices[device_id], NULL, NULL, &err);
 
 
-    bucketCommands = clCreateCommandQueue(bucketContext, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
+    bucketCommands = clCreateCommandQueue(bucketContext, devices[device_id], CL_QUEUE_PROFILING_ENABLE, &err);
     
     
 	h_offsets = (unsigned int *) malloc(DIVISIONS * sizeof(unsigned int));
 	h_offsets = (unsigned int *) malloc(DIVISIONS * sizeof(unsigned int));
     for(int i = 0; i < DIVISIONS; i++){
     for(int i = 0; i < DIVISIONS; i++){
@@ -162,7 +165,7 @@ void init_bucketsort(int listsize)
         char buffer[2048];
         char buffer[2048];
         
         
         printf("Error: Failed to build bucket program executable!\n");
         printf("Error: Failed to build bucket program executable!\n");
-        clGetProgramBuildInfo(bucketProgram, devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+        clGetProgramBuildInfo(bucketProgram, devices[device_id], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
         printf("%s\n", buffer);
         printf("%s\n", buffer);
         exit(1);
         exit(1);
     }
     }
@@ -193,22 +196,25 @@ void finish_bucketsort()
 	free(historesult);
 	free(historesult);
 }
 }
 
 
-void histogramInit(int listsize) {
+void histogramInit(int listsize, int platform_id, int device_id, int use_gpu) {
     cl_uint num = 0;
     cl_uint num = 0;
     clGetPlatformIDs(0, NULL, &num);
     clGetPlatformIDs(0, NULL, &num);
     cl_platform_id platformID[num];
     cl_platform_id platformID[num];
     clGetPlatformIDs(num, platformID, NULL);
     clGetPlatformIDs(num, platformID, NULL);
     
     
-    clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,0,NULL,&num);
+    // Selector for CPU/GPU
+    cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
+    
+    clGetDeviceIDs(platformID[platform_id],device_type,0,NULL,&num);
     
     
     num = 2;
     num = 2;
     char name[128];
     char name[128];
     
     
-    clGetPlatformInfo(platformID[1], CL_PLATFORM_PROFILE,128,name,NULL);
+    clGetPlatformInfo(platformID[platform_id], CL_PLATFORM_PROFILE,128,name,NULL);
     
     
     
     
     cl_device_id devices[num];
     cl_device_id devices[num];
-    err = clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,num,devices,NULL);
+    err = clGetDeviceIDs(platformID[1],device_type,num,devices,NULL);
     //    int gpu = 1;
     //    int gpu = 1;
     //    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 2, &device_id, NULL);
     //    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 2, &device_id, NULL);
     
     
@@ -218,7 +224,7 @@ void histogramInit(int listsize) {
         exit(1);
         exit(1);
     }
     }
     
     
-    clGetDeviceInfo(devices[0],CL_DEVICE_NAME,128,name,NULL);
+    clGetDeviceInfo(devices[device_id],CL_DEVICE_NAME,128,name,NULL);
     
     
     printf("%s \n", name);
     printf("%s \n", name);
     
     
@@ -229,9 +235,9 @@ void histogramInit(int listsize) {
         0
         0
     };
     };
     
     
-    histoContext = clCreateContext(0, 1, &devices[0], NULL, NULL, &err);
+    histoContext = clCreateContext(0, 1, &devices[device_id], NULL, NULL, &err);
     
     
-    histoCommands = clCreateCommandQueue(histoContext, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
+    histoCommands = clCreateCommandQueue(histoContext, devices[device_id], CL_QUEUE_PROFILING_ENABLE, &err);
     histoInput = clCreateBuffer(histoContext,  CL_MEM_READ_ONLY,  listsize*(sizeof(float)), NULL, NULL);
     histoInput = clCreateBuffer(histoContext,  CL_MEM_READ_ONLY,  listsize*(sizeof(float)), NULL, NULL);
     histoOutput = clCreateBuffer(histoContext, CL_MEM_READ_WRITE, 1024 * sizeof(unsigned int), NULL, NULL);
     histoOutput = clCreateBuffer(histoContext, CL_MEM_READ_WRITE, 1024 * sizeof(unsigned int), NULL, NULL);
     FILE *fp;
     FILE *fp;
@@ -264,7 +270,7 @@ void histogramInit(int listsize) {
         char buffer[2048];
         char buffer[2048];
         
         
         printf("Error: Failed to build program executable!\n");
         printf("Error: Failed to build program executable!\n");
-        clGetProgramBuildInfo(histoProgram, devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+        clGetProgramBuildInfo(histoProgram, devices[device_id], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
         printf("%s\n", buffer);
         printf("%s\n", buffer);
         exit(1);
         exit(1);
     }
     }
@@ -349,13 +355,13 @@ void finish_histogram() {
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 void bucketSort(float *d_input, float *d_output, int listsize,
 void bucketSort(float *d_input, float *d_output, int listsize,
 				int *sizes, int *nullElements, float minimum, float maximum,
 				int *sizes, int *nullElements, float minimum, float maximum,
-				unsigned int *origOffsets)
+				unsigned int *origOffsets, int platform_id, int device_id, int use_gpu)
 {
 {
 //	////////////////////////////////////////////////////////////////////////////
 //	////////////////////////////////////////////////////////////////////////////
 //	// First pass - Create 1024 bin histogram
 //	// First pass - Create 1024 bin histogram
 //	////////////////////////////////////////////////////////////////////////////
 //	////////////////////////////////////////////////////////////////////////////
-    histogramInit(listsize);
-	histogram1024GPU(h_offsets, d_input, minimum, maximum, listsize);
+    histogramInit(listsize, platform_id, device_id, use_gpu);
+	  histogram1024GPU(h_offsets, d_input, minimum, maximum, listsize);
     finish_histogram();
     finish_histogram();
     for(int i=0; i<histosize; i++) historesult[i] = (float)h_offsets[i];
     for(int i=0; i<histosize; i++) historesult[i] = (float)h_offsets[i];
 
 

+ 3 - 3
opencl/hybridsort/bucketsort.h

@@ -4,18 +4,18 @@
 #define LOG_DIVISIONS	10
 #define LOG_DIVISIONS	10
 #define DIVISIONS		(1 << LOG_DIVISIONS)
 #define DIVISIONS		(1 << LOG_DIVISIONS)
 
 
-void init_bucketsort(int listsize);
+void init_bucketsort(int listsize, int platform_id, int device_id, int use_gpu);
 void finish_bucketsort();
 void finish_bucketsort();
 void bucketSort(float *d_input, float *d_output, int listsize,
 void bucketSort(float *d_input, float *d_output, int listsize,
 				int *sizes, int *nullElements, float minimum, float maximum,
 				int *sizes, int *nullElements, float minimum, float maximum,
-				unsigned int *origOffsets);
+				unsigned int *origOffsets, int platform_id, int device_id, int use_gpu);
 void histogram1024GPU(
 void histogram1024GPU(
                       unsigned int *h_Result,
                       unsigned int *h_Result,
                       float *d_Data,
                       float *d_Data,
                       float minimum,
                       float minimum,
                       float maximum,
                       float maximum,
                       int dataN);
                       int dataN);
-void histogramInit(int listsize);
+void histogramInit(int listsize, int platform_id, int device_id, int use_gpu);
 void finish_histogram();
 void finish_histogram();
 double getBucketTime();
 double getBucketTime();
 
 

+ 58 - 15
opencl/hybridsort/hybridsort.c

@@ -43,6 +43,50 @@ cl_float4*runMergeSort(int listsize, int divisions,
                                int *sizes, int *nullElements,
                                int *sizes, int *nullElements,
                        unsigned int *origOffsets);
                        unsigned int *origOffsets);
 
 
+int parseCommandline(int argc, char *argv[], int *platform_id, int *device_id, int *use_gpu){
+   int i;
+   printf("%d", argc);
+   if (argc < 5) return 1; // error
+   char flag;
+   
+   for(i=1;i<argc;i++) {
+     if (argv[i][0]=='-') {// flag
+       flag = argv[i][1];
+         switch (flag) {
+           case 'p': // platform
+             i++;
+             *platform_id = atoi(argv[i]);
+             break;
+           case 'd': // device
+             i++;
+             *device_id = atoi(argv[i]);
+             break;
+           case 'g': // device
+             i++;
+             *use_gpu = atoi(argv[i]);
+             break;  
+       }
+     }
+   }
+   if ((*device_id >= 0 && *platform_id<0) || (*platform_id>=0 && *device_id<0)) // both p and d must be specified if either are specified
+     return 1;
+   return 0;
+}
+
+void printUsage(){
+  printf("Hybridsort Usage\n");
+  printf("\n");
+  printf("hybridsort r -p [int] -d [int] -g [int]\n");
+  printf("\n");
+  printf("example:\n");
+  printf("$ ./hybridsort r -p 0 -d 0 -g 1\n");
+  printf("\n");
+  printf("-p [int]     Choose the platform (must choose both platform and device)\n");
+  printf("-d [int]     Choose the device (must choose both platform and device)\n");
+  printf("-g [int]     1 for gpu and 0 for cpu\n");
+  printf("\n");
+}                       
+
 int main(int argc, char** argv)
 int main(int argc, char** argv)
 {
 {
     int err;                            // error code returned from api calls
     int err;                            // error code returned from api calls
@@ -52,22 +96,21 @@ int main(int argc, char** argv)
     size_t global;                      // global domain size for our calculation
     size_t global;                      // global domain size for our calculation
     size_t local;                       // local domain size for our calculation
     size_t local;                       // local domain size for our calculation
     unsigned int *results;
     unsigned int *results;
-
-    cl_device_id device_id;             // compute device id 
-    cl_context context;                 // compute context
-    cl_command_queue commands;          // compute command queue
-    cl_program program;                 // compute program
-    cl_kernel kernel;                   // compute kernel
     
     
-    cl_mem input;                       // device memory used for the input array
-    cl_mem output;                      // device memory used for the output array
+    int platform_id=-1,device_id=-1,use_gpu=-1;
+  
+    // parse command line
+    if (parseCommandline(argc, argv, &platform_id, &device_id, &use_gpu)) {
+    printUsage();
+    return 0;
+    }
     
     
     // Fill our data set with random float values
     // Fill our data set with random float values
     //
     //
     
     
     int numElements = 0 ;
     int numElements = 0 ;
         
         
-    if(strcmp(argv[1],"r") ==0) {
+    if(strcmp(argv[1],"r") == 0) {
         numElements = SIZE;
         numElements = SIZE;
 	}
 	}
     else {
     else {
@@ -87,11 +130,11 @@ int main(int argc, char** argv)
         
         
         numElements = count;
         numElements = count;
     }
     }
-    printf("Sorting list of %d floats.\n", numElements);
-    int mem_size = (numElements + (DIVISIONS*4))*sizeof(float);
+  printf("Sorting list of %d floats.\n", numElements);
+  int mem_size = (numElements + (DIVISIONS*4))*sizeof(float);
 	// Allocate enough for the input list
 	// Allocate enough for the input list
 	float *cpu_idata = (float *)malloc(mem_size);
 	float *cpu_idata = (float *)malloc(mem_size);
-    float *cpu_odata = (float *)malloc(mem_size);
+  float *cpu_odata = (float *)malloc(mem_size);
 	// Allocate enough for the output list on the cpu side
 	// Allocate enough for the output list on the cpu side
 	float *d_output = (float *)malloc(mem_size);
 	float *d_output = (float *)malloc(mem_size);
 	// Allocate enough memory for the output list on the gpu side
 	// Allocate enough memory for the output list on the gpu side
@@ -129,12 +172,12 @@ int main(int argc, char** argv)
     fclose(tp);
     fclose(tp);
     memcpy(cpu_odata, cpu_idata, mem_size);
     memcpy(cpu_odata, cpu_idata, mem_size);
     clock_t gpu_start = clock();
     clock_t gpu_start = clock();
-    init_bucketsort(numElements);
+    init_bucketsort(numElements, platform_id, device_id, use_gpu);
     int *sizes = (int*) malloc(DIVISIONS * sizeof(int));
     int *sizes = (int*) malloc(DIVISIONS * sizeof(int));
     int *nullElements = (int*) malloc(DIVISIONS * sizeof(int));
     int *nullElements = (int*) malloc(DIVISIONS * sizeof(int));
     unsigned int *origOffsets = (unsigned int *) malloc((DIVISIONS + 1) * sizeof(int));
     unsigned int *origOffsets = (unsigned int *) malloc((DIVISIONS + 1) * sizeof(int));
     clock_t bucketsort_start = clock();
     clock_t bucketsort_start = clock();
-    bucketSort(cpu_idata,d_output,numElements,sizes,nullElements,datamin,datamax, origOffsets);
+    bucketSort(cpu_idata,d_output,numElements,sizes,nullElements,datamin,datamax,origOffsets,platform_id,device_id,use_gpu);
     clock_t bucketsort_diff = clock() - bucketsort_start;
     clock_t bucketsort_diff = clock() - bucketsort_start;
     finish_bucketsort();
     finish_bucketsort();
     double bucketTime = getBucketTime();
     double bucketTime = getBucketTime();
@@ -147,7 +190,7 @@ int main(int argc, char** argv)
         newlistsize += sizes[i] * 4;
         newlistsize += sizes[i] * 4;
     }
     }
     
     
-    init_mergesort(newlistsize);
+    init_mergesort(newlistsize, platform_id, device_id, use_gpu);
     clock_t mergesort_start = clock();
     clock_t mergesort_start = clock();
     cl_float4 *mergeresult = runMergeSort(newlistsize,DIVISIONS,d_origList,d_resultList,sizes,nullElements,origOffsets);
     cl_float4 *mergeresult = runMergeSort(newlistsize,DIVISIONS,d_origList,d_resultList,sizes,nullElements,origOffsets);
     clock_t mergesort_diff = clock() - mergesort_start;
     clock_t mergesort_diff = clock() - mergesort_start;

+ 10 - 6
opencl/hybridsort/mergesort.c

@@ -48,14 +48,18 @@ double mergesum = 0;
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 // The mergesort algorithm
 // The mergesort algorithm
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
-void init_mergesort(int listsize){
+void init_mergesort(int listsize, int platform_id, int device_id, int use_gpu){
     cl_uint num = 0;
     cl_uint num = 0;
     clGetPlatformIDs(0,NULL,&num);
     clGetPlatformIDs(0,NULL,&num);
     cl_platform_id platformID[num];
     cl_platform_id platformID[num];
     clGetPlatformIDs(num,platformID,NULL);
     clGetPlatformIDs(num,platformID,NULL);
-    clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,0,NULL,&num);
+    
+    // Selector for CPU/GPU
+    cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
+    
+    clGetDeviceIDs(platformID[platform_id],device_type,0,NULL,&num);
     cl_device_id devices[num];
     cl_device_id devices[num];
-    err = clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,num,devices,NULL);
+    err = clGetDeviceIDs(platformID[platform_id],device_type,num,devices,NULL);
     
     
     if (err != CL_SUCCESS)
     if (err != CL_SUCCESS)
     {
     {
@@ -65,11 +69,11 @@ void init_mergesort(int listsize){
     char name[128];
     char name[128];
     
     
     
     
-    clGetDeviceInfo(devices[0],CL_DEVICE_NAME,128,name,NULL);
+    clGetDeviceInfo(devices[device_id],CL_DEVICE_NAME,128,name,NULL);
     
     
-    mergeContext = clCreateContext(0, 1, &devices[0], NULL, NULL, &err);
+    mergeContext = clCreateContext(0, 1, &devices[device_id], NULL, NULL, &err);
     
     
-    mergeCommands = clCreateCommandQueue(mergeContext, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
+    mergeCommands = clCreateCommandQueue(mergeContext, devices[device_id], CL_QUEUE_PROFILING_ENABLE, &err);
     
     
     d_resultList_first_altered = (cl_float4 *)malloc(listsize*sizeof(float));
     d_resultList_first_altered = (cl_float4 *)malloc(listsize*sizeof(float));
     d_resultList_first_buff = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, listsize * sizeof(float),NULL,NULL);
     d_resultList_first_buff = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, listsize * sizeof(float),NULL,NULL);

+ 1 - 1
opencl/hybridsort/mergesort.h

@@ -7,7 +7,7 @@ cl_float4 *runMergeSort(int listsize, int divisions,
 					 cl_float4 *d_origList, cl_float4 *d_resultList,
 					 cl_float4 *d_origList, cl_float4 *d_resultList,
 					 int *sizes, int *nullElements,
 					 int *sizes, int *nullElements,
 					 unsigned int *origOffsets);
 					 unsigned int *origOffsets);
-void init_mergesort(int listsize);
+void init_mergesort(int listsize, int platform_id, int device_id, int use_gpu);
 void finish_mergesort();
 void finish_mergesort();
 double getMergeTime();
 double getMergeTime();
 #endif
 #endif

+ 1 - 1
opencl/hybridsort/run-cpu

@@ -1 +1 @@
-./hybridsort r
+./hybridsort r -p 1 -d 0 -g 0

+ 1 - 0
opencl/hybridsort/run-gpu

@@ -0,0 +1 @@
+./hybridsort r -p 0 -d 0 -g 1