瀏覽代碼

Implemented device selection for hybridsort

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

Since the original benchmark had a really basic parsing structure, I opted for
implementing for scratch an helper function to parse the arguments and
retrieve the needed paramters.
Andrea Gussoni 8 年之前
父節點
當前提交
cf82024bea

+ 24 - 18
opencl/hybridsort/bucketsort.c

@@ -82,17 +82,20 @@ double sum = 0;
 ////////////////////////////////////////////////////////////////////////////////
 // Initialize the bucketsort algorithm
 ////////////////////////////////////////////////////////////////////////////////
-void init_bucketsort(int listsize)
+void init_bucketsort(int listsize, int platform_id, int device_id, int use_gpu)
 {
     cl_uint num = 0;
     clGetPlatformIDs(0, NULL, &num);
     cl_platform_id platformID[num];
     clGetPlatformIDs(num, platformID, NULL);
     
-    clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,0,NULL,&num);
+    // Selector for CPU/GPU
+    cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
+    
+    clGetDeviceIDs(platformID[platform_id],device_type,0,NULL,&num);
     
     cl_device_id devices[num];
-    err = clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,num,devices,NULL);
+    err = clGetDeviceIDs(platformID[platform_id],device_type,num,devices,NULL);
 //    int gpu = 1;
 //    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 2, &device_id, NULL);
     
@@ -104,12 +107,12 @@ void init_bucketsort(int listsize)
     char name[128];
     
     
-    clGetDeviceInfo(devices[0],CL_DEVICE_NAME,128,name,NULL);
+    clGetDeviceInfo(devices[device_id],CL_DEVICE_NAME,128,name,NULL);
 
     
-    bucketContext = clCreateContext(0, 1, &devices[0], NULL, NULL, &err);
+    bucketContext = clCreateContext(0, 1, &devices[device_id], NULL, NULL, &err);
 
-    bucketCommands = clCreateCommandQueue(bucketContext, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
+    bucketCommands = clCreateCommandQueue(bucketContext, devices[device_id], CL_QUEUE_PROFILING_ENABLE, &err);
     
 	h_offsets = (unsigned int *) malloc(DIVISIONS * sizeof(unsigned int));
     for(int i = 0; i < DIVISIONS; i++){
@@ -162,7 +165,7 @@ void init_bucketsort(int listsize)
         char buffer[2048];
         
         printf("Error: Failed to build bucket program executable!\n");
-        clGetProgramBuildInfo(bucketProgram, devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+        clGetProgramBuildInfo(bucketProgram, devices[device_id], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
         printf("%s\n", buffer);
         exit(1);
     }
@@ -193,22 +196,25 @@ void finish_bucketsort()
 	free(historesult);
 }
 
-void histogramInit(int listsize) {
+void histogramInit(int listsize, int platform_id, int device_id, int use_gpu) {
     cl_uint num = 0;
     clGetPlatformIDs(0, NULL, &num);
     cl_platform_id platformID[num];
     clGetPlatformIDs(num, platformID, NULL);
     
-    clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,0,NULL,&num);
+    // Selector for CPU/GPU
+    cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
+    
+    clGetDeviceIDs(platformID[platform_id],device_type,0,NULL,&num);
     
     num = 2;
     char name[128];
     
-    clGetPlatformInfo(platformID[1], CL_PLATFORM_PROFILE,128,name,NULL);
+    clGetPlatformInfo(platformID[platform_id], CL_PLATFORM_PROFILE,128,name,NULL);
     
     
     cl_device_id devices[num];
-    err = clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,num,devices,NULL);
+    err = clGetDeviceIDs(platformID[1],device_type,num,devices,NULL);
     //    int gpu = 1;
     //    err = clGetDeviceIDs(NULL, gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU, 2, &device_id, NULL);
     
@@ -218,7 +224,7 @@ void histogramInit(int listsize) {
         exit(1);
     }
     
-    clGetDeviceInfo(devices[0],CL_DEVICE_NAME,128,name,NULL);
+    clGetDeviceInfo(devices[device_id],CL_DEVICE_NAME,128,name,NULL);
     
     printf("%s \n", name);
     
@@ -229,9 +235,9 @@ void histogramInit(int listsize) {
         0
     };
     
-    histoContext = clCreateContext(0, 1, &devices[0], NULL, NULL, &err);
+    histoContext = clCreateContext(0, 1, &devices[device_id], NULL, NULL, &err);
     
-    histoCommands = clCreateCommandQueue(histoContext, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
+    histoCommands = clCreateCommandQueue(histoContext, devices[device_id], CL_QUEUE_PROFILING_ENABLE, &err);
     histoInput = clCreateBuffer(histoContext,  CL_MEM_READ_ONLY,  listsize*(sizeof(float)), NULL, NULL);
     histoOutput = clCreateBuffer(histoContext, CL_MEM_READ_WRITE, 1024 * sizeof(unsigned int), NULL, NULL);
     FILE *fp;
@@ -264,7 +270,7 @@ void histogramInit(int listsize) {
         char buffer[2048];
         
         printf("Error: Failed to build program executable!\n");
-        clGetProgramBuildInfo(histoProgram, devices[0], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
+        clGetProgramBuildInfo(histoProgram, devices[device_id], CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
         printf("%s\n", buffer);
         exit(1);
     }
@@ -349,13 +355,13 @@ void finish_histogram() {
 ////////////////////////////////////////////////////////////////////////////////
 void bucketSort(float *d_input, float *d_output, int listsize,
 				int *sizes, int *nullElements, float minimum, float maximum,
-				unsigned int *origOffsets)
+				unsigned int *origOffsets, int platform_id, int device_id, int use_gpu)
 {
 //	////////////////////////////////////////////////////////////////////////////
 //	// First pass - Create 1024 bin histogram
 //	////////////////////////////////////////////////////////////////////////////
-    histogramInit(listsize);
-	histogram1024GPU(h_offsets, d_input, minimum, maximum, listsize);
+    histogramInit(listsize, platform_id, device_id, use_gpu);
+	  histogram1024GPU(h_offsets, d_input, minimum, maximum, listsize);
     finish_histogram();
     for(int i=0; i<histosize; i++) historesult[i] = (float)h_offsets[i];
 

+ 3 - 3
opencl/hybridsort/bucketsort.h

@@ -4,18 +4,18 @@
 #define LOG_DIVISIONS	10
 #define DIVISIONS		(1 << LOG_DIVISIONS)
 
-void init_bucketsort(int listsize);
+void init_bucketsort(int listsize, int platform_id, int device_id, int use_gpu);
 void finish_bucketsort();
 void bucketSort(float *d_input, float *d_output, int listsize,
 				int *sizes, int *nullElements, float minimum, float maximum,
-				unsigned int *origOffsets);
+				unsigned int *origOffsets, int platform_id, int device_id, int use_gpu);
 void histogram1024GPU(
                       unsigned int *h_Result,
                       float *d_Data,
                       float minimum,
                       float maximum,
                       int dataN);
-void histogramInit(int listsize);
+void histogramInit(int listsize, int platform_id, int device_id, int use_gpu);
 void finish_histogram();
 double getBucketTime();
 

+ 58 - 15
opencl/hybridsort/hybridsort.c

@@ -43,6 +43,50 @@ cl_float4*runMergeSort(int listsize, int divisions,
                                int *sizes, int *nullElements,
                        unsigned int *origOffsets);
 
+int parseCommandline(int argc, char *argv[], int *platform_id, int *device_id, int *use_gpu){
+   int i;
+   printf("%d", argc);
+   if (argc < 5) return 1; // error
+   char flag;
+   
+   for(i=1;i<argc;i++) {
+     if (argv[i][0]=='-') {// flag
+       flag = argv[i][1];
+         switch (flag) {
+           case 'p': // platform
+             i++;
+             *platform_id = atoi(argv[i]);
+             break;
+           case 'd': // device
+             i++;
+             *device_id = atoi(argv[i]);
+             break;
+           case 'g': // device
+             i++;
+             *use_gpu = atoi(argv[i]);
+             break;  
+       }
+     }
+   }
+   if ((*device_id >= 0 && *platform_id<0) || (*platform_id>=0 && *device_id<0)) // both p and d must be specified if either are specified
+     return 1;
+   return 0;
+}
+
+void printUsage(){
+  printf("Hybridsort Usage\n");
+  printf("\n");
+  printf("hybridsort r -p [int] -d [int] -g [int]\n");
+  printf("\n");
+  printf("example:\n");
+  printf("$ ./hybridsort r -p 0 -d 0 -g 1\n");
+  printf("\n");
+  printf("-p [int]     Choose the platform (must choose both platform and device)\n");
+  printf("-d [int]     Choose the device (must choose both platform and device)\n");
+  printf("-g [int]     1 for gpu and 0 for cpu\n");
+  printf("\n");
+}                       
+
 int main(int argc, char** argv)
 {
     int err;                            // error code returned from api calls
@@ -52,22 +96,21 @@ int main(int argc, char** argv)
     size_t global;                      // global domain size for our calculation
     size_t local;                       // local domain size for our calculation
     unsigned int *results;
-
-    cl_device_id device_id;             // compute device id 
-    cl_context context;                 // compute context
-    cl_command_queue commands;          // compute command queue
-    cl_program program;                 // compute program
-    cl_kernel kernel;                   // compute kernel
     
-    cl_mem input;                       // device memory used for the input array
-    cl_mem output;                      // device memory used for the output array
+    int platform_id=-1,device_id=-1,use_gpu=-1;
+  
+    // parse command line
+    if (parseCommandline(argc, argv, &platform_id, &device_id, &use_gpu)) {
+    printUsage();
+    return 0;
+    }
     
     // Fill our data set with random float values
     //
     
     int numElements = 0 ;
         
-    if(strcmp(argv[1],"r") ==0) {
+    if(strcmp(argv[1],"r") == 0) {
         numElements = SIZE;
 	}
     else {
@@ -87,11 +130,11 @@ int main(int argc, char** argv)
         
         numElements = count;
     }
-    printf("Sorting list of %d floats.\n", numElements);
-    int mem_size = (numElements + (DIVISIONS*4))*sizeof(float);
+  printf("Sorting list of %d floats.\n", numElements);
+  int mem_size = (numElements + (DIVISIONS*4))*sizeof(float);
 	// Allocate enough for the input list
 	float *cpu_idata = (float *)malloc(mem_size);
-    float *cpu_odata = (float *)malloc(mem_size);
+  float *cpu_odata = (float *)malloc(mem_size);
 	// Allocate enough for the output list on the cpu side
 	float *d_output = (float *)malloc(mem_size);
 	// Allocate enough memory for the output list on the gpu side
@@ -129,12 +172,12 @@ int main(int argc, char** argv)
     fclose(tp);
     memcpy(cpu_odata, cpu_idata, mem_size);
     clock_t gpu_start = clock();
-    init_bucketsort(numElements);
+    init_bucketsort(numElements, platform_id, device_id, use_gpu);
     int *sizes = (int*) malloc(DIVISIONS * sizeof(int));
     int *nullElements = (int*) malloc(DIVISIONS * sizeof(int));
     unsigned int *origOffsets = (unsigned int *) malloc((DIVISIONS + 1) * sizeof(int));
     clock_t bucketsort_start = clock();
-    bucketSort(cpu_idata,d_output,numElements,sizes,nullElements,datamin,datamax, origOffsets);
+    bucketSort(cpu_idata,d_output,numElements,sizes,nullElements,datamin,datamax,origOffsets,platform_id,device_id,use_gpu);
     clock_t bucketsort_diff = clock() - bucketsort_start;
     finish_bucketsort();
     double bucketTime = getBucketTime();
@@ -147,7 +190,7 @@ int main(int argc, char** argv)
         newlistsize += sizes[i] * 4;
     }
     
-    init_mergesort(newlistsize);
+    init_mergesort(newlistsize, platform_id, device_id, use_gpu);
     clock_t mergesort_start = clock();
     cl_float4 *mergeresult = runMergeSort(newlistsize,DIVISIONS,d_origList,d_resultList,sizes,nullElements,origOffsets);
     clock_t mergesort_diff = clock() - mergesort_start;

+ 10 - 6
opencl/hybridsort/mergesort.c

@@ -48,14 +48,18 @@ double mergesum = 0;
 ////////////////////////////////////////////////////////////////////////////////
 // The mergesort algorithm
 ////////////////////////////////////////////////////////////////////////////////
-void init_mergesort(int listsize){
+void init_mergesort(int listsize, int platform_id, int device_id, int use_gpu){
     cl_uint num = 0;
     clGetPlatformIDs(0,NULL,&num);
     cl_platform_id platformID[num];
     clGetPlatformIDs(num,platformID,NULL);
-    clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,0,NULL,&num);
+    
+    // Selector for CPU/GPU
+    cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
+    
+    clGetDeviceIDs(platformID[platform_id],device_type,0,NULL,&num);
     cl_device_id devices[num];
-    err = clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,num,devices,NULL);
+    err = clGetDeviceIDs(platformID[platform_id],device_type,num,devices,NULL);
     
     if (err != CL_SUCCESS)
     {
@@ -65,11 +69,11 @@ void init_mergesort(int listsize){
     char name[128];
     
     
-    clGetDeviceInfo(devices[0],CL_DEVICE_NAME,128,name,NULL);
+    clGetDeviceInfo(devices[device_id],CL_DEVICE_NAME,128,name,NULL);
     
-    mergeContext = clCreateContext(0, 1, &devices[0], NULL, NULL, &err);
+    mergeContext = clCreateContext(0, 1, &devices[device_id], NULL, NULL, &err);
     
-    mergeCommands = clCreateCommandQueue(mergeContext, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
+    mergeCommands = clCreateCommandQueue(mergeContext, devices[device_id], CL_QUEUE_PROFILING_ENABLE, &err);
     
     d_resultList_first_altered = (cl_float4 *)malloc(listsize*sizeof(float));
     d_resultList_first_buff = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, listsize * sizeof(float),NULL,NULL);

+ 1 - 1
opencl/hybridsort/mergesort.h

@@ -7,7 +7,7 @@ cl_float4 *runMergeSort(int listsize, int divisions,
 					 cl_float4 *d_origList, cl_float4 *d_resultList,
 					 int *sizes, int *nullElements,
 					 unsigned int *origOffsets);
-void init_mergesort(int listsize);
+void init_mergesort(int listsize, int platform_id, int device_id, int use_gpu);
 void finish_mergesort();
 double getMergeTime();
 #endif

+ 1 - 1
opencl/hybridsort/run-cpu

@@ -1 +1 @@
-./hybridsort r
+./hybridsort r -p 1 -d 0 -g 0

+ 1 - 0
opencl/hybridsort/run-gpu

@@ -0,0 +1 @@
+./hybridsort r -p 0 -d 0 -g 1