Browse Source

Implemented device selection for bfs

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

Reused part of the available routine to parse command line arguments,
even if it is quite artisanal.
Andrea Gussoni 8 years ago
parent
commit
e6a3bb8bcb
4 changed files with 119 additions and 83 deletions
  1. 73 70
      opencl/bfs/CLHelper.h
  2. 44 12
      opencl/bfs/bfs.cpp
  3. 1 1
      opencl/bfs/run-cpu
  4. 1 0
      opencl/bfs/run-gpu

+ 73 - 70
opencl/bfs/CLHelper.h

@@ -65,7 +65,7 @@ string FileToString(const string fileName)
             f.read(str, fileSize);
             f.close();
             str[size] = '\0';
-        
+
             s = str;
             delete [] str;
             return s;
@@ -121,19 +121,19 @@ void _clCmdParams(int argc, char* argv[]){
 			;
 		}
 	}
-	
+
 }
 
 //---------------------------------------
 //Initlize CL objects
 //--description: there are 5 steps to initialize all the OpenCL objects needed
-//--revised on 04/01/2011: get the number of devices  and 
+//--revised on 04/01/2011: get the number of devices  and
 //  devices have no relationship with context
-void _clInit()
+void _clInit(int platform_num, int device_num, int use_gpu)
 {
-    int DEVICE_ID_INUSED = device_id_inused;
+    //int DEVICE_ID_INUSED = device_id_inused;
     cl_int resultCL;
-    
+
     oclHandles.context = NULL;
     oclHandles.devices = NULL;
     oclHandles.queue = NULL;
@@ -162,7 +162,7 @@ void _clInit()
         throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
 
     /* Select the target platform. Default: first platform */
-    targetPlatform = allPlatforms[1];
+    targetPlatform = allPlatforms[platform_num];
     for (int i = 0; i < numPlatforms; i++)
     {
         char pbuff[128];
@@ -180,22 +180,25 @@ void _clInit()
     free(allPlatforms);
 
     //-----------------------------------------------
-    //--cambine-2: create an OpenCL context
+    //--cambine-2: create an OpenCL context
+    // Select device type on the basis of the passed parameter
+    static cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
+
     cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)targetPlatform, 0 };
-    oclHandles.context = clCreateContextFromType(cprops, 
-                                                CL_DEVICE_TYPE_CPU, 
-                                                NULL, 
-                                                NULL, 
+    oclHandles.context = clCreateContextFromType(cprops,
+                                                device_type,
+                                                NULL,
+                                                NULL,
                                                 &resultCL);
 
     if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
         throw (string("InitCL()::Error: Creating Context (clCreateContextFromType)"));
     //-----------------------------------------------
-    //--cambine-3: detect OpenCL devices	
+    //--cambine-3: detect OpenCL devices
     /* First, get the size of device list */
-   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);
+   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, device_type, 0, NULL, &deviceListSize);
    if(oclHandles.cl_status!=CL_SUCCESS){
-   	throw(string("exception in _clInit -> clGetDeviceIDs"));   	
+   	throw(string("exception in _clInit -> clGetDeviceIDs"));
    }
    if (deviceListSize == 0)
         throw(string("InitCL()::Error: No devices found."));
@@ -209,16 +212,16 @@ void _clInit()
         throw(string("InitCL()::Error: Could not allocate memory."));
 
     /* Next, get the device list data */
-   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, \
+   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, device_type, deviceListSize, \
 								oclHandles.devices, NULL);
    if(oclHandles.cl_status!=CL_SUCCESS){
-   	throw(string("exception in _clInit -> clGetDeviceIDs-2"));   	
+   	throw(string("exception in _clInit -> clGetDeviceIDs-2"));
    }
    //-----------------------------------------------
-   //--cambine-4: Create an OpenCL command queue    
-    oclHandles.queue = clCreateCommandQueue(oclHandles.context, 
-                                            oclHandles.devices[DEVICE_ID_INUSED], 
-                                            0, 
+   //--cambine-4: Create an OpenCL command queue
+    oclHandles.queue = clCreateCommandQueue(oclHandles.context,
+                                            oclHandles.devices[device_num],
+                                            0,
                                             &resultCL);
 
     if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
@@ -229,14 +232,14 @@ void _clInit()
     const char * source    = source_str.c_str();
     size_t sourceSize[]    = { source_str.length() };
 
-    oclHandles.program = clCreateProgramWithSource(oclHandles.context, 
-                                                    1, 
+    oclHandles.program = clCreateProgramWithSource(oclHandles.context,
+                                                    1,
                                                     &source,
                                                     sourceSize,
                                                     &resultCL);
 
     if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
-        throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));    
+        throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
     //insert debug information
     //std::string options= "-cl-nv-verbose"; //Doesn't work on AMD machines
     //options += " -cl-nv-opt-level=3";
@@ -247,43 +250,43 @@ void _clInit()
         cerr << "InitCL()::Error: In clBuildProgram" << endl;
 
 		size_t length;
-        resultCL = clGetProgramBuildInfo(oclHandles.program, 
-                                        oclHandles.devices[DEVICE_ID_INUSED], 
-                                        CL_PROGRAM_BUILD_LOG, 
-                                        0, 
-                                        NULL, 
+        resultCL = clGetProgramBuildInfo(oclHandles.program,
+                                        oclHandles.devices[device_num],
+                                        CL_PROGRAM_BUILD_LOG,
+                                        0,
+                                        NULL,
                                         &length);
-        if(resultCL != CL_SUCCESS) 
+        if(resultCL != CL_SUCCESS)
             throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
 
 		char* buffer = (char*)malloc(length);
-        resultCL = clGetProgramBuildInfo(oclHandles.program, 
-                                        oclHandles.devices[DEVICE_ID_INUSED], 
-                                        CL_PROGRAM_BUILD_LOG, 
-                                        length, 
-                                        buffer, 
+        resultCL = clGetProgramBuildInfo(oclHandles.program,
+                                        oclHandles.devices[device_num],
+                                        CL_PROGRAM_BUILD_LOG,
+                                        length,
+                                        buffer,
                                         NULL);
-        if(resultCL != CL_SUCCESS) 
+        if(resultCL != CL_SUCCESS)
             throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
 
 		cerr << buffer << endl;
         free(buffer);
 
         throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
-    } 
+    }
 
     //get program information in intermediate representation
-    #ifdef PTX_MSG    
+    #ifdef PTX_MSG
     size_t binary_sizes[deviceListSize];
     char * binaries[deviceListSize];
-    //figure out number of devices and the sizes of the binary for each device. 
+    //figure out number of devices and the sizes of the binary for each device.
     oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*deviceListSize, &binary_sizes, NULL );
     if(oclHandles.cl_status!=CL_SUCCESS){
         throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));
     }
 
     std::cout<<"--cambine:"<<binary_sizes<<std::endl;
-    //copy over all of the generated binaries. 
+    //copy over all of the generated binaries.
     for(int i=0;i<deviceListSize;i++)
 	binaries[i] = (char *)malloc( sizeof(char)*(binary_sizes[i]+1));
     oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES, sizeof(char *)*deviceListSize, binaries, NULL );
@@ -326,7 +329,7 @@ void _clInit()
     oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
     if(oclHandles.cl_status!=CL_SUCCESS){
 	throw(string("exceptions in _InitCL -> getting resource information"));
-    }    
+    }
 
     build_log = (char *)malloc(ret_val_size+1);
     oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
@@ -411,7 +414,7 @@ cl_mem _clCreateAndCpyMem(int size, void * h_mem_source) throw(string){
 }
 //-------------------------------------------------------
 //--cambine:	create read only  buffer for devices
-//--date:	17/01/2011	
+//--date:	17/01/2011
 cl_mem _clMallocRW(int size, void * h_mem_ptr) throw(string){
  	cl_mem d_mem;
 	d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, h_mem_ptr, &oclHandles.cl_status);
@@ -423,7 +426,7 @@ cl_mem _clMallocRW(int size, void * h_mem_ptr) throw(string){
 }
 //-------------------------------------------------------
 //--cambine:	create read and write buffer for devices
-//--date:	17/01/2011	
+//--date:	17/01/2011
 cl_mem _clMalloc(int size, void * h_mem_ptr) throw(string){
  	cl_mem d_mem;
 	d_mem = clCreateBuffer(oclHandles.context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, size, h_mem_ptr, &oclHandles.cl_status);
@@ -445,7 +448,7 @@ void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string){
 	#endif
 }
 //--------------------------------------------------------
-//--cambine:create buffer and then copy data from host to device with pinned 
+//--cambine:create buffer and then copy data from host to device with pinned
 // memory
 cl_mem _clCreateAndCpyPinnedMem(int size, float* h_mem_source) throw(string){
 	cl_mem d_mem, d_mem_pinned;
@@ -484,7 +487,7 @@ cl_mem _clCreateAndCpyPinnedMem(int size, float* h_mem_source) throw(string){
 	if(oclHandles.cl_status != CL_SUCCESS)
 		throw(string("excpetion in _clCreateAndCpyMem() -> clEnqueueWriteBuffer"));
 	#endif
-	
+
 	return d_mem;
 }
 
@@ -513,22 +516,22 @@ void _clMemcpyD2H(cl_mem d_mem, int size, void * h_mem) throw(string){
 				break;
 			case CL_INVALID_CONTEXT:
 				oclHandles.error_str += "CL_INVALID_CONTEXT";
-				break;	
+				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
 				break;
 			case CL_INVALID_VALUE:
 				oclHandles.error_str += "CL_INVALID_VALUE";
-				break;	
+				break;
 			case CL_INVALID_EVENT_WAIT_LIST:
 				oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
 				break;
 			case CL_MEM_OBJECT_ALLOCATION_FAILURE:
 				oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-				break;	
+				break;
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
-				break;		
+				break;
 			default:
 				oclHandles.error_str += "Unknown reason";
 				break;
@@ -551,19 +554,19 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 				break;
 			case CL_INVALID_ARG_INDEX:
 				oclHandles.error_str += "CL_INVALID_ARG_INDEX";
-				break;	
+				break;
 			case CL_INVALID_ARG_VALUE:
 				oclHandles.error_str += "CL_INVALID_ARG_VALUE";
 				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
-				break;	
+				break;
 			case CL_INVALID_SAMPLER:
 				oclHandles.error_str += "CL_INVALID_SAMPLER";
 				break;
 			case CL_INVALID_ARG_SIZE:
 				oclHandles.error_str += "CL_INVALID_ARG_SIZE";
-				break;	
+				break;
 			case CL_OUT_OF_RESOURCES:
 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";
 				break;
@@ -588,19 +591,19 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 				break;
 			case CL_INVALID_ARG_INDEX:
 				oclHandles.error_str += "CL_INVALID_ARG_INDEX";
-				break;	
+				break;
 			case CL_INVALID_ARG_VALUE:
 				oclHandles.error_str += "CL_INVALID_ARG_VALUE";
 				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
-				break;	
+				break;
 			case CL_INVALID_SAMPLER:
 				oclHandles.error_str += "CL_INVALID_SAMPLER";
 				break;
 			case CL_INVALID_ARG_SIZE:
 				oclHandles.error_str += "CL_INVALID_ARG_SIZE";
-				break;	
+				break;
 			case CL_OUT_OF_RESOURCES:
 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";
 				break;
@@ -617,7 +620,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 	}
 }
 void _clFinish() throw(string){
-	oclHandles.cl_status = clFinish(oclHandles.queue);	
+	oclHandles.cl_status = clFinish(oclHandles.queue);
 	#ifdef ERRMSG
 	oclHandles.error_str = "excpetion in _clFinish";
 	switch(oclHandles.cl_status){
@@ -626,7 +629,7 @@ void _clFinish() throw(string){
 			break;
 		case CL_OUT_OF_RESOURCES:
 			oclHandles.error_str += "CL_OUT_OF_RESOURCES";
-			break;		
+			break;
 		case CL_OUT_OF_HOST_MEMORY:
 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 			break;
@@ -650,7 +653,7 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
   	size_t local_work_size[] = {work_group_size, 1};
 	size_t global_work_size[] = {work_items, 1};
 	oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
-											global_work_size, local_work_size, 0 , 0, &(e[0]) );	
+											global_work_size, local_work_size, 0 , 0, &(e[0]) );
 	#ifdef ERRMSG
 	oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
 	switch(oclHandles.cl_status)
@@ -697,12 +700,12 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
 		case CL_OUT_OF_HOST_MEMORY:
 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 			break;
-		default: 
+		default:
 			oclHandles.error_str += "Unkown reseason";
-			break;		
+			break;
 	}
 	if(oclHandles.cl_status != CL_SUCCESS)
-		throw(oclHandles.error_str);	
+		throw(oclHandles.error_str);
 	#endif
 	//_clFinish();
 	// oclHandles.cl_status = clWaitForEvents(1, &e[0]);
@@ -719,7 +722,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 	/*if(work_items%work_group_size != 0)	//process situations that work_items cannot be divided by work_group_size
 	  work_items = work_items + (work_group_size-(work_items%work_group_size));*/
 	oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
-											global_work_size, local_work_size, 0 , 0, &(e[0]) );	
+											global_work_size, local_work_size, 0 , 0, &(e[0]) );
 	#ifdef ERRMSG
 	oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
 	switch(oclHandles.cl_status)
@@ -766,12 +769,12 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 		case CL_OUT_OF_HOST_MEMORY:
 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 			break;
-		default: 
+		default:
 			oclHandles.error_str += "Unkown reseason";
-			break;		
+			break;
 	}
 	if(oclHandles.cl_status != CL_SUCCESS)
-		throw(oclHandles.error_str);	
+		throw(oclHandles.error_str);
 	#endif
 	//_clFinish();
 	/*oclHandles.cl_status = clWaitForEvents(1, &e[0]);
@@ -789,7 +792,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 //release OpenCL objects
 void _clFree(cl_mem ob) throw(string){
 	if(ob!=NULL)
-		oclHandles.cl_status = clReleaseMemObject(ob);	
+		oclHandles.cl_status = clReleaseMemObject(ob);
 	#ifdef ERRMSG
 	oclHandles.error_str = "excpetion in _clFree() ->";
 	switch(oclHandles.cl_status)
@@ -802,11 +805,11 @@ void _clFree(cl_mem ob) throw(string){
 			break;
 		case CL_OUT_OF_HOST_MEMORY:
 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
-			break;			
-		default: 
+			break;
+		default:
 			oclHandles.error_str += "Unkown reseason";
-			break;		
-	}        
+			break;
+	}
     if (oclHandles.cl_status!= CL_SUCCESS)
        throw(oclHandles.error_str);
 	#endif

+ 44 - 12
opencl/bfs/bfs.cpp

@@ -2,9 +2,12 @@
 
 #define __CL_ENABLE_EXCEPTIONS
 #include <cstdlib>
+#include <cstring>
 #include <iostream>
+#include <stdlib.h>
+#include <stdio.h>
 #include <string>
-#include <cstring>
+#include <unistd.h>
 
 #ifdef  PROFILING
 #include "timer.h"
@@ -13,7 +16,11 @@
 #include "CLHelper.h"
 #include "util.h"
 
-#define MAX_THREADS_PER_BLOCK 256
+#define MAX_THREADS_PER_BLOCK 256
+
+int platform_id = 0;
+int device_id = 0;
+int use_gpu = 0;
 
 //Structure to hold a node information
 struct Node
@@ -78,7 +85,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 			d_graph_visited, d_cost, d_over;
 	try{
 		//--1 transfer data from host to device
-		_clInit();			
+		_clInit(platform_id, device_id, use_gpu);
 		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
 		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
 		d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
@@ -171,10 +178,17 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 	}
 	return ;
 }
-void Usage(int argc, char**argv){
 
-fprintf(stderr,"Usage: %s <input_file>\n", argv[0]);
+void Usage(char *argv0){
 
+  char *help =
+  "\nUsage: %s [switches] \n\n"
+  "    -f               :file to process      \n"
+  "    -p platform_id   :OCL platform to use [default=0]\n"
+  "    -d device_id     :OCL device to use   [default=0]\n"
+  "    -g use_gpu       :1 for GPU 0 for CPU [default=0]\n";
+  fprintf(stderr, help, argv0);
+  exit(-1);
 }
 //----------------------------------------------------------
 //--cambine:	main function
@@ -188,14 +202,32 @@ int main(int argc, char * argv[])
 	FILE *fp;
 	Node* h_graph_nodes;
 	char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
+	char *input_f;
+
 	try{
-		char *input_f;
-		if(argc!=2){
-		  Usage(argc, argv);
-		  exit(0);
-		}
-	
-		input_f = argv[1];
+    // Arguments parsing for platform, device and type
+    // Variables to store information on platform and device to use_gpu
+
+    /* obtain command line arguments and change appropriate options */
+    int opt;
+    extern char *optarg;
+    while ((opt=getopt(argc,argv,"f:p:d:g:"))!= EOF) {
+        switch (opt) {
+            case 'p': platform_id = atoi(optarg);
+                      break;
+            case 'd': device_id = atoi(optarg);
+                      break;
+            case 'g': use_gpu = atoi(optarg);
+                      break;
+            case 'f': input_f = optarg;
+                      break;
+            case '?': Usage(argv[0]);
+                      break;
+            default:  Usage(argv[0]);
+                      break;
+        }
+    }
+
 		printf("Reading File\n");
 		//Read in Graph from a file
 		fp = fopen(input_f,"r");

+ 1 - 1
opencl/bfs/run-cpu

@@ -1 +1 @@
-./bfs ../../data/bfs/graph1MW_6.txt
+./bfs -f ../../data/bfs/graph1MW_6.txt -p 1 -d 0 -g 0

+ 1 - 0
opencl/bfs/run-gpu

@@ -0,0 +1 @@
+./bfs -f ../../data/bfs/graph1MW_6.txt -p 0 -d 0 -g 1