소스 검색

Implemented device selection for bfs

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

Reused part of the available routine to parse command line arguments,
even if it is quite artisanal.
Andrea Gussoni 8 년 전
부모
커밋
1984b7b6e8
4개의 변경된 파일119개의 추가작업 그리고 107개의 파일을 삭제
  1. 73 70
      opencl/bfs/CLHelper.h
  2. 44 36
      opencl/bfs/bfs.cpp
  3. 1 1
      opencl/bfs/run-cpu
  4. 1 0
      opencl/bfs/run-gpu

+ 73 - 70
opencl/bfs/CLHelper.h

@@ -65,7 +65,7 @@ string FileToString(const string fileName)
             f.read(str, fileSize);
             f.close();
             str[size] = '\0';
-        
+
             s = str;
             delete [] str;
             return s;
@@ -121,19 +121,19 @@ void _clCmdParams(int argc, char* argv[]){
 			;
 		}
 	}
-	
+
 }
 
 //---------------------------------------
 //Initlize CL objects
 //--description: there are 5 steps to initialize all the OpenCL objects needed
-//--revised on 04/01/2011: get the number of devices  and 
+//--revised on 04/01/2011: get the number of devices  and
 //  devices have no relationship with context
-void _clInit()
+void _clInit(int platform_num, int device_num, int use_gpu)
 {
-    int DEVICE_ID_INUSED = device_id_inused;
+    //int DEVICE_ID_INUSED = device_id_inused;
     cl_int resultCL;
-    
+
     oclHandles.context = NULL;
     oclHandles.devices = NULL;
     oclHandles.queue = NULL;
@@ -162,7 +162,7 @@ void _clInit()
         throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
 
     /* Select the target platform. Default: first platform */
-    targetPlatform = allPlatforms[1];
+    targetPlatform = allPlatforms[platform_num];
     for (int i = 0; i < numPlatforms; i++)
     {
         char pbuff[128];
@@ -180,22 +180,25 @@ void _clInit()
     free(allPlatforms);
 
     //-----------------------------------------------
-    //--cambine-2: create an OpenCL context
+    //--cambine-2: create an OpenCL context
+    // Select device type on the basis of the passed parameter
+    static cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
+
     cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)targetPlatform, 0 };
-    oclHandles.context = clCreateContextFromType(cprops, 
-                                                CL_DEVICE_TYPE_CPU, 
-                                                NULL, 
-                                                NULL, 
+    oclHandles.context = clCreateContextFromType(cprops,
+                                                device_type,
+                                                NULL,
+                                                NULL,
                                                 &resultCL);
 
     if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
         throw (string("InitCL()::Error: Creating Context (clCreateContextFromType)"));
     //-----------------------------------------------
-    //--cambine-3: detect OpenCL devices	
+    //--cambine-3: detect OpenCL devices
     /* First, get the size of device list */
-   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);
+   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, device_type, 0, NULL, &deviceListSize);
    if(oclHandles.cl_status!=CL_SUCCESS){
-   	throw(string("exception in _clInit -> clGetDeviceIDs"));   	
+   	throw(string("exception in _clInit -> clGetDeviceIDs"));
    }
    if (deviceListSize == 0)
         throw(string("InitCL()::Error: No devices found."));
@@ -209,16 +212,16 @@ void _clInit()
         throw(string("InitCL()::Error: Could not allocate memory."));
 
     /* Next, get the device list data */
-   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, \
+   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, device_type, deviceListSize, \
 								oclHandles.devices, NULL);
    if(oclHandles.cl_status!=CL_SUCCESS){
-   	throw(string("exception in _clInit -> clGetDeviceIDs-2"));   	
+   	throw(string("exception in _clInit -> clGetDeviceIDs-2"));
    }
    //-----------------------------------------------
-   //--cambine-4: Create an OpenCL command queue    
-    oclHandles.queue = clCreateCommandQueue(oclHandles.context, 
-                                            oclHandles.devices[DEVICE_ID_INUSED], 
-                                            0, 
+   //--cambine-4: Create an OpenCL command queue
+    oclHandles.queue = clCreateCommandQueue(oclHandles.context,
+                                            oclHandles.devices[device_num],
+                                            0,
                                             &resultCL);
 
     if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
@@ -229,14 +232,14 @@ void _clInit()
     const char * source    = source_str.c_str();
     size_t sourceSize[]    = { source_str.length() };
 
-    oclHandles.program = clCreateProgramWithSource(oclHandles.context, 
-                                                    1, 
+    oclHandles.program = clCreateProgramWithSource(oclHandles.context,
+                                                    1,
                                                     &source,
                                                     sourceSize,
                                                     &resultCL);
 
     if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
-        throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));    
+        throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
     //insert debug information
     //std::string options= "-cl-nv-verbose"; //Doesn't work on AMD machines
     //options += " -cl-nv-opt-level=3";
@@ -247,43 +250,43 @@ void _clInit()
         cerr << "InitCL()::Error: In clBuildProgram" << endl;
 
 		size_t length;
-        resultCL = clGetProgramBuildInfo(oclHandles.program, 
-                                        oclHandles.devices[DEVICE_ID_INUSED], 
-                                        CL_PROGRAM_BUILD_LOG, 
-                                        0, 
-                                        NULL, 
+        resultCL = clGetProgramBuildInfo(oclHandles.program,
+                                        oclHandles.devices[device_num],
+                                        CL_PROGRAM_BUILD_LOG,
+                                        0,
+                                        NULL,
                                         &length);
-        if(resultCL != CL_SUCCESS) 
+        if(resultCL != CL_SUCCESS)
             throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
 
 		char* buffer = (char*)malloc(length);
-        resultCL = clGetProgramBuildInfo(oclHandles.program, 
-                                        oclHandles.devices[DEVICE_ID_INUSED], 
-                                        CL_PROGRAM_BUILD_LOG, 
-                                        length, 
-                                        buffer, 
+        resultCL = clGetProgramBuildInfo(oclHandles.program,
+                                        oclHandles.devices[device_num],
+                                        CL_PROGRAM_BUILD_LOG,
+                                        length,
+                                        buffer,
                                         NULL);
-        if(resultCL != CL_SUCCESS) 
+        if(resultCL != CL_SUCCESS)
             throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
 
 		cerr << buffer << endl;
         free(buffer);
 
         throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
-    } 
+    }
 
     //get program information in intermediate representation
-    #ifdef PTX_MSG    
+    #ifdef PTX_MSG
     size_t binary_sizes[deviceListSize];
     char * binaries[deviceListSize];
-    //figure out number of devices and the sizes of the binary for each device. 
+    //figure out number of devices and the sizes of the binary for each device.
     oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*deviceListSize, &binary_sizes, NULL );
     if(oclHandles.cl_status!=CL_SUCCESS){
         throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));
     }
 
     std::cout<<"--cambine:"<<binary_sizes<<std::endl;
-    //copy over all of the generated binaries. 
+    //copy over all of the generated binaries.
     for(int i=0;i<deviceListSize;i++)
 	binaries[i] = (char *)malloc( sizeof(char)*(binary_sizes[i]+1));
     oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES, sizeof(char *)*deviceListSize, binaries, NULL );
@@ -326,7 +329,7 @@ void _clInit()
     oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
     if(oclHandles.cl_status!=CL_SUCCESS){
 	throw(string("exceptions in _InitCL -> getting resource information"));
-    }    
+    }
 
     build_log = (char *)malloc(ret_val_size+1);
     oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
@@ -411,7 +414,7 @@ cl_mem _clCreateAndCpyMem(int size, void * h_mem_source) throw(string){
 }
 //-------------------------------------------------------
 //--cambine:	create read only  buffer for devices
-//--date:	17/01/2011	
+//--date:	17/01/2011
 cl_mem _clMallocRW(int size, void * h_mem_ptr) throw(string){
  	cl_mem d_mem;
 	d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, h_mem_ptr, &oclHandles.cl_status);
@@ -423,7 +426,7 @@ cl_mem _clMallocRW(int size, void * h_mem_ptr) throw(string){
 }
 //-------------------------------------------------------
 //--cambine:	create read and write buffer for devices
-//--date:	17/01/2011	
+//--date:	17/01/2011
 cl_mem _clMalloc(int size, void * h_mem_ptr) throw(string){
  	cl_mem d_mem;
 	d_mem = clCreateBuffer(oclHandles.context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, size, h_mem_ptr, &oclHandles.cl_status);
@@ -445,7 +448,7 @@ void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string){
 	#endif
 }
 //--------------------------------------------------------
-//--cambine:create buffer and then copy data from host to device with pinned 
+//--cambine:create buffer and then copy data from host to device with pinned
 // memory
 cl_mem _clCreateAndCpyPinnedMem(int size, float* h_mem_source) throw(string){
 	cl_mem d_mem, d_mem_pinned;
@@ -484,7 +487,7 @@ cl_mem _clCreateAndCpyPinnedMem(int size, float* h_mem_source) throw(string){
 	if(oclHandles.cl_status != CL_SUCCESS)
 		throw(string("excpetion in _clCreateAndCpyMem() -> clEnqueueWriteBuffer"));
 	#endif
-	
+
 	return d_mem;
 }
 
@@ -513,22 +516,22 @@ void _clMemcpyD2H(cl_mem d_mem, int size, void * h_mem) throw(string){
 				break;
 			case CL_INVALID_CONTEXT:
 				oclHandles.error_str += "CL_INVALID_CONTEXT";
-				break;	
+				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
 				break;
 			case CL_INVALID_VALUE:
 				oclHandles.error_str += "CL_INVALID_VALUE";
-				break;	
+				break;
 			case CL_INVALID_EVENT_WAIT_LIST:
 				oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
 				break;
 			case CL_MEM_OBJECT_ALLOCATION_FAILURE:
 				oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-				break;	
+				break;
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
-				break;		
+				break;
 			default:
 				oclHandles.error_str += "Unknown reason";
 				break;
@@ -551,19 +554,19 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 				break;
 			case CL_INVALID_ARG_INDEX:
 				oclHandles.error_str += "CL_INVALID_ARG_INDEX";
-				break;	
+				break;
 			case CL_INVALID_ARG_VALUE:
 				oclHandles.error_str += "CL_INVALID_ARG_VALUE";
 				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
-				break;	
+				break;
 			case CL_INVALID_SAMPLER:
 				oclHandles.error_str += "CL_INVALID_SAMPLER";
 				break;
 			case CL_INVALID_ARG_SIZE:
 				oclHandles.error_str += "CL_INVALID_ARG_SIZE";
-				break;	
+				break;
 			case CL_OUT_OF_RESOURCES:
 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";
 				break;
@@ -588,19 +591,19 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 				break;
 			case CL_INVALID_ARG_INDEX:
 				oclHandles.error_str += "CL_INVALID_ARG_INDEX";
-				break;	
+				break;
 			case CL_INVALID_ARG_VALUE:
 				oclHandles.error_str += "CL_INVALID_ARG_VALUE";
 				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
-				break;	
+				break;
 			case CL_INVALID_SAMPLER:
 				oclHandles.error_str += "CL_INVALID_SAMPLER";
 				break;
 			case CL_INVALID_ARG_SIZE:
 				oclHandles.error_str += "CL_INVALID_ARG_SIZE";
-				break;	
+				break;
 			case CL_OUT_OF_RESOURCES:
 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";
 				break;
@@ -617,7 +620,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 	}
 }
 void _clFinish() throw(string){
-	oclHandles.cl_status = clFinish(oclHandles.queue);	
+	oclHandles.cl_status = clFinish(oclHandles.queue);
 	#ifdef ERRMSG
 	oclHandles.error_str = "excpetion in _clFinish";
 	switch(oclHandles.cl_status){
@@ -626,7 +629,7 @@ void _clFinish() throw(string){
 			break;
 		case CL_OUT_OF_RESOURCES:
 			oclHandles.error_str += "CL_OUT_OF_RESOURCES";
-			break;		
+			break;
 		case CL_OUT_OF_HOST_MEMORY:
 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 			break;
@@ -650,7 +653,7 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
   	size_t local_work_size[] = {work_group_size, 1};
 	size_t global_work_size[] = {work_items, 1};
 	oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
-											global_work_size, local_work_size, 0 , 0, &(e[0]) );	
+											global_work_size, local_work_size, 0 , 0, &(e[0]) );
 	#ifdef ERRMSG
 	oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
 	switch(oclHandles.cl_status)
@@ -697,12 +700,12 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
 		case CL_OUT_OF_HOST_MEMORY:
 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 			break;
-		default: 
+		default:
 			oclHandles.error_str += "Unkown reseason";
-			break;		
+			break;
 	}
 	if(oclHandles.cl_status != CL_SUCCESS)
-		throw(oclHandles.error_str);	
+		throw(oclHandles.error_str);
 	#endif
 	//_clFinish();
 	// oclHandles.cl_status = clWaitForEvents(1, &e[0]);
@@ -719,7 +722,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 	/*if(work_items%work_group_size != 0)	//process situations that work_items cannot be divided by work_group_size
 	  work_items = work_items + (work_group_size-(work_items%work_group_size));*/
 	oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
-											global_work_size, local_work_size, 0 , 0, &(e[0]) );	
+											global_work_size, local_work_size, 0 , 0, &(e[0]) );
 	#ifdef ERRMSG
 	oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
 	switch(oclHandles.cl_status)
@@ -766,12 +769,12 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 		case CL_OUT_OF_HOST_MEMORY:
 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 			break;
-		default: 
+		default:
 			oclHandles.error_str += "Unkown reseason";
-			break;		
+			break;
 	}
 	if(oclHandles.cl_status != CL_SUCCESS)
-		throw(oclHandles.error_str);	
+		throw(oclHandles.error_str);
 	#endif
 	//_clFinish();
 	/*oclHandles.cl_status = clWaitForEvents(1, &e[0]);
@@ -789,7 +792,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 //release OpenCL objects
 void _clFree(cl_mem ob) throw(string){
 	if(ob!=NULL)
-		oclHandles.cl_status = clReleaseMemObject(ob);	
+		oclHandles.cl_status = clReleaseMemObject(ob);
 	#ifdef ERRMSG
 	oclHandles.error_str = "excpetion in _clFree() ->";
 	switch(oclHandles.cl_status)
@@ -802,11 +805,11 @@ void _clFree(cl_mem ob) throw(string){
 			break;
 		case CL_OUT_OF_HOST_MEMORY:
 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
-			break;			
-		default: 
+			break;
+		default:
 			oclHandles.error_str += "Unkown reseason";
-			break;		
-	}        
+			break;
+	}
     if (oclHandles.cl_status!= CL_SUCCESS)
        throw(oclHandles.error_str);
 	#endif

+ 44 - 36
opencl/bfs/bfs.cpp

@@ -13,7 +13,11 @@
 #include "CLHelper.h"
 #include "util.h"
 
-#define MAX_THREADS_PER_BLOCK 256
+#define MAX_THREADS_PER_BLOCK 256
+
+int platform_num = 0;
+int device_num = 0;
+int use_gpu = 0;
 
 //Structure to hold a node information
 struct Node
@@ -39,7 +43,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 		stop=false;
 		for(int tid = 0; tid < no_of_nodes; tid++ )
 		{
-			if (h_graph_mask[tid] == true){ 
+			if (h_graph_mask[tid] == true){
 				h_graph_mask[tid]=false;
 				for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){
 					int id = h_graph_edges[i];	//--cambine: node id is connected with node tid
@@ -48,7 +52,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 						h_updating_graph_mask[id]=true;
 					}
 				}
-			}		
+			}
 		}
 
   		for(int tid=0; tid< no_of_nodes ; tid++ )
@@ -69,7 +73,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 //----------------------------------------------------------
 void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 		int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
-		char *h_graph_visited, int *h_cost) 
+		char *h_graph_visited, int *h_cost)
 					throw(std::string){
 
 	//int number_elements = height*width;
@@ -78,7 +82,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 			d_graph_visited, d_cost, d_over;
 	try{
 		//--1 transfer data from host to device
-		_clInit();			
+		_clInit(platform_num, device_num, use_gpu);
 		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
 		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
 		d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
@@ -88,18 +92,18 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
 		d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
 		d_over = _clMallocRW(sizeof(char), &h_over);
-		
+
 		_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
-		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);	
-		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);	
-		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);	
-		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);	
-		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);	
-			
+		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);
+		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);
+		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);
+		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);
+		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);
+
 		//--2 invoke kernel
 #ifdef	PROFILING
 		timer kernel_timer;
-		double kernel_time = 0.0;		
+		double kernel_time = 0.0;
 		kernel_timer.reset();
 		kernel_timer.start();
 #endif
@@ -116,25 +120,25 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
 			_clSetArgs(kernel_id, kernel_idx++, d_cost);
 			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
-			
+
 			//int work_items = no_of_nodes;
 			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
-			
+
 			//--kernel 1
 			kernel_id = 1;
-			kernel_idx = 0;			
+			kernel_idx = 0;
 			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
 			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
 			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
 			_clSetArgs(kernel_id, kernel_idx++, d_over);
 			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
-			
+
 			//work_items = no_of_nodes;
-			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);			
-			
+			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
+
 			_clMemcpyD2H(d_over,sizeof(char), &h_over);
 			}while(h_over);
-			
+
 		_clFinish();
 #ifdef	PROFILING
 		kernel_timer.stop();
@@ -144,7 +148,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 		_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
 		//--statistics
 #ifdef	PROFILING
-		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;		
+		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;
 #endif
 		//--4 release cl resources.
 		_clFree(d_graph_nodes);
@@ -156,7 +160,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 		_clFree(d_over);
 		_clRelease();
 	}
-	catch(std::string msg){		
+	catch(std::string msg){
 		_clFree(d_graph_nodes);
 		_clFree(d_graph_edges);
 		_clFree(d_graph_mask);
@@ -173,7 +177,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 }
 void Usage(int argc, char**argv){
 
-fprintf(stderr,"Usage: %s <input_file>\n", argv[0]);
+fprintf(stderr,"Usage: %s <input_file> <platform_number> <device_number> <use_gpu>\n", argv[0]);
 
 }
 //----------------------------------------------------------
@@ -190,11 +194,11 @@ int main(int argc, char * argv[])
 	char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
 	try{
 		char *input_f;
-		if(argc!=2){
+		if(argc!=5){
 		  Usage(argc, argv);
 		  exit(0);
 		}
-	
+
 		input_f = argv[1];
 		printf("Reading File\n");
 		//Read in Graph from a file
@@ -202,7 +206,11 @@ int main(int argc, char * argv[])
 		if(!fp){
 		  printf("Error Reading graph file\n");
 		  return 0;
-		}
+		}
+
+    platform_num = atoi(argv[2]);
+    device_num = atoi(argv[3]);
+    use_gpu = atoi(argv[4]);
 
 		int source = 0;
 
@@ -214,8 +222,8 @@ int main(int argc, char * argv[])
 		//Make execution Parameters according to the number of nodes
 		//Distribute threads across multiple Blocks if necessary
 		if(no_of_nodes>MAX_THREADS_PER_BLOCK){
-			num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK); 
-			num_of_threads_per_block = MAX_THREADS_PER_BLOCK; 
+			num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK);
+			num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
 		}
 		work_group_size = num_of_threads_per_block;
 		// allocate host memory
@@ -223,8 +231,8 @@ int main(int argc, char * argv[])
 		h_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
 		h_updating_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
 		h_graph_visited = (char*) malloc(sizeof(char)*no_of_nodes);
-	
-		int start, edgeno;   
+
+		int start, edgeno;
 		// initalize the memory
 		for(int i = 0; i < no_of_nodes; i++){
 			fscanf(fp,"%d %d",&start,&edgeno);
@@ -250,7 +258,7 @@ int main(int argc, char * argv[])
 		}
 
 		if(fp)
-			fclose(fp);    
+			fclose(fp);
 		// allocate mem for the result on host side
 		int	*h_cost = (int*) malloc(sizeof(int)*no_of_nodes);
 		int *h_cost_ref = (int*)malloc(sizeof(int)*no_of_nodes);
@@ -259,10 +267,10 @@ int main(int argc, char * argv[])
 			h_cost_ref[i] = -1;
 		}
 		h_cost[source]=0;
-		h_cost_ref[source]=0;		
+		h_cost_ref[source]=0;
 		//---------------------------------------------------------
 		//--gpu entry
-		run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);	
+		run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
 		//---------------------------------------------------------
 		//--cpu entry
 		// initalize the memory again
@@ -279,7 +287,7 @@ int main(int argc, char * argv[])
 		//---------------------------------------------------------
 		//--result varification
 		compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
-		//release host memory		
+		//release host memory
 		free(h_graph_nodes);
 		free(h_graph_mask);
 		free(h_updating_graph_mask);
@@ -292,8 +300,8 @@ int main(int argc, char * argv[])
 		free(h_graph_nodes);
 		free(h_graph_mask);
 		free(h_updating_graph_mask);
-		free(h_graph_visited);		
+		free(h_graph_visited);
 	}
-		
+
     return 0;
 }

+ 1 - 1
opencl/bfs/run-cpu

@@ -1 +1 @@
-./bfs ../../data/bfs/graph1MW_6.txt
+./bfs ../../data/bfs/graph1MW_6.txt 1 0 0

+ 1 - 0
opencl/bfs/run-gpu

@@ -0,0 +1 @@
+./bfs ../../data/bfs/graph1MW_6.txt 0 0 1