8 anni fa · 1984b7b6e8
--- a/opencl/bfs/CLHelper.h
+++ b/opencl/bfs/CLHelper.h
@@ -65,7 +65,7 @@ string FileToString(const string fileName)
 
				             f.read(str, fileSize);

			
 
				             f.close();

			
 
				             str[size] = '\0';

			
 
				-        

			
 
				+

			
 
				             s = str;

			
 
				             delete [] str;

			
 
				             return s;

			
@@ -121,19 +121,19 @@ void _clCmdParams(int argc, char* argv[]){
 
				 			;

			
 
				 		}

			
 
				 	}

			
 
				-	

			
 
				+

			
 
				 }

			
 
				 

			
 
				 //---------------------------------------

			
 
				 //Initlize CL objects

			
 
				 //--description: there are 5 steps to initialize all the OpenCL objects needed

			
 
				-//--revised on 04/01/2011: get the number of devices  and 

			
 
				+//--revised on 04/01/2011: get the number of devices  and

			
 
				 //  devices have no relationship with context

			
 
				-void _clInit()

			
 
				+void _clInit(int platform_num, int device_num, int use_gpu)

			
 
				 {

			
 
				-    int DEVICE_ID_INUSED = device_id_inused;

			
 
				+    //int DEVICE_ID_INUSED = device_id_inused;

			
 
				     cl_int resultCL;

			
 
				-    

			
 
				+

			
 
				     oclHandles.context = NULL;

			
 
				     oclHandles.devices = NULL;

			
 
				     oclHandles.queue = NULL;

			
@@ -162,7 +162,7 @@ void _clInit()
 
				         throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));

			
 
				 

			
 
				     /* Select the target platform. Default: first platform */

			
 
				-    targetPlatform = allPlatforms[1];

			
 
				+    targetPlatform = allPlatforms[platform_num];

			
 
				     for (int i = 0; i < numPlatforms; i++)

			
 
				     {

			
 
				         char pbuff[128];

			
@@ -180,22 +180,25 @@ void _clInit()
 
				     free(allPlatforms);

			
 
				 

			
 
				     //-----------------------------------------------

			
 
				-    //--cambine-2: create an OpenCL context

			
 
				+    //--cambine-2: create an OpenCL context
			
 
				+    // Select device type on the basis of the passed parameter

			
 
				+    static cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;

			
 
				+

			
 
				     cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)targetPlatform, 0 };

			
 
				-    oclHandles.context = clCreateContextFromType(cprops, 

			
 
				-                                                CL_DEVICE_TYPE_CPU, 

			
 
				-                                                NULL, 

			
 
				-                                                NULL, 

			
 
				+    oclHandles.context = clCreateContextFromType(cprops,

			
 
				+                                                device_type,

			
 
				+                                                NULL,

			
 
				+                                                NULL,

			
 
				                                                 &resultCL);

			
 
				 

			
 
				     if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))

			
 
				         throw (string("InitCL()::Error: Creating Context (clCreateContextFromType)"));

			
 
				     //-----------------------------------------------

			
 
				-    //--cambine-3: detect OpenCL devices	

			
 
				+    //--cambine-3: detect OpenCL devices

			
 
				     /* First, get the size of device list */

			
 
				-   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);

			
 
				+   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, device_type, 0, NULL, &deviceListSize);

			
 
				    if(oclHandles.cl_status!=CL_SUCCESS){

			
 
				-   	throw(string("exception in _clInit -> clGetDeviceIDs"));   	

			
 
				+   	throw(string("exception in _clInit -> clGetDeviceIDs"));

			
 
				    }

			
 
				    if (deviceListSize == 0)

			
 
				         throw(string("InitCL()::Error: No devices found."));

			
@@ -209,16 +212,16 @@ void _clInit()
 
				         throw(string("InitCL()::Error: Could not allocate memory."));

			
 
				 

			
 
				     /* Next, get the device list data */

			
 
				-   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, \

			
 
				+   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, device_type, deviceListSize, \

			
 
				 								oclHandles.devices, NULL);

			
 
				    if(oclHandles.cl_status!=CL_SUCCESS){

			
 
				-   	throw(string("exception in _clInit -> clGetDeviceIDs-2"));   	

			
 
				+   	throw(string("exception in _clInit -> clGetDeviceIDs-2"));

			
 
				    }

			
 
				    //-----------------------------------------------

			
 
				-   //--cambine-4: Create an OpenCL command queue    

			
 
				-    oclHandles.queue = clCreateCommandQueue(oclHandles.context, 

			
 
				-                                            oclHandles.devices[DEVICE_ID_INUSED], 

			
 
				-                                            0, 

			
 
				+   //--cambine-4: Create an OpenCL command queue

			
 
				+    oclHandles.queue = clCreateCommandQueue(oclHandles.context,

			
 
				+                                            oclHandles.devices[device_num],

			
 
				+                                            0,

			
 
				                                             &resultCL);

			
 
				 

			
 
				     if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))

			
@@ -229,14 +232,14 @@ void _clInit()
 
				     const char * source    = source_str.c_str();

			
 
				     size_t sourceSize[]    = { source_str.length() };

			
 
				 

			
 
				-    oclHandles.program = clCreateProgramWithSource(oclHandles.context, 

			
 
				-                                                    1, 

			
 
				+    oclHandles.program = clCreateProgramWithSource(oclHandles.context,

			
 
				+                                                    1,

			
 
				                                                     &source,

			
 
				                                                     sourceSize,

			
 
				                                                     &resultCL);

			
 
				 

			
 
				     if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))

			
 
				-        throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));    

			
 
				+        throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));

			
 
				     //insert debug information

			
 
				     //std::string options= "-cl-nv-verbose"; //Doesn't work on AMD machines

			
 
				     //options += " -cl-nv-opt-level=3";

			
@@ -247,43 +250,43 @@ void _clInit()
 
				         cerr << "InitCL()::Error: In clBuildProgram" << endl;

			
 
				 

			
 
				 		size_t length;

			
 
				-        resultCL = clGetProgramBuildInfo(oclHandles.program, 

			
 
				-                                        oclHandles.devices[DEVICE_ID_INUSED], 

			
 
				-                                        CL_PROGRAM_BUILD_LOG, 

			
 
				-                                        0, 

			
 
				-                                        NULL, 

			
 
				+        resultCL = clGetProgramBuildInfo(oclHandles.program,

			
 
				+                                        oclHandles.devices[device_num],

			
 
				+                                        CL_PROGRAM_BUILD_LOG,

			
 
				+                                        0,

			
 
				+                                        NULL,

			
 
				                                         &length);

			
 
				-        if(resultCL != CL_SUCCESS) 

			
 
				+        if(resultCL != CL_SUCCESS)

			
 
				             throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));

			
 
				 

			
 
				 		char* buffer = (char*)malloc(length);

			
 
				-        resultCL = clGetProgramBuildInfo(oclHandles.program, 

			
 
				-                                        oclHandles.devices[DEVICE_ID_INUSED], 

			
 
				-                                        CL_PROGRAM_BUILD_LOG, 

			
 
				-                                        length, 

			
 
				-                                        buffer, 

			
 
				+        resultCL = clGetProgramBuildInfo(oclHandles.program,

			
 
				+                                        oclHandles.devices[device_num],

			
 
				+                                        CL_PROGRAM_BUILD_LOG,

			
 
				+                                        length,

			
 
				+                                        buffer,

			
 
				                                         NULL);

			
 
				-        if(resultCL != CL_SUCCESS) 

			
 
				+        if(resultCL != CL_SUCCESS)

			
 
				             throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));

			
 
				 

			
 
				 		cerr << buffer << endl;

			
 
				         free(buffer);

			
 
				 

			
 
				         throw(string("InitCL()::Error: Building Program (clBuildProgram)"));

			
 
				-    } 

			
 
				+    }

			
 
				 

			
 
				     //get program information in intermediate representation

			
 
				-    #ifdef PTX_MSG    

			
 
				+    #ifdef PTX_MSG

			
 
				     size_t binary_sizes[deviceListSize];

			
 
				     char * binaries[deviceListSize];

			
 
				-    //figure out number of devices and the sizes of the binary for each device. 

			
 
				+    //figure out number of devices and the sizes of the binary for each device.

			
 
				     oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*deviceListSize, &binary_sizes, NULL );

			
 
				     if(oclHandles.cl_status!=CL_SUCCESS){

			
 
				         throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));

			
 
				     }

			
 
				 

			
 
				     std::cout<<"--cambine:"<<binary_sizes<<std::endl;

			
 
				-    //copy over all of the generated binaries. 

			
 
				+    //copy over all of the generated binaries.

			
 
				     for(int i=0;i<deviceListSize;i++)

			
 
				 	binaries[i] = (char *)malloc( sizeof(char)*(binary_sizes[i]+1));

			
 
				     oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES, sizeof(char *)*deviceListSize, binaries, NULL );

			
@@ -326,7 +329,7 @@ void _clInit()
 
				     oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);

			
 
				     if(oclHandles.cl_status!=CL_SUCCESS){

			
 
				 	throw(string("exceptions in _InitCL -> getting resource information"));

			
 
				-    }    

			
 
				+    }

			
 
				 

			
 
				     build_log = (char *)malloc(ret_val_size+1);

			
 
				     oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);

			
@@ -411,7 +414,7 @@ cl_mem _clCreateAndCpyMem(int size, void * h_mem_source) throw(string){
 
				 }

			
 
				 //-------------------------------------------------------

			
 
				 //--cambine:	create read only  buffer for devices

			
 
				-//--date:	17/01/2011	

			
 
				+//--date:	17/01/2011

			
 
				 cl_mem _clMallocRW(int size, void * h_mem_ptr) throw(string){

			
 
				  	cl_mem d_mem;

			
 
				 	d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, size, h_mem_ptr, &oclHandles.cl_status);

			
@@ -423,7 +426,7 @@ cl_mem _clMallocRW(int size, void * h_mem_ptr) throw(string){
 
				 }

			
 
				 //-------------------------------------------------------

			
 
				 //--cambine:	create read and write buffer for devices

			
 
				-//--date:	17/01/2011	

			
 
				+//--date:	17/01/2011

			
 
				 cl_mem _clMalloc(int size, void * h_mem_ptr) throw(string){

			
 
				  	cl_mem d_mem;

			
 
				 	d_mem = clCreateBuffer(oclHandles.context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, size, h_mem_ptr, &oclHandles.cl_status);

			
@@ -445,7 +448,7 @@ void _clMemcpyH2D(cl_mem d_mem, int size, const void *h_mem_ptr) throw(string){
 
				 	#endif

			
 
				 }

			
 
				 //--------------------------------------------------------

			
 
				-//--cambine:create buffer and then copy data from host to device with pinned 

			
 
				+//--cambine:create buffer and then copy data from host to device with pinned

			
 
				 // memory

			
 
				 cl_mem _clCreateAndCpyPinnedMem(int size, float* h_mem_source) throw(string){

			
 
				 	cl_mem d_mem, d_mem_pinned;

			
@@ -484,7 +487,7 @@ cl_mem _clCreateAndCpyPinnedMem(int size, float* h_mem_source) throw(string){
 
				 	if(oclHandles.cl_status != CL_SUCCESS)

			
 
				 		throw(string("excpetion in _clCreateAndCpyMem() -> clEnqueueWriteBuffer"));

			
 
				 	#endif

			
 
				-	

			
 
				+

			
 
				 	return d_mem;

			
 
				 }

			
 
				 

			
@@ -513,22 +516,22 @@ void _clMemcpyD2H(cl_mem d_mem, int size, void * h_mem) throw(string){
 
				 				break;

			
 
				 			case CL_INVALID_CONTEXT:

			
 
				 				oclHandles.error_str += "CL_INVALID_CONTEXT";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_INVALID_MEM_OBJECT:

			
 
				 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";

			
 
				 				break;

			
 
				 			case CL_INVALID_VALUE:

			
 
				 				oclHandles.error_str += "CL_INVALID_VALUE";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_INVALID_EVENT_WAIT_LIST:

			
 
				 				oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";

			
 
				 				break;

			
 
				 			case CL_MEM_OBJECT_ALLOCATION_FAILURE:

			
 
				 				oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_OUT_OF_HOST_MEMORY:

			
 
				 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";

			
 
				-				break;		

			
 
				+				break;

			
 
				 			default:

			
 
				 				oclHandles.error_str += "Unknown reason";

			
 
				 				break;

			
@@ -551,19 +554,19 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 
				 				break;

			
 
				 			case CL_INVALID_ARG_INDEX:

			
 
				 				oclHandles.error_str += "CL_INVALID_ARG_INDEX";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_INVALID_ARG_VALUE:

			
 
				 				oclHandles.error_str += "CL_INVALID_ARG_VALUE";

			
 
				 				break;

			
 
				 			case CL_INVALID_MEM_OBJECT:

			
 
				 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_INVALID_SAMPLER:

			
 
				 				oclHandles.error_str += "CL_INVALID_SAMPLER";

			
 
				 				break;

			
 
				 			case CL_INVALID_ARG_SIZE:

			
 
				 				oclHandles.error_str += "CL_INVALID_ARG_SIZE";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_OUT_OF_RESOURCES:

			
 
				 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";

			
 
				 				break;

			
@@ -588,19 +591,19 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 
				 				break;

			
 
				 			case CL_INVALID_ARG_INDEX:

			
 
				 				oclHandles.error_str += "CL_INVALID_ARG_INDEX";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_INVALID_ARG_VALUE:

			
 
				 				oclHandles.error_str += "CL_INVALID_ARG_VALUE";

			
 
				 				break;

			
 
				 			case CL_INVALID_MEM_OBJECT:

			
 
				 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_INVALID_SAMPLER:

			
 
				 				oclHandles.error_str += "CL_INVALID_SAMPLER";

			
 
				 				break;

			
 
				 			case CL_INVALID_ARG_SIZE:

			
 
				 				oclHandles.error_str += "CL_INVALID_ARG_SIZE";

			
 
				-				break;	

			
 
				+				break;

			
 
				 			case CL_OUT_OF_RESOURCES:

			
 
				 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";

			
 
				 				break;

			
@@ -617,7 +620,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 
				 	}

			
 
				 }

			
 
				 void _clFinish() throw(string){

			
 
				-	oclHandles.cl_status = clFinish(oclHandles.queue);	

			
 
				+	oclHandles.cl_status = clFinish(oclHandles.queue);

			
 
				 	#ifdef ERRMSG

			
 
				 	oclHandles.error_str = "excpetion in _clFinish";

			
 
				 	switch(oclHandles.cl_status){

			
@@ -626,7 +629,7 @@ void _clFinish() throw(string){
 
				 			break;

			
 
				 		case CL_OUT_OF_RESOURCES:

			
 
				 			oclHandles.error_str += "CL_OUT_OF_RESOURCES";

			
 
				-			break;		

			
 
				+			break;

			
 
				 		case CL_OUT_OF_HOST_MEMORY:

			
 
				 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";

			
 
				 			break;

			
@@ -650,7 +653,7 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
 
				   	size_t local_work_size[] = {work_group_size, 1};

			
 
				 	size_t global_work_size[] = {work_items, 1};

			
 
				 	oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \

			
 
				-											global_work_size, local_work_size, 0 , 0, &(e[0]) );	

			
 
				+											global_work_size, local_work_size, 0 , 0, &(e[0]) );

			
 
				 	#ifdef ERRMSG

			
 
				 	oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";

			
 
				 	switch(oclHandles.cl_status)

			
@@ -697,12 +700,12 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
 
				 		case CL_OUT_OF_HOST_MEMORY:

			
 
				 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";

			
 
				 			break;

			
 
				-		default: 

			
 
				+		default:

			
 
				 			oclHandles.error_str += "Unkown reseason";

			
 
				-			break;		

			
 
				+			break;

			
 
				 	}

			
 
				 	if(oclHandles.cl_status != CL_SUCCESS)

			
 
				-		throw(oclHandles.error_str);	

			
 
				+		throw(oclHandles.error_str);

			
 
				 	#endif

			
 
				 	//_clFinish();

			
 
				 	// oclHandles.cl_status = clWaitForEvents(1, &e[0]);

			
@@ -719,7 +722,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 
				 	/*if(work_items%work_group_size != 0)	//process situations that work_items cannot be divided by work_group_size

			
 
				 	  work_items = work_items + (work_group_size-(work_items%work_group_size));*/

			
 
				 	oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \

			
 
				-											global_work_size, local_work_size, 0 , 0, &(e[0]) );	

			
 
				+											global_work_size, local_work_size, 0 , 0, &(e[0]) );

			
 
				 	#ifdef ERRMSG

			
 
				 	oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";

			
 
				 	switch(oclHandles.cl_status)

			
@@ -766,12 +769,12 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 
				 		case CL_OUT_OF_HOST_MEMORY:

			
 
				 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";

			
 
				 			break;

			
 
				-		default: 

			
 
				+		default:

			
 
				 			oclHandles.error_str += "Unkown reseason";

			
 
				-			break;		

			
 
				+			break;

			
 
				 	}

			
 
				 	if(oclHandles.cl_status != CL_SUCCESS)

			
 
				-		throw(oclHandles.error_str);	

			
 
				+		throw(oclHandles.error_str);

			
 
				 	#endif

			
 
				 	//_clFinish();

			
 
				 	/*oclHandles.cl_status = clWaitForEvents(1, &e[0]);

			
@@ -789,7 +792,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 
				 //release OpenCL objects

			
 
				 void _clFree(cl_mem ob) throw(string){

			
 
				 	if(ob!=NULL)

			
 
				-		oclHandles.cl_status = clReleaseMemObject(ob);	

			
 
				+		oclHandles.cl_status = clReleaseMemObject(ob);

			
 
				 	#ifdef ERRMSG

			
 
				 	oclHandles.error_str = "excpetion in _clFree() ->";

			
 
				 	switch(oclHandles.cl_status)

			
@@ -802,11 +805,11 @@ void _clFree(cl_mem ob) throw(string){
 
				 			break;

			
 
				 		case CL_OUT_OF_HOST_MEMORY:

			
 
				 			oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";

			
 
				-			break;			

			
 
				-		default: 

			
 
				+			break;

			
 
				+		default:

			
 
				 			oclHandles.error_str += "Unkown reseason";

			
 
				-			break;		

			
 
				-	}        

			
 
				+			break;

			
 
				+	}

			
 
				     if (oclHandles.cl_status!= CL_SUCCESS)

			
 
				        throw(oclHandles.error_str);

			
 
				 	#endif

			
--- a/opencl/bfs/bfs.cpp
+++ b/opencl/bfs/bfs.cpp
@@ -13,7 +13,11 @@
 
				 #include "CLHelper.h"

			
 
				 #include "util.h"

			
 
				 

			
 
				-#define MAX_THREADS_PER_BLOCK 256

			
 
				+#define MAX_THREADS_PER_BLOCK 256
			
 
				+
			
 
				+int platform_num = 0;

			
 
				+int device_num = 0;

			
 
				+int use_gpu = 0;

			
 
				 

			
 
				 //Structure to hold a node information

			
 
				 struct Node

			
@@ -39,7 +43,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 		stop=false;

			
 
				 		for(int tid = 0; tid < no_of_nodes; tid++ )

			
 
				 		{

			
 
				-			if (h_graph_mask[tid] == true){ 

			
 
				+			if (h_graph_mask[tid] == true){

			
 
				 				h_graph_mask[tid]=false;

			
 
				 				for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){

			
 
				 					int id = h_graph_edges[i];	//--cambine: node id is connected with node tid

			
@@ -48,7 +52,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 						h_updating_graph_mask[id]=true;

			
 
				 					}

			
 
				 				}

			
 
				-			}		

			
 
				+			}

			
 
				 		}

			
 
				 

			
 
				   		for(int tid=0; tid< no_of_nodes ; tid++ )

			
@@ -69,7 +73,7 @@ void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 //----------------------------------------------------------

			
 
				 void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \

			
 
				 		int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \

			
 
				-		char *h_graph_visited, int *h_cost) 

			
 
				+		char *h_graph_visited, int *h_cost)

			
 
				 					throw(std::string){

			
 
				 

			
 
				 	//int number_elements = height*width;

			
@@ -78,7 +82,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 			d_graph_visited, d_cost, d_over;

			
 
				 	try{

			
 
				 		//--1 transfer data from host to device

			
 
				-		_clInit();			

			
 
				+		_clInit(platform_num, device_num, use_gpu);

			
 
				 		d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);

			
 
				 		d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);

			
 
				 		d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);

			
@@ -88,18 +92,18 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 

			
 
				 		d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);

			
 
				 		d_over = _clMallocRW(sizeof(char), &h_over);

			
 
				-		

			
 
				+

			
 
				 		_clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);

			
 
				-		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);	

			
 
				-		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);	

			
 
				-		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);	

			
 
				-		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);	

			
 
				-		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);	

			
 
				-			

			
 
				+		_clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);

			
 
				+		_clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);

			
 
				+		_clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);

			
 
				+		_clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);

			
 
				+		_clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);

			
 
				+

			
 
				 		//--2 invoke kernel

			
 
				 #ifdef	PROFILING

			
 
				 		timer kernel_timer;

			
 
				-		double kernel_time = 0.0;		

			
 
				+		double kernel_time = 0.0;

			
 
				 		kernel_timer.reset();

			
 
				 		kernel_timer.start();

			
 
				 #endif

			
@@ -116,25 +120,25 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);

			
 
				 			_clSetArgs(kernel_id, kernel_idx++, d_cost);

			
 
				 			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));

			
 
				-			

			
 
				+

			
 
				 			//int work_items = no_of_nodes;

			
 
				 			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);

			
 
				-			

			
 
				+

			
 
				 			//--kernel 1

			
 
				 			kernel_id = 1;

			
 
				-			kernel_idx = 0;			

			
 
				+			kernel_idx = 0;

			
 
				 			_clSetArgs(kernel_id, kernel_idx++, d_graph_mask);

			
 
				 			_clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);

			
 
				 			_clSetArgs(kernel_id, kernel_idx++, d_graph_visited);

			
 
				 			_clSetArgs(kernel_id, kernel_idx++, d_over);

			
 
				 			_clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));

			
 
				-			

			
 
				+

			
 
				 			//work_items = no_of_nodes;

			
 
				-			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);			

			
 
				-			

			
 
				+			_clInvokeKernel(kernel_id, no_of_nodes, work_group_size);

			
 
				+

			
 
				 			_clMemcpyD2H(d_over,sizeof(char), &h_over);

			
 
				 			}while(h_over);

			
 
				-			

			
 
				+

			
 
				 		_clFinish();

			
 
				 #ifdef	PROFILING

			
 
				 		kernel_timer.stop();

			
@@ -144,7 +148,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 		_clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);

			
 
				 		//--statistics

			
 
				 #ifdef	PROFILING

			
 
				-		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;		

			
 
				+		std::cout<<"kernel time(s):"<<kernel_time<<std::endl;

			
 
				 #endif

			
 
				 		//--4 release cl resources.

			
 
				 		_clFree(d_graph_nodes);

			
@@ -156,7 +160,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 		_clFree(d_over);

			
 
				 		_clRelease();

			
 
				 	}

			
 
				-	catch(std::string msg){		

			
 
				+	catch(std::string msg){

			
 
				 		_clFree(d_graph_nodes);

			
 
				 		_clFree(d_graph_edges);

			
 
				 		_clFree(d_graph_mask);

			
@@ -173,7 +177,7 @@ void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
 
				 }

			
 
				 void Usage(int argc, char**argv){

			
 
				 

			
 
				-fprintf(stderr,"Usage: %s <input_file>\n", argv[0]);

			
 
				+fprintf(stderr,"Usage: %s <input_file> <platform_number> <device_number> <use_gpu>\n", argv[0]);

			
 
				 

			
 
				 }

			
 
				 //----------------------------------------------------------

			
@@ -190,11 +194,11 @@ int main(int argc, char * argv[])
 
				 	char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;

			
 
				 	try{

			
 
				 		char *input_f;

			
 
				-		if(argc!=2){

			
 
				+		if(argc!=5){

			
 
				 		  Usage(argc, argv);

			
 
				 		  exit(0);

			
 
				 		}

			
 
				-	

			
 
				+

			
 
				 		input_f = argv[1];

			
 
				 		printf("Reading File\n");

			
 
				 		//Read in Graph from a file

			
@@ -202,7 +206,11 @@ int main(int argc, char * argv[])
 
				 		if(!fp){

			
 
				 		  printf("Error Reading graph file\n");

			
 
				 		  return 0;

			
 
				-		}

			
 
				+		}
			
 
				+
			
 
				+    platform_num = atoi(argv[2]);

			
 
				+    device_num = atoi(argv[3]);

			
 
				+    use_gpu = atoi(argv[4]);

			
 
				 

			
 
				 		int source = 0;

			
 
				 

			
@@ -214,8 +222,8 @@ int main(int argc, char * argv[])
 
				 		//Make execution Parameters according to the number of nodes

			
 
				 		//Distribute threads across multiple Blocks if necessary

			
 
				 		if(no_of_nodes>MAX_THREADS_PER_BLOCK){

			
 
				-			num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK); 

			
 
				-			num_of_threads_per_block = MAX_THREADS_PER_BLOCK; 

			
 
				+			num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK);

			
 
				+			num_of_threads_per_block = MAX_THREADS_PER_BLOCK;

			
 
				 		}

			
 
				 		work_group_size = num_of_threads_per_block;

			
 
				 		// allocate host memory

			
@@ -223,8 +231,8 @@ int main(int argc, char * argv[])
 
				 		h_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);

			
 
				 		h_updating_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);

			
 
				 		h_graph_visited = (char*) malloc(sizeof(char)*no_of_nodes);

			
 
				-	

			
 
				-		int start, edgeno;   

			
 
				+

			
 
				+		int start, edgeno;

			
 
				 		// initalize the memory

			
 
				 		for(int i = 0; i < no_of_nodes; i++){

			
 
				 			fscanf(fp,"%d %d",&start,&edgeno);

			
@@ -250,7 +258,7 @@ int main(int argc, char * argv[])
 
				 		}

			
 
				 

			
 
				 		if(fp)

			
 
				-			fclose(fp);    

			
 
				+			fclose(fp);

			
 
				 		// allocate mem for the result on host side

			
 
				 		int	*h_cost = (int*) malloc(sizeof(int)*no_of_nodes);

			
 
				 		int *h_cost_ref = (int*)malloc(sizeof(int)*no_of_nodes);

			
@@ -259,10 +267,10 @@ int main(int argc, char * argv[])
 
				 			h_cost_ref[i] = -1;

			
 
				 		}

			
 
				 		h_cost[source]=0;

			
 
				-		h_cost_ref[source]=0;		

			
 
				+		h_cost_ref[source]=0;

			
 
				 		//---------------------------------------------------------

			
 
				 		//--gpu entry

			
 
				-		run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);	

			
 
				+		run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);

			
 
				 		//---------------------------------------------------------

			
 
				 		//--cpu entry

			
 
				 		// initalize the memory again

			
@@ -279,7 +287,7 @@ int main(int argc, char * argv[])
 
				 		//---------------------------------------------------------

			
 
				 		//--result varification

			
 
				 		compare_results<int>(h_cost_ref, h_cost, no_of_nodes);

			
 
				-		//release host memory		

			
 
				+		//release host memory

			
 
				 		free(h_graph_nodes);

			
 
				 		free(h_graph_mask);

			
 
				 		free(h_updating_graph_mask);

			
@@ -292,8 +300,8 @@ int main(int argc, char * argv[])
 
				 		free(h_graph_nodes);

			
 
				 		free(h_graph_mask);

			
 
				 		free(h_updating_graph_mask);

			
 
				-		free(h_graph_visited);		

			
 
				+		free(h_graph_visited);

			
 
				 	}

			
 
				-		

			
 
				+

			
 
				     return 0;

			
 
				 }

			
--- a/opencl/bfs/run-cpu
+++ b/opencl/bfs/run-cpu
@@ -1 +1 @@
 
				-./bfs ../../data/bfs/graph1MW_6.txt
			
 
				+./bfs ../../data/bfs/graph1MW_6.txt 1 0 0
			
--- a/opencl/bfs/run-gpu
+++ b/opencl/bfs/run-gpu
@@ -0,0 +1 @@
 
				+./bfs ../../data/bfs/graph1MW_6.txt 0 0 1