Jelajahi Sumber

Implemented device selection for cfd

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

The parsing is done using the pre-existing structure of command line
arguments parsing, even if it is not completely coherent with the rest
of the benchmarks.
Andrea Gussoni 8 tahun lalu
induk
melakukan
13f6f39603
4 mengubah file dengan 249 tambahan dan 233 penghapusan
  1. 199 188
      opencl/cfd/CLHelper.h
  2. 42 42
      opencl/cfd/euler3d.cpp
  3. 3 3
      opencl/cfd/run-cpu
  4. 5 0
      opencl/cfd/run-gpu

+ 199 - 188
opencl/cfd/CLHelper.h

@@ -40,55 +40,55 @@ double KC;  //: the kernel compilation time
 	@date:		24/03/2011
 ------------------------------------------------------------*/
 struct _clDeviceProp{
-/*CL_DEVICE_ADDRESS_BITS                       
-CL_DEVICE_AVAILABLE                          
-CL_DEVICE_COMPILER_AVAILABLE                 
-CL_DEVICE_ENDIAN_LITTLE                      
-CL_DEVICE_ERROR_CORRECTION_SUPPORT           
-CL_DEVICE_EXECUTION_CAPABILITIES             
+/*CL_DEVICE_ADDRESS_BITS
+CL_DEVICE_AVAILABLE
+CL_DEVICE_COMPILER_AVAILABLE
+CL_DEVICE_ENDIAN_LITTLE
+CL_DEVICE_ERROR_CORRECTION_SUPPORT
+CL_DEVICE_EXECUTION_CAPABILITIES
 CL_DEVICE_EXTENSIONS
-CL_DEVICE_GLOBAL_MEM_CACHE_SIZE              
-CL_DEVICE_GLOBAL_MEM_CACHE_TYPE              
-CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE          
-CL_DEVICE_GLOBAL_MEM_SIZE                    
-CL_DEVICE_IMAGE_SUPPORT                      
-CL_DEVICE_IMAGE2D_MAX_HEIGHT                 
-CL_DEVICE_IMAGE2D_MAX_WIDTH                  
-CL_DEVICE_IMAGE3D_MAX_DEPTH                  
-CL_DEVICE_IMAGE3D_MAX_HEIGHT                 
-CL_DEVICE_IMAGE3D_MAX_WIDTH                  
-CL_DEVICE_LOCAL_MEM_SIZE                     
-CL_DEVICE_LOCAL_MEM_TYPE                     
-CL_DEVICE_MAX_CLOCK_FREQUENCY                
-CL_DEVICE_MAX_COMPUTE_UNITS                  
-CL_DEVICE_MAX_CONSTANT_ARGS                  
-CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE           
-CL_DEVICE_MAX_MEM_ALLOC_SIZE                 
-CL_DEVICE_MAX_PARAMETER_SIZE                 
-CL_DEVICE_MAX_READ_IMAGE_ARGS                
-CL_DEVICE_MAX_SAMPLERS                       
-CL_DEVICE_MAX_WORK_GROUP_SIZE                
-CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS           
-CL_DEVICE_MAX_WORK_ITEM_SIZES                
-CL_DEVICE_MAX_WRITE_IMAGE_ARGS               
-CL_DEVICE_MEM_BASE_ADDR_ALIGN                
-CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE           
-CL_DEVICE_NAME                               
+CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
+CL_DEVICE_GLOBAL_MEM_CACHE_TYPE
+CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+CL_DEVICE_GLOBAL_MEM_SIZE
+CL_DEVICE_IMAGE_SUPPORT
+CL_DEVICE_IMAGE2D_MAX_HEIGHT
+CL_DEVICE_IMAGE2D_MAX_WIDTH
+CL_DEVICE_IMAGE3D_MAX_DEPTH
+CL_DEVICE_IMAGE3D_MAX_HEIGHT
+CL_DEVICE_IMAGE3D_MAX_WIDTH
+CL_DEVICE_LOCAL_MEM_SIZE
+CL_DEVICE_LOCAL_MEM_TYPE
+CL_DEVICE_MAX_CLOCK_FREQUENCY
+CL_DEVICE_MAX_COMPUTE_UNITS
+CL_DEVICE_MAX_CONSTANT_ARGS
+CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
+CL_DEVICE_MAX_MEM_ALLOC_SIZE
+CL_DEVICE_MAX_PARAMETER_SIZE
+CL_DEVICE_MAX_READ_IMAGE_ARGS
+CL_DEVICE_MAX_SAMPLERS
+CL_DEVICE_MAX_WORK_GROUP_SIZE
+CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
+CL_DEVICE_MAX_WORK_ITEM_SIZES
+CL_DEVICE_MAX_WRITE_IMAGE_ARGS
+CL_DEVICE_MEM_BASE_ADDR_ALIGN
+CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE
+CL_DEVICE_NAME
 CL_DEVICE_PLATFORM
-CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR        
-CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE      
-CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT       
-CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT         
-CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG        
-CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT       
-CL_DEVICE_PROFILE                           
-CL_DEVICE_PROFILING_TIMER_RESOLUTION         
-CL_DEVICE_QUEUE_PROPERTIES                   
-CL_DEVICE_SINGLE_FP_CONFIG                   
-CL_DEVICE_TYPE                              
-CL_DEVICE_VENDOR_ID                          
-CL_DEVICE_VENDOR                            
-CL_DEVICE_VERSION                      
+CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR
+CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE
+CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT
+CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT
+CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG
+CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT
+CL_DEVICE_PROFILE
+CL_DEVICE_PROFILING_TIMER_RESOLUTION
+CL_DEVICE_QUEUE_PROPERTIES
+CL_DEVICE_SINGLE_FP_CONFIG
+CL_DEVICE_TYPE
+CL_DEVICE_VENDOR_ID
+CL_DEVICE_VENDOR
+CL_DEVICE_VERSION
 CL_DRIVER_VERSION*/
 char device_name[100];
 };
@@ -117,14 +117,14 @@ int number_devices = 0;
 
 /*------------------------------------------------------------
 	@function:	select device to use
-	@params:	
+	@params:
 		size: 	the index of device to be used
 	@return:	NULL
 	@date:		24/03/2011
 ------------------------------------------------------------*/
 void _clSetDevice(int idx) throw(string){
 
-    cl_int resultCL;    
+    cl_int resultCL;
     oclHandles.context = NULL;
     oclHandles.devices = NULL;
     oclHandles.queue = NULL;
@@ -152,29 +152,29 @@ void _clSetDevice(int idx) throw(string){
 
    oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
    if(oclHandles.cl_status!=CL_SUCCESS){
-   	throw(string("exception in _clInit -> clGetDeviceIDs"));   	
+   	throw(string("exception in _clInit -> clGetDeviceIDs"));
    }
    if (deviceListSize == 0)
         throw(string("InitCL()::Error: No devices found."));
-   
+
    if(idx>(deviceListSize-1))
 	  throw(string(":invalid device ID:"));
    device_id_inused = idx;
-	
+
 }
 
 /*------------------------------------------------------------
 	@function:	get device properties indexed by 'idx'
-	@params:	
+	@params:
 		idx:	device index
 		prop:	output properties
 	@return:	prop
 	@date:		24/03/2011
 ------------------------------------------------------------*/
 void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
-   
+
    oclHandles.cl_status= clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL);
-   
+
 #ifdef ERRMSG
 	if(oclHandles.cl_status != CL_SUCCESS){
 	  oclHandles.error_str = "exception in _clGetDeviceProperties-> ";
@@ -184,13 +184,13 @@ void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
 	  		break;
 	  	case CL_INVALID_VALUE:
 	  		oclHandles.error_str += "CL_INVALID_VALUE";
-	  		break;	   
+	  		break;
 	  	default:
 	  		oclHandles.error_str += "unknown reasons";
-	  		break;	  		
+	  		break;
 	  }
 	throw(oclHandles.error_str);
-	}	
+	}
 #endif
 }
 
@@ -217,7 +217,7 @@ string FileToString(const string fileName){
             f.read(str, fileSize);
             f.close();
             str[size] = '\0';
-        
+
             s = str;
             delete [] str;
             return s;
@@ -246,6 +246,8 @@ string FileToString(const string fileName){
 ------------------------------------------------------------*/
 char device_type[3];
 int device_id = 0;
+int platform_id = 0;
+
 void _clCmdParams(int argc, char* argv[]){
 	for (int i = 0; i < argc; ++i){
 		switch (argv[i][1]){
@@ -258,15 +260,24 @@ void _clCmdParams(int argc, char* argv[]){
 					throw;
 				}
 				break;
-			  case 'd':	 //--d stands for device id
-				if (++i < argc){
-					sscanf(argv[i], "%d", &device_id);
-				}
-				else{
-					std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
-					throw;
-				}
-				break;
+		  case 'd':	 //--d stands for device id
+  			if (++i < argc){
+  				sscanf(argv[i], "%d", &device_id);
+  			}
+  			else{
+  				std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
+  				throw;
+  			}
+  			break;
+      case 'p':	 //--d stands for device id
+  			if (++i < argc){
+  				sscanf(argv[i], "%d", &platform_id);
+  			}
+  			else{
+  				std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
+  				throw;
+  			}
+  			break;
 			default:
 				;
 		}
@@ -275,9 +286,9 @@ void _clCmdParams(int argc, char* argv[]){
 
 /*------------------------------------------------------------
 	@function:	Initlize CL objects
-	@params:	
+	@params:
 		device_id: device id
-		device_type: the types of devices, e.g. CPU, GPU, ACCERLERATOR,...	
+		device_type: the types of devices, e.g. CPU, GPU, ACCERLERATOR,...
 		(1) -t cpu/gpu/acc -d 0/1/2/...
 		(2) -t cpu/gpu/acc [-d 0]
 		(3) [-t default] -d 0/1/2/...
@@ -285,7 +296,7 @@ void _clCmdParams(int argc, char* argv[]){
 	@return:
 	@description:
 		there are 5 steps to initialize all the OpenCL objects needed,
-	@revised: 
+	@revised:
 		get the number of devices and devices have no relationship with context
 	@date:		24/03/2011
 ------------------------------------------------------------*/
@@ -303,13 +314,13 @@ void _clInit(string device_type, int device_id)throw(string){
 	KE = 0;
 	KC = 0;
 #endif
-	int DEVICE_ID_INUSED = 0;	
+	int DEVICE_ID_INUSED = 0;
 	_clDeviceProp prop;
 #ifdef PROFILE_
 	double t1 = gettime();
 #endif
 
-    cl_int resultCL;    
+    cl_int resultCL;
     oclHandles.context = NULL;
     oclHandles.devices = NULL;
     oclHandles.queue = NULL;
@@ -339,8 +350,8 @@ void _clInit(string device_type, int device_id)throw(string){
     if (resultCL != CL_SUCCESS)
         throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
 
-    // Select the target platform. Default: first platform 
-    targetPlatform = allPlatforms[1];
+    // Select the target platform. Default: first platform
+    targetPlatform = allPlatforms[platform_id];
     for (int i = 0; i < numPlatforms; i++)
     {
         char pbuff[128];
@@ -360,74 +371,74 @@ void _clInit(string device_type, int device_id)throw(string){
     }
     free(allPlatforms);
     //-----------------------------------------------
-    //--cambine-2: detect OpenCL devices	
-    // First, get the size of device list 
+    //--cambine-2: detect OpenCL devices
+    // First, get the size of device list
     if(device_type.compare("")!=0){
 		if(device_type.compare("cpu")==0){
 		   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);
 		   if(oclHandles.cl_status!=CL_SUCCESS){
-			   throw(string("exception in _clInit -> clGetDeviceIDs -> CPU"));   				
+			   throw(string("exception in _clInit -> clGetDeviceIDs -> CPU"));
 	       }
-		}	
-		if(device_type.compare("gpu")==0){		   
+		}
+		if(device_type.compare("gpu")==0){
 		   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceListSize);
 		   if(oclHandles.cl_status!=CL_SUCCESS){
-			   throw(string("exception in _clInit -> clGetDeviceIDs -> GPU"));   	
+			   throw(string("exception in _clInit -> clGetDeviceIDs -> GPU"));
 	       }
 		}
 		if(device_type.compare("acc")==0){
 		   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceListSize);
 		   if(oclHandles.cl_status!=CL_SUCCESS){
-			   throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));   	
+			   throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));
 	       }
-		}	 	
+		}
     }
     else{
 		   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
 		   if(oclHandles.cl_status!=CL_SUCCESS){
-			   throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));   	
+			   throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
 	       }
     }
-    
+
    if (deviceListSize == 0)
         throw(string("InitCL()::Error: No devices found."));
-        
+
 #ifdef	DEV_INFO
 	std::cout<<"--cambine: number of device="<<deviceListSize<<std::endl;
 #endif
 	number_devices = deviceListSize;
-    // Now, allocate the device list 
+    // Now, allocate the device list
 	//    oclHandles.devices = (cl_device_id *)malloc(deviceListSize);
     oclHandles.devices = (cl_device_id *)malloc(sizeof(cl_device_id) * deviceListSize);
 
     if (oclHandles.devices == 0)
         throw(string("InitCL()::Error: Could not allocate memory."));
 
-    // Next, get the device list data 
+    // Next, get the device list data
 	if(device_type.compare("")!=0){
 	   if(device_type.compare("cpu")==0){
 		   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, oclHandles.devices, NULL);
 		   if(oclHandles.cl_status!=CL_SUCCESS){
-		   		throw(string("exception in _clInit -> clGetDeviceIDs -> CPU ->2"));   		   	
+		   		throw(string("exception in _clInit -> clGetDeviceIDs -> CPU ->2"));
 	   	   }
 	   }
    	   if(device_type.compare("gpu")==0){
 		   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, deviceListSize, oclHandles.devices, NULL);
 		   if(oclHandles.cl_status!=CL_SUCCESS){
-		   	throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2"));   		   	
+		   	throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2"));
 	   	   }
    	 	}
    	   if(device_type.compare("acc")==0){
 		   oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, deviceListSize, oclHandles.devices, NULL);
 		   if(oclHandles.cl_status!=CL_SUCCESS){
-		   	throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));   		   	
+		   	throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));
 	   	   }
-   	   }  	 	
+   	   }
    	}
    	else{
 	   	 oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
 	   	 if(oclHandles.cl_status!=CL_SUCCESS){
-	   	 	throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));   		   	
+	   	 	throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
    	   	}
    	}
    if(device_id!=0){
@@ -435,23 +446,23 @@ void _clInit(string device_type, int device_id)throw(string){
    		throw(string("Invalidate device id"));
    	DEVICE_ID_INUSED = device_id;
    }
-     
+
    	_clGetDeviceProperties(DEVICE_ID_INUSED, &prop);
 	std::cout<<"--cambine: device name="<<prop.device_name<<std::endl;
-	   	
+
 #ifdef	DEV_INFO
 	std::cout<<"--cambine: return device list successfully!"<<std::endl;
-#endif  
- 
+#endif
+
     //-----------------------------------------------
     //--cambine-3: create an OpenCL context
 #ifdef	DEV_INFO
 	std::cout<<"--cambine: before creating context"<<std::endl;
 #endif
     cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)targetPlatform, 0 };
-    oclHandles.context = clCreateContext(0, 
-                                        deviceListSize, 
-                                        oclHandles.devices, 
+    oclHandles.context = clCreateContext(0,
+                                        deviceListSize,
+                                        oclHandles.devices,
                                         NULL,
 										NULL,
                                         &resultCL);
@@ -463,10 +474,10 @@ void _clInit(string device_type, int device_id)throw(string){
 #endif
 
    //-----------------------------------------------
-   //--cambine-4: Create an OpenCL command queue    
-    oclHandles.queue = clCreateCommandQueue(oclHandles.context, 
-                                            oclHandles.devices[DEVICE_ID_INUSED], 
-                                            0, 
+   //--cambine-4: Create an OpenCL command queue
+    oclHandles.queue = clCreateCommandQueue(oclHandles.context,
+                                            oclHandles.devices[DEVICE_ID_INUSED],
+                                            0,
                                             &resultCL);
 
     if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
@@ -481,40 +492,40 @@ void _clInit(string device_type, int device_id)throw(string){
     const char * source    = source_str.c_str();
     size_t sourceSize[]    = { source_str.length() };
 
-    oclHandles.program = clCreateProgramWithSource(oclHandles.context, 
-                                                    1, 
+    oclHandles.program = clCreateProgramWithSource(oclHandles.context,
+                                                    1,
                                                     &source,
                                                     sourceSize,
                                                     &resultCL);
 
     if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
-        throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));    
+        throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
     //insert debug information
     std::string options= "";
     //options += " -cl-nv-opt-level=3";
     resultCL = clBuildProgram(oclHandles.program, deviceListSize, oclHandles.devices, options.c_str(), NULL,  NULL);
-	
+
     if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL)){
         cerr << "InitCL()::Error: In clBuildProgram" << endl;
 
 		size_t length;
-        resultCL = clGetProgramBuildInfo(oclHandles.program, 
-                                        oclHandles.devices[DEVICE_ID_INUSED], 
-                                        CL_PROGRAM_BUILD_LOG, 
-                                        0, 
-                                        NULL, 
+        resultCL = clGetProgramBuildInfo(oclHandles.program,
+                                        oclHandles.devices[DEVICE_ID_INUSED],
+                                        CL_PROGRAM_BUILD_LOG,
+                                        0,
+                                        NULL,
                                         &length);
-        if(resultCL != CL_SUCCESS) 
+        if(resultCL != CL_SUCCESS)
             throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
 
 		char* buffer = (char*)malloc(length);
-        resultCL = clGetProgramBuildInfo(oclHandles.program, 
-                                        oclHandles.devices[DEVICE_ID_INUSED], 
-                                        CL_PROGRAM_BUILD_LOG, 
-                                        length, 
-                                        buffer, 
+        resultCL = clGetProgramBuildInfo(oclHandles.program,
+                                        oclHandles.devices[DEVICE_ID_INUSED],
+                                        CL_PROGRAM_BUILD_LOG,
+                                        length,
+                                        buffer,
                                         NULL);
-        if(resultCL != CL_SUCCESS) 
+        if(resultCL != CL_SUCCESS)
             throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
 
 		cerr << buffer << endl;
@@ -524,23 +535,23 @@ void _clInit(string device_type, int device_id)throw(string){
         free(buffer);
 
         throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
-    } 
+    }
 #ifdef PROFILE_
 	double t3 = gettime();
 	KC += t3 - t2;
 #endif
     //get program information in intermediate representation
-#ifdef PTX_MSG    
+#ifdef PTX_MSG
     size_t binary_sizes[deviceListSize];
     char * binaries[deviceListSize];
-    //figure out number of devices and the sizes of the binary for each device. 
+    //figure out number of devices and the sizes of the binary for each device.
     oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*deviceListSize, &binary_sizes, NULL );
     if(oclHandles.cl_status!=CL_SUCCESS){
         throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));
     }
 
     std::cout<<"--cambine:"<<binary_sizes<<std::endl;
-    //copy over all of the generated binaries. 
+    //copy over all of the generated binaries.
     for(int i=0;i<deviceListSize;i++)
 	binaries[i] = (char *)malloc( sizeof(char)*(binary_sizes[i]+1));
     oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES, sizeof(char *)*deviceListSize, binaries, NULL );
@@ -563,7 +574,7 @@ void _clInit(string device_type, int device_id)throw(string){
 
     for (int nKernel = 0; nKernel < total_kernels; nKernel++)
     {
-        // get a kernel object handle for a kernel with the given name 
+        // get a kernel object handle for a kernel with the given name
         cl_kernel kernel = clCreateKernel(oclHandles.program,
                                             (kernel_names[nKernel]).c_str(),
                                             &resultCL);
@@ -583,7 +594,7 @@ void _clInit(string device_type, int device_id)throw(string){
     oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
     if(oclHandles.cl_status!=CL_SUCCESS){
 		throw(string("exceptions in _InitCL -> getting resource information"));
-    }    
+    }
 
     build_log = (char *)malloc(ret_val_size+1);
     oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
@@ -664,7 +675,7 @@ void _clRelease()
 
 /*------------------------------------------------------------
 	@function:	create read and write buffer for devices
-	@params:	
+	@params:
 		size: 	the size of device memory to be allocated
 	@return:	mem_d
 	@date:		24/03/2011
@@ -699,10 +710,10 @@ cl_mem _clMalloc(int size) throw(string){
 	  		break;
 	  	default:
 	  		oclHandles.error_str += "unknown reasons";
-	  		break;	  		
+	  		break;
 	  }
 	throw(oclHandles.error_str);
-	}	
+	}
 #endif
 #ifdef PROFILE_
 	double t2 = gettime();
@@ -745,14 +756,14 @@ void* _clMallocHost(int size)throw(string){
 	  		break;
 	  	default:
 	  		oclHandles.error_str += "unknown reasons";
-	  		break;	  		
+	  		break;
 	  }
 	throw(oclHandles.error_str);
-	}	
+	}
 #endif
 
 	mem_h = clEnqueueMapBuffer(oclHandles.queue, oclHandles.pinned_mem_out, CL_TRUE, CL_MAP_READ, 0, size, 0, NULL, NULL, &oclHandles.cl_status);
-	
+
 #ifdef ERRMSG
 	if(oclHandles.cl_status != CL_SUCCESS||mem_h==NULL){
 	  oclHandles.error_str = "excpetion in _clMallocHost -> clEnqueueMapBuffer";
@@ -783,10 +794,10 @@ void* _clMallocHost(int size)throw(string){
 	  		break;
 	  	default:
 	  		oclHandles.error_str += "unknown reasons";
-	  		break;	  		
+	  		break;
 	  }
 	throw(oclHandles.error_str);
-	}	
+	}
 #endif
 	return mem_h;
 }
@@ -826,7 +837,7 @@ void _clFreeHost(int io, void * mem_h){
 			  		break;
 			  	default:
 			  		oclHandles.error_str += "unknown reasons";
-			  		break;	  		
+			  		break;
 			  }
 				throw(oclHandles.error_str);
 			 }
@@ -860,12 +871,12 @@ void _clFreeHost(int io, void * mem_h){
 				  		break;
 				  	default:
 				  		oclHandles.error_str += "unknown reasons";
-				  		break;	  		
+				  		break;
 				  }
 					throw(oclHandles.error_str);
 			   }
 #endif
-	 	 }		
+	 	 }
 	}
 	else
 		throw(string("encounter invalid choice when freeing pinned memmory"));
@@ -874,7 +885,7 @@ void _clFreeHost(int io, void * mem_h){
 	@function:	transfer data from host to device
 	@params:
 		dest:	the destination device memory
-		src:	the source host memory	
+		src:	the source host memory
 		size: 	the size of data to be transferred in bytes
 	@return:	NULL
 	@date:		17/01/2011
@@ -893,28 +904,28 @@ void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
 				break;
 			case CL_INVALID_CONTEXT:
 				oclHandles.error_str += "CL_INVALID_CONTEXT";
-				break;	
+				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
 				break;
 			case CL_INVALID_VALUE:
 				oclHandles.error_str += "CL_INVALID_VALUE";
-				break;	
+				break;
 			case CL_INVALID_EVENT_WAIT_LIST:
 				oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
 				break;
 			case CL_MEM_OBJECT_ALLOCATION_FAILURE:
 				oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-				break;	
+				break;
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
-				break;		
+				break;
 			default:
 				oclHandles.error_str += "Unknown reason";
 				break;
-		}		
+		}
 		throw(oclHandles.error_str);
-	}		
+	}
 #endif
 #ifdef PROFILE_
 	double t2 = gettime();
@@ -926,7 +937,7 @@ void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
 	@function:	transfer data from device to host
 	@params:
 		dest:	the destination device memory
-		src:	the source host memory	
+		src:	the source host memory
 		size: 	the size of data to be transferred in bytes
 	@return:	NULL
 	@date:		17/01/2011
@@ -945,28 +956,28 @@ void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
 				break;
 			case CL_INVALID_CONTEXT:
 				oclHandles.error_str += "CL_INVALID_CONTEXT";
-				break;	
+				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
 				break;
 			case CL_INVALID_VALUE:
 				oclHandles.error_str += "CL_INVALID_VALUE";
-				break;	
+				break;
 			case CL_INVALID_EVENT_WAIT_LIST:
 				oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
 				break;
 			case CL_MEM_OBJECT_ALLOCATION_FAILURE:
 				oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-				break;	
+				break;
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
-				break;		
+				break;
 			default:
 				oclHandles.error_str += "Unknown reason";
 				break;
-		}		
+		}
 		throw(oclHandles.error_str);
-	}		
+	}
 #endif
 #ifdef PROFILE_
 	double t2 = gettime();
@@ -977,7 +988,7 @@ void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
 	@function:	transfer data from device to device
 	@params:
 		dest:	the destination device memory
-		src:	the source device memory	
+		src:	the source device memory
 		size: 	the size of data to be transferred in bytes
 	@return:	NULL
 	@date:		27/03/2011
@@ -996,37 +1007,37 @@ void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
 				break;
 			case CL_INVALID_CONTEXT:
 				oclHandles.error_str += "CL_INVALID_CONTEXT";
-				break;	
+				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
 				break;
 			case CL_INVALID_VALUE:
 				oclHandles.error_str += "CL_INVALID_VALUE";
-				break;	
+				break;
 			case CL_INVALID_EVENT_WAIT_LIST:
 				oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
 				break;
 			case CL_MISALIGNED_SUB_BUFFER_OFFSET:
 				oclHandles.error_str += "CL_MISALIGNED_SUB_BUFFER_OFFSET";
-				break;	
+				break;
 			case CL_MEM_COPY_OVERLAP:
 				oclHandles.error_str += "CL_MEM_COPY_OVERLAP";
-				break;	
+				break;
 			case CL_MEM_OBJECT_ALLOCATION_FAILURE:
 				oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
-				break;	
+				break;
 			case CL_OUT_OF_RESOURCES:
 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";
-				break;	
+				break;
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 				break;
 			default:
 				oclHandles.error_str += "Unknown reason";
 				break;
-		}		
+		}
 		throw(oclHandles.error_str);
-	}		
+	}
 #endif
 #ifdef PROFILE_
 	double t2 = gettime();
@@ -1055,19 +1066,19 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 				break;
 			case CL_INVALID_ARG_INDEX:
 				oclHandles.error_str += "CL_INVALID_ARG_INDEX";
-				break;	
+				break;
 			case CL_INVALID_ARG_VALUE:
 				oclHandles.error_str += "CL_INVALID_ARG_VALUE";
 				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
-				break;	
+				break;
 			case CL_INVALID_SAMPLER:
 				oclHandles.error_str += "CL_INVALID_SAMPLER";
 				break;
 			case CL_INVALID_ARG_SIZE:
 				oclHandles.error_str += "CL_INVALID_ARG_SIZE";
-				break;	
+				break;
 			case CL_OUT_OF_RESOURCES:
 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";
 				break;
@@ -1092,19 +1103,19 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 				break;
 			case CL_INVALID_ARG_INDEX:
 				oclHandles.error_str += "CL_INVALID_ARG_INDEX";
-				break;	
+				break;
 			case CL_INVALID_ARG_VALUE:
 				oclHandles.error_str += "CL_INVALID_ARG_VALUE";
 				break;
 			case CL_INVALID_MEM_OBJECT:
 				oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
-				break;	
+				break;
 			case CL_INVALID_SAMPLER:
 				oclHandles.error_str += "CL_INVALID_SAMPLER";
 				break;
 			case CL_INVALID_ARG_SIZE:
 				oclHandles.error_str += "CL_INVALID_ARG_SIZE";
-				break;	
+				break;
 			case CL_OUT_OF_RESOURCES:
 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";
 				break;
@@ -1121,7 +1132,7 @@ void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(st
 	}
 }
 void _clFinish() throw(string){
-	oclHandles.cl_status = clFinish(oclHandles.queue);	
+	oclHandles.cl_status = clFinish(oclHandles.queue);
 #ifdef ERRMSG
 	if(oclHandles.cl_status!=CL_SUCCESS){
 		oclHandles.error_str = "excpetion in _clFinish";
@@ -1131,7 +1142,7 @@ void _clFinish() throw(string){
 				break;
 			case CL_OUT_OF_RESOURCES:
 				oclHandles.error_str += "CL_OUT_OF_RESOURCES";
-				break;		
+				break;
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 				break;
@@ -1163,7 +1174,7 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
   	size_t local_work_size[] = {work_group_size, 1};
 	size_t global_work_size[] = {work_items, 1};
 	oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
-											global_work_size, local_work_size, 0 , 0, &(e[0]) );	
+											global_work_size, local_work_size, 0 , 0, &(e[0]) );
 	#ifdef ERRMSG
 	if(oclHandles.cl_status != CL_SUCCESS){
 		oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
@@ -1210,12 +1221,12 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 				break;
-			default: 
+			default:
 				oclHandles.error_str += "Unkown reseason";
-				break;		
+				break;
 		}
-	
-		throw(oclHandles.error_str);	
+
+		throw(oclHandles.error_str);
 	}
 	#endif
 	//_clFinish();
@@ -1257,7 +1268,7 @@ void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(s
 
 /*------------------------------------------------------------
 	@function:	set device memory in an easy manner
-	@params:	
+	@params:
 		mem_d: the device memory to be set;
 		val:   set the selected memory to 'val';
 		number_elements:	the number of elements in the selected memory
@@ -1270,7 +1281,7 @@ void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
 	int arg_idx = 0;
 	_clSetArgs(kernel_id, arg_idx++, mem_d);
 	_clSetArgs(kernel_id, arg_idx++, &val, sizeof(short));
-	_clSetArgs(kernel_id, arg_idx++, &number_bytes, sizeof(int));	
+	_clSetArgs(kernel_id, arg_idx++, &number_bytes, sizeof(int));
 	_clInvokeKernel(kernel_id, number_bytes, work_group_size);
 }
 /*------------------------------------------------------------
@@ -1280,7 +1291,7 @@ void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
 		range_x:	the number of working items in x direction
 		range_y:	the number of working items in y direction
 		group_x:	the number of working items in each work group in x direction
-		group_y:	the number of working items	in each work group in y direction	 
+		group_y:	the number of working items	in each work group in y direction
 	@return:	NULL
 	@date:		03/04/2011
 ------------------------------------------------------------*/
@@ -1295,7 +1306,7 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 	/*if(work_items%work_group_size != 0)	//process situations that work_items cannot be divided by work_group_size
 	  work_items = work_items + (work_group_size-(work_items%work_group_size));*/
 	oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
-											global_work_size, local_work_size, 0 , 0, &(e[0]) );	
+											global_work_size, local_work_size, 0 , 0, &(e[0]) );
 	#ifdef ERRMSG
 	if(oclHandles.cl_status != CL_SUCCESS){
 		oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
@@ -1342,14 +1353,14 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
 				break;
-			default: 
+			default:
 				oclHandles.error_str += "Unkown reseason";
-				break;		
-		}	
-		throw(oclHandles.error_str);	
+				break;
+		}
+		throw(oclHandles.error_str);
 	}
 	#endif
-		
+
 	//	oclHandles.cl_status = clWaitForEvents(1, &e[0]);
 
 #ifdef ERRMSG
@@ -1374,10 +1385,10 @@ void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int
 
 void _clFree(cl_mem ob) throw(string){
 #ifdef PROFILE_
-	double t1 = gettime();	
+	double t1 = gettime();
 #endif
 	if(ob!=NULL)
-		oclHandles.cl_status = clReleaseMemObject(ob);	
+		oclHandles.cl_status = clReleaseMemObject(ob);
 #ifdef ERRMSG
 	if (oclHandles.cl_status!= CL_SUCCESS){
 		oclHandles.error_str = "excpetion in _clFree() ->";
@@ -1390,10 +1401,10 @@ void _clFree(cl_mem ob) throw(string){
 				break;
 			case CL_OUT_OF_HOST_MEMORY:
 				oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
-				break;			
-			default: 
+				break;
+			default:
 				oclHandles.error_str += "Unkown reseason";
-				break;		
+				break;
 		}
 	throw(oclHandles.error_str);
 	}
@@ -1415,7 +1426,7 @@ void _clStatistics(){
 	FILE *fp_pd = fopen("PD_OCL.txt", "a");
 	fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC);
 	fclose(fp_pd);
-#endif	
+#endif
 	return ;
 }
 #endif //_CL_HELPER_

+ 42 - 42
opencl/cfd/euler3d.cpp

@@ -1,7 +1,7 @@
 /********************************************************************
 	euler3d.cpp
 	: parallelized code of CFD
-	
+
 	- original code from the AIAA-2009-4001 by Andrew Corrigan, acorriga@gmu.edu
 	- parallelization with OpenCL API has been applied by
 	Jianbin Fang - j.fang@tudelft.nl
@@ -15,12 +15,12 @@
 #include <iostream>
 #include <fstream>
 #include <math.h>
-#include "CLHelper.h" 
- 
+#include "CLHelper.h"
+
 /*
- * Options 
- * 
- */ 
+ * Options
+ *
+ */
 #define GAMMA 1.4f
 #define iterations 2000
 #ifndef block_length
@@ -106,7 +106,7 @@ void dump(cl_mem variables, int nel, int nelr){
 			file << std::endl;
 		}
 	}
-	
+
 	{
 		std::ofstream file("density_energy");
 		file << nel << " " << nelr << std::endl;
@@ -120,7 +120,7 @@ void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(
 	int work_items = nelr;
 	int work_group_size = BLOCK_SIZE_1;
 	int kernel_id = 1;
-	int arg_idx = 0;	
+	int arg_idx = 0;
 	_clSetArgs(kernel_id, arg_idx++, variables);
 	_clSetArgs(kernel_id, arg_idx++, ff_variable);
 	_clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int));
@@ -175,7 +175,7 @@ void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem s
 	_clSetArgs(kernel_id, arg_idx++, variables);
 	_clSetArgs(kernel_id, arg_idx++, step_factors);
 	_clSetArgs(kernel_id, arg_idx++, fluxes);
-	
+
 	_clInvokeKernel(kernel_id, work_items, work_group_size);
 }
 inline void compute_flux_contribution(float& density, float3& momentum, float& density_energy, float& pressure, float3& velocity, float3& fc_momentum_x, float3& fc_momentum_y, float3& fc_momentum_z, float3& fc_density_energy)
@@ -183,8 +183,8 @@ inline void compute_flux_contribution(float& density, float3& momentum, float& d
 	fc_momentum_x.x = velocity.x*momentum.x + pressure;
 	fc_momentum_x.y = velocity.x*momentum.y;
 	fc_momentum_x.z = velocity.x*momentum.z;
-	
-	
+
+
 	fc_momentum_y.x = fc_momentum_x.y;
 	fc_momentum_y.y = velocity.y*momentum.y + pressure;
 	fc_momentum_y.z = velocity.y*momentum.z;
@@ -206,7 +206,7 @@ int main(int argc, char** argv){
   printf("WG size of kernel:initialize = %d, WG size of kernel:compute_step_factor = %d, WG size of kernel:compute_flux = %d, WG size of kernel:time_step = %d\n", BLOCK_SIZE_1, BLOCK_SIZE_2, BLOCK_SIZE_3, BLOCK_SIZE_4);
 
 	if (argc < 2){
-		std::cout << "specify data file name and [device type] [device id]" << std::endl;
+		std::cout << "specify data file name and [platform id] [device id] [device type] " << std::endl;
 		return 0;
 	}
 	const char* data_file_name = argv[1];
@@ -216,28 +216,28 @@ int main(int argc, char** argv){
 	cl_mem variables, old_variables, fluxes, step_factors;
 	float h_ff_variable[NVAR];
 
-	try{		
-		_clInit(device_type, device_id);		
+	try{
+		_clInit(device_type, device_id);
 		// set far field conditions and load them into constant memory on the gpu
 		{
 			//float h_ff_variable[NVAR];
 			const float angle_of_attack = float(3.1415926535897931 / 180.0f) * float(deg_angle_of_attack);
-			
+
 			h_ff_variable[VAR_DENSITY] = float(1.4);
-			
+
 			float ff_pressure = float(1.0f);
 			float ff_speed_of_sound = sqrt(GAMMA*ff_pressure / h_ff_variable[VAR_DENSITY]);
 			float ff_speed = float(ff_mach)*ff_speed_of_sound;
-			
+
 			float3 ff_velocity;
 			ff_velocity.x = ff_speed*float(cos((float)angle_of_attack));
 			ff_velocity.y = ff_speed*float(sin((float)angle_of_attack));
 			ff_velocity.z = 0.0f;
-			
+
 			h_ff_variable[VAR_MOMENTUM+0] = h_ff_variable[VAR_DENSITY] * ff_velocity.x;
 			h_ff_variable[VAR_MOMENTUM+1] = h_ff_variable[VAR_DENSITY] * ff_velocity.y;
 			h_ff_variable[VAR_MOMENTUM+2] = h_ff_variable[VAR_DENSITY] * ff_velocity.z;
-					
+
 			h_ff_variable[VAR_DENSITY_ENERGY] = h_ff_variable[VAR_DENSITY]*(float(0.5f)*(ff_speed*ff_speed)) + (ff_pressure / float(GAMMA-1.0f));
 
 			float3 h_ff_momentum;
@@ -260,10 +260,10 @@ int main(int argc, char** argv){
 			_clMemcpyH2D(ff_variable,          h_ff_variable,          NVAR*sizeof(float));
 			_clMemcpyH2D(ff_flux_contribution_momentum_x, &h_ff_flux_contribution_momentum_x, sizeof(float3));
 			_clMemcpyH2D(ff_flux_contribution_momentum_y, &h_ff_flux_contribution_momentum_y, sizeof(float3));
-			_clMemcpyH2D(ff_flux_contribution_momentum_z, &h_ff_flux_contribution_momentum_z, sizeof(float3));		
+			_clMemcpyH2D(ff_flux_contribution_momentum_z, &h_ff_flux_contribution_momentum_z, sizeof(float3));
 			_clMemcpyH2D(ff_flux_contribution_density_energy, &h_ff_flux_contribution_density_energy, sizeof(float3));
 			_clFinish();
-		
+
 		}
 		int nel;
 		int nelr;
@@ -283,7 +283,7 @@ int main(int argc, char** argv){
 			int* h_elements_surrounding_elements = new int[nelr*NNB];
 			float* h_normals = new float[nelr*NDIM*NNB];
 
-					
+
 			// read in data
 			for(int i = 0; i < nel; i++)
 			{
@@ -292,8 +292,8 @@ int main(int argc, char** argv){
 				{
 					file >> h_elements_surrounding_elements[i + j*nelr];
 					if(h_elements_surrounding_elements[i+j*nelr] < 0) h_elements_surrounding_elements[i+j*nelr] = -1;
-					h_elements_surrounding_elements[i + j*nelr]--; //it's coming in with Fortran numbering				
-					
+					h_elements_surrounding_elements[i + j*nelr]--; //it's coming in with Fortran numbering
+
 					for(int k = 0; k < NDIM; k++)
 					{
 						file >> h_normals[i + (j + k*NNB)*nelr];
@@ -301,7 +301,7 @@ int main(int argc, char** argv){
 					}
 				}
 			}
-			
+
 			// fill in remaining data
 			int last = nel-1;
 			for(int i = nel; i < nelr; i++)
@@ -310,11 +310,11 @@ int main(int argc, char** argv){
 				for(int j = 0; j < NNB; j++)
 				{
 					// duplicate the last element
-					h_elements_surrounding_elements[i + j*nelr] = h_elements_surrounding_elements[last + j*nelr];	
+					h_elements_surrounding_elements[i + j*nelr] = h_elements_surrounding_elements[last + j*nelr];
 					for(int k = 0; k < NDIM; k++) h_normals[last + (j + k*NNB)*nelr] = h_normals[last + (j + k*NNB)*nelr];
 				}
 			}
-			
+
 			areas = alloc<float>(nelr);
 			upload<float>(areas, h_areas, nelr);
 
@@ -323,22 +323,22 @@ int main(int argc, char** argv){
 
 			normals = alloc<float>(nelr*NDIM*NNB);
 			upload<float>(normals, h_normals, nelr*NDIM*NNB);
-					
+
 			delete[] h_areas;
 			delete[] h_elements_surrounding_elements;
 			delete[] h_normals;
 		}
-		
+
 		// Create arrays and set initial conditions
-		variables = alloc<float>(nelr*NVAR);				
+		variables = alloc<float>(nelr*NVAR);
 		int tp = 0;
-		initialize_variables(nelr, variables, ff_variable);			
-		old_variables = alloc<float>(nelr*NVAR);   	
+		initialize_variables(nelr, variables, ff_variable);
+		old_variables = alloc<float>(nelr*NVAR);
 		fluxes = alloc<float>(nelr*NVAR);
-		step_factors = alloc<float>(nelr); 
+		step_factors = alloc<float>(nelr);
 		// make sure all memory is floatly allocated before we start timing
-		initialize_variables(nelr, old_variables, ff_variable);	
-		initialize_variables(nelr, fluxes, ff_variable);		
+		initialize_variables(nelr, old_variables, ff_variable);
+		initialize_variables(nelr, fluxes, ff_variable);
 		_clMemset(step_factors, 0, sizeof(float)*nelr);
 		// make sure CUDA isn't still doing something before we start timing
 		_clFinish();
@@ -353,7 +353,7 @@ int main(int argc, char** argv){
 			for(int j = 0; j < RK; j++){
 				compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy, \
 				ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z);
-			
+
 				time_step(j, nelr, old_variables, variables, step_factors, fluxes);
 			}
 		}
@@ -363,7 +363,7 @@ int main(int argc, char** argv){
 		std::cout << "Saved solution..." << std::endl;
 		_clStatistics();
 		std::cout << "Cleaning up..." << std::endl;
-		
+
 		//--release resources
 		_clFree(ff_variable);
 		_clFree(ff_flux_contribution_momentum_x);
@@ -381,7 +381,7 @@ int main(int argc, char** argv){
 		std::cout << "Done..." << std::endl;
 	}
 	catch(string msg){
-		std::cout<<"--cambine:( an exception catched in main body ->"<<msg<<std::endl;		
+		std::cout<<"--cambine:( an exception catched in main body ->"<<msg<<std::endl;
 		_clFree(ff_variable);
 		_clFree(ff_flux_contribution_momentum_x);
 		_clFree(ff_flux_contribution_momentum_y);
@@ -394,10 +394,10 @@ int main(int argc, char** argv){
 		_clFree(old_variables);
 		_clFree(fluxes);
 		_clFree(step_factors);
-		_clRelease();		
+		_clRelease();
 	}
 	catch(...){
-		std::cout<<"--cambine:( unknow exceptions in main body..."<<std::endl;		
+		std::cout<<"--cambine:( unknow exceptions in main body..."<<std::endl;
 		_clFree(ff_variable);
 		_clFree(ff_flux_contribution_momentum_x);
 		_clFree(ff_flux_contribution_momentum_y);
@@ -410,8 +410,8 @@ int main(int argc, char** argv){
 		_clFree(old_variables);
 		_clFree(fluxes);
 		_clFree(step_factors);
-		_clRelease();		
+		_clRelease();
 	}
-		
+
 	return 0;
 }

+ 3 - 3
opencl/cfd/run-cpu

@@ -1,5 +1,5 @@
 #There are three datasets:
 
-./euler3d ../../data/cfd/fvcorr.domn.097K -t cpu -d 0
-./euler3d ../../data/cfd/fvcorr.domn.193K -t cpu -d 0
-./euler3d ../../data/cfd/missile.domn.0.2M -t cpu -d 0
+./euler3d ../../data/cfd/fvcorr.domn.097K -p 1 -t cpu -d 0
+./euler3d ../../data/cfd/fvcorr.domn.193K -p 1 -t cpu -d 0
+./euler3d ../../data/cfd/missile.domn.0.2M -p 1 -t cpu -d 0

+ 5 - 0
opencl/cfd/run-gpu

@@ -0,0 +1,5 @@
+#There are three datasets:
+
+./euler3d ../../data/cfd/fvcorr.domn.097K -p 0 -t gpu -d 0
+./euler3d ../../data/cfd/fvcorr.domn.193K -p 0 -t gpu -d 0
+./euler3d ../../data/cfd/missile.domn.0.2M -p 0 -t gpu -d 0