Bladeren bron

Implemented device selection for hotspot

We now use parameters as the way to select OpenCL platform, device and
type instead of having the hardcoded in the code. In this way we can
easily change the type of the benchmark without recompiling it.

The parsing is done using the pre-existing structure of command line
arguments parsing, that is really artisanal.
Andrea Gussoni 8 jaren geleden
bovenliggende
commit
8354e71ca0
3 gewijzigde bestanden met toevoegingen van 64 en 50 verwijderingen
  1. 62 49
      opencl/hotspot/hotspot.c
  2. 1 1
      opencl/hotspot/run-cpu
  3. 1 0
      opencl/hotspot/run-gpu

+ 62 - 49
opencl/hotspot/hotspot.c

@@ -131,22 +131,57 @@ int compute_tran_temp(cl_mem MatrixPower, cl_mem MatrixTemp[2], int col, int row
 }
 
 void usage(int argc, char **argv) {
-	fprintf(stderr, "Usage: %s <grid_rows/grid_cols> <pyramid_height> <sim_time> <temp_file> <power_file> <output_file>\n", argv[0]);
+	fprintf(stderr, "Usage: %s <grid_rows/grid_cols> <pyramid_height> <sim_time> <temp_file> <power_file> <output_file> <platform_id> <device_id> <use_gpu>\n", argv[0]);
 	fprintf(stderr, "\t<grid_rows/grid_cols>  - number of rows/cols in the grid (positive integer)\n");
 	fprintf(stderr, "\t<pyramid_height> - pyramid heigh(positive integer)\n");
 	fprintf(stderr, "\t<sim_time>   - number of iterations\n");
 	fprintf(stderr, "\t<temp_file>  - name of the file containing the initial temperature values of each cell\n");
 	fprintf(stderr, "\t<power_file> - name of the file containing the dissipated power values of each cell\n");
 	fprintf(stderr, "\t<output_file> - name of the output file\n");
+  fprintf(stderr, "\t<platform_id> - the target platform to use\n");
+  fprintf(stderr, "\t<device_id> - the device to use\n");
+  fprintf(stderr, "\t<use_gpu> - 1 for GPU, 0 for CPU\n");
 	exit(1);
 }
 
 int main(int argc, char** argv) {
 
   printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE);
+  
+  // Command line arguments "parsing"
+  int size;
+  int grid_rows,grid_cols = 0;
+  float *FilesavingTemp,*FilesavingPower; //,*MatrixOut; 
+  char *tfile, *pfile, *ofile;
+  
+  int total_iterations = 60;
+  int pyramid_height = 1; // number of iterations
+  int platform_id = 0;
+  int device_id = 0;
+  int use_gpu = 0;
+	
+	if (argc < 10)
+		usage(argc, argv);
+	if((grid_rows = atoi(argv[1]))<=0||
+	   (grid_cols = atoi(argv[1]))<=0||
+       (pyramid_height = atoi(argv[2]))<=0||
+       (total_iterations = atoi(argv[3]))<=0)
+		usage(argc, argv);
+		
+	tfile=argv[4];
+  pfile=argv[5];
+  ofile=argv[6];
+  platform_id = atoi(argv[7]);
+  device_id = atoi(argv[8]);
+  use_gpu = atoi(argv[9]);
+
+  size=grid_rows*grid_cols;
 
 	cl_int error;
 	cl_uint num_platforms;
+  
+  // Selector for CPU/GPU
+  cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
 	
 	// Get the number of platforms
 	error = clGetPlatformIDs(0, NULL, &num_platforms);
@@ -157,8 +192,8 @@ int main(int argc, char** argv) {
 	error = clGetPlatformIDs(num_platforms, platforms, NULL);
     if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
 	
-	// Print the chosen platform (if there are multiple platforms, choose the first one)
-	cl_platform_id platform = platforms[1];
+	// Print the chosen platform, selected on the basis of the corresponding command line aegument
+	cl_platform_id platform = platforms[platform_id];
 	char pbuf[100];
 	error = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
 	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
@@ -166,7 +201,7 @@ int main(int argc, char** argv) {
 	
 	// Create a GPU context
 	cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0};
-    context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_CPU, NULL, NULL, &error);
+    context = clCreateContextFromType(context_properties, device_type, NULL, NULL, &error);
     if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
 	
 	// Get and print the chosen device (if there are multiple devices, choose the first one)
@@ -176,65 +211,43 @@ int main(int argc, char** argv) {
 	cl_device_id *devices = (cl_device_id *) malloc(devices_size);
 	error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size, devices, NULL);
 	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
-	device = devices[0];
+  
+  // Select the device passed as command line argument
+	device = devices[device_id];
 	error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(pbuf), pbuf, NULL);
 	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
 	printf("Device: %s\n", pbuf);
 	
 	// Create a command queue
 	command_queue = clCreateCommandQueue(context, device, 0, &error);
-    if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
-	
-	
+  if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
+
+  // --------------- pyramid parameters --------------- 
+  int borderCols = (pyramid_height)*EXPAND_RATE/2;
+  int borderRows = (pyramid_height)*EXPAND_RATE/2;
+  int smallBlockCol = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
+  int smallBlockRow = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
+  int blockCols = grid_cols/smallBlockCol+((grid_cols%smallBlockCol==0)?0:1);
+  int blockRows = grid_rows/smallBlockRow+((grid_rows%smallBlockRow==0)?0:1);
+
+  FilesavingTemp = (float *) malloc(size*sizeof(float));
+  FilesavingPower = (float *) malloc(size*sizeof(float));
+  // MatrixOut = (float *) calloc (size, sizeof(float));
+
+  if( !FilesavingPower || !FilesavingTemp) // || !MatrixOut)
+    fatal("unable to allocate memory");
 
-    int size;
-    int grid_rows,grid_cols = 0;
-    float *FilesavingTemp,*FilesavingPower; //,*MatrixOut; 
-    char *tfile, *pfile, *ofile;
-    
-    int total_iterations = 60;
-    int pyramid_height = 1; // number of iterations
-	
-	if (argc < 7)
-		usage(argc, argv);
-	if((grid_rows = atoi(argv[1]))<=0||
-	   (grid_cols = atoi(argv[1]))<=0||
-       (pyramid_height = atoi(argv[2]))<=0||
-       (total_iterations = atoi(argv[3]))<=0)
-		usage(argc, argv);
-		
-	tfile=argv[4];
-    pfile=argv[5];
-    ofile=argv[6];
-	
-    size=grid_rows*grid_cols;
-
-    // --------------- pyramid parameters --------------- 
-    int borderCols = (pyramid_height)*EXPAND_RATE/2;
-    int borderRows = (pyramid_height)*EXPAND_RATE/2;
-    int smallBlockCol = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
-    int smallBlockRow = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
-    int blockCols = grid_cols/smallBlockCol+((grid_cols%smallBlockCol==0)?0:1);
-    int blockRows = grid_rows/smallBlockRow+((grid_rows%smallBlockRow==0)?0:1);
-
-    FilesavingTemp = (float *) malloc(size*sizeof(float));
-    FilesavingPower = (float *) malloc(size*sizeof(float));
-    // MatrixOut = (float *) calloc (size, sizeof(float));
-
-    if( !FilesavingPower || !FilesavingTemp) // || !MatrixOut)
-        fatal("unable to allocate memory");
-	
 	// Read input data from disk
-    readinput(FilesavingTemp, grid_rows, grid_cols, tfile);
-    readinput(FilesavingPower, grid_rows, grid_cols, pfile);
+  readinput(FilesavingTemp, grid_rows, grid_cols, tfile);
+  readinput(FilesavingPower, grid_rows, grid_cols, pfile);
 	
 	// Load kernel source from file
 	const char *source = load_kernel_source("hotspot_kernel.cl");
 	size_t sourceSize = strlen(source);
 	
 	// Compile the kernel
-    cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
-    if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
+  cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
+  if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
 	
 	char clOptions[110];
 	//  sprintf(clOptions,"-I../../src"); 

+ 1 - 1
opencl/hotspot/run-cpu

@@ -1 +1 @@
-./hotspot 512 3 1000  ../../data/hotspot/temp_512 ../../data/hotspot/power_512 output.out
+./hotspot 512 3 1000  ../../data/hotspot/temp_512 ../../data/hotspot/power_512 output.out 1 0 0

+ 1 - 0
opencl/hotspot/run-gpu

@@ -0,0 +1 @@
+./hotspot 512 3 1000  ../../data/hotspot/temp_512 ../../data/hotspot/power_512 output.out 0 0 1