|
@@ -131,22 +131,57 @@ int compute_tran_temp(cl_mem MatrixPower, cl_mem MatrixTemp[2], int col, int row
|
|
|
}
|
|
|
|
|
|
void usage(int argc, char **argv) {
|
|
|
- fprintf(stderr, "Usage: %s <grid_rows/grid_cols> <pyramid_height> <sim_time> <temp_file> <power_file> <output_file>\n", argv[0]);
|
|
|
+ fprintf(stderr, "Usage: %s <grid_rows/grid_cols> <pyramid_height> <sim_time> <temp_file> <power_file> <output_file> <platform_id> <device_id> <use_gpu>\n", argv[0]);
|
|
|
fprintf(stderr, "\t<grid_rows/grid_cols> - number of rows/cols in the grid (positive integer)\n");
|
|
|
fprintf(stderr, "\t<pyramid_height> - pyramid heigh(positive integer)\n");
|
|
|
fprintf(stderr, "\t<sim_time> - number of iterations\n");
|
|
|
fprintf(stderr, "\t<temp_file> - name of the file containing the initial temperature values of each cell\n");
|
|
|
fprintf(stderr, "\t<power_file> - name of the file containing the dissipated power values of each cell\n");
|
|
|
fprintf(stderr, "\t<output_file> - name of the output file\n");
|
|
|
+ fprintf(stderr, "\t<platform_id> - the target platform to use\n");
|
|
|
+ fprintf(stderr, "\t<device_id> - the device to use\n");
|
|
|
+ fprintf(stderr, "\t<use_gpu> - 1 for GPU, 0 for CPU\n");
|
|
|
exit(1);
|
|
|
}
|
|
|
|
|
|
int main(int argc, char** argv) {
|
|
|
|
|
|
printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE);
|
|
|
+
|
|
|
+ // Command line arguments "parsing"
|
|
|
+ int size;
|
|
|
+ int grid_rows,grid_cols = 0;
|
|
|
+ float *FilesavingTemp,*FilesavingPower; //,*MatrixOut;
|
|
|
+ char *tfile, *pfile, *ofile;
|
|
|
+
|
|
|
+ int total_iterations = 60;
|
|
|
+ int pyramid_height = 1; // number of iterations
|
|
|
+ int platform_id = 0;
|
|
|
+ int device_id = 0;
|
|
|
+ int use_gpu = 0;
|
|
|
+
|
|
|
+ if (argc < 10)
|
|
|
+ usage(argc, argv);
|
|
|
+ if((grid_rows = atoi(argv[1]))<=0||
|
|
|
+ (grid_cols = atoi(argv[1]))<=0||
|
|
|
+ (pyramid_height = atoi(argv[2]))<=0||
|
|
|
+ (total_iterations = atoi(argv[3]))<=0)
|
|
|
+ usage(argc, argv);
|
|
|
+
|
|
|
+ tfile=argv[4];
|
|
|
+ pfile=argv[5];
|
|
|
+ ofile=argv[6];
|
|
|
+ platform_id = atoi(argv[7]);
|
|
|
+ device_id = atoi(argv[8]);
|
|
|
+ use_gpu = atoi(argv[9]);
|
|
|
+
|
|
|
+ size=grid_rows*grid_cols;
|
|
|
|
|
|
cl_int error;
|
|
|
cl_uint num_platforms;
|
|
|
+
|
|
|
+ // Selector for CPU/GPU
|
|
|
+ cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
|
|
|
|
|
|
// Get the number of platforms
|
|
|
error = clGetPlatformIDs(0, NULL, &num_platforms);
|
|
@@ -157,8 +192,8 @@ int main(int argc, char** argv) {
|
|
|
error = clGetPlatformIDs(num_platforms, platforms, NULL);
|
|
|
if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
|
|
|
|
- // Print the chosen platform (if there are multiple platforms, choose the first one)
|
|
|
- cl_platform_id platform = platforms[1];
|
|
|
+ // Print the chosen platform, selected on the basis of the corresponding command line aegument
|
|
|
+ cl_platform_id platform = platforms[platform_id];
|
|
|
char pbuf[100];
|
|
|
error = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
|
|
|
if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
@@ -166,7 +201,7 @@ int main(int argc, char** argv) {
|
|
|
|
|
|
// Create a GPU context
|
|
|
cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0};
|
|
|
- context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_CPU, NULL, NULL, &error);
|
|
|
+ context = clCreateContextFromType(context_properties, device_type, NULL, NULL, &error);
|
|
|
if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
|
|
|
|
// Get and print the chosen device (if there are multiple devices, choose the first one)
|
|
@@ -176,65 +211,43 @@ int main(int argc, char** argv) {
|
|
|
cl_device_id *devices = (cl_device_id *) malloc(devices_size);
|
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size, devices, NULL);
|
|
|
if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
|
- device = devices[0];
|
|
|
+
|
|
|
+ // Select the device passed as command line argument
|
|
|
+ device = devices[device_id];
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(pbuf), pbuf, NULL);
|
|
|
if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
|
printf("Device: %s\n", pbuf);
|
|
|
|
|
|
// Create a command queue
|
|
|
command_queue = clCreateCommandQueue(context, device, 0, &error);
|
|
|
- if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
|
-
|
|
|
-
|
|
|
+ if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
|
+
|
|
|
+ // --------------- pyramid parameters ---------------
|
|
|
+ int borderCols = (pyramid_height)*EXPAND_RATE/2;
|
|
|
+ int borderRows = (pyramid_height)*EXPAND_RATE/2;
|
|
|
+ int smallBlockCol = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
|
|
|
+ int smallBlockRow = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
|
|
|
+ int blockCols = grid_cols/smallBlockCol+((grid_cols%smallBlockCol==0)?0:1);
|
|
|
+ int blockRows = grid_rows/smallBlockRow+((grid_rows%smallBlockRow==0)?0:1);
|
|
|
+
|
|
|
+ FilesavingTemp = (float *) malloc(size*sizeof(float));
|
|
|
+ FilesavingPower = (float *) malloc(size*sizeof(float));
|
|
|
+ // MatrixOut = (float *) calloc (size, sizeof(float));
|
|
|
+
|
|
|
+ if( !FilesavingPower || !FilesavingTemp) // || !MatrixOut)
|
|
|
+ fatal("unable to allocate memory");
|
|
|
|
|
|
- int size;
|
|
|
- int grid_rows,grid_cols = 0;
|
|
|
- float *FilesavingTemp,*FilesavingPower; //,*MatrixOut;
|
|
|
- char *tfile, *pfile, *ofile;
|
|
|
-
|
|
|
- int total_iterations = 60;
|
|
|
- int pyramid_height = 1; // number of iterations
|
|
|
-
|
|
|
- if (argc < 7)
|
|
|
- usage(argc, argv);
|
|
|
- if((grid_rows = atoi(argv[1]))<=0||
|
|
|
- (grid_cols = atoi(argv[1]))<=0||
|
|
|
- (pyramid_height = atoi(argv[2]))<=0||
|
|
|
- (total_iterations = atoi(argv[3]))<=0)
|
|
|
- usage(argc, argv);
|
|
|
-
|
|
|
- tfile=argv[4];
|
|
|
- pfile=argv[5];
|
|
|
- ofile=argv[6];
|
|
|
-
|
|
|
- size=grid_rows*grid_cols;
|
|
|
-
|
|
|
- // --------------- pyramid parameters ---------------
|
|
|
- int borderCols = (pyramid_height)*EXPAND_RATE/2;
|
|
|
- int borderRows = (pyramid_height)*EXPAND_RATE/2;
|
|
|
- int smallBlockCol = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
|
|
|
- int smallBlockRow = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
|
|
|
- int blockCols = grid_cols/smallBlockCol+((grid_cols%smallBlockCol==0)?0:1);
|
|
|
- int blockRows = grid_rows/smallBlockRow+((grid_rows%smallBlockRow==0)?0:1);
|
|
|
-
|
|
|
- FilesavingTemp = (float *) malloc(size*sizeof(float));
|
|
|
- FilesavingPower = (float *) malloc(size*sizeof(float));
|
|
|
- // MatrixOut = (float *) calloc (size, sizeof(float));
|
|
|
-
|
|
|
- if( !FilesavingPower || !FilesavingTemp) // || !MatrixOut)
|
|
|
- fatal("unable to allocate memory");
|
|
|
-
|
|
|
// Read input data from disk
|
|
|
- readinput(FilesavingTemp, grid_rows, grid_cols, tfile);
|
|
|
- readinput(FilesavingPower, grid_rows, grid_cols, pfile);
|
|
|
+ readinput(FilesavingTemp, grid_rows, grid_cols, tfile);
|
|
|
+ readinput(FilesavingPower, grid_rows, grid_cols, pfile);
|
|
|
|
|
|
// Load kernel source from file
|
|
|
const char *source = load_kernel_source("hotspot_kernel.cl");
|
|
|
size_t sourceSize = strlen(source);
|
|
|
|
|
|
// Compile the kernel
|
|
|
- cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
|
|
|
- if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
|
+ cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
|
|
|
+ if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
|
|
|
|
|
|
char clOptions[110];
|
|
|
// sprintf(clOptions,"-I../../src");
|