123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657 |
- // #ifdef __cplusplus
- // extern "C" {
- // #endif
- //========================================================================================================================================================================================================200
- // DEFINE/INCLUDE
- //========================================================================================================================================================================================================200
- //======================================================================================================================================================150
- // LIBRARIES
- //======================================================================================================================================================150
- #include <CL/cl.h> // (in directory provided to compiler) needed by OpenCL types and functions
- #include <string.h> // (in directory known to compiler) needed by memset
- //======================================================================================================================================================150
- // COMMON
- //======================================================================================================================================================150
- #include "../common.h" // (in directory provided here)
- //======================================================================================================================================================150
- // UTILITIES
- //======================================================================================================================================================150
- #include "../util/opencl/opencl.h" // (in directory provided here)
- #include "../util/timer/timer.h" // (in directory provided here)
- //======================================================================================================================================================150
- // HEADER
- //======================================================================================================================================================150
- #include "./kernel_gpu_opencl_wrapper.h" // (in directory provided here)
- //========================================================================================================================================================================================================200
- // KERNEL_GPU_CUDA_WRAPPER FUNCTION
- //========================================================================================================================================================================================================200
- void
- kernel_gpu_opencl_wrapper( record *records,
- long records_mem,
- knode *knodes,
- long knodes_elem,
- long knodes_mem,
- int order,
- long maxheight,
- int count,
- long *currKnode,
- long *offset,
- int *keys,
- record *ans)
- {
- //======================================================================================================================================================150
- // CPU VARIABLES
- //======================================================================================================================================================150
- // timer
- long long time0;
- long long time1;
- long long time2;
- long long time3;
- long long time4;
- long long time5;
- long long time6;
- time0 = get_time();
- //======================================================================================================================================================150
- // GPU SETUP
- //======================================================================================================================================================150
- //====================================================================================================100
- // INITIAL DRIVER OVERHEAD
- //====================================================================================================100
- // cudaThreadSynchronize();
- //====================================================================================================100
- // COMMON VARIABLES
- //====================================================================================================100
- // common variables
- cl_int error;
- //====================================================================================================100
- // GET PLATFORMS (Intel, AMD, NVIDIA, based on provided library), SELECT ONE
- //====================================================================================================100
- // Get the number of available platforms
- cl_uint num_platforms;
- error = clGetPlatformIDs( 0,
- NULL,
- &num_platforms);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Get the list of available platforms
- cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
- error = clGetPlatformIDs( num_platforms,
- platforms,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Select the 1st platform
- cl_platform_id platform = platforms[0];
- // Get the name of the selected platform and print it (if there are multiple platforms, choose the first one)
- char pbuf[100];
- error = clGetPlatformInfo( platform,
- CL_PLATFORM_VENDOR,
- sizeof(pbuf),
- pbuf,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("Platform: %s\n", pbuf);
- //====================================================================================================100
- // CREATE CONTEXT FOR THE PLATFORM
- //====================================================================================================100
- // Create context properties for selected platform
- cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM,
- (cl_context_properties) platform,
- 0};
- // Create context for selected platform being GPU
- cl_context context;
- context = clCreateContextFromType( context_properties,
- CL_DEVICE_TYPE_GPU,
- NULL,
- NULL,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // GET DEVICES AVAILABLE FOR THE CONTEXT, SELECT ONE
- //====================================================================================================100
- // Get the number of devices (previousely selected for the context)
- size_t devices_size;
- error = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- 0,
- NULL,
- &devices_size);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Get the list of devices (previousely selected for the context)
- cl_device_id *devices = (cl_device_id *) malloc(devices_size);
- error = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- devices_size,
- devices,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Select the first device (previousely selected for the context) (if there are multiple devices, choose the first one)
- cl_device_id device;
- device = devices[0];
- // Get the name of the selected device (previousely selected for the context) and print it
- error = clGetDeviceInfo(device,
- CL_DEVICE_NAME,
- sizeof(pbuf),
- pbuf,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("Device: %s\n", pbuf);
- //====================================================================================================100
- // CREATE COMMAND QUEUE FOR THE DEVICE
- //====================================================================================================100
- // Create a command queue
- cl_command_queue command_queue;
- command_queue = clCreateCommandQueue( context,
- device,
- 0,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // CREATE PROGRAM, COMPILE IT
- //====================================================================================================100
- // Load kernel source code from file
- const char *source = load_kernel_source("./kernel/kernel_gpu_opencl.cl");
- size_t sourceSize = strlen(source);
- // Create the program
- cl_program program = clCreateProgramWithSource( context,
- 1,
- &source,
- &sourceSize,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- char clOptions[110];
- // sprintf(clOptions,"-I../../src");
- sprintf(clOptions,"-I./../");
- #ifdef DEFAULT_ORDER
- sprintf(clOptions + strlen(clOptions), " -DDEFAULT_ORDER=%d", DEFAULT_ORDER);
- #endif
- // Compile the program
- error = clBuildProgram( program,
- 1,
- &device,
- clOptions,
- NULL,
- NULL);
- // Print warnings and errors from compilation
- static char log[65536];
- memset(log, 0, sizeof(log));
- clGetProgramBuildInfo( program,
- device,
- CL_PROGRAM_BUILD_LOG,
- sizeof(log)-1,
- log,
- NULL);
- printf("-----OpenCL Compiler Output-----\n");
- if (strstr(log,"warning:") || strstr(log, "error:"))
- printf("<<<<\n%s\n>>>>\n", log);
- printf("--------------------------------\n");
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Create kernel
- cl_kernel kernel;
- kernel = clCreateKernel(program,
- "findK",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time1 = get_time();
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY (MALLOC)
- //======================================================================================================================================================150
- //====================================================================================================100
- // DEVICE IN
- //====================================================================================================100
- //==================================================50
- // recordsD
- //==================================================50
- cl_mem recordsD;
- recordsD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- records_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // knodesD
- //==================================================50
- cl_mem knodesD;
- knodesD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- knodes_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // currKnodeD
- //==================================================50
- cl_mem currKnodeD;
- currKnodeD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- count*sizeof(long),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // offsetD
- //==================================================50
- cl_mem offsetD;
- offsetD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- count*sizeof(long),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // keysD
- //==================================================50
- cl_mem keysD;
- keysD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- count*sizeof(long),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // DEVICE IN/OUT
- //====================================================================================================100
- //==================================================50
- // ansD
- //==================================================50
- cl_mem ansD;
- ansD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- count*sizeof(record),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time2 = get_time();
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY COPY
- //======================================================================================================================================================150
- //====================================================================================================100
- // GPU MEMORY (MALLOC) COPY IN
- //====================================================================================================100
- //==================================================50
- // recordsD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- recordsD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- records_mem, // size to be copied
- records, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // knodesD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- knodesD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- knodes_mem, // size to be copied
- knodes, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // currKnodeD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- currKnodeD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(long), // size to be copied
- currKnode, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // offsetD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- offsetD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(long), // size to be copied
- offset, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // keysD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- keysD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(int), // size to be copied
- keys, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // DEVICE IN/OUT
- //====================================================================================================100
- //==================================================50
- // ansD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- ansD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(record), // size to be copied
- ans, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time3 = get_time();
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // findK kernel
- //======================================================================================================================================================150
- //====================================================================================================100
- // Execution Parameters
- //====================================================================================================100
- size_t local_work_size[1];
- local_work_size[0] = order < 1024 ? order : 1024;
- size_t global_work_size[1];
- global_work_size[0] = count * local_work_size[0];
- printf("# of blocks = %d, # of threads/block = %d (ensure that device can handle)\n", (int)(global_work_size[0]/local_work_size[0]), (int)local_work_size[0]);
- //====================================================================================================100
- // Kernel Arguments
- //====================================================================================================100
- clSetKernelArg( kernel,
- 0,
- sizeof(long),
- (void *) &maxheight);
- clSetKernelArg( kernel,
- 1,
- sizeof(cl_mem),
- (void *) &knodesD);
- clSetKernelArg( kernel,
- 2,
- sizeof(long),
- (void *) &knodes_elem);
- clSetKernelArg( kernel,
- 3,
- sizeof(cl_mem),
- (void *) &recordsD);
- clSetKernelArg( kernel,
- 4,
- sizeof(cl_mem),
- (void *) &currKnodeD);
- clSetKernelArg( kernel,
- 5,
- sizeof(cl_mem),
- (void *) &offsetD);
- clSetKernelArg( kernel,
- 6,
- sizeof(cl_mem),
- (void *) &keysD);
- clSetKernelArg( kernel,
- 7,
- sizeof(cl_mem),
- (void *) &ansD);
- //====================================================================================================100
- // Kernel
- //====================================================================================================100
- error = clEnqueueNDRangeKernel( command_queue,
- kernel,
- 1,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Wait for all operations to finish NOT SURE WHERE THIS SHOULD GO
- error = clFinish(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time4 = get_time();
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY COPY (CONTD.)
- //======================================================================================================================================================150
- //====================================================================================================100
- // DEVICE IN/OUT
- //====================================================================================================100
- //==================================================50
- // ansD
- //==================================================50
- error = clEnqueueReadBuffer(command_queue, // The command queue.
- ansD, // The image on the device.
- CL_TRUE, // Blocking? (ie. Wait at this line until read has finished?)
- 0, // Offset. None in this case.
- count*sizeof(record), // Size to copy.
- ans, // The pointer to the image on the host.
- 0, // Number of events in wait list. Not used.
- NULL, // Event wait list. Not used.
- NULL); // Event object for determining status. Not used.
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time5 = get_time();
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY DEALLOCATION
- //======================================================================================================================================================150
- // Release kernels...
- clReleaseKernel(kernel);
- // Now the program...
- clReleaseProgram(program);
- // Clean up the device memory...
- clReleaseMemObject(recordsD);
- clReleaseMemObject(knodesD);
- clReleaseMemObject(currKnodeD);
- clReleaseMemObject(offsetD);
- clReleaseMemObject(keysD);
- clReleaseMemObject(ansD);
- // Flush the queue
- error = clFlush(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // ...and finally, the queue and context.
- clReleaseCommandQueue(command_queue);
- // ???
- clReleaseContext(context);
- time6 = get_time();
- //======================================================================================================================================================150
- // DISPLAY TIMING
- //======================================================================================================================================================150
- printf("Time spent in different stages of GPU_CUDA KERNEL:\n");
- printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time6-time0) * 100);
- printf("Total time:\n");
- printf("%.12f s\n", (float) (time6-time0) / 1000000);
- //======================================================================================================================================================150
- // END
- //======================================================================================================================================================150
- }
- //========================================================================================================================================================================================================200
- // END
- //========================================================================================================================================================================================================200
- // #ifdef __cplusplus
- // }
- // #endif
|