123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772 |
- // #ifdef __cplusplus
- // extern "C" {
- // #endif
- //========================================================================================================================================================================================================200
- // INCLUDE
- //========================================================================================================================================================================================================200
- //======================================================================================================================================================150
- // LIBRARIES
- //======================================================================================================================================================150
- #include <CL/cl.h> // (in directory provided to compiler) needed by OpenCL types and functions
- #include <string.h> // (in directory known to compiler) needed by memset
- #include <stdio.h> // (in directory known to compiler) needed by printf, stderr
- //======================================================================================================================================================150
- // COMMON
- //======================================================================================================================================================150
- #include "../common.h" // (in directory provided here)
- //======================================================================================================================================================150
- // UTILITIES
- //======================================================================================================================================================150
- #include "../util/timer/timer.h" // (in directory provided here)
- //======================================================================================================================================================150
- // HEADER
- //======================================================================================================================================================150
- #include "./kernel_gpu_opencl_wrapper_2.h" // (in directory provided here)
- //========================================================================================================================================================================================================200
- // FUNCTION
- //========================================================================================================================================================================================================200
- void
- kernel_gpu_opencl_wrapper_2(knode *knodes,
- long knodes_elem,
- long knodes_mem,
- int order,
- long maxheight,
- int count,
- long *currKnode,
- long *offset,
- long *lastKnode,
- long *offset_2,
- int *start,
- int *end,
- int *recstart,
- int *reclength)
- {
- //======================================================================================================================================================150
- // CPU VARIABLES
- //======================================================================================================================================================150
- // timer
- long long time0;
- long long time1;
- long long time2;
- long long time3;
- long long time4;
- long long time5;
- long long time6;
- time0 = get_time();
- //======================================================================================================================================================150
- // GPU SETUP
- //======================================================================================================================================================150
- //====================================================================================================100
- // INITIAL DRIVER OVERHEAD
- //====================================================================================================100
- // cudaThreadSynchronize();
- //====================================================================================================100
- // COMMON VARIABLES
- //====================================================================================================100
- // common variables
- cl_int error;
- //====================================================================================================100
- // GET PLATFORMS (Intel, AMD, NVIDIA, based on provided library), SELECT ONE
- //====================================================================================================100
- // Get the number of available platforms
- cl_uint num_platforms;
- error = clGetPlatformIDs( 0,
- NULL,
- &num_platforms);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Get the list of available platforms
- cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
- error = clGetPlatformIDs( num_platforms,
- platforms,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Select the 1st platform
- cl_platform_id platform = platforms[0];
- // Get the name of the selected platform and print it (if there are multiple platforms, choose the first one)
- char pbuf[100];
- error = clGetPlatformInfo( platform,
- CL_PLATFORM_VENDOR,
- sizeof(pbuf),
- pbuf,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("Platform: %s\n", pbuf);
- //====================================================================================================100
- // CREATE CONTEXT FOR THE PLATFORM
- //====================================================================================================100
- // Create context properties for selected platform
- cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM,
- (cl_context_properties) platform,
- 0};
- // Create context for selected platform being GPU
- cl_context context;
- context = clCreateContextFromType( context_properties,
- CL_DEVICE_TYPE_GPU,
- NULL,
- NULL,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // GET DEVICES AVAILABLE FOR THE CONTEXT, SELECT ONE
- //====================================================================================================100
- // Get the number of devices (previousely selected for the context)
- size_t devices_size;
- error = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- 0,
- NULL,
- &devices_size);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Get the list of devices (previousely selected for the context)
- cl_device_id *devices = (cl_device_id *) malloc(devices_size);
- error = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- devices_size,
- devices,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Select the first device (previousely selected for the context) (if there are multiple devices, choose the first one)
- cl_device_id device;
- device = devices[0];
- // Get the name of the selected device (previousely selected for the context) and print it
- error = clGetDeviceInfo(device,
- CL_DEVICE_NAME,
- sizeof(pbuf),
- pbuf,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("Device: %s\n", pbuf);
- //====================================================================================================100
- // CREATE COMMAND QUEUE FOR THE DEVICE
- //====================================================================================================100
- // Create a command queue
- cl_command_queue command_queue;
- command_queue = clCreateCommandQueue( context,
- device,
- 0,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // CREATE PROGRAM, COMPILE IT
- //====================================================================================================100
- // Load kernel source code from file
- const char *source = load_kernel_source("./kernel/kernel_gpu_opencl_2.cl");
- size_t sourceSize = strlen(source);
- // Create the program
- cl_program program = clCreateProgramWithSource( context,
- 1,
- &source,
- &sourceSize,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- char clOptions[110];
- // sprintf(clOptions,"-I../../src");
- sprintf(clOptions,"-I./../");
- #ifdef DEFAULT_ORDER_2
- sprintf(clOptions + strlen(clOptions), " -DDEFAULT_ORDER_2=%d", DEFAULT_ORDER_2);
- #endif
- // Compile the program
- error = clBuildProgram( program,
- 1,
- &device,
- clOptions,
- NULL,
- NULL);
- // Print warnings and errors from compilation
- static char log[65536];
- memset(log, 0, sizeof(log));
- clGetProgramBuildInfo( program,
- device,
- CL_PROGRAM_BUILD_LOG,
- sizeof(log)-1,
- log,
- NULL);
- printf("-----OpenCL Compiler Output-----\n");
- if (strstr(log,"warning:") || strstr(log, "error:"))
- printf("<<<<\n%s\n>>>>\n", log);
- printf("--------------------------------\n");
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Create kernel
- cl_kernel kernel;
- kernel = clCreateKernel(program,
- "findRangeK",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time1 = get_time();
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY MALLOC
- //======================================================================================================================================================150
- //====================================================================================================100
- // DEVICE IN
- //====================================================================================================100
- //==================================================50
- // knodesD
- //==================================================50
- cl_mem knodesD;
- knodesD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- knodes_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // currKnodeD
- //==================================================50
- cl_mem currKnodeD;
- currKnodeD = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- count*sizeof(long),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // offsetD
- //==================================================50
- cl_mem offsetD;
- offsetD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- count*sizeof(long),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // lastKnodeD
- //==================================================50
- cl_mem lastKnodeD;
- lastKnodeD = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- count*sizeof(long),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // offset_2D
- //==================================================50
- cl_mem offset_2D;
- offset_2D = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- count*sizeof(long),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // startD
- //==================================================50
- cl_mem startD;
- startD = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- count*sizeof(int),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // endD
- //==================================================50
- cl_mem endD;
- endD = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- count*sizeof(int),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // DEVICE IN/OUT
- //====================================================================================================100
- //==================================================50
- // ansDStart
- //==================================================50
- cl_mem ansDStart;
- ansDStart = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- count*sizeof(int),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // ansDLength
- //==================================================50
- cl_mem ansDLength;
- ansDLength = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- count*sizeof(int),
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time2 = get_time();
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY COPY
- //======================================================================================================================================================150
- //====================================================================================================100
- // DEVICE IN
- //====================================================================================================100
- //==================================================50
- // knodesD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- knodesD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- knodes_mem, // size to be copied
- knodes, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // currKnodeD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- currKnodeD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(long), // size to be copied
- currKnode, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // offsetD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- offsetD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(long), // size to be copied
- offset, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // lastKnodeD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- lastKnodeD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(long), // size to be copied
- lastKnode, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // offset_2D
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- offset_2D, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(long), // size to be copied
- offset_2, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // startD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- startD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(int), // size to be copied
- start, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // endD
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- endD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(int), // size to be copied
- end, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // DEVICE IN/OUT
- //====================================================================================================100
- //==================================================50
- // ansDStart
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- endD, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(int), // size to be copied
- end, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // ansDLength
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue, // command queue
- ansDLength, // destination
- 1, // block the source from access until this copy operation complates (1=yes, 0=no)
- 0, // offset in destination to write to
- count*sizeof(int), // size to be copied
- reclength, // source
- 0, // # of events in the list of events to wait for
- NULL, // list of events to wait for
- NULL); // ID of this operation to be used by waiting operations
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time3 = get_time();
- //==================================================50
- // END
- //==================================================50
- //======================================================================================================================================================150
- // KERNEL
- //======================================================================================================================================================150
- //====================================================================================================100
- // Execution Parameters
- //====================================================================================================100
- size_t local_work_size[1];
- local_work_size[0] = order < 1024 ? order : 1024;
- size_t global_work_size[1];
- global_work_size[0] = count * local_work_size[0];
- printf("# of blocks = %d, # of threads/block = %d (ensure that device can handle)\n", (int)(global_work_size[0]/local_work_size[0]), (int)local_work_size[0]);
- //====================================================================================================100
- // Kernel Arguments
- //====================================================================================================100
- clSetKernelArg( kernel,
- 0,
- sizeof(long),
- (void *) &maxheight);
- clSetKernelArg( kernel,
- 1,
- sizeof(cl_mem),
- (void *) &knodesD);
- clSetKernelArg( kernel,
- 2,
- sizeof(long),
- (void *) &knodes_elem);
- clSetKernelArg( kernel,
- 3,
- sizeof(cl_mem),
- (void *) &currKnodeD);
- clSetKernelArg( kernel,
- 4,
- sizeof(cl_mem),
- (void *) &offsetD);
- clSetKernelArg( kernel,
- 5,
- sizeof(cl_mem),
- (void *) &lastKnodeD);
- clSetKernelArg( kernel,
- 6,
- sizeof(cl_mem),
- (void *) &offset_2D);
- clSetKernelArg( kernel,
- 7,
- sizeof(cl_mem),
- (void *) &startD);
- clSetKernelArg( kernel,
- 8,
- sizeof(cl_mem),
- (void *) &endD);
- clSetKernelArg( kernel,
- 9,
- sizeof(cl_mem),
- (void *) &ansDStart);
- clSetKernelArg( kernel,
- 10,
- sizeof(cl_mem),
- (void *) &ansDLength);
- //====================================================================================================100
- // Kernel
- //====================================================================================================100
- error = clEnqueueNDRangeKernel( command_queue,
- kernel,
- 1,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Wait for all operations to finish NOT SURE WHERE THIS SHOULD GO
- error = clFinish(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time4 = get_time();
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY COPY (CONTD.)
- //======================================================================================================================================================150
- //====================================================================================================100
- // DEVICE IN/OUT
- //====================================================================================================100
- //==================================================50
- // ansDStart
- //==================================================50
- error = clEnqueueReadBuffer(command_queue, // The command queue.
- ansDStart, // The image on the device.
- CL_TRUE, // Blocking? (ie. Wait at this line until read has finished?)
- 0, // Offset. None in this case.
- count*sizeof(int), // Size to copy.
- recstart, // The pointer to the image on the host.
- 0, // Number of events in wait list. Not used.
- NULL, // Event wait list. Not used.
- NULL); // Event object for determining status. Not used.
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // ansDLength
- //==================================================50
- error = clEnqueueReadBuffer(command_queue, // The command queue.
- ansDLength, // The image on the device.
- CL_TRUE, // Blocking? (ie. Wait at this line until read has finished?)
- 0, // Offset. None in this case.
- count*sizeof(int), // Size to copy.
- reclength, // The pointer to the image on the host.
- 0, // Number of events in wait list. Not used.
- NULL, // Event wait list. Not used.
- NULL); // Event object for determining status. Not used.
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- time5 = get_time();
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY DEALLOCATION
- //======================================================================================================================================================150
- // Release kernels...
- clReleaseKernel(kernel);
- // Now the program...
- clReleaseProgram(program);
- // Clean up the device memory...
- clReleaseMemObject(knodesD);
- clReleaseMemObject(currKnodeD);
- clReleaseMemObject(offsetD);
- clReleaseMemObject(lastKnodeD);
- clReleaseMemObject(offset_2D);
- clReleaseMemObject(startD);
- clReleaseMemObject(endD);
- clReleaseMemObject(ansDStart);
- clReleaseMemObject(ansDLength);
- // Flush the queue
- error = clFlush(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // ...and finally, the queue and context.
- clReleaseCommandQueue(command_queue);
- // ???
- clReleaseContext(context);
- time6 = get_time();
- //======================================================================================================================================================150
- // DISPLAY TIMING
- //======================================================================================================================================================150
- printf("Time spent in different stages of GPU_CUDA KERNEL:\n");
- printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time6-time0) * 100);
- printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time6-time0) * 100);
- printf("Total time:\n");
- printf("%.12f s\n", (float) (time6-time0) / 1000000);
- //======================================================================================================================================================150
- // END
- //======================================================================================================================================================150
- }
- //========================================================================================================================================================================================================200
- // END
- //========================================================================================================================================================================================================200
- // #ifdef __cplusplus
- // }
- // #endif
|