123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292 |
- #include "find_ellipse.h"
- #include "find_ellipse_opencl.h"
- #include "OpenCL_helper_library.h"
- #include <CL/cl.h>
- #include <stdio.h>
- // Defined if we want to use images/textures
- /* #define USE_IMAGE */
- // The number of sample points in each ellipse (stencil)
- #define NPOINTS 150
- // The maximum radius of a sample ellipse
- #define MAX_RAD 20
- // The total number of sample ellipses
- #define NCIRCLES 7
- // The size of the structuring element used in dilation
- #define STREL_SIZE (12 * 2 + 1)
- // Matrix used to store the maximal GICOV score at each pixels
- // Produced by the GICOV kernel and consumed by the dilation kernel
- cl_mem device_gicov;
- // Device arrays holding the stencil parameters used by the GICOV kernel
- cl_mem c_sin_angle, c_cos_angle, c_tX, c_tY;
- // Dilate kernel
- cl_kernel dilate_kernel;
- // Sets up and invokes the GICOV kernel and returns its output
- float *GICOV_OpenCL(int grad_m, int grad_n, float *host_grad_x, float *host_grad_y) {
-
- cl_int error;
- int MaxR = MAX_RAD + 2;
- // Allocate device buffers and transfer data
- unsigned int grad_mem_size = sizeof(float) * grad_m * grad_n;
- cl_mem device_grad_x, device_grad_y;
- #ifdef USE_IMAGE
- // Define the image parameters
- cl_image_format image_format;
- image_format.image_channel_order = CL_R;
- image_format.image_channel_data_type = CL_FLOAT;
- // Create images (textures)
- device_grad_x = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &image_format, grad_m, grad_n, 0, host_grad_x, &error);
- check_error(error, __FILE__, __LINE__);
- device_grad_y = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &image_format, grad_m, grad_n, 0, host_grad_y, &error);
- check_error(error, __FILE__, __LINE__);
- #else
- // Create buffers
- device_grad_x = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, grad_mem_size, host_grad_x, &error);
- check_error(error, __FILE__, __LINE__);
- device_grad_y = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, grad_mem_size, host_grad_y, &error);
- check_error(error, __FILE__, __LINE__);
- #endif
- // Allocate & initialize device memory for result
- // (some elements are not assigned values in the kernel)
- // Since there is no OpenCL version of cudaMemset, we first allocate and initialize
- // the host-side copy of the buffer and then transfer that to the device
- float *host_gicov = (float *) malloc(grad_mem_size);
- memset(host_gicov, 0, grad_mem_size);
- device_gicov = clCreateBuffer(context, CL_MEM_READ_WRITE | /*CL_MEM_COPY_HOST_PTR*/ CL_MEM_USE_HOST_PTR, grad_mem_size, host_gicov, &error);
- check_error(error, __FILE__, __LINE__);
-
- // Load the kernel source from the file
- const char *source = load_kernel_source("find_ellipse_kernel.cl");
- size_t sourceSize = strlen(source);
-
- // Compile the kernel
- cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
- check_error(error, __FILE__, __LINE__);
- #ifdef USE_IMAGE
- error = clBuildProgram(program, 1, &device, "-D USE_IMAGE", NULL, NULL);
- #else
- error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
- #endif
- // Show compiler warnings/errors
- static char log[65536]; memset(log, 0, sizeof(log));
- clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL);
- if (strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log);
- check_error(error, __FILE__, __LINE__);
- // Create both kernels (GICOV and dilate)
- cl_kernel GICOV_kernel = clCreateKernel(program, "GICOV_kernel", &error);
- check_error(error, __FILE__, __LINE__);
- dilate_kernel = clCreateKernel(program, "dilate_kernel", &error);
- check_error(error, __FILE__, __LINE__);
-
- // Setup execution parameters
- cl_int local_work_size = grad_m - (2 * MaxR);
- cl_int num_work_groups = grad_n - (2 * MaxR);
-
- // Set the kernel arguments
- clSetKernelArg(GICOV_kernel, 0, sizeof(cl_int), (void *) &grad_m);
- clSetKernelArg(GICOV_kernel, 1, sizeof(cl_mem), (void *) &device_grad_x);
- clSetKernelArg(GICOV_kernel, 2, sizeof(cl_mem), (void *) &device_grad_y);
- clSetKernelArg(GICOV_kernel, 3, sizeof(cl_mem), (void *) &c_sin_angle);
- clSetKernelArg(GICOV_kernel, 4, sizeof(cl_mem), (void *) &c_cos_angle);
- clSetKernelArg(GICOV_kernel, 5, sizeof(cl_mem), (void *) &c_tX);
- clSetKernelArg(GICOV_kernel, 6, sizeof(cl_mem), (void *) &c_tY);
- clSetKernelArg(GICOV_kernel, 7, sizeof(cl_mem), (void *) &device_gicov);
- clSetKernelArg(GICOV_kernel, 8, sizeof(cl_int), (void *) &local_work_size);
- clSetKernelArg(GICOV_kernel, 9, sizeof(cl_int), (void *) &num_work_groups);
- size_t work_group_size = 256;
- size_t global_work_size = num_work_groups * local_work_size;
- if(global_work_size % work_group_size > 0)
- global_work_size=(global_work_size / work_group_size+1)*work_group_size;
- printf("Find: local_work_size = %d, global_work_size = %d \n"
- ,work_group_size, global_work_size);
- // Execute the GICOV kernel
- error = clEnqueueNDRangeKernel(command_queue, GICOV_kernel, 1, NULL, &global_work_size, &work_group_size, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
-
- // Check for kernel errors
- error = clFinish(command_queue);
- check_error(error, __FILE__, __LINE__);
- // Copy the result to the host
- host_gicov = (cl_float *) clEnqueueMapBuffer(command_queue, device_gicov, CL_TRUE, CL_MAP_READ, 0, grad_mem_size, 0, NULL, NULL, &error);
- check_error(error, __FILE__, __LINE__);
- // Cleanup memory
- clReleaseMemObject(device_grad_x);
- clReleaseMemObject(device_grad_y);
- return host_gicov;
- }
- // Constant device array holding the structuring element used by the dilation kernel
- cl_mem c_strel;
- // Sets up and invokes the dilation kernel and returns its output
- float *dilate_OpenCL(int max_gicov_m, int max_gicov_n, int strel_m, int strel_n) {
- cl_int error;
-
- // Allocate device memory for result
- unsigned int max_gicov_mem_size = sizeof(float) * max_gicov_m * max_gicov_n;
- cl_mem device_img_dilated = clCreateBuffer(context, CL_MEM_WRITE_ONLY, max_gicov_mem_size, NULL, &error);
- check_error(error, __FILE__, __LINE__);
-
- #ifdef USE_IMAGE
- // Copy the input matrix of GICOV values to an image
- // Define the image parameters
- cl_image_format image_format;
- image_format.image_channel_order = CL_R;
- image_format.image_channel_data_type = CL_FLOAT;
- // Create the image
- cl_mem device_gicov_image = clCreateImage2D(context, CL_MEM_READ_ONLY, &image_format, max_gicov_m, max_gicov_n, 0, NULL, &error);
- check_error(error, __FILE__, __LINE__);
- // Copy the GICOV data to the image
- size_t offset[3] = {0, 0, 0};
- size_t region[3] = {max_gicov_m, max_gicov_n, 1};
- error = clEnqueueCopyBufferToImage(command_queue, device_gicov, device_gicov_image, 0, offset, region, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
- #endif
-
- // Setup execution parameters
- size_t global_work_size = max_gicov_m * max_gicov_n;
- size_t local_work_size = 176;
- // Make sure the global work size is a multiple of the local work size
- if (global_work_size % local_work_size != 0) {
- global_work_size = ((global_work_size / local_work_size) + 1) * local_work_size;
- }
-
- // Set the kernel arguments
- clSetKernelArg(dilate_kernel, 0, sizeof(cl_int), (void *) &max_gicov_m);
- clSetKernelArg(dilate_kernel, 1, sizeof(cl_int), (void *) &max_gicov_n);
- clSetKernelArg(dilate_kernel, 2, sizeof(cl_int), (void *) &strel_m);
- clSetKernelArg(dilate_kernel, 3, sizeof(cl_int), (void *) &strel_n);
- clSetKernelArg(dilate_kernel, 4, sizeof(cl_mem), (void *) &c_strel);
- #ifdef USE_IMAGE
- clSetKernelArg(dilate_kernel, 5, sizeof(cl_mem), (void *) &device_gicov_image);
- #else
- clSetKernelArg(dilate_kernel, 5, sizeof(cl_mem), (void *) &device_gicov);
- #endif
- clSetKernelArg(dilate_kernel, 6, sizeof(cl_mem), (void *) &device_img_dilated);
- // Execute the dilation kernel
- error = clEnqueueNDRangeKernel(command_queue, dilate_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
-
- // Check for kernel errors
- error = clFinish(command_queue);
- check_error(error, __FILE__, __LINE__);
- // Copy the result to the host
- // float *host_img_dilated = (cl_float *) clEnqueueMapBuffer(command_queue, device_img_dilated, CL_TRUE, CL_MAP_READ, 0, max_gicov_mem_size, 0, NULL, NULL, &error);
- float *host_img_dilated = (float*) malloc(max_gicov_mem_size);
- error = clEnqueueReadBuffer(command_queue, device_img_dilated, CL_TRUE, 0, max_gicov_mem_size, host_img_dilated, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
- // Cleanup memory
- clReleaseMemObject(device_gicov);
- #ifdef USE_IMAGE
- clReleaseMemObject(device_gicov_image);
- #endif
- clReleaseMemObject(device_img_dilated);
- return host_img_dilated;
- }
- // Chooses the most appropriate GPU on which to execute
- void select_device(int platform_id, int device_id, int use_gpu) {
- cl_int error;
- // Determine the number of platforms
- cl_uint num_platforms;
- error = clGetPlatformIDs(0, NULL, &num_platforms);
- check_error(error, __FILE__, __LINE__);
-
- // Make sure at least one platform is available
- if (num_platforms == 0) {
- printf("Error: No OpenCL platforms available\n");
- exit(EXIT_FAILURE);
- }
-
- // Get the list of platforms
- cl_platform_id *platform_ids = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms);
- error = clGetPlatformIDs(num_platforms, platform_ids, NULL);
- check_error(error, __FILE__, __LINE__);
-
- // Selector for the device type in accordance to what passed as parameter
- cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
- // Create an OpenCL context
- cl_context_properties ctxprop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform_ids[platform_id], 0};
- context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, &error);
- // If this platform has no GPU, try the next one
- check_error(error, __FILE__, __LINE__);
-
- // Get the list of devices (GPUs or CPUs)
- size_t size;
- error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
- check_error(error, __FILE__, __LINE__);
- cl_device_id *device_list = (cl_device_id *) malloc(size);
- error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL);
- check_error(error, __FILE__, __LINE__);
- // Create a command queue for the device passed as parameter
- device = device_list[device_id];
- command_queue = clCreateCommandQueue(context, device, 0, &error);
- check_error(error, __FILE__, __LINE__);
-
- // Print the device name
- char cBuffer[1024];
- clGetDeviceInfo(device_list[0], CL_DEVICE_NAME, sizeof(cBuffer), &cBuffer, NULL);
- printf("Running on: %s\n", cBuffer);
-
- return;
-
-
- // If we reach here, no platform has a GPU
- printf("Error: None of the platforms has a GPU\n");
- exit(EXIT_FAILURE);
- }
- // Transfers pre-computed constants used by the two kernels to the GPU
- void transfer_constants(float *host_sin_angle, float *host_cos_angle, int *host_tX, int *host_tY, int strel_m, int strel_n, float *host_strel) {
- cl_int error;
- // Compute the sizes of the matrices
- unsigned int angle_mem_size = sizeof(float) * NPOINTS;
- unsigned int t_mem_size = sizeof(int) * NCIRCLES * NPOINTS;
- unsigned int strel_mem_size = sizeof(float) * strel_m * strel_n;
- // Allocate device memory and copy the matrices
- c_sin_angle = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, angle_mem_size, host_sin_angle, &error);
- check_error(error, __FILE__, __LINE__);
- c_cos_angle = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, angle_mem_size, host_cos_angle, &error);
- check_error(error, __FILE__, __LINE__);
- c_tX = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, t_mem_size, host_tX, &error);
- check_error(error, __FILE__, __LINE__);
- c_tY = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, t_mem_size, host_tY, &error);
- check_error(error, __FILE__, __LINE__);
- c_strel = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, strel_mem_size, host_strel, &error);
- check_error(error, __FILE__, __LINE__);
- }
|