find_ellipse_opencl.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. #include "find_ellipse.h"
  2. #include "find_ellipse_opencl.h"
  3. #include "OpenCL_helper_library.h"
  4. #include <CL/cl.h>
  5. #include <stdio.h>
  6. // Defined if we want to use images/textures
  7. /* #define USE_IMAGE */
  8. // The number of sample points in each ellipse (stencil)
  9. #define NPOINTS 150
  10. // The maximum radius of a sample ellipse
  11. #define MAX_RAD 20
  12. // The total number of sample ellipses
  13. #define NCIRCLES 7
  14. // The size of the structuring element used in dilation
  15. #define STREL_SIZE (12 * 2 + 1)
  16. // Matrix used to store the maximal GICOV score at each pixels
  17. // Produced by the GICOV kernel and consumed by the dilation kernel
  18. cl_mem device_gicov;
  19. // Device arrays holding the stencil parameters used by the GICOV kernel
  20. cl_mem c_sin_angle, c_cos_angle, c_tX, c_tY;
  21. // Dilate kernel
  22. cl_kernel dilate_kernel;
  23. // Sets up and invokes the GICOV kernel and returns its output
  24. float *GICOV_OpenCL(int grad_m, int grad_n, float *host_grad_x, float *host_grad_y) {
  25. cl_int error;
  26. int MaxR = MAX_RAD + 2;
  27. // Allocate device buffers and transfer data
  28. unsigned int grad_mem_size = sizeof(float) * grad_m * grad_n;
  29. cl_mem device_grad_x, device_grad_y;
  30. #ifdef USE_IMAGE
  31. // Define the image parameters
  32. cl_image_format image_format;
  33. image_format.image_channel_order = CL_R;
  34. image_format.image_channel_data_type = CL_FLOAT;
  35. // Create images (textures)
  36. device_grad_x = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &image_format, grad_m, grad_n, 0, host_grad_x, &error);
  37. check_error(error, __FILE__, __LINE__);
  38. device_grad_y = clCreateImage2D(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, &image_format, grad_m, grad_n, 0, host_grad_y, &error);
  39. check_error(error, __FILE__, __LINE__);
  40. #else
  41. // Create buffers
  42. device_grad_x = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, grad_mem_size, host_grad_x, &error);
  43. check_error(error, __FILE__, __LINE__);
  44. device_grad_y = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, grad_mem_size, host_grad_y, &error);
  45. check_error(error, __FILE__, __LINE__);
  46. #endif
  47. // Allocate & initialize device memory for result
  48. // (some elements are not assigned values in the kernel)
  49. // Since there is no OpenCL version of cudaMemset, we first allocate and initialize
  50. // the host-side copy of the buffer and then transfer that to the device
  51. float *host_gicov = (float *) malloc(grad_mem_size);
  52. memset(host_gicov, 0, grad_mem_size);
  53. device_gicov = clCreateBuffer(context, CL_MEM_READ_WRITE | /*CL_MEM_COPY_HOST_PTR*/ CL_MEM_USE_HOST_PTR, grad_mem_size, host_gicov, &error);
  54. check_error(error, __FILE__, __LINE__);
  55. // Load the kernel source from the file
  56. const char *source = load_kernel_source("find_ellipse_kernel.cl");
  57. size_t sourceSize = strlen(source);
  58. // Compile the kernel
  59. cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
  60. check_error(error, __FILE__, __LINE__);
  61. #ifdef USE_IMAGE
  62. error = clBuildProgram(program, 1, &device, "-D USE_IMAGE", NULL, NULL);
  63. #else
  64. error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
  65. #endif
  66. // Show compiler warnings/errors
  67. static char log[65536]; memset(log, 0, sizeof(log));
  68. clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL);
  69. if (strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log);
  70. check_error(error, __FILE__, __LINE__);
  71. // Create both kernels (GICOV and dilate)
  72. cl_kernel GICOV_kernel = clCreateKernel(program, "GICOV_kernel", &error);
  73. check_error(error, __FILE__, __LINE__);
  74. dilate_kernel = clCreateKernel(program, "dilate_kernel", &error);
  75. check_error(error, __FILE__, __LINE__);
  76. // Setup execution parameters
  77. cl_int local_work_size = grad_m - (2 * MaxR);
  78. cl_int num_work_groups = grad_n - (2 * MaxR);
  79. // Set the kernel arguments
  80. clSetKernelArg(GICOV_kernel, 0, sizeof(cl_int), (void *) &grad_m);
  81. clSetKernelArg(GICOV_kernel, 1, sizeof(cl_mem), (void *) &device_grad_x);
  82. clSetKernelArg(GICOV_kernel, 2, sizeof(cl_mem), (void *) &device_grad_y);
  83. clSetKernelArg(GICOV_kernel, 3, sizeof(cl_mem), (void *) &c_sin_angle);
  84. clSetKernelArg(GICOV_kernel, 4, sizeof(cl_mem), (void *) &c_cos_angle);
  85. clSetKernelArg(GICOV_kernel, 5, sizeof(cl_mem), (void *) &c_tX);
  86. clSetKernelArg(GICOV_kernel, 6, sizeof(cl_mem), (void *) &c_tY);
  87. clSetKernelArg(GICOV_kernel, 7, sizeof(cl_mem), (void *) &device_gicov);
  88. clSetKernelArg(GICOV_kernel, 8, sizeof(cl_int), (void *) &local_work_size);
  89. clSetKernelArg(GICOV_kernel, 9, sizeof(cl_int), (void *) &num_work_groups);
  90. size_t work_group_size = 256;
  91. size_t global_work_size = num_work_groups * local_work_size;
  92. if(global_work_size % work_group_size > 0)
  93. global_work_size=(global_work_size / work_group_size+1)*work_group_size;
  94. printf("Find: local_work_size = %d, global_work_size = %d \n"
  95. ,work_group_size, global_work_size);
  96. // Execute the GICOV kernel
  97. error = clEnqueueNDRangeKernel(command_queue, GICOV_kernel, 1, NULL, &global_work_size, &work_group_size, 0, NULL, NULL);
  98. check_error(error, __FILE__, __LINE__);
  99. // Check for kernel errors
  100. error = clFinish(command_queue);
  101. check_error(error, __FILE__, __LINE__);
  102. // Copy the result to the host
  103. host_gicov = (cl_float *) clEnqueueMapBuffer(command_queue, device_gicov, CL_TRUE, CL_MAP_READ, 0, grad_mem_size, 0, NULL, NULL, &error);
  104. check_error(error, __FILE__, __LINE__);
  105. // Cleanup memory
  106. clReleaseMemObject(device_grad_x);
  107. clReleaseMemObject(device_grad_y);
  108. return host_gicov;
  109. }
  110. // Constant device array holding the structuring element used by the dilation kernel
  111. cl_mem c_strel;
  112. // Sets up and invokes the dilation kernel and returns its output
  113. float *dilate_OpenCL(int max_gicov_m, int max_gicov_n, int strel_m, int strel_n) {
  114. cl_int error;
  115. // Allocate device memory for result
  116. unsigned int max_gicov_mem_size = sizeof(float) * max_gicov_m * max_gicov_n;
  117. cl_mem device_img_dilated = clCreateBuffer(context, CL_MEM_WRITE_ONLY, max_gicov_mem_size, NULL, &error);
  118. check_error(error, __FILE__, __LINE__);
  119. #ifdef USE_IMAGE
  120. // Copy the input matrix of GICOV values to an image
  121. // Define the image parameters
  122. cl_image_format image_format;
  123. image_format.image_channel_order = CL_R;
  124. image_format.image_channel_data_type = CL_FLOAT;
  125. // Create the image
  126. cl_mem device_gicov_image = clCreateImage2D(context, CL_MEM_READ_ONLY, &image_format, max_gicov_m, max_gicov_n, 0, NULL, &error);
  127. check_error(error, __FILE__, __LINE__);
  128. // Copy the GICOV data to the image
  129. size_t offset[3] = {0, 0, 0};
  130. size_t region[3] = {max_gicov_m, max_gicov_n, 1};
  131. error = clEnqueueCopyBufferToImage(command_queue, device_gicov, device_gicov_image, 0, offset, region, 0, NULL, NULL);
  132. check_error(error, __FILE__, __LINE__);
  133. #endif
  134. // Setup execution parameters
  135. size_t global_work_size = max_gicov_m * max_gicov_n;
  136. size_t local_work_size = 176;
  137. // Make sure the global work size is a multiple of the local work size
  138. if (global_work_size % local_work_size != 0) {
  139. global_work_size = ((global_work_size / local_work_size) + 1) * local_work_size;
  140. }
  141. // Set the kernel arguments
  142. clSetKernelArg(dilate_kernel, 0, sizeof(cl_int), (void *) &max_gicov_m);
  143. clSetKernelArg(dilate_kernel, 1, sizeof(cl_int), (void *) &max_gicov_n);
  144. clSetKernelArg(dilate_kernel, 2, sizeof(cl_int), (void *) &strel_m);
  145. clSetKernelArg(dilate_kernel, 3, sizeof(cl_int), (void *) &strel_n);
  146. clSetKernelArg(dilate_kernel, 4, sizeof(cl_mem), (void *) &c_strel);
  147. #ifdef USE_IMAGE
  148. clSetKernelArg(dilate_kernel, 5, sizeof(cl_mem), (void *) &device_gicov_image);
  149. #else
  150. clSetKernelArg(dilate_kernel, 5, sizeof(cl_mem), (void *) &device_gicov);
  151. #endif
  152. clSetKernelArg(dilate_kernel, 6, sizeof(cl_mem), (void *) &device_img_dilated);
  153. // Execute the dilation kernel
  154. error = clEnqueueNDRangeKernel(command_queue, dilate_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
  155. check_error(error, __FILE__, __LINE__);
  156. // Check for kernel errors
  157. error = clFinish(command_queue);
  158. check_error(error, __FILE__, __LINE__);
  159. // Copy the result to the host
  160. // float *host_img_dilated = (cl_float *) clEnqueueMapBuffer(command_queue, device_img_dilated, CL_TRUE, CL_MAP_READ, 0, max_gicov_mem_size, 0, NULL, NULL, &error);
  161. float *host_img_dilated = (float*) malloc(max_gicov_mem_size);
  162. error = clEnqueueReadBuffer(command_queue, device_img_dilated, CL_TRUE, 0, max_gicov_mem_size, host_img_dilated, 0, NULL, NULL);
  163. check_error(error, __FILE__, __LINE__);
  164. // Cleanup memory
  165. clReleaseMemObject(device_gicov);
  166. #ifdef USE_IMAGE
  167. clReleaseMemObject(device_gicov_image);
  168. #endif
  169. clReleaseMemObject(device_img_dilated);
  170. return host_img_dilated;
  171. }
  172. // Chooses the most appropriate GPU on which to execute
  173. void select_device(int platform_id, int device_id, int use_gpu) {
  174. cl_int error;
  175. // Determine the number of platforms
  176. cl_uint num_platforms;
  177. error = clGetPlatformIDs(0, NULL, &num_platforms);
  178. check_error(error, __FILE__, __LINE__);
  179. // Make sure at least one platform is available
  180. if (num_platforms == 0) {
  181. printf("Error: No OpenCL platforms available\n");
  182. exit(EXIT_FAILURE);
  183. }
  184. // Get the list of platforms
  185. cl_platform_id *platform_ids = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms);
  186. error = clGetPlatformIDs(num_platforms, platform_ids, NULL);
  187. check_error(error, __FILE__, __LINE__);
  188. // Selector for the device type in accordance to what passed as parameter
  189. cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
  190. // Create an OpenCL context
  191. cl_context_properties ctxprop[] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform_ids[platform_id], 0};
  192. context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, &error);
  193. // If this platform has no GPU, try the next one
  194. check_error(error, __FILE__, __LINE__);
  195. // Get the list of devices (GPUs or CPUs)
  196. size_t size;
  197. error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
  198. check_error(error, __FILE__, __LINE__);
  199. cl_device_id *device_list = (cl_device_id *) malloc(size);
  200. error = clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL);
  201. check_error(error, __FILE__, __LINE__);
  202. // Create a command queue for the device passed as parameter
  203. device = device_list[device_id];
  204. command_queue = clCreateCommandQueue(context, device, 0, &error);
  205. check_error(error, __FILE__, __LINE__);
  206. // Print the device name
  207. char cBuffer[1024];
  208. clGetDeviceInfo(device_list[0], CL_DEVICE_NAME, sizeof(cBuffer), &cBuffer, NULL);
  209. printf("Running on: %s\n", cBuffer);
  210. return;
  211. // If we reach here, no platform has a GPU
  212. printf("Error: None of the platforms has a GPU\n");
  213. exit(EXIT_FAILURE);
  214. }
  215. // Transfers pre-computed constants used by the two kernels to the GPU
  216. void transfer_constants(float *host_sin_angle, float *host_cos_angle, int *host_tX, int *host_tY, int strel_m, int strel_n, float *host_strel) {
  217. cl_int error;
  218. // Compute the sizes of the matrices
  219. unsigned int angle_mem_size = sizeof(float) * NPOINTS;
  220. unsigned int t_mem_size = sizeof(int) * NCIRCLES * NPOINTS;
  221. unsigned int strel_mem_size = sizeof(float) * strel_m * strel_n;
  222. // Allocate device memory and copy the matrices
  223. c_sin_angle = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, angle_mem_size, host_sin_angle, &error);
  224. check_error(error, __FILE__, __LINE__);
  225. c_cos_angle = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, angle_mem_size, host_cos_angle, &error);
  226. check_error(error, __FILE__, __LINE__);
  227. c_tX = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, t_mem_size, host_tX, &error);
  228. check_error(error, __FILE__, __LINE__);
  229. c_tY = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, t_mem_size, host_tY, &error);
  230. check_error(error, __FILE__, __LINE__);
  231. c_strel = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, strel_mem_size, host_strel, &error);
  232. check_error(error, __FILE__, __LINE__);
  233. }