track_ellipse_opencl.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. #include "find_ellipse.h"
  2. #include "track_ellipse_opencl.h"
  3. #include "OpenCL_helper_library.h"
  4. // Host and device arrays to hold device pointers to input matrices
  5. int *host_I_offsets;
  6. cl_mem device_I_offsets;
  7. // Host and device arrays to hold sizes of input matrices
  8. int *host_m_array, *host_n_array;
  9. cl_mem device_m_array, device_n_array;
  10. // Host and device arrays to hold matrices for all cells
  11. // (so we can copy to and from the device in a single transfer)
  12. float *host_I_all;
  13. cl_mem device_I_all, device_IMGVF_all;
  14. size_t total_mem_size;
  15. // The number of work items per work group
  16. const size_t local_work_size = 128;
  17. cl_bool compiled = FALSE;
  18. cl_kernel IMGVF_kernel;
  19. // Host function that launches an OpenCL kernel to compute the MGVF matrices for the specified cells
  20. void IMGVF_OpenCL(MAT **I, MAT **IMGVF, double vx, double vy, double e, int max_iterations, double cutoff, int num_cells) {
  21. cl_int error;
  22. // Initialize the data on the GPU
  23. IMGVF_OpenCL_init(I, num_cells);
  24. if (! compiled) {
  25. // Load the kernel source from the file
  26. const char *source = load_kernel_source("track_ellipse_kernel.cl");
  27. size_t sourceSize = strlen(source);
  28. // Compile the kernel
  29. cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
  30. check_error(error, __FILE__, __LINE__);
  31. error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
  32. // Show compiler warnings/errors
  33. static char log[65536]; memset(log, 0, sizeof(log));
  34. clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL);
  35. if (strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log);
  36. check_error(error, __FILE__, __LINE__);
  37. // Create the IMGVF kernels
  38. IMGVF_kernel = clCreateKernel(program, "IMGVF_kernel", &error);
  39. check_error(error, __FILE__, __LINE__);
  40. // Record that compiling has already completed
  41. compiled = TRUE;
  42. }
  43. // Setup execution parameters
  44. size_t num_work_groups = num_cells;
  45. size_t global_work_size = num_work_groups * local_work_size;
  46. // Convert double-precision parameters to single-precision
  47. float vx_float = (float) vx;
  48. float vy_float = (float) vy;
  49. float e_float = (float) e;
  50. float cutoff_float = (float) cutoff;
  51. // Set the kernel arguments
  52. clSetKernelArg(IMGVF_kernel, 0, sizeof(cl_mem), (void *) &device_IMGVF_all);
  53. clSetKernelArg(IMGVF_kernel, 1, sizeof(cl_mem), (void *) &device_I_all);
  54. clSetKernelArg(IMGVF_kernel, 2, sizeof(cl_mem), (void *) &device_I_offsets);
  55. clSetKernelArg(IMGVF_kernel, 3, sizeof(cl_mem), (void *) &device_m_array);
  56. clSetKernelArg(IMGVF_kernel, 4, sizeof(cl_mem), (void *) &device_n_array);
  57. clSetKernelArg(IMGVF_kernel, 5, sizeof(cl_float), (void *) &vx_float);
  58. clSetKernelArg(IMGVF_kernel, 6, sizeof(cl_float), (void *) &vy_float);
  59. clSetKernelArg(IMGVF_kernel, 7, sizeof(cl_float), (void *) &e_float);
  60. clSetKernelArg(IMGVF_kernel, 8, sizeof(cl_int), (void *) &max_iterations);
  61. clSetKernelArg(IMGVF_kernel, 9, sizeof(cl_float), (void *) &cutoff_float);
  62. // Compute the MGVF on the GPU
  63. error = clEnqueueNDRangeKernel(command_queue, IMGVF_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
  64. check_error(error, __FILE__, __LINE__);
  65. // Check for kernel errors
  66. error = clFinish(command_queue);
  67. check_error(error, __FILE__, __LINE__);
  68. // Copy back the final results from the GPU
  69. IMGVF_OpenCL_cleanup(IMGVF, num_cells);
  70. }
  71. // Initializes data on the GPU for the MGVF kernel
  72. void IMGVF_OpenCL_init(MAT **IE, int num_cells) {
  73. cl_int error;
  74. // Allocate array of offsets to each cell's image
  75. size_t mem_size = sizeof(int) * num_cells;
  76. host_I_offsets = (int *) malloc(mem_size);
  77. device_I_offsets = clCreateBuffer(context, CL_MEM_READ_ONLY, mem_size, NULL, &error);
  78. check_error(error, __FILE__, __LINE__);
  79. // Allocate arrays to hold the dimensions of each cell's image
  80. host_m_array = (int *) malloc(mem_size);
  81. host_n_array = (int *) malloc(mem_size);
  82. device_m_array = clCreateBuffer(context, CL_MEM_READ_ONLY, mem_size, NULL, &error);
  83. check_error(error, __FILE__, __LINE__);
  84. device_n_array = clCreateBuffer(context, CL_MEM_READ_ONLY, mem_size, NULL, &error);
  85. check_error(error, __FILE__, __LINE__);
  86. // Figure out the size of all of the matrices combined
  87. int i, j, cell_num;
  88. size_t total_size = 0;
  89. for (cell_num = 0; cell_num < num_cells; cell_num++) {
  90. MAT *I = IE[cell_num];
  91. size_t size = I->m * I->n;
  92. total_size += size;
  93. }
  94. total_mem_size = total_size * sizeof(float);
  95. // Allocate host memory just once for all cells
  96. host_I_all = (float *) malloc(total_mem_size);
  97. // Allocate device memory just once for all cells
  98. device_I_all = clCreateBuffer(context, CL_MEM_READ_ONLY, total_mem_size, NULL, &error);
  99. check_error(error, __FILE__, __LINE__);
  100. device_IMGVF_all = clCreateBuffer(context, CL_MEM_READ_WRITE, total_mem_size, NULL, &error);
  101. check_error(error, __FILE__, __LINE__);
  102. // Copy each initial matrix into the allocated host memory
  103. int offset = 0;
  104. for (cell_num = 0; cell_num < num_cells; cell_num++) {
  105. MAT *I = IE[cell_num];
  106. // Determine the size of the matrix
  107. int m = I->m, n = I->n;
  108. int size = m * n;
  109. // Store memory dimensions
  110. host_m_array[cell_num] = m;
  111. host_n_array[cell_num] = n;
  112. // Store offsets to this cell's image
  113. host_I_offsets[cell_num] = offset;
  114. // Copy matrix I (which is also the initial IMGVF matrix) into the overall array
  115. for (i = 0; i < m; i++)
  116. for (j = 0; j < n; j++)
  117. host_I_all[offset + (i * n) + j] = (float) m_get_val(I, i, j);
  118. offset += size;
  119. }
  120. // Copy I matrices (which are also the initial IMGVF matrices) to device
  121. error = clEnqueueWriteBuffer(command_queue, device_I_all, CL_TRUE, 0, total_mem_size, (void *) host_I_all, 0, NULL, NULL);
  122. check_error(error, __FILE__, __LINE__);
  123. error = clEnqueueWriteBuffer(command_queue, device_IMGVF_all, CL_TRUE, 0, total_mem_size, (void *) host_I_all, 0, NULL, NULL);
  124. check_error(error, __FILE__, __LINE__);
  125. // Copy offsets array to device
  126. error = clEnqueueWriteBuffer(command_queue, device_I_offsets, CL_TRUE, 0, mem_size, (void *) host_I_offsets, 0, NULL, NULL);
  127. check_error(error, __FILE__, __LINE__);
  128. // Copy memory dimension arrays to device
  129. error = clEnqueueWriteBuffer(command_queue, device_m_array, CL_TRUE, 0, mem_size, (void *) host_m_array, 0, NULL, NULL);
  130. check_error(error, __FILE__, __LINE__);
  131. error = clEnqueueWriteBuffer(command_queue, device_n_array, CL_TRUE, 0, mem_size, (void *) host_n_array, 0, NULL, NULL);
  132. check_error(error, __FILE__, __LINE__);
  133. }
  134. // Copies the results of the MGVF kernel back to the host
  135. void IMGVF_OpenCL_cleanup(MAT **IMGVF_out_array, int num_cells) {
  136. cl_int error;
  137. // Copy the result matrices from the device to the host
  138. float *host_IMGVF_all = (cl_float *) clEnqueueMapBuffer(command_queue, device_IMGVF_all, CL_TRUE, CL_MAP_READ, 0, total_mem_size, 0, NULL, NULL, &error);
  139. check_error(error, __FILE__, __LINE__);
  140. // Copy each result matrix into its appropriate host matrix
  141. int cell_num, offset = 0;
  142. for (cell_num = 0; cell_num < num_cells; cell_num++) {
  143. MAT *IMGVF_out = IMGVF_out_array[cell_num];
  144. // Determine the size of the matrix
  145. int m = IMGVF_out->m, n = IMGVF_out->n, i, j;
  146. // Pack the result into the matrix
  147. for (i = 0; i < m; i++)
  148. for (j = 0; j < n; j++)
  149. m_set_val(IMGVF_out, i, j, (double) host_IMGVF_all[offset + (i * n) + j]);
  150. offset += (m * n);
  151. }
  152. // Unmap results buffer
  153. error = clEnqueueUnmapMemObject(command_queue, device_IMGVF_all, (void *) host_IMGVF_all, 0, NULL, NULL);
  154. check_error(error, __FILE__, __LINE__);
  155. // Free device memory
  156. clReleaseMemObject(device_m_array);
  157. clReleaseMemObject(device_n_array);
  158. clReleaseMemObject(device_IMGVF_all);
  159. clReleaseMemObject(device_I_all);
  160. clReleaseMemObject(device_I_offsets);
  161. // Free host memory
  162. free(host_m_array);
  163. free(host_n_array);
  164. free(host_I_all);
  165. free(host_I_offsets);
  166. }