123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209 |
- #include "find_ellipse.h"
- #include "track_ellipse_opencl.h"
- #include "OpenCL_helper_library.h"
- // Host and device arrays to hold device pointers to input matrices
- int *host_I_offsets;
- cl_mem device_I_offsets;
- // Host and device arrays to hold sizes of input matrices
- int *host_m_array, *host_n_array;
- cl_mem device_m_array, device_n_array;
- // Host and device arrays to hold matrices for all cells
- // (so we can copy to and from the device in a single transfer)
- float *host_I_all;
- cl_mem device_I_all, device_IMGVF_all;
- size_t total_mem_size;
- // The number of work items per work group
- const size_t local_work_size = 128;
- cl_bool compiled = FALSE;
- cl_kernel IMGVF_kernel;
- // Host function that launches an OpenCL kernel to compute the MGVF matrices for the specified cells
- void IMGVF_OpenCL(MAT **I, MAT **IMGVF, double vx, double vy, double e, int max_iterations, double cutoff, int num_cells) {
- cl_int error;
-
- // Initialize the data on the GPU
- IMGVF_OpenCL_init(I, num_cells);
-
- if (! compiled) {
- // Load the kernel source from the file
- const char *source = load_kernel_source("track_ellipse_kernel.cl");
- size_t sourceSize = strlen(source);
- // Compile the kernel
- cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
- check_error(error, __FILE__, __LINE__);
- error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
- // Show compiler warnings/errors
- static char log[65536]; memset(log, 0, sizeof(log));
- clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL);
- if (strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log);
- check_error(error, __FILE__, __LINE__);
- // Create the IMGVF kernels
- IMGVF_kernel = clCreateKernel(program, "IMGVF_kernel", &error);
- check_error(error, __FILE__, __LINE__);
- // Record that compiling has already completed
- compiled = TRUE;
- }
-
- // Setup execution parameters
- size_t num_work_groups = num_cells;
- size_t global_work_size = num_work_groups * local_work_size;
-
- // Convert double-precision parameters to single-precision
- float vx_float = (float) vx;
- float vy_float = (float) vy;
- float e_float = (float) e;
- float cutoff_float = (float) cutoff;
-
- // Set the kernel arguments
- clSetKernelArg(IMGVF_kernel, 0, sizeof(cl_mem), (void *) &device_IMGVF_all);
- clSetKernelArg(IMGVF_kernel, 1, sizeof(cl_mem), (void *) &device_I_all);
- clSetKernelArg(IMGVF_kernel, 2, sizeof(cl_mem), (void *) &device_I_offsets);
- clSetKernelArg(IMGVF_kernel, 3, sizeof(cl_mem), (void *) &device_m_array);
- clSetKernelArg(IMGVF_kernel, 4, sizeof(cl_mem), (void *) &device_n_array);
- clSetKernelArg(IMGVF_kernel, 5, sizeof(cl_float), (void *) &vx_float);
- clSetKernelArg(IMGVF_kernel, 6, sizeof(cl_float), (void *) &vy_float);
- clSetKernelArg(IMGVF_kernel, 7, sizeof(cl_float), (void *) &e_float);
- clSetKernelArg(IMGVF_kernel, 8, sizeof(cl_int), (void *) &max_iterations);
- clSetKernelArg(IMGVF_kernel, 9, sizeof(cl_float), (void *) &cutoff_float);
-
- // Compute the MGVF on the GPU
- error = clEnqueueNDRangeKernel(command_queue, IMGVF_kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
-
- // Check for kernel errors
- error = clFinish(command_queue);
- check_error(error, __FILE__, __LINE__);
-
- // Copy back the final results from the GPU
- IMGVF_OpenCL_cleanup(IMGVF, num_cells);
- }
- // Initializes data on the GPU for the MGVF kernel
- void IMGVF_OpenCL_init(MAT **IE, int num_cells) {
- cl_int error;
-
- // Allocate array of offsets to each cell's image
- size_t mem_size = sizeof(int) * num_cells;
- host_I_offsets = (int *) malloc(mem_size);
- device_I_offsets = clCreateBuffer(context, CL_MEM_READ_ONLY, mem_size, NULL, &error);
- check_error(error, __FILE__, __LINE__);
-
- // Allocate arrays to hold the dimensions of each cell's image
- host_m_array = (int *) malloc(mem_size);
- host_n_array = (int *) malloc(mem_size);
- device_m_array = clCreateBuffer(context, CL_MEM_READ_ONLY, mem_size, NULL, &error);
- check_error(error, __FILE__, __LINE__);
- device_n_array = clCreateBuffer(context, CL_MEM_READ_ONLY, mem_size, NULL, &error);
- check_error(error, __FILE__, __LINE__);
-
- // Figure out the size of all of the matrices combined
- int i, j, cell_num;
- size_t total_size = 0;
- for (cell_num = 0; cell_num < num_cells; cell_num++) {
- MAT *I = IE[cell_num];
- size_t size = I->m * I->n;
- total_size += size;
- }
- total_mem_size = total_size * sizeof(float);
-
- // Allocate host memory just once for all cells
- host_I_all = (float *) malloc(total_mem_size);
-
- // Allocate device memory just once for all cells
- device_I_all = clCreateBuffer(context, CL_MEM_READ_ONLY, total_mem_size, NULL, &error);
- check_error(error, __FILE__, __LINE__);
- device_IMGVF_all = clCreateBuffer(context, CL_MEM_READ_WRITE, total_mem_size, NULL, &error);
- check_error(error, __FILE__, __LINE__);
-
- // Copy each initial matrix into the allocated host memory
- int offset = 0;
- for (cell_num = 0; cell_num < num_cells; cell_num++) {
- MAT *I = IE[cell_num];
-
- // Determine the size of the matrix
- int m = I->m, n = I->n;
- int size = m * n;
-
- // Store memory dimensions
- host_m_array[cell_num] = m;
- host_n_array[cell_num] = n;
-
- // Store offsets to this cell's image
- host_I_offsets[cell_num] = offset;
-
- // Copy matrix I (which is also the initial IMGVF matrix) into the overall array
- for (i = 0; i < m; i++)
- for (j = 0; j < n; j++)
- host_I_all[offset + (i * n) + j] = (float) m_get_val(I, i, j);
-
- offset += size;
- }
-
- // Copy I matrices (which are also the initial IMGVF matrices) to device
- error = clEnqueueWriteBuffer(command_queue, device_I_all, CL_TRUE, 0, total_mem_size, (void *) host_I_all, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
- error = clEnqueueWriteBuffer(command_queue, device_IMGVF_all, CL_TRUE, 0, total_mem_size, (void *) host_I_all, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
-
- // Copy offsets array to device
- error = clEnqueueWriteBuffer(command_queue, device_I_offsets, CL_TRUE, 0, mem_size, (void *) host_I_offsets, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
-
- // Copy memory dimension arrays to device
- error = clEnqueueWriteBuffer(command_queue, device_m_array, CL_TRUE, 0, mem_size, (void *) host_m_array, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
- error = clEnqueueWriteBuffer(command_queue, device_n_array, CL_TRUE, 0, mem_size, (void *) host_n_array, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
- }
- // Copies the results of the MGVF kernel back to the host
- void IMGVF_OpenCL_cleanup(MAT **IMGVF_out_array, int num_cells) {
- cl_int error;
-
- // Copy the result matrices from the device to the host
- float *host_IMGVF_all = (cl_float *) clEnqueueMapBuffer(command_queue, device_IMGVF_all, CL_TRUE, CL_MAP_READ, 0, total_mem_size, 0, NULL, NULL, &error);
- check_error(error, __FILE__, __LINE__);
-
- // Copy each result matrix into its appropriate host matrix
- int cell_num, offset = 0;
- for (cell_num = 0; cell_num < num_cells; cell_num++) {
- MAT *IMGVF_out = IMGVF_out_array[cell_num];
-
- // Determine the size of the matrix
- int m = IMGVF_out->m, n = IMGVF_out->n, i, j;
- // Pack the result into the matrix
- for (i = 0; i < m; i++)
- for (j = 0; j < n; j++)
- m_set_val(IMGVF_out, i, j, (double) host_IMGVF_all[offset + (i * n) + j]);
-
- offset += (m * n);
- }
-
- // Unmap results buffer
- error = clEnqueueUnmapMemObject(command_queue, device_IMGVF_all, (void *) host_IMGVF_all, 0, NULL, NULL);
- check_error(error, __FILE__, __LINE__);
-
- // Free device memory
- clReleaseMemObject(device_m_array);
- clReleaseMemObject(device_n_array);
- clReleaseMemObject(device_IMGVF_all);
- clReleaseMemObject(device_I_all);
- clReleaseMemObject(device_I_offsets);
-
- // Free host memory
- free(host_m_array);
- free(host_n_array);
- free(host_I_all);
- free(host_I_offsets);
- }
|