12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193 |
- //========================================================================================================================================================================================================200
- // DEFINE/INCLUDE
- //========================================================================================================================================================================================================200
- //======================================================================================================================================================150
- // MAIN FUNCTION HEADER
- //======================================================================================================================================================150
- #include "./../main.h" // (in the main program folder)
- //======================================================================================================================================================150
- // DEFINE
- //======================================================================================================================================================150
- //======================================================================================================================================================150
- // LIBRARIES
- //======================================================================================================================================================150
- #include <stdio.h> // (in path known to compiler) needed by printf
- #include <string.h> // (in path known to compiler) needed by strlen
- #include <CL/cl.h> // (in path specified to compiler) needed by OpenCL types and functions
- //======================================================================================================================================================150
- // UTILITIES
- //======================================================================================================================================================150
- #include "./../util/opencl/opencl.h" // (in directory) needed by device functions
- //======================================================================================================================================================150
- // KERNEL_GPU_CUDA_WRAPPER FUNCTION HEADER
- //======================================================================================================================================================150
- #include "./kernel_gpu_opencl_wrapper.h" // (in directory)
- //======================================================================================================================================================150
- // END
- //======================================================================================================================================================150
- //========================================================================================================================================================================================================200
- // KERNEL_GPU_CUDA_WRAPPER FUNCTION
- //========================================================================================================================================================================================================200
- void
- kernel_gpu_opencl_wrapper(fp* image, // input image
- int Nr, // IMAGE nbr of rows
- int Nc, // IMAGE nbr of cols
- long Ne, // IMAGE nbr of elem
- int niter, // nbr of iterations
- fp lambda, // update step size
- long NeROI, // ROI nbr of elements
- int* iN,
- int* iS,
- int* jE,
- int* jW,
- int iter, // primary loop
- int mem_size_i,
- int mem_size_j,
- int platform_idx,
- int device_idx,
- int use_gpu)
- {
- //======================================================================================================================================================150
- // GPU SETUP
- //======================================================================================================================================================150
- //====================================================================================================100
- // COMMON VARIABLES
- //====================================================================================================100
- // common variables
- cl_int error;
- //====================================================================================================100
- // GET PLATFORMS (Intel, AMD, NVIDIA, based on provided library), SELECT ONE
- //====================================================================================================100
- // Get the number of available platforms
- cl_uint num_platforms;
- error = clGetPlatformIDs( 0,
- NULL,
- &num_platforms);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Get the list of available platforms
- cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
- error = clGetPlatformIDs( num_platforms,
- platforms,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Select the platform using the parameter
- cl_platform_id platform = platforms[platform_idx];
- // Get the name of the selected platform and print it (if there are multiple platforms, choose the first one)
- char pbuf[100];
- error = clGetPlatformInfo( platform,
- CL_PLATFORM_VENDOR,
- sizeof(pbuf),
- pbuf,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("Platform: %s\n", pbuf);
- //====================================================================================================100
- // CREATE CONTEXT FOR THE PLATFORM
- //====================================================================================================100
- // Create context properties for selected platform
- cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM,
- (cl_context_properties) platform,
- 0};
- // Selector for the device type in accordance to what passed as parameter
- cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
-
- // Create context for selected platform being GPU/CPU
- cl_context context;
- context = clCreateContextFromType( context_properties,
- device_type,
- NULL,
- NULL,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // GET DEVICES AVAILABLE FOR THE CONTEXT, SELECT ONE
- //====================================================================================================100
- // Get the number of devices (previousely selected for the context)
- size_t devices_size;
- error = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- 0,
- NULL,
- &devices_size);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Get the list of devices (previousely selected for the context)
- cl_device_id *devices = (cl_device_id *) malloc(devices_size);
- error = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- devices_size,
- devices,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Select the device in accordance to the paramter
- cl_device_id device;
- device = devices[device_idx];
- // Get the name of the selected device (previousely selected for the context) and print it
- error = clGetDeviceInfo(device,
- CL_DEVICE_NAME,
- sizeof(pbuf),
- pbuf,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("Device: %s\n", pbuf);
- //====================================================================================================100
- // CREATE COMMAND QUEUE FOR THE DEVICE
- //====================================================================================================100
- // Create a command queue
- cl_command_queue command_queue;
- command_queue = clCreateCommandQueue( context,
- device,
- 0,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // CREATE PROGRAM, COMPILE IT
- //====================================================================================================100
- // Load kernel source code from file
- const char *source = load_kernel_source("./kernel/kernel_gpu_opencl.cl");
- size_t sourceSize = strlen(source);
- // Create the program
- cl_program program = clCreateProgramWithSource( context,
- 1,
- &source,
- &sourceSize,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- char clOptions[150];
- // sprintf(clOptions,"-I../../src");
- sprintf(clOptions,"-I.");
- #ifdef RD_WG_SIZE
- sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE=%d", RD_WG_SIZE);
- #endif
- #ifdef RD_WG_SIZE_0
- sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE_0=%d", RD_WG_SIZE_0);
- #endif
- #ifdef RD_WG_SIZE_0_0
- sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE_0_0=%d", RD_WG_SIZE_0_0);
- #endif
- // Compile the program
- error = clBuildProgram( program,
- 1,
- &device,
- clOptions,
- NULL,
- NULL);
- // Print warnings and errors from compilation
- static char log[65536];
- memset(log, 0, sizeof(log));
- clGetProgramBuildInfo( program,
- device,
- CL_PROGRAM_BUILD_LOG,
- sizeof(log)-1,
- log,
- NULL);
- printf("-----OpenCL Compiler Output-----\n");
- if (strstr(log,"warning:") || strstr(log, "error:"))
- printf("<<<<\n%s\n>>>>\n", log);
- printf("--------------------------------\n");
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // CREATE Kernels
- //====================================================================================================100
- // Extract kernel
- cl_kernel extract_kernel;
- extract_kernel = clCreateKernel(program,
- "extract_kernel",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Prepare kernel
- cl_kernel prepare_kernel;
- prepare_kernel = clCreateKernel(program,
- "prepare_kernel",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Reduce kernel
- cl_kernel reduce_kernel;
- reduce_kernel = clCreateKernel( program,
- "reduce_kernel",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // SRAD kernel
- cl_kernel srad_kernel;
- srad_kernel = clCreateKernel( program,
- "srad_kernel",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // SRAD2 kernel
- cl_kernel srad2_kernel;
- srad2_kernel = clCreateKernel( program,
- "srad2_kernel",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Compress kernel
- cl_kernel compress_kernel;
- compress_kernel = clCreateKernel( program,
- "compress_kernel",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // TRIGGERING INITIAL DRIVER OVERHEAD
- //====================================================================================================100
- // cudaThreadSynchronize(); // the above does it
- //======================================================================================================================================================150
- // GPU VARIABLES
- //======================================================================================================================================================150
- // CUDA kernel execution parameters
- int blocks_x;
- //======================================================================================================================================================150
- // ALLOCATE MEMORY IN GPU
- //======================================================================================================================================================150
- //====================================================================================================100
- // common memory size
- //====================================================================================================100
- int mem_size; // matrix memory size
- mem_size = sizeof(fp) * Ne; // get the size of float representation of input IMAGE
- //====================================================================================================100
- // allocate memory for entire IMAGE on DEVICE
- //====================================================================================================100
- cl_mem d_I;
- d_I = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // allocate memory for coordinates on DEVICE
- //====================================================================================================100
- cl_mem d_iN;
- d_iN = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size_i,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_iS;
- d_iS = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size_i,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_jE;
- d_jE = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size_j,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_jW;
- d_jW = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size_j,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // allocate memory for derivatives
- //====================================================================================================100
- cl_mem d_dN;
- d_dN = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_dS;
- d_dS = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_dW;
- d_dW = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_dE;
- d_dE = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // allocate memory for coefficient on DEVICE
- //====================================================================================================100
- cl_mem d_c;
- d_c = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // allocate memory for partial sums on DEVICE
- //====================================================================================================100
- cl_mem d_sums;
- d_sums = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_sums2;
- d_sums2 = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- mem_size,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // End
- //====================================================================================================100
- //======================================================================================================================================================150
- // COPY INPUT TO CPU
- //======================================================================================================================================================150
- //====================================================================================================100
- // Image
- //====================================================================================================100
- error = clEnqueueWriteBuffer( command_queue,
- d_I,
- 1,
- 0,
- mem_size,
- image,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // coordinates
- //====================================================================================================100
- error = clEnqueueWriteBuffer( command_queue,
- d_iN,
- 1,
- 0,
- mem_size_i,
- iN,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clEnqueueWriteBuffer( command_queue,
- d_iS,
- 1,
- 0,
- mem_size_i,
- iS,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clEnqueueWriteBuffer( command_queue,
- d_jE,
- 1,
- 0,
- mem_size_j,
- jE,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clEnqueueWriteBuffer( command_queue,
- d_jW,
- 1,
- 0,
- mem_size_j,
- jW,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // End
- //====================================================================================================100
- //======================================================================================================================================================150
- // KERNEL EXECUTION PARAMETERS
- //======================================================================================================================================================150
- // threads
- size_t local_work_size[1];
- local_work_size[0] = NUMBER_THREADS;
- // workgroups
- int blocks_work_size;
- size_t global_work_size[1];
- blocks_x = Ne/(int)local_work_size[0];
- if (Ne % (int)local_work_size[0] != 0){ // compensate for division remainder above by adding one grid
- blocks_x = blocks_x + 1;
- }
- blocks_work_size = blocks_x;
- global_work_size[0] = blocks_work_size * local_work_size[0]; // define the number of blocks in the grid
- printf("max # of workgroups = %d, # of threads/workgroup = %d (ensure that device can handle)\n", (int)(global_work_size[0]/local_work_size[0]), (int)local_work_size[0]);
- //======================================================================================================================================================150
- // Extract Kernel - SCALE IMAGE DOWN FROM 0-255 TO 0-1 AND EXTRACT
- //======================================================================================================================================================150
- //====================================================================================================100
- // set arguments
- //====================================================================================================100
- error = clSetKernelArg( extract_kernel,
- 0,
- sizeof(long)*2,
- (void *) &Ne);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( extract_kernel,
- 1,
- sizeof(cl_mem),
- (void *) &d_I);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // launch kernel
- //====================================================================================================100
- error = clEnqueueNDRangeKernel( command_queue,
- extract_kernel,
- 1,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // Synchronization - wait for all operations in the command queue so far to finish
- //====================================================================================================100
- // error = clFinish(command_queue);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- //====================================================================================================100
- // End
- //====================================================================================================100
- //======================================================================================================================================================150
- // WHAT IS CONSTANT IN COMPUTATION LOOP
- //======================================================================================================================================================150
- //====================================================================================================100
- // Prepare Kernel
- //====================================================================================================100
- error = clSetKernelArg( prepare_kernel,
- 0,
- sizeof(long)*2,
- (void *) &Ne);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( prepare_kernel,
- 1,
- sizeof(cl_mem),
- (void *) &d_I);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( prepare_kernel,
- 2,
- sizeof(cl_mem),
- (void *) &d_sums);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( prepare_kernel,
- 3,
- sizeof(cl_mem),
- (void *) &d_sums2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // Reduce Kernel
- //====================================================================================================100
- int blocks2_x;
- int blocks2_work_size;
- size_t global_work_size2[1];
- long no;
- int mul;
- int mem_size_single = sizeof(fp) * 1;
- fp total;
- fp total2;
- fp meanROI;
- fp meanROI2;
- fp varROI;
- fp q0sqr;
- error = clSetKernelArg( reduce_kernel,
- 0,
- sizeof(long)*2,
- (void *) &Ne);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( reduce_kernel,
- 3,
- sizeof(cl_mem),
- (void *) &d_sums);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( reduce_kernel,
- 4,
- sizeof(cl_mem),
- (void *) &d_sums2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // SRAD Kernel
- //====================================================================================================100
- error = clSetKernelArg( srad_kernel,
- 0,
- sizeof(fp),
- (void *) &lambda);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 1,
- sizeof(int),
- (void *) &Nr);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 2,
- sizeof(int),
- (void *) &Nc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 3,
- sizeof(long)*2,
- (void *) &Ne);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 4,
- sizeof(cl_mem),
- (void *) &d_iN);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 5,
- sizeof(cl_mem),
- (void *) &d_iS);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 6,
- sizeof(cl_mem),
- (void *) &d_jE);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 7,
- sizeof(cl_mem),
- (void *) &d_jW);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 8,
- sizeof(cl_mem),
- (void *) &d_dN);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 9,
- sizeof(cl_mem),
- (void *) &d_dS);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 10,
- sizeof(cl_mem),
- (void *) &d_dW);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 11,
- sizeof(cl_mem),
- (void *) &d_dE);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 13,
- sizeof(cl_mem),
- (void *) &d_c);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad_kernel,
- 14,
- sizeof(cl_mem),
- (void *) &d_I);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // SRAD2 Kernel
- //====================================================================================================100
- error = clSetKernelArg( srad2_kernel,
- 0,
- sizeof(fp),
- (void *) &lambda);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 1,
- sizeof(int),
- (void *) &Nr);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 2,
- sizeof(int),
- (void *) &Nc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 3,
- sizeof(long)*2,
- (void *) &Ne);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 4,
- sizeof(cl_mem),
- (void *) &d_iN);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 5,
- sizeof(cl_mem),
- (void *) &d_iS);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 6,
- sizeof(cl_mem),
- (void *) &d_jE);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 7,
- sizeof(cl_mem),
- (void *) &d_jW);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 8,
- sizeof(cl_mem),
- (void *) &d_dN);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 9,
- sizeof(cl_mem),
- (void *) &d_dS);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 10,
- sizeof(cl_mem),
- (void *) &d_dW);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 11,
- sizeof(cl_mem),
- (void *) &d_dE);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 12,
- sizeof(cl_mem),
- (void *) &d_c);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( srad2_kernel,
- 13,
- sizeof(cl_mem),
- (void *) &d_I);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // End
- //====================================================================================================100
- //======================================================================================================================================================150
- // COMPUTATION
- //======================================================================================================================================================150
- printf("Iterations Progress: ");
- // execute main loop
- for (iter=0; iter<niter; iter++){ // do for the number of iterations input parameter
- printf("%d ", iter);
- fflush(NULL);
- //====================================================================================================100
- // Prepare kernel
- //====================================================================================================100
- // launch kernel
- error = clEnqueueNDRangeKernel( command_queue,
- prepare_kernel,
- 1,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // synchronize
- // error = clFinish(command_queue);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- //====================================================================================================100
- // Reduce Kernel - performs subsequent reductions of sums
- //====================================================================================================100
- // initial values
- blocks2_work_size = blocks_work_size; // original number of blocks
- global_work_size2[0] = global_work_size[0];
- no = Ne; // original number of sum elements
- mul = 1; // original multiplier
- // loop
- while(blocks2_work_size != 0){
- // set arguments that were uptaded in this loop
- error = clSetKernelArg( reduce_kernel,
- 1,
- sizeof(long)*2,
- (void *) &no);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( reduce_kernel,
- 2,
- sizeof(int),
- (void *) &mul);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( reduce_kernel,
- 5,
- sizeof(int),
- (void *) &blocks2_work_size);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // launch kernel
- error = clEnqueueNDRangeKernel( command_queue,
- reduce_kernel,
- 1,
- NULL,
- global_work_size2,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // synchronize
- // error = clFinish(command_queue);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- // update execution parameters
- no = blocks2_work_size; // get current number of elements
- if(blocks2_work_size == 1){
- blocks2_work_size = 0;
- }
- else{
- mul = mul * NUMBER_THREADS; // update the increment
- blocks_x = blocks2_work_size/(int)local_work_size[0]; // number of blocks
- if (blocks2_work_size % (int)local_work_size[0] != 0){ // compensate for division remainder above by adding one grid
- blocks_x = blocks_x + 1;
- }
- blocks2_work_size = blocks_x;
- global_work_size2[0] = blocks2_work_size * (int)local_work_size[0];
- }
- }
- // copy total sums to device
- error = clEnqueueReadBuffer(command_queue,
- d_sums,
- CL_TRUE,
- 0,
- mem_size_single,
- &total,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clEnqueueReadBuffer(command_queue,
- d_sums2,
- CL_TRUE,
- 0,
- mem_size_single,
- &total2,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // calculate statistics
- //====================================================================================================100
-
- meanROI = total / (fp)(NeROI); // gets mean (average) value of element in ROI
- meanROI2 = meanROI * meanROI; //
- varROI = (total2 / (fp)(NeROI)) - meanROI2; // gets variance of ROI
- q0sqr = varROI / meanROI2; // gets standard deviation of ROI
- //====================================================================================================100
- // execute srad kernel
- //====================================================================================================100
- // set arguments that were uptaded in this loop
- error = clSetKernelArg( srad_kernel,
- 12,
- sizeof(fp),
- (void *) &q0sqr);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // launch kernel
- error = clEnqueueNDRangeKernel( command_queue,
- srad_kernel,
- 1,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // synchronize
- // error = clFinish(command_queue);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- //====================================================================================================100
- // execute srad2 kernel
- //====================================================================================================100
- // launch kernel
- error = clEnqueueNDRangeKernel( command_queue,
- srad2_kernel,
- 1,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // synchronize
- // error = clFinish(command_queue);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- //====================================================================================================100
- // End
- //====================================================================================================100
- }
- printf("\n");
- //======================================================================================================================================================150
- // Compress Kernel - SCALE IMAGE UP FROM 0-1 TO 0-255 AND COMPRESS
- //======================================================================================================================================================150
- //====================================================================================================100
- // set parameters
- //====================================================================================================100
- error = clSetKernelArg( compress_kernel,
- 0,
- sizeof(long)*2,
- (void *) &Ne);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( compress_kernel,
- 1,
- sizeof(cl_mem),
- (void *) &d_I);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // launch kernel
- //====================================================================================================100
- error = clEnqueueNDRangeKernel( command_queue,
- compress_kernel,
- 1,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // synchronize
- //====================================================================================================100
- error = clFinish(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // End
- //====================================================================================================100
- //======================================================================================================================================================150
- // COPY RESULTS BACK TO CPU
- //======================================================================================================================================================150
- error = clEnqueueReadBuffer(command_queue,
- d_I,
- CL_TRUE,
- 0,
- mem_size,
- image,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // int i;
- // for(i=0; i<100; i++){
- // printf("%f ", image[i]);
- // }
- //======================================================================================================================================================150
- // FREE MEMORY
- //======================================================================================================================================================150
- // OpenCL structures
- error = clReleaseKernel(extract_kernel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseKernel(prepare_kernel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseKernel(reduce_kernel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseKernel(srad_kernel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseKernel(srad2_kernel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseKernel(compress_kernel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseProgram(program);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // common_change
- error = clReleaseMemObject(d_I);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_c);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_iN);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_iS);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_jE);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_jW);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_dN);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_dS);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_dE);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_dW);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_sums);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_sums2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // OpenCL structures
- error = clFlush(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseCommandQueue(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseContext(context);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //======================================================================================================================================================150
- // End
- //======================================================================================================================================================150
- }
- //========================================================================================================================================================================================================200
- // End
- //========================================================================================================================================================================================================200
|