123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467 |
- //========================================================================================================================================================================================================200
- // DEFINE/INCLUDE
- //========================================================================================================================================================================================================200
- //======================================================================================================================================================150
- // LIBRARIES
- //======================================================================================================================================================150
- #include <CL/cl.h> // (in directory specified to compiler) needed by OpenCL types and functions
- //======================================================================================================================================================150
- // MAIN FUNCTION HEADER
- //======================================================================================================================================================150
- #include "./../main.h" // (in main directory) needed to recognized input parameters
- //======================================================================================================================================================150
- // DEFINE
- //======================================================================================================================================================150
- //======================================================================================================================================================150
- // UTILITIES
- //======================================================================================================================================================150
- #include "./../util/opencl/opencl.h" // (in directory) needed by device functions
- #include "./../util/avi/avilib.h" // (in directory) needed by avi functions
- #include "./../util/avi/avimod.h" // (in directory) needed by avi functions
- //======================================================================================================================================================150
- // KERNEL_GPU_CUDA_WRAPPER FUNCTION HEADER
- //======================================================================================================================================================150
- #include "./kernel_gpu_opencl_wrapper.h" // (in directory)
- //======================================================================================================================================================150
- // END
- //======================================================================================================================================================150
- //========================================================================================================================================================================================================200
- // KERNEL_GPU_CUDA_WRAPPER FUNCTION
- //========================================================================================================================================================================================================200
- void
- kernel_gpu_opencl_wrapper( params_common common,
- int* endoRow,
- int* endoCol,
- int* tEndoRowLoc,
- int* tEndoColLoc,
- int* epiRow,
- int* epiCol,
- int* tEpiRowLoc,
- int* tEpiColLoc,
- avi_t* frames,
- int platform_id,
- int device_id,
- int use_gpu)
- {
- //======================================================================================================================================================150
- // CPU VARIABLES
- //======================================================================================================================================================150
- // common variables
- int i;
- //======================================================================================================================================================150
- // GPU SETUP
- //======================================================================================================================================================150
- //====================================================================================================100
- // COMMON VARIABLES
- //====================================================================================================100
- // common variables
- int error;
- //====================================================================================================100
- // GET PLATFORMS (Intel, AMD, NVIDIA, based on provided library), SELECT ONE
- //====================================================================================================100
- // Get number of available platforms
- cl_uint num_platforms;
- error = clGetPlatformIDs( 0,
- NULL,
- &num_platforms); // # of platforms
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("# of platforms %d\n", num_platforms);
- // Get list of available platforms
- cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
- error = clGetPlatformIDs( num_platforms,
- platforms,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Get names of platforms and print them
- cl_char pbuf[100];
- int plat_count;
- cl_platform_id platform;
- for(plat_count = 0; plat_count < num_platforms; plat_count++){
- platform = platforms[plat_count];
- error = clGetPlatformInfo( platform,
- CL_PLATFORM_VENDOR,
- sizeof(pbuf),
- pbuf,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("\tPlatform %d: %s\n", plat_count, pbuf);
- }
- // Select platform on the basis of the passed as function argument
- platform = platforms[platform_id];
- printf("Selecting platform %d\n", platform_id);
- //====================================================================================================100
- // CREATE CONTEXT FOR THE PLATFORM
- //====================================================================================================100
- // Create context properties for selected platform
- cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM,
- (cl_context_properties) platform,
- 0};
-
- // Create the device_type selector in the basis of the argument passed (use_gpu)
- cl_device_type device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
-
- // Create context for selected platform being GPU or CPU
- cl_context context;
- context = clCreateContextFromType( context_properties,
- device_type,
- NULL,
- NULL,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // GET DEVICES AVAILABLE FOR THE CONTEXT, SELECT ONE
- //====================================================================================================100
- // Get number of devices (previousely selected for the context)
- size_t devices_size;
- error = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- 0,
- NULL,
- &devices_size); // number of bytes (devices * sizeof(cl_device_id))
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- int num_devices = devices_size / sizeof(cl_device_id);
- printf("# of devices %d\n", num_devices);
- // Get the list of devices (previousely selected for the context)
- cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
- error = clGetContextInfo( context,
- CL_CONTEXT_DEVICES,
- devices_size,
- devices,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Get names of devices and print them
- cl_char dbuf[100];
- int devi_count;
- cl_device_id device;
- for(devi_count = 0; devi_count < num_devices; devi_count++){
- device = devices[devi_count];
- error = clGetDeviceInfo(device,
- CL_DEVICE_NAME,
- sizeof(dbuf),
- dbuf,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("\tDevice %d: %s\n", devi_count, dbuf);
- }
- // Select device on the basis of the argument passed as function argument
- device = devices[device_id];
- printf("Selecting device %d\n", device_id);
- //====================================================================================================100
- // CREATE COMMAND QUEUE FOR THE DEVICE
- //====================================================================================================100
- // Create a command queue
- cl_command_queue command_queue;
- command_queue = clCreateCommandQueue( context,
- device,
- 0,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // CREATE PROGRAM, COMPILE IT
- //====================================================================================================100
- // Load kernel source code from file
- const char *source = load_kernel_source("./kernel/kernel_gpu_opencl.cl");
- size_t sourceSize = strlen(source);
- // Create the program
- cl_program program = clCreateProgramWithSource( context,
- 1,
- &source,
- &sourceSize,
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- char clOptions[150];
- // sprintf(clOptions,"-I../../src");
- sprintf(clOptions,"-I.");
- #ifdef RD_WG_SIZE
- sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE=%d", RD_WG_SIZE);
- #endif
- #ifdef RD_WG_SIZE_0
- sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE_0=%d", RD_WG_SIZE_0);
- #endif
- #ifdef RD_WG_SIZE_0_0
- sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE_0_0=%d", RD_WG_SIZE_0_0);
- #endif
- // Compile the program
- error = clBuildProgram( program,
- 1,
- &device,
- clOptions,
- NULL,
- NULL);
- // Print warnings and errors from compilation
- static cl_char log[65536];
- memset(log, 0, sizeof(log));
- clGetProgramBuildInfo( program,
- device,
- CL_PROGRAM_BUILD_LOG,
- sizeof(log)-1,
- log,
- NULL);
- printf("-----OpenCL Compiler Output-----\n");
- if (strstr(log,"warning:") || strstr(log, "error:"))
- printf("<<<<\n%s\n>>>>\n", log);
- printf("--------------------------------\n");
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // Create kernel
- cl_kernel kernel;
- kernel = clCreateKernel(program,
- "kernel_gpu_opencl",
- &error);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // TRIGGERING INITIAL DRIVER OVERHEAD
- //====================================================================================================100
- // cudaThreadSynchronize(); // the above does it
- //======================================================================================================================================================150
- // GPU MEMORY ALLOCATION
- //======================================================================================================================================================150
- //====================================================================================================100
- // Common (COPY IN)
- //====================================================================================================100
- cl_mem d_common;
- d_common = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.common_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // Frame (COPY IN)
- //====================================================================================================100
- // common
- cl_mem d_frame;
- d_frame = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.frame_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // Inputs (COPY IN)
- //====================================================================================================100
- //==================================================50
- // endo points
- //==================================================50
- // common
- cl_mem d_endoRow;
- d_endoRow = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.endo_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_endoCol;
- d_endoCol = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.endo_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_tEndoRowLoc;
- d_tEndoRowLoc = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.endo_mem * common.no_frames,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_tEndoColLoc;
- d_tEndoColLoc = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.endo_mem * common.no_frames,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // epi points
- //==================================================50
- // common
- cl_mem d_epiRow;
- d_epiRow = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.epi_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_epiCol;
- d_epiCol = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.epi_mem,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_tEpiRowLoc;
- d_tEpiRowLoc = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.epi_mem * common.no_frames,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_tEpiColLoc;
- d_tEpiColLoc = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.epi_mem * common.no_frames,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // Array of Templates for All Points (COPY IN)
- //====================================================================================================100
- //==================================================50
- // template sizes
- //==================================================50
- // common
- printf("tSize is %d, sSize is %d\n", common.tSize, common.sSize);
- common.in_rows = common.tSize + 1 + common.tSize;
- common.in_cols = common.in_rows;
- common.in_elem = common.in_rows * common.in_cols;
- common.in_mem = sizeof(fp) * common.in_elem;
- //==================================================50
- // endo points templates
- //==================================================50
- // common
- cl_mem d_endoT;
- d_endoT = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.in_mem * common.endoPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // epi points templates
- //==================================================50
- // common
- cl_mem d_epiT;
- d_epiT = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.in_mem * common.epiPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // AREA AROUND POINT FROM FRAME (LOCAL)
- //====================================================================================================100
- // common
- common.in2_rows = common.sSize + 1 + common.sSize;
- common.in2_cols = common.in2_rows;
- common.in2_elem = common.in2_rows * common.in2_cols;
- common.in2_mem = sizeof(fp) * common.in2_elem;
- // unique
- cl_mem d_in2;
- d_in2 = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.in2_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // CONVOLUTION (LOCAL)
- //====================================================================================================100
- // common
- common.conv_rows = common.in_rows + common.in2_rows - 1; // number of rows in I
- common.conv_cols = common.in_cols + common.in2_cols - 1; // number of columns in I
- common.conv_elem = common.conv_rows * common.conv_cols; // number of elements
- common.conv_mem = sizeof(fp) * common.conv_elem;
- common.ioffset = 0;
- common.joffset = 0;
- // unique
- cl_mem d_conv;
- d_conv = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- common.conv_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // CUMULATIVE SUM (LOCAL)
- //====================================================================================================100
- //==================================================50
- // PADDING OF ARRAY, VERTICAL CUMULATIVE SUM
- //==================================================50
- // common
- common.in2_pad_add_rows = common.in_rows;
- common.in2_pad_add_cols = common.in_cols;
- common.in2_pad_cumv_rows = common.in2_rows + 2*common.in2_pad_add_rows;
- common.in2_pad_cumv_cols = common.in2_cols + 2*common.in2_pad_add_cols;
- common.in2_pad_cumv_elem = common.in2_pad_cumv_rows * common.in2_pad_cumv_cols;
- common.in2_pad_cumv_mem = sizeof(fp) * common.in2_pad_cumv_elem;
- // unique
- cl_mem d_in2_pad_cumv;
- d_in2_pad_cumv = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- common.in2_pad_cumv_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // SELECTION
- //==================================================50
- // common
- common.in2_pad_cumv_sel_rowlow = 1 + common.in_rows; // (1 to n+1)
- common.in2_pad_cumv_sel_rowhig = common.in2_pad_cumv_rows - 1;
- common.in2_pad_cumv_sel_collow = 1;
- common.in2_pad_cumv_sel_colhig = common.in2_pad_cumv_cols;
- common.in2_pad_cumv_sel_rows = common.in2_pad_cumv_sel_rowhig - common.in2_pad_cumv_sel_rowlow + 1;
- common.in2_pad_cumv_sel_cols = common.in2_pad_cumv_sel_colhig - common.in2_pad_cumv_sel_collow + 1;
- common.in2_pad_cumv_sel_elem = common.in2_pad_cumv_sel_rows * common.in2_pad_cumv_sel_cols;
- common.in2_pad_cumv_sel_mem = sizeof(fp) * common.in2_pad_cumv_sel_elem;
- // unique
- cl_mem d_in2_pad_cumv_sel;
- d_in2_pad_cumv_sel = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- common.in2_pad_cumv_sel_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM
- //==================================================50
- // common
- common.in2_pad_cumv_sel2_rowlow = 1;
- common.in2_pad_cumv_sel2_rowhig = common.in2_pad_cumv_rows - common.in_rows - 1;
- common.in2_pad_cumv_sel2_collow = 1;
- common.in2_pad_cumv_sel2_colhig = common.in2_pad_cumv_cols;
- common.in2_sub_cumh_rows = common.in2_pad_cumv_sel2_rowhig - common.in2_pad_cumv_sel2_rowlow + 1;
- common.in2_sub_cumh_cols = common.in2_pad_cumv_sel2_colhig - common.in2_pad_cumv_sel2_collow + 1;
- common.in2_sub_cumh_elem = common.in2_sub_cumh_rows * common.in2_sub_cumh_cols;
- common.in2_sub_cumh_mem = sizeof(fp) * common.in2_sub_cumh_elem;
- // unique
- cl_mem d_in2_sub_cumh;
- d_in2_sub_cumh = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- common.in2_sub_cumh_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // SELECTION
- //==================================================50
- // common
- common.in2_sub_cumh_sel_rowlow = 1;
- common.in2_sub_cumh_sel_rowhig = common.in2_sub_cumh_rows;
- common.in2_sub_cumh_sel_collow = 1 + common.in_cols;
- common.in2_sub_cumh_sel_colhig = common.in2_sub_cumh_cols - 1;
- common.in2_sub_cumh_sel_rows = common.in2_sub_cumh_sel_rowhig - common.in2_sub_cumh_sel_rowlow + 1;
- common.in2_sub_cumh_sel_cols = common.in2_sub_cumh_sel_colhig - common.in2_sub_cumh_sel_collow + 1;
- common.in2_sub_cumh_sel_elem = common.in2_sub_cumh_sel_rows * common.in2_sub_cumh_sel_cols;
- common.in2_sub_cumh_sel_mem = sizeof(fp) * common.in2_sub_cumh_sel_elem;
- // unique
- cl_mem d_in2_sub_cumh_sel;
- d_in2_sub_cumh_sel = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- common.in2_sub_cumh_sel_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // SELECTION 2, SUBTRACTION
- //==================================================50
- // common
- common.in2_sub_cumh_sel2_rowlow = 1;
- common.in2_sub_cumh_sel2_rowhig = common.in2_sub_cumh_rows;
- common.in2_sub_cumh_sel2_collow = 1;
- common.in2_sub_cumh_sel2_colhig = common.in2_sub_cumh_cols - common.in_cols - 1;
- common.in2_sub2_rows = common.in2_sub_cumh_sel2_rowhig - common.in2_sub_cumh_sel2_rowlow + 1;
- common.in2_sub2_cols = common.in2_sub_cumh_sel2_colhig - common.in2_sub_cumh_sel2_collow + 1;
- common.in2_sub2_elem = common.in2_sub2_rows * common.in2_sub2_cols;
- common.in2_sub2_mem = sizeof(fp) * common.in2_sub2_elem;
- // unique
- cl_mem d_in2_sub2;
- d_in2_sub2 = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- common.in2_sub2_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // CUMULATIVE SUM 2 (LOCAL)
- //====================================================================================================100
- //==================================================50
- // MULTIPLICATION
- //==================================================50
- // common
- common.in2_sqr_rows = common.in2_rows;
- common.in2_sqr_cols = common.in2_cols;
- common.in2_sqr_elem = common.in2_elem;
- common.in2_sqr_mem = common.in2_mem;
- // unique
- cl_mem d_in2_sqr;
- d_in2_sqr = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- common.in2_sqr_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // SELECTION 2, SUBTRACTION
- //==================================================50
- // common
- common.in2_sqr_sub2_rows = common.in2_sub2_rows;
- common.in2_sqr_sub2_cols = common.in2_sub2_cols;
- common.in2_sqr_sub2_elem = common.in2_sub2_elem;
- common.in2_sqr_sub2_mem = common.in2_sub2_mem;
- // unique
- cl_mem d_in2_sqr_sub2;
- d_in2_sqr_sub2 = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- common.in2_sqr_sub2_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // FINAL (LOCAL)
- //====================================================================================================100
- // common
- common.in_sqr_rows = common.in_rows;
- common.in_sqr_cols = common.in_cols;
- common.in_sqr_elem = common.in_elem;
- common.in_sqr_mem = common.in_mem;
- // unique
- cl_mem d_in_sqr;
- d_in_sqr = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.in_sqr_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // TEMPLATE MASK CREATE (LOCAL)
- //====================================================================================================100
- // common
- common.tMask_rows = common.in_rows + (common.sSize+1+common.sSize) - 1;
- common.tMask_cols = common.tMask_rows;
- common.tMask_elem = common.tMask_rows * common.tMask_cols;
- common.tMask_mem = sizeof(fp) * common.tMask_elem;
- // unique
- cl_mem d_tMask;
- d_tMask = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.tMask_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // POINT MASK INITIALIZE (LOCAL)
- //====================================================================================================100
- // common
- common.mask_rows = common.maxMove;
- common.mask_cols = common.mask_rows;
- common.mask_elem = common.mask_rows * common.mask_cols;
- common.mask_mem = sizeof(fp) * common.mask_elem;
- //====================================================================================================100
- // MASK CONVOLUTION (LOCAL)
- //====================================================================================================100
- // common
- common.mask_conv_rows = common.tMask_rows; // number of rows in I
- common.mask_conv_cols = common.tMask_cols; // number of columns in I
- common.mask_conv_elem = common.mask_conv_rows * common.mask_conv_cols; // number of elements
- common.mask_conv_mem = sizeof(fp) * common.mask_conv_elem;
- common.mask_conv_ioffset = (common.mask_rows-1)/2;
- if((common.mask_rows-1) % 2 > 0.5){
- common.mask_conv_ioffset = common.mask_conv_ioffset + 1;
- }
- common.mask_conv_joffset = (common.mask_cols-1)/2;
- if((common.mask_cols-1) % 2 > 0.5){
- common.mask_conv_joffset = common.mask_conv_joffset + 1;
- }
- // unique
- cl_mem d_mask_conv;
- d_mask_conv = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- common.mask_conv_mem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // GPU MEMORY COPY
- //======================================================================================================================================================150
- //====================================================================================================100
- // Inputs
- //====================================================================================================100
- //==================================================50
- // endo points
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue,
- d_endoRow,
- 1,
- 0,
- common.endo_mem,
- endoRow,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clEnqueueWriteBuffer( command_queue,
- d_endoCol,
- 1,
- 0,
- common.endo_mem,
- endoCol,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // epi points
- //==================================================50
- error = clEnqueueWriteBuffer( command_queue,
- d_epiRow,
- 1,
- 0,
- common.epi_mem,
- epiRow,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clEnqueueWriteBuffer( command_queue,
- d_epiCol,
- 1,
- 0,
- common.epi_mem,
- epiCol,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // END
- //==================================================50
- //====================================================================================================100
- // END
- //====================================================================================================100
- //======================================================================================================================================================150
- // KERNEL
- //======================================================================================================================================================150
- //====================================================================================================100
- // EXECUTION PARAMETERS
- //====================================================================================================100
- // All kernels operations within kernel use same max size of threads. Size of block size is set to the size appropriate for max size operation (on padded matrix). Other use subsets of that.
- size_t local_work_size[1];
- local_work_size[0] = NUMBER_THREADS;
- size_t global_work_size[1];
- global_work_size[0] = common.allPoints * local_work_size[0];
- printf("# of workgroups = %d, # of threads/workgroup = %d (ensure that device can handle)\n", (int)(global_work_size[0]/local_work_size[0]), (int)local_work_size[0]);
- //====================================================================================================100
- // COPY ARGUMENTS
- //====================================================================================================100
- error = clEnqueueWriteBuffer( command_queue,
- d_common,
- 1,
- 0,
- common.common_mem,
- &common,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // set kernel arguments
- //====================================================================================================100
- // structures
- error = clSetKernelArg( kernel,
- 0,
- sizeof(params_common),
- (void *) &common);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // common
- error = clSetKernelArg( kernel,
- 3,
- sizeof(cl_mem),
- (void *) &d_endoRow);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 4,
- sizeof(cl_mem),
- (void *) &d_endoCol);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 5,
- sizeof(cl_mem),
- (void *) &d_tEndoRowLoc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 6,
- sizeof(cl_mem),
- (void *) &d_tEndoColLoc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 7,
- sizeof(cl_mem),
- (void *) &d_epiRow);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 8,
- sizeof(cl_mem),
- (void *) &d_epiCol);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 9,
- sizeof(cl_mem),
- (void *) &d_tEpiRowLoc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 10,
- sizeof(cl_mem),
- (void *) &d_tEpiColLoc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // common_unique
- error = clSetKernelArg( kernel,
- 11,
- sizeof(cl_mem),
- (void *) &d_endoT);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 12,
- sizeof(cl_mem),
- (void *) &d_epiT);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 13,
- sizeof(cl_mem),
- (void *) &d_in2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 14,
- sizeof(cl_mem),
- (void *) &d_conv);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 15,
- sizeof(cl_mem),
- (void *) &d_in2_pad_cumv);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 16,
- sizeof(cl_mem),
- (void *) &d_in2_pad_cumv_sel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 17,
- sizeof(cl_mem),
- (void *) &d_in2_sub_cumh);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 18,
- sizeof(cl_mem),
- (void *) &d_in2_sub_cumh_sel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 19,
- sizeof(cl_mem),
- (void *) &d_in2_sub2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 20,
- sizeof(cl_mem),
- (void *) &d_in2_sqr);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 21,
- sizeof(cl_mem),
- (void *) &d_in2_sqr_sub2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 22,
- sizeof(cl_mem),
- (void *) &d_in_sqr);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 23,
- sizeof(cl_mem),
- (void *) &d_tMask);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 24,
- sizeof(cl_mem),
- (void *) &d_mask_conv);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // // local
- // // int local_size_one;
- // // local_size_one = common.in_rows;
- // error = clSetKernelArg( kernel,
- // 25,
- // // sizeof(fp) * local_size_one, // size 51
- // sizeof(fp) * common.in_elem,
- // NULL);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- // error = clSetKernelArg( kernel,
- // 26,
- // // sizeof(fp) * local_size_one, // size 51
- // sizeof(fp) * common.in_cols,
- // NULL);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- // // int local_size_two;
- // // local_size_two = common.in_rows + common.in2_rows - 1;
- // error = clSetKernelArg( kernel,
- // 27,
- // // sizeof(fp) * local_size_two, // size 51+81-1=131
- // sizeof(fp) * common.in_sqr_rows,
- // NULL);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- // error = clSetKernelArg( kernel,
- // 28,
- // // sizeof(fp) * local_size_two, // size 51+81-1=131
- // sizeof(fp) * common.mask_conv_rows,
- // NULL);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- // // int local_size_three;
- // // local_size_three = common.in_rows * common.in_rows;
- // error = clSetKernelArg( kernel,
- // 29,
- // // sizeof(fp) * local_size_three, // size 51*51=2601
- // sizeof(int) * common.mask_conv_rows,
- // NULL);
- // if (error != CL_SUCCESS)
- // fatal_CL(error, __LINE__);
- // int local_size;
- // local_size = (common.in_elem + common.in_cols + common.in_sqr_rows + common.mask_conv_rows) * 4 + common.mask_conv_rows * 2;
- // printf("size of used local memory/workgroup = %dB (ensure that device can handle)\n", local_size);
- cl_mem d_in_mod_temp;
- d_in_mod_temp = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- sizeof(fp) * common.in_elem * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem in_partial_sum;
- in_partial_sum = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- sizeof(fp) * common.in_cols * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem in_sqr_partial_sum;
- in_sqr_partial_sum = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- sizeof(fp) * common.in_sqr_rows * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem par_max_val;
- par_max_val = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- sizeof(fp) * common.mask_conv_rows * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem par_max_coo;
- par_max_coo = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- sizeof(int) * common.mask_conv_rows * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem in_final_sum;
- in_final_sum = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- sizeof(fp) * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem in_sqr_final_sum;
- in_sqr_final_sum = clCreateBuffer( context,
- CL_MEM_READ_WRITE,
- sizeof(fp) * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem denomT;
- denomT = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- sizeof(fp) * common.allPoints,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 25,
- sizeof(cl_mem),
- (void *) &d_in_mod_temp);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 26,
- sizeof(cl_mem),
- (void *) &in_partial_sum);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 27,
- sizeof(cl_mem),
- (void *) &in_sqr_partial_sum);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 28,
- sizeof(cl_mem),
- (void *) &par_max_val);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 29,
- sizeof(cl_mem),
- (void *) &par_max_coo);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 30,
- sizeof(cl_mem),
- (void *) &in_final_sum);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 31,
- sizeof(cl_mem),
- (void *) &in_sqr_final_sum);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 32,
- sizeof(cl_mem),
- (void *) &denomT);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- cl_mem d_checksum;
- d_checksum = clCreateBuffer(context,
- CL_MEM_READ_WRITE,
- sizeof(fp) * CHECK,
- NULL,
- &error );
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 33,
- sizeof(cl_mem),
- (void *) &d_checksum);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // PRINT FRAME PROGRESS START
- //====================================================================================================100
- printf("frame progress: ");
- fflush(NULL);
- //====================================================================================================100
- // LAUNCH
- //====================================================================================================100
- // variables
- fp* frame;
- int frame_no;
- for(frame_no=0; frame_no<common.frames_processed; frame_no++){
- //==================================================50
- // get and write current frame to GPU buffer
- //==================================================50
- // Extract a cropped version of the first frame from the video file
- frame = get_frame( frames, // pointer to video file
- frame_no, // number of frame that needs to be returned
- 0, // cropped?
- 0, // scaled?
- 1); // converted
- // copy frame to GPU memory
- error = clEnqueueWriteBuffer( command_queue,
- d_frame,
- 1,
- 0,
- common.frame_mem,
- frame,
- 0,
- 0,
- 0);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // kernel arguments that change inside this loop
- //==================================================50
- // common_change
- error = clSetKernelArg( kernel,
- 1,
- sizeof(cl_mem),
- (void *) &d_frame);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clSetKernelArg( kernel,
- 2,
- sizeof(int),
- (void *) &frame_no);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // launch kernel
- //==================================================50
- error = clEnqueueNDRangeKernel( command_queue,
- kernel,
- 1,
- NULL,
- global_work_size,
- local_work_size,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //==================================================50
- // finish iteration
- //==================================================50
- // Synchronization, wait for all operations in the command queue so far to finish
- error = clFinish(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // free frame after each loop iteration, since AVI library allocates memory for every frame fetched
- free(frame);
- //==================================================50
- // print frame progress
- //==================================================50
- // print frame progress
- printf("%d ", frame_no);
- fflush(NULL);
- //==================================================50
- // DISPLAY CHECKSUM (TESTING)
- //==================================================50
- #ifdef TEST_CHECKSUM
- fp* checksum;
- checksum = (fp*)malloc(sizeof(fp) * CHECK);
- error = clEnqueueReadBuffer(command_queue,
- d_checksum,
- CL_TRUE,
- 0,
- sizeof(fp)*CHECK,
- checksum,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- printf("CHECKSUM:\n");
- for(i=0; i<CHECK; i++){
- printf("%f ", checksum[i]);
- }
- printf("\n\n");
- #endif
- //==================================================50
- // End
- //==================================================50
- }
- //====================================================================================================100
- // PRINT FRAME PROGRESS END
- //====================================================================================================100
- printf("\n");
- fflush(NULL);
- //======================================================================================================================================================150
- // OUTPUT
- //======================================================================================================================================================150
- //====================================================================================================100
- // endo points
- //====================================================================================================100
- error = clEnqueueReadBuffer(command_queue,
- d_tEndoRowLoc,
- CL_TRUE,
- 0,
- common.endo_mem * common.no_frames,
- tEndoRowLoc,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // for testing of the output
- #ifdef TEST_OUTPUT
- int j;
- for(i=0; i<common.frames_processed; i++){
- printf("%d: ", i);
- for(j=0; j<common.endoPoints; j++){
- printf("%d ", tEndoRowLoc[j*common.no_frames+i]);
- }
- printf("\n\n");
- }
- #endif
- error = clEnqueueReadBuffer(command_queue,
- d_tEndoColLoc,
- CL_TRUE,
- 0,
- common.endo_mem * common.no_frames,
- tEndoColLoc,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //====================================================================================================100
- // epi points
- //====================================================================================================100
- error = clEnqueueReadBuffer(command_queue,
- d_tEpiRowLoc,
- CL_TRUE,
- 0,
- common.epi_mem * common.no_frames,
- tEpiRowLoc,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);;
- error = clEnqueueReadBuffer(command_queue,
- d_tEpiColLoc,
- CL_TRUE,
- 0,
- common.epi_mem * common.no_frames,
- tEpiColLoc,
- 0,
- NULL,
- NULL);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //======================================================================================================================================================150
- // DEALLOCATION
- //======================================================================================================================================================150
- // OpenCL structures
- error = clReleaseKernel(kernel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseProgram(program);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // common_change
- error = clReleaseMemObject(d_frame);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // common
- error = clReleaseMemObject(d_endoRow);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_endoCol);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_tEndoRowLoc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_tEndoColLoc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_epiRow);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_epiCol);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_tEpiRowLoc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_tEpiColLoc);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // common_unique
- error = clReleaseMemObject(d_endoT);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_epiT);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_conv);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in2_pad_cumv);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in2_pad_cumv_sel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in2_sub_cumh);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in2_sub_cumh_sel);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in2_sub2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in2_sqr);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in2_sqr_sub2);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_in_sqr);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_tMask);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseMemObject(d_mask_conv);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- // OpenCL structures
- error = clFlush(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseCommandQueue(command_queue);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- error = clReleaseContext(context);
- if (error != CL_SUCCESS)
- fatal_CL(error, __LINE__);
- //======================================================================================================================================================150
- // End
- //======================================================================================================================================================150
- }
- //========================================================================================================================================================================================================200
- // END
- //========================================================================================================================================================================================================200
|