kernel_gpu_opencl_wrapper.c 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463
  1. //========================================================================================================================================================================================================200
  2. // DEFINE/INCLUDE
  3. //========================================================================================================================================================================================================200
  4. //======================================================================================================================================================150
  5. // LIBRARIES
  6. //======================================================================================================================================================150
  7. #include <CL/cl.h> // (in directory specified to compiler) needed by OpenCL types and functions
  8. //======================================================================================================================================================150
  9. // MAIN FUNCTION HEADER
  10. //======================================================================================================================================================150
  11. #include "./../main.h" // (in main directory) needed to recognized input parameters
  12. //======================================================================================================================================================150
  13. // DEFINE
  14. //======================================================================================================================================================150
  15. //======================================================================================================================================================150
  16. // UTILITIES
  17. //======================================================================================================================================================150
  18. #include "./../util/opencl/opencl.h" // (in directory) needed by device functions
  19. #include "./../util/avi/avilib.h" // (in directory) needed by avi functions
  20. #include "./../util/avi/avimod.h" // (in directory) needed by avi functions
  21. //======================================================================================================================================================150
  22. // KERNEL_GPU_CUDA_WRAPPER FUNCTION HEADER
  23. //======================================================================================================================================================150
  24. #include "./kernel_gpu_opencl_wrapper.h" // (in directory)
  25. //======================================================================================================================================================150
  26. // END
  27. //======================================================================================================================================================150
  28. //========================================================================================================================================================================================================200
  29. // KERNEL_GPU_CUDA_WRAPPER FUNCTION
  30. //========================================================================================================================================================================================================200
  31. void
  32. kernel_gpu_opencl_wrapper( params_common common,
  33. int* endoRow,
  34. int* endoCol,
  35. int* tEndoRowLoc,
  36. int* tEndoColLoc,
  37. int* epiRow,
  38. int* epiCol,
  39. int* tEpiRowLoc,
  40. int* tEpiColLoc,
  41. avi_t* frames)
  42. {
  43. //======================================================================================================================================================150
  44. // CPU VARIABLES
  45. //======================================================================================================================================================150
  46. // common variables
  47. int i;
  48. //======================================================================================================================================================150
  49. // GPU SETUP
  50. //======================================================================================================================================================150
  51. //====================================================================================================100
  52. // COMMON VARIABLES
  53. //====================================================================================================100
  54. // common variables
  55. int error;
  56. //====================================================================================================100
  57. // GET PLATFORMS (Intel, AMD, NVIDIA, based on provided library), SELECT ONE
  58. //====================================================================================================100
  59. // Get number of available platforms
  60. cl_uint num_platforms;
  61. error = clGetPlatformIDs( 0,
  62. NULL,
  63. &num_platforms); // # of platforms
  64. if (error != CL_SUCCESS)
  65. fatal_CL(error, __LINE__);
  66. printf("# of platforms %d\n", num_platforms);
  67. // Get list of available platforms
  68. cl_platform_id *platforms = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
  69. error = clGetPlatformIDs( num_platforms,
  70. platforms,
  71. NULL);
  72. if (error != CL_SUCCESS)
  73. fatal_CL(error, __LINE__);
  74. // Get names of platforms and print them
  75. cl_char pbuf[100];
  76. int plat_count;
  77. cl_platform_id platform;
  78. for(plat_count = 0; plat_count < num_platforms; plat_count++){
  79. platform = platforms[plat_count];
  80. error = clGetPlatformInfo( platform,
  81. CL_PLATFORM_VENDOR,
  82. sizeof(pbuf),
  83. pbuf,
  84. NULL);
  85. if (error != CL_SUCCESS)
  86. fatal_CL(error, __LINE__);
  87. printf("\tPlatform %d: %s\n", plat_count, pbuf);
  88. }
  89. // Select platform
  90. int plat = 1;
  91. platform = platforms[plat];
  92. printf("Selecting platform %d\n", plat);
  93. //====================================================================================================100
  94. // CREATE CONTEXT FOR THE PLATFORM
  95. //====================================================================================================100
  96. // Create context properties for selected platform
  97. cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM,
  98. (cl_context_properties) platform,
  99. 0};
  100. // Create context for selected platform being GPU
  101. cl_context context;
  102. context = clCreateContextFromType( context_properties,
  103. CL_DEVICE_TYPE_CPU,
  104. NULL,
  105. NULL,
  106. &error);
  107. if (error != CL_SUCCESS)
  108. fatal_CL(error, __LINE__);
  109. //====================================================================================================100
  110. // GET DEVICES AVAILABLE FOR THE CONTEXT, SELECT ONE
  111. //====================================================================================================100
  112. // Get number of devices (previousely selected for the context)
  113. size_t devices_size;
  114. error = clGetContextInfo( context,
  115. CL_CONTEXT_DEVICES,
  116. 0,
  117. NULL,
  118. &devices_size); // number of bytes (devices * sizeof(cl_device_id))
  119. if (error != CL_SUCCESS)
  120. fatal_CL(error, __LINE__);
  121. int num_devices = devices_size / sizeof(cl_device_id);
  122. printf("# of devices %d\n", num_devices);
  123. // Get the list of devices (previousely selected for the context)
  124. cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
  125. error = clGetContextInfo( context,
  126. CL_CONTEXT_DEVICES,
  127. devices_size,
  128. devices,
  129. NULL);
  130. if (error != CL_SUCCESS)
  131. fatal_CL(error, __LINE__);
  132. // Get names of devices and print them
  133. cl_char dbuf[100];
  134. int devi_count;
  135. cl_device_id device;
  136. for(devi_count = 0; devi_count < num_devices; devi_count++){
  137. device = devices[devi_count];
  138. error = clGetDeviceInfo(device,
  139. CL_DEVICE_NAME,
  140. sizeof(dbuf),
  141. dbuf,
  142. NULL);
  143. if (error != CL_SUCCESS)
  144. fatal_CL(error, __LINE__);
  145. printf("\tDevice %d: %s\n", devi_count, dbuf);
  146. }
  147. // Select device (previousely selected for the context) (if there are multiple devices, choose the first one)
  148. int devi = 0;
  149. device = devices[devi];
  150. printf("Selecting device %d\n", devi);
  151. //====================================================================================================100
  152. // CREATE COMMAND QUEUE FOR THE DEVICE
  153. //====================================================================================================100
  154. // Create a command queue
  155. cl_command_queue command_queue;
  156. command_queue = clCreateCommandQueue( context,
  157. device,
  158. 0,
  159. &error);
  160. if (error != CL_SUCCESS)
  161. fatal_CL(error, __LINE__);
  162. //====================================================================================================100
  163. // CREATE PROGRAM, COMPILE IT
  164. //====================================================================================================100
  165. // Load kernel source code from file
  166. const char *source = load_kernel_source("./kernel/kernel_gpu_opencl.cl");
  167. size_t sourceSize = strlen(source);
  168. // Create the program
  169. cl_program program = clCreateProgramWithSource( context,
  170. 1,
  171. &source,
  172. &sourceSize,
  173. &error);
  174. if (error != CL_SUCCESS)
  175. fatal_CL(error, __LINE__);
  176. char clOptions[150];
  177. // sprintf(clOptions,"-I../../src");
  178. sprintf(clOptions,"-I.");
  179. #ifdef RD_WG_SIZE
  180. sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE=%d", RD_WG_SIZE);
  181. #endif
  182. #ifdef RD_WG_SIZE_0
  183. sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE_0=%d", RD_WG_SIZE_0);
  184. #endif
  185. #ifdef RD_WG_SIZE_0_0
  186. sprintf(clOptions + strlen(clOptions), " -DRD_WG_SIZE_0_0=%d", RD_WG_SIZE_0_0);
  187. #endif
  188. // Compile the program
  189. error = clBuildProgram( program,
  190. 1,
  191. &device,
  192. clOptions,
  193. NULL,
  194. NULL);
  195. // Print warnings and errors from compilation
  196. static cl_char log[65536];
  197. memset(log, 0, sizeof(log));
  198. clGetProgramBuildInfo( program,
  199. device,
  200. CL_PROGRAM_BUILD_LOG,
  201. sizeof(log)-1,
  202. log,
  203. NULL);
  204. printf("-----OpenCL Compiler Output-----\n");
  205. if (strstr(log,"warning:") || strstr(log, "error:"))
  206. printf("<<<<\n%s\n>>>>\n", log);
  207. printf("--------------------------------\n");
  208. if (error != CL_SUCCESS)
  209. fatal_CL(error, __LINE__);
  210. // Create kernel
  211. cl_kernel kernel;
  212. kernel = clCreateKernel(program,
  213. "kernel_gpu_opencl",
  214. &error);
  215. if (error != CL_SUCCESS)
  216. fatal_CL(error, __LINE__);
  217. //====================================================================================================100
  218. // TRIGGERING INITIAL DRIVER OVERHEAD
  219. //====================================================================================================100
  220. // cudaThreadSynchronize(); // the above does it
  221. //======================================================================================================================================================150
  222. // GPU MEMORY ALLOCATION
  223. //======================================================================================================================================================150
  224. //====================================================================================================100
  225. // Common (COPY IN)
  226. //====================================================================================================100
  227. cl_mem d_common;
  228. d_common = clCreateBuffer( context,
  229. CL_MEM_READ_WRITE,
  230. common.common_mem,
  231. NULL,
  232. &error );
  233. if (error != CL_SUCCESS)
  234. fatal_CL(error, __LINE__);
  235. //====================================================================================================100
  236. // Frame (COPY IN)
  237. //====================================================================================================100
  238. // common
  239. cl_mem d_frame;
  240. d_frame = clCreateBuffer( context,
  241. CL_MEM_READ_WRITE,
  242. common.frame_mem,
  243. NULL,
  244. &error );
  245. if (error != CL_SUCCESS)
  246. fatal_CL(error, __LINE__);
  247. //====================================================================================================100
  248. // Inputs (COPY IN)
  249. //====================================================================================================100
  250. //==================================================50
  251. // endo points
  252. //==================================================50
  253. // common
  254. cl_mem d_endoRow;
  255. d_endoRow = clCreateBuffer( context,
  256. CL_MEM_READ_WRITE,
  257. common.endo_mem,
  258. NULL,
  259. &error );
  260. if (error != CL_SUCCESS)
  261. fatal_CL(error, __LINE__);
  262. cl_mem d_endoCol;
  263. d_endoCol = clCreateBuffer( context,
  264. CL_MEM_READ_WRITE,
  265. common.endo_mem,
  266. NULL,
  267. &error );
  268. if (error != CL_SUCCESS)
  269. fatal_CL(error, __LINE__);
  270. cl_mem d_tEndoRowLoc;
  271. d_tEndoRowLoc = clCreateBuffer( context,
  272. CL_MEM_READ_WRITE,
  273. common.endo_mem * common.no_frames,
  274. NULL,
  275. &error );
  276. if (error != CL_SUCCESS)
  277. fatal_CL(error, __LINE__);
  278. cl_mem d_tEndoColLoc;
  279. d_tEndoColLoc = clCreateBuffer( context,
  280. CL_MEM_READ_WRITE,
  281. common.endo_mem * common.no_frames,
  282. NULL,
  283. &error );
  284. if (error != CL_SUCCESS)
  285. fatal_CL(error, __LINE__);
  286. //==================================================50
  287. // epi points
  288. //==================================================50
  289. // common
  290. cl_mem d_epiRow;
  291. d_epiRow = clCreateBuffer( context,
  292. CL_MEM_READ_WRITE,
  293. common.epi_mem,
  294. NULL,
  295. &error );
  296. if (error != CL_SUCCESS)
  297. fatal_CL(error, __LINE__);
  298. cl_mem d_epiCol;
  299. d_epiCol = clCreateBuffer( context,
  300. CL_MEM_READ_WRITE,
  301. common.epi_mem,
  302. NULL,
  303. &error );
  304. if (error != CL_SUCCESS)
  305. fatal_CL(error, __LINE__);
  306. cl_mem d_tEpiRowLoc;
  307. d_tEpiRowLoc = clCreateBuffer( context,
  308. CL_MEM_READ_WRITE,
  309. common.epi_mem * common.no_frames,
  310. NULL,
  311. &error );
  312. if (error != CL_SUCCESS)
  313. fatal_CL(error, __LINE__);
  314. cl_mem d_tEpiColLoc;
  315. d_tEpiColLoc = clCreateBuffer( context,
  316. CL_MEM_READ_WRITE,
  317. common.epi_mem * common.no_frames,
  318. NULL,
  319. &error );
  320. if (error != CL_SUCCESS)
  321. fatal_CL(error, __LINE__);
  322. //====================================================================================================100
  323. // Array of Templates for All Points (COPY IN)
  324. //====================================================================================================100
  325. //==================================================50
  326. // template sizes
  327. //==================================================50
  328. // common
  329. printf("tSize is %d, sSize is %d\n", common.tSize, common.sSize);
  330. common.in_rows = common.tSize + 1 + common.tSize;
  331. common.in_cols = common.in_rows;
  332. common.in_elem = common.in_rows * common.in_cols;
  333. common.in_mem = sizeof(fp) * common.in_elem;
  334. //==================================================50
  335. // endo points templates
  336. //==================================================50
  337. // common
  338. cl_mem d_endoT;
  339. d_endoT = clCreateBuffer( context,
  340. CL_MEM_READ_WRITE,
  341. common.in_mem * common.endoPoints,
  342. NULL,
  343. &error );
  344. if (error != CL_SUCCESS)
  345. fatal_CL(error, __LINE__);
  346. //==================================================50
  347. // epi points templates
  348. //==================================================50
  349. // common
  350. cl_mem d_epiT;
  351. d_epiT = clCreateBuffer( context,
  352. CL_MEM_READ_WRITE,
  353. common.in_mem * common.epiPoints,
  354. NULL,
  355. &error );
  356. if (error != CL_SUCCESS)
  357. fatal_CL(error, __LINE__);
  358. //====================================================================================================100
  359. // AREA AROUND POINT FROM FRAME (LOCAL)
  360. //====================================================================================================100
  361. // common
  362. common.in2_rows = common.sSize + 1 + common.sSize;
  363. common.in2_cols = common.in2_rows;
  364. common.in2_elem = common.in2_rows * common.in2_cols;
  365. common.in2_mem = sizeof(fp) * common.in2_elem;
  366. // unique
  367. cl_mem d_in2;
  368. d_in2 = clCreateBuffer( context,
  369. CL_MEM_READ_WRITE,
  370. common.in2_mem * common.allPoints,
  371. NULL,
  372. &error );
  373. if (error != CL_SUCCESS)
  374. fatal_CL(error, __LINE__);
  375. //====================================================================================================100
  376. // CONVOLUTION (LOCAL)
  377. //====================================================================================================100
  378. // common
  379. common.conv_rows = common.in_rows + common.in2_rows - 1; // number of rows in I
  380. common.conv_cols = common.in_cols + common.in2_cols - 1; // number of columns in I
  381. common.conv_elem = common.conv_rows * common.conv_cols; // number of elements
  382. common.conv_mem = sizeof(fp) * common.conv_elem;
  383. common.ioffset = 0;
  384. common.joffset = 0;
  385. // unique
  386. cl_mem d_conv;
  387. d_conv = clCreateBuffer(context,
  388. CL_MEM_READ_WRITE,
  389. common.conv_mem * common.allPoints,
  390. NULL,
  391. &error );
  392. if (error != CL_SUCCESS)
  393. fatal_CL(error, __LINE__);
  394. //====================================================================================================100
  395. // CUMULATIVE SUM (LOCAL)
  396. //====================================================================================================100
  397. //==================================================50
  398. // PADDING OF ARRAY, VERTICAL CUMULATIVE SUM
  399. //==================================================50
  400. // common
  401. common.in2_pad_add_rows = common.in_rows;
  402. common.in2_pad_add_cols = common.in_cols;
  403. common.in2_pad_cumv_rows = common.in2_rows + 2*common.in2_pad_add_rows;
  404. common.in2_pad_cumv_cols = common.in2_cols + 2*common.in2_pad_add_cols;
  405. common.in2_pad_cumv_elem = common.in2_pad_cumv_rows * common.in2_pad_cumv_cols;
  406. common.in2_pad_cumv_mem = sizeof(fp) * common.in2_pad_cumv_elem;
  407. // unique
  408. cl_mem d_in2_pad_cumv;
  409. d_in2_pad_cumv = clCreateBuffer(context,
  410. CL_MEM_READ_WRITE,
  411. common.in2_pad_cumv_mem * common.allPoints,
  412. NULL,
  413. &error );
  414. if (error != CL_SUCCESS)
  415. fatal_CL(error, __LINE__);
  416. //==================================================50
  417. // SELECTION
  418. //==================================================50
  419. // common
  420. common.in2_pad_cumv_sel_rowlow = 1 + common.in_rows; // (1 to n+1)
  421. common.in2_pad_cumv_sel_rowhig = common.in2_pad_cumv_rows - 1;
  422. common.in2_pad_cumv_sel_collow = 1;
  423. common.in2_pad_cumv_sel_colhig = common.in2_pad_cumv_cols;
  424. common.in2_pad_cumv_sel_rows = common.in2_pad_cumv_sel_rowhig - common.in2_pad_cumv_sel_rowlow + 1;
  425. common.in2_pad_cumv_sel_cols = common.in2_pad_cumv_sel_colhig - common.in2_pad_cumv_sel_collow + 1;
  426. common.in2_pad_cumv_sel_elem = common.in2_pad_cumv_sel_rows * common.in2_pad_cumv_sel_cols;
  427. common.in2_pad_cumv_sel_mem = sizeof(fp) * common.in2_pad_cumv_sel_elem;
  428. // unique
  429. cl_mem d_in2_pad_cumv_sel;
  430. d_in2_pad_cumv_sel = clCreateBuffer(context,
  431. CL_MEM_READ_WRITE,
  432. common.in2_pad_cumv_sel_mem * common.allPoints,
  433. NULL,
  434. &error );
  435. if (error != CL_SUCCESS)
  436. fatal_CL(error, __LINE__);
  437. //==================================================50
  438. // SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM
  439. //==================================================50
  440. // common
  441. common.in2_pad_cumv_sel2_rowlow = 1;
  442. common.in2_pad_cumv_sel2_rowhig = common.in2_pad_cumv_rows - common.in_rows - 1;
  443. common.in2_pad_cumv_sel2_collow = 1;
  444. common.in2_pad_cumv_sel2_colhig = common.in2_pad_cumv_cols;
  445. common.in2_sub_cumh_rows = common.in2_pad_cumv_sel2_rowhig - common.in2_pad_cumv_sel2_rowlow + 1;
  446. common.in2_sub_cumh_cols = common.in2_pad_cumv_sel2_colhig - common.in2_pad_cumv_sel2_collow + 1;
  447. common.in2_sub_cumh_elem = common.in2_sub_cumh_rows * common.in2_sub_cumh_cols;
  448. common.in2_sub_cumh_mem = sizeof(fp) * common.in2_sub_cumh_elem;
  449. // unique
  450. cl_mem d_in2_sub_cumh;
  451. d_in2_sub_cumh = clCreateBuffer(context,
  452. CL_MEM_READ_WRITE,
  453. common.in2_sub_cumh_mem * common.allPoints,
  454. NULL,
  455. &error );
  456. if (error != CL_SUCCESS)
  457. fatal_CL(error, __LINE__);
  458. //==================================================50
  459. // SELECTION
  460. //==================================================50
  461. // common
  462. common.in2_sub_cumh_sel_rowlow = 1;
  463. common.in2_sub_cumh_sel_rowhig = common.in2_sub_cumh_rows;
  464. common.in2_sub_cumh_sel_collow = 1 + common.in_cols;
  465. common.in2_sub_cumh_sel_colhig = common.in2_sub_cumh_cols - 1;
  466. common.in2_sub_cumh_sel_rows = common.in2_sub_cumh_sel_rowhig - common.in2_sub_cumh_sel_rowlow + 1;
  467. common.in2_sub_cumh_sel_cols = common.in2_sub_cumh_sel_colhig - common.in2_sub_cumh_sel_collow + 1;
  468. common.in2_sub_cumh_sel_elem = common.in2_sub_cumh_sel_rows * common.in2_sub_cumh_sel_cols;
  469. common.in2_sub_cumh_sel_mem = sizeof(fp) * common.in2_sub_cumh_sel_elem;
  470. // unique
  471. cl_mem d_in2_sub_cumh_sel;
  472. d_in2_sub_cumh_sel = clCreateBuffer(context,
  473. CL_MEM_READ_WRITE,
  474. common.in2_sub_cumh_sel_mem * common.allPoints,
  475. NULL,
  476. &error );
  477. if (error != CL_SUCCESS)
  478. fatal_CL(error, __LINE__);
  479. //==================================================50
  480. // SELECTION 2, SUBTRACTION
  481. //==================================================50
  482. // common
  483. common.in2_sub_cumh_sel2_rowlow = 1;
  484. common.in2_sub_cumh_sel2_rowhig = common.in2_sub_cumh_rows;
  485. common.in2_sub_cumh_sel2_collow = 1;
  486. common.in2_sub_cumh_sel2_colhig = common.in2_sub_cumh_cols - common.in_cols - 1;
  487. common.in2_sub2_rows = common.in2_sub_cumh_sel2_rowhig - common.in2_sub_cumh_sel2_rowlow + 1;
  488. common.in2_sub2_cols = common.in2_sub_cumh_sel2_colhig - common.in2_sub_cumh_sel2_collow + 1;
  489. common.in2_sub2_elem = common.in2_sub2_rows * common.in2_sub2_cols;
  490. common.in2_sub2_mem = sizeof(fp) * common.in2_sub2_elem;
  491. // unique
  492. cl_mem d_in2_sub2;
  493. d_in2_sub2 = clCreateBuffer(context,
  494. CL_MEM_READ_WRITE,
  495. common.in2_sub2_mem * common.allPoints,
  496. NULL,
  497. &error );
  498. if (error != CL_SUCCESS)
  499. fatal_CL(error, __LINE__);
  500. //====================================================================================================100
  501. // CUMULATIVE SUM 2 (LOCAL)
  502. //====================================================================================================100
  503. //==================================================50
  504. // MULTIPLICATION
  505. //==================================================50
  506. // common
  507. common.in2_sqr_rows = common.in2_rows;
  508. common.in2_sqr_cols = common.in2_cols;
  509. common.in2_sqr_elem = common.in2_elem;
  510. common.in2_sqr_mem = common.in2_mem;
  511. // unique
  512. cl_mem d_in2_sqr;
  513. d_in2_sqr = clCreateBuffer(context,
  514. CL_MEM_READ_WRITE,
  515. common.in2_sqr_mem * common.allPoints,
  516. NULL,
  517. &error );
  518. if (error != CL_SUCCESS)
  519. fatal_CL(error, __LINE__);
  520. //==================================================50
  521. // SELECTION 2, SUBTRACTION
  522. //==================================================50
  523. // common
  524. common.in2_sqr_sub2_rows = common.in2_sub2_rows;
  525. common.in2_sqr_sub2_cols = common.in2_sub2_cols;
  526. common.in2_sqr_sub2_elem = common.in2_sub2_elem;
  527. common.in2_sqr_sub2_mem = common.in2_sub2_mem;
  528. // unique
  529. cl_mem d_in2_sqr_sub2;
  530. d_in2_sqr_sub2 = clCreateBuffer(context,
  531. CL_MEM_READ_WRITE,
  532. common.in2_sqr_sub2_mem * common.allPoints,
  533. NULL,
  534. &error );
  535. if (error != CL_SUCCESS)
  536. fatal_CL(error, __LINE__);
  537. //====================================================================================================100
  538. // FINAL (LOCAL)
  539. //====================================================================================================100
  540. // common
  541. common.in_sqr_rows = common.in_rows;
  542. common.in_sqr_cols = common.in_cols;
  543. common.in_sqr_elem = common.in_elem;
  544. common.in_sqr_mem = common.in_mem;
  545. // unique
  546. cl_mem d_in_sqr;
  547. d_in_sqr = clCreateBuffer( context,
  548. CL_MEM_READ_WRITE,
  549. common.in_sqr_mem * common.allPoints,
  550. NULL,
  551. &error );
  552. if (error != CL_SUCCESS)
  553. fatal_CL(error, __LINE__);
  554. //====================================================================================================100
  555. // TEMPLATE MASK CREATE (LOCAL)
  556. //====================================================================================================100
  557. // common
  558. common.tMask_rows = common.in_rows + (common.sSize+1+common.sSize) - 1;
  559. common.tMask_cols = common.tMask_rows;
  560. common.tMask_elem = common.tMask_rows * common.tMask_cols;
  561. common.tMask_mem = sizeof(fp) * common.tMask_elem;
  562. // unique
  563. cl_mem d_tMask;
  564. d_tMask = clCreateBuffer( context,
  565. CL_MEM_READ_WRITE,
  566. common.tMask_mem * common.allPoints,
  567. NULL,
  568. &error );
  569. if (error != CL_SUCCESS)
  570. fatal_CL(error, __LINE__);
  571. //====================================================================================================100
  572. // POINT MASK INITIALIZE (LOCAL)
  573. //====================================================================================================100
  574. // common
  575. common.mask_rows = common.maxMove;
  576. common.mask_cols = common.mask_rows;
  577. common.mask_elem = common.mask_rows * common.mask_cols;
  578. common.mask_mem = sizeof(fp) * common.mask_elem;
  579. //====================================================================================================100
  580. // MASK CONVOLUTION (LOCAL)
  581. //====================================================================================================100
  582. // common
  583. common.mask_conv_rows = common.tMask_rows; // number of rows in I
  584. common.mask_conv_cols = common.tMask_cols; // number of columns in I
  585. common.mask_conv_elem = common.mask_conv_rows * common.mask_conv_cols; // number of elements
  586. common.mask_conv_mem = sizeof(fp) * common.mask_conv_elem;
  587. common.mask_conv_ioffset = (common.mask_rows-1)/2;
  588. if((common.mask_rows-1) % 2 > 0.5){
  589. common.mask_conv_ioffset = common.mask_conv_ioffset + 1;
  590. }
  591. common.mask_conv_joffset = (common.mask_cols-1)/2;
  592. if((common.mask_cols-1) % 2 > 0.5){
  593. common.mask_conv_joffset = common.mask_conv_joffset + 1;
  594. }
  595. // unique
  596. cl_mem d_mask_conv;
  597. d_mask_conv = clCreateBuffer( context,
  598. CL_MEM_READ_WRITE,
  599. common.mask_conv_mem * common.allPoints,
  600. NULL,
  601. &error );
  602. if (error != CL_SUCCESS)
  603. fatal_CL(error, __LINE__);
  604. //====================================================================================================100
  605. // END
  606. //====================================================================================================100
  607. //======================================================================================================================================================150
  608. // GPU MEMORY COPY
  609. //======================================================================================================================================================150
  610. //====================================================================================================100
  611. // Inputs
  612. //====================================================================================================100
  613. //==================================================50
  614. // endo points
  615. //==================================================50
  616. error = clEnqueueWriteBuffer( command_queue,
  617. d_endoRow,
  618. 1,
  619. 0,
  620. common.endo_mem,
  621. endoRow,
  622. 0,
  623. 0,
  624. 0);
  625. if (error != CL_SUCCESS)
  626. fatal_CL(error, __LINE__);
  627. error = clEnqueueWriteBuffer( command_queue,
  628. d_endoCol,
  629. 1,
  630. 0,
  631. common.endo_mem,
  632. endoCol,
  633. 0,
  634. 0,
  635. 0);
  636. if (error != CL_SUCCESS)
  637. fatal_CL(error, __LINE__);
  638. //==================================================50
  639. // epi points
  640. //==================================================50
  641. error = clEnqueueWriteBuffer( command_queue,
  642. d_epiRow,
  643. 1,
  644. 0,
  645. common.epi_mem,
  646. epiRow,
  647. 0,
  648. 0,
  649. 0);
  650. if (error != CL_SUCCESS)
  651. fatal_CL(error, __LINE__);
  652. error = clEnqueueWriteBuffer( command_queue,
  653. d_epiCol,
  654. 1,
  655. 0,
  656. common.epi_mem,
  657. epiCol,
  658. 0,
  659. 0,
  660. 0);
  661. if (error != CL_SUCCESS)
  662. fatal_CL(error, __LINE__);
  663. //==================================================50
  664. // END
  665. //==================================================50
  666. //====================================================================================================100
  667. // END
  668. //====================================================================================================100
  669. //======================================================================================================================================================150
  670. // KERNEL
  671. //======================================================================================================================================================150
  672. //====================================================================================================100
  673. // EXECUTION PARAMETERS
  674. //====================================================================================================100
  675. // All kernels operations within kernel use same max size of threads. Size of block size is set to the size appropriate for max size operation (on padded matrix). Other use subsets of that.
  676. size_t local_work_size[1];
  677. local_work_size[0] = NUMBER_THREADS;
  678. size_t global_work_size[1];
  679. global_work_size[0] = common.allPoints * local_work_size[0];
  680. printf("# of workgroups = %d, # of threads/workgroup = %d (ensure that device can handle)\n", (int)(global_work_size[0]/local_work_size[0]), (int)local_work_size[0]);
  681. //====================================================================================================100
  682. // COPY ARGUMENTS
  683. //====================================================================================================100
  684. error = clEnqueueWriteBuffer( command_queue,
  685. d_common,
  686. 1,
  687. 0,
  688. common.common_mem,
  689. &common,
  690. 0,
  691. 0,
  692. 0);
  693. if (error != CL_SUCCESS)
  694. fatal_CL(error, __LINE__);
  695. //====================================================================================================100
  696. // set kernel arguments
  697. //====================================================================================================100
  698. // structures
  699. error = clSetKernelArg( kernel,
  700. 0,
  701. sizeof(params_common),
  702. (void *) &common);
  703. if (error != CL_SUCCESS)
  704. fatal_CL(error, __LINE__);
  705. // common
  706. error = clSetKernelArg( kernel,
  707. 3,
  708. sizeof(cl_mem),
  709. (void *) &d_endoRow);
  710. if (error != CL_SUCCESS)
  711. fatal_CL(error, __LINE__);
  712. error = clSetKernelArg( kernel,
  713. 4,
  714. sizeof(cl_mem),
  715. (void *) &d_endoCol);
  716. if (error != CL_SUCCESS)
  717. fatal_CL(error, __LINE__);
  718. error = clSetKernelArg( kernel,
  719. 5,
  720. sizeof(cl_mem),
  721. (void *) &d_tEndoRowLoc);
  722. if (error != CL_SUCCESS)
  723. fatal_CL(error, __LINE__);
  724. error = clSetKernelArg( kernel,
  725. 6,
  726. sizeof(cl_mem),
  727. (void *) &d_tEndoColLoc);
  728. if (error != CL_SUCCESS)
  729. fatal_CL(error, __LINE__);
  730. error = clSetKernelArg( kernel,
  731. 7,
  732. sizeof(cl_mem),
  733. (void *) &d_epiRow);
  734. if (error != CL_SUCCESS)
  735. fatal_CL(error, __LINE__);
  736. error = clSetKernelArg( kernel,
  737. 8,
  738. sizeof(cl_mem),
  739. (void *) &d_epiCol);
  740. if (error != CL_SUCCESS)
  741. fatal_CL(error, __LINE__);
  742. error = clSetKernelArg( kernel,
  743. 9,
  744. sizeof(cl_mem),
  745. (void *) &d_tEpiRowLoc);
  746. if (error != CL_SUCCESS)
  747. fatal_CL(error, __LINE__);
  748. error = clSetKernelArg( kernel,
  749. 10,
  750. sizeof(cl_mem),
  751. (void *) &d_tEpiColLoc);
  752. if (error != CL_SUCCESS)
  753. fatal_CL(error, __LINE__);
  754. // common_unique
  755. error = clSetKernelArg( kernel,
  756. 11,
  757. sizeof(cl_mem),
  758. (void *) &d_endoT);
  759. if (error != CL_SUCCESS)
  760. fatal_CL(error, __LINE__);
  761. error = clSetKernelArg( kernel,
  762. 12,
  763. sizeof(cl_mem),
  764. (void *) &d_epiT);
  765. if (error != CL_SUCCESS)
  766. fatal_CL(error, __LINE__);
  767. error = clSetKernelArg( kernel,
  768. 13,
  769. sizeof(cl_mem),
  770. (void *) &d_in2);
  771. if (error != CL_SUCCESS)
  772. fatal_CL(error, __LINE__);
  773. error = clSetKernelArg( kernel,
  774. 14,
  775. sizeof(cl_mem),
  776. (void *) &d_conv);
  777. if (error != CL_SUCCESS)
  778. fatal_CL(error, __LINE__);
  779. error = clSetKernelArg( kernel,
  780. 15,
  781. sizeof(cl_mem),
  782. (void *) &d_in2_pad_cumv);
  783. if (error != CL_SUCCESS)
  784. fatal_CL(error, __LINE__);
  785. error = clSetKernelArg( kernel,
  786. 16,
  787. sizeof(cl_mem),
  788. (void *) &d_in2_pad_cumv_sel);
  789. if (error != CL_SUCCESS)
  790. fatal_CL(error, __LINE__);
  791. error = clSetKernelArg( kernel,
  792. 17,
  793. sizeof(cl_mem),
  794. (void *) &d_in2_sub_cumh);
  795. if (error != CL_SUCCESS)
  796. fatal_CL(error, __LINE__);
  797. error = clSetKernelArg( kernel,
  798. 18,
  799. sizeof(cl_mem),
  800. (void *) &d_in2_sub_cumh_sel);
  801. if (error != CL_SUCCESS)
  802. fatal_CL(error, __LINE__);
  803. error = clSetKernelArg( kernel,
  804. 19,
  805. sizeof(cl_mem),
  806. (void *) &d_in2_sub2);
  807. if (error != CL_SUCCESS)
  808. fatal_CL(error, __LINE__);
  809. error = clSetKernelArg( kernel,
  810. 20,
  811. sizeof(cl_mem),
  812. (void *) &d_in2_sqr);
  813. if (error != CL_SUCCESS)
  814. fatal_CL(error, __LINE__);
  815. error = clSetKernelArg( kernel,
  816. 21,
  817. sizeof(cl_mem),
  818. (void *) &d_in2_sqr_sub2);
  819. if (error != CL_SUCCESS)
  820. fatal_CL(error, __LINE__);
  821. error = clSetKernelArg( kernel,
  822. 22,
  823. sizeof(cl_mem),
  824. (void *) &d_in_sqr);
  825. if (error != CL_SUCCESS)
  826. fatal_CL(error, __LINE__);
  827. error = clSetKernelArg( kernel,
  828. 23,
  829. sizeof(cl_mem),
  830. (void *) &d_tMask);
  831. if (error != CL_SUCCESS)
  832. fatal_CL(error, __LINE__);
  833. error = clSetKernelArg( kernel,
  834. 24,
  835. sizeof(cl_mem),
  836. (void *) &d_mask_conv);
  837. if (error != CL_SUCCESS)
  838. fatal_CL(error, __LINE__);
  839. // // local
  840. // // int local_size_one;
  841. // // local_size_one = common.in_rows;
  842. // error = clSetKernelArg( kernel,
  843. // 25,
  844. // // sizeof(fp) * local_size_one, // size 51
  845. // sizeof(fp) * common.in_elem,
  846. // NULL);
  847. // if (error != CL_SUCCESS)
  848. // fatal_CL(error, __LINE__);
  849. // error = clSetKernelArg( kernel,
  850. // 26,
  851. // // sizeof(fp) * local_size_one, // size 51
  852. // sizeof(fp) * common.in_cols,
  853. // NULL);
  854. // if (error != CL_SUCCESS)
  855. // fatal_CL(error, __LINE__);
  856. // // int local_size_two;
  857. // // local_size_two = common.in_rows + common.in2_rows - 1;
  858. // error = clSetKernelArg( kernel,
  859. // 27,
  860. // // sizeof(fp) * local_size_two, // size 51+81-1=131
  861. // sizeof(fp) * common.in_sqr_rows,
  862. // NULL);
  863. // if (error != CL_SUCCESS)
  864. // fatal_CL(error, __LINE__);
  865. // error = clSetKernelArg( kernel,
  866. // 28,
  867. // // sizeof(fp) * local_size_two, // size 51+81-1=131
  868. // sizeof(fp) * common.mask_conv_rows,
  869. // NULL);
  870. // if (error != CL_SUCCESS)
  871. // fatal_CL(error, __LINE__);
  872. // // int local_size_three;
  873. // // local_size_three = common.in_rows * common.in_rows;
  874. // error = clSetKernelArg( kernel,
  875. // 29,
  876. // // sizeof(fp) * local_size_three, // size 51*51=2601
  877. // sizeof(int) * common.mask_conv_rows,
  878. // NULL);
  879. // if (error != CL_SUCCESS)
  880. // fatal_CL(error, __LINE__);
  881. // int local_size;
  882. // local_size = (common.in_elem + common.in_cols + common.in_sqr_rows + common.mask_conv_rows) * 4 + common.mask_conv_rows * 2;
  883. // printf("size of used local memory/workgroup = %dB (ensure that device can handle)\n", local_size);
  884. cl_mem d_in_mod_temp;
  885. d_in_mod_temp = clCreateBuffer( context,
  886. CL_MEM_READ_WRITE,
  887. sizeof(fp) * common.in_elem * common.allPoints,
  888. NULL,
  889. &error );
  890. if (error != CL_SUCCESS)
  891. fatal_CL(error, __LINE__);
  892. cl_mem in_partial_sum;
  893. in_partial_sum = clCreateBuffer(context,
  894. CL_MEM_READ_WRITE,
  895. sizeof(fp) * common.in_cols * common.allPoints,
  896. NULL,
  897. &error );
  898. if (error != CL_SUCCESS)
  899. fatal_CL(error, __LINE__);
  900. cl_mem in_sqr_partial_sum;
  901. in_sqr_partial_sum = clCreateBuffer(context,
  902. CL_MEM_READ_WRITE,
  903. sizeof(fp) * common.in_sqr_rows * common.allPoints,
  904. NULL,
  905. &error );
  906. if (error != CL_SUCCESS)
  907. fatal_CL(error, __LINE__);
  908. cl_mem par_max_val;
  909. par_max_val = clCreateBuffer( context,
  910. CL_MEM_READ_WRITE,
  911. sizeof(fp) * common.mask_conv_rows * common.allPoints,
  912. NULL,
  913. &error );
  914. if (error != CL_SUCCESS)
  915. fatal_CL(error, __LINE__);
  916. cl_mem par_max_coo;
  917. par_max_coo = clCreateBuffer( context,
  918. CL_MEM_READ_WRITE,
  919. sizeof(int) * common.mask_conv_rows * common.allPoints,
  920. NULL,
  921. &error );
  922. if (error != CL_SUCCESS)
  923. fatal_CL(error, __LINE__);
  924. cl_mem in_final_sum;
  925. in_final_sum = clCreateBuffer( context,
  926. CL_MEM_READ_WRITE,
  927. sizeof(fp) * common.allPoints,
  928. NULL,
  929. &error );
  930. if (error != CL_SUCCESS)
  931. fatal_CL(error, __LINE__);
  932. cl_mem in_sqr_final_sum;
  933. in_sqr_final_sum = clCreateBuffer( context,
  934. CL_MEM_READ_WRITE,
  935. sizeof(fp) * common.allPoints,
  936. NULL,
  937. &error );
  938. if (error != CL_SUCCESS)
  939. fatal_CL(error, __LINE__);
  940. cl_mem denomT;
  941. denomT = clCreateBuffer(context,
  942. CL_MEM_READ_WRITE,
  943. sizeof(fp) * common.allPoints,
  944. NULL,
  945. &error );
  946. if (error != CL_SUCCESS)
  947. fatal_CL(error, __LINE__);
  948. error = clSetKernelArg( kernel,
  949. 25,
  950. sizeof(cl_mem),
  951. (void *) &d_in_mod_temp);
  952. if (error != CL_SUCCESS)
  953. fatal_CL(error, __LINE__);
  954. error = clSetKernelArg( kernel,
  955. 26,
  956. sizeof(cl_mem),
  957. (void *) &in_partial_sum);
  958. if (error != CL_SUCCESS)
  959. fatal_CL(error, __LINE__);
  960. error = clSetKernelArg( kernel,
  961. 27,
  962. sizeof(cl_mem),
  963. (void *) &in_sqr_partial_sum);
  964. if (error != CL_SUCCESS)
  965. fatal_CL(error, __LINE__);
  966. error = clSetKernelArg( kernel,
  967. 28,
  968. sizeof(cl_mem),
  969. (void *) &par_max_val);
  970. if (error != CL_SUCCESS)
  971. fatal_CL(error, __LINE__);
  972. error = clSetKernelArg( kernel,
  973. 29,
  974. sizeof(cl_mem),
  975. (void *) &par_max_coo);
  976. if (error != CL_SUCCESS)
  977. fatal_CL(error, __LINE__);
  978. error = clSetKernelArg( kernel,
  979. 30,
  980. sizeof(cl_mem),
  981. (void *) &in_final_sum);
  982. if (error != CL_SUCCESS)
  983. fatal_CL(error, __LINE__);
  984. error = clSetKernelArg( kernel,
  985. 31,
  986. sizeof(cl_mem),
  987. (void *) &in_sqr_final_sum);
  988. if (error != CL_SUCCESS)
  989. fatal_CL(error, __LINE__);
  990. error = clSetKernelArg( kernel,
  991. 32,
  992. sizeof(cl_mem),
  993. (void *) &denomT);
  994. if (error != CL_SUCCESS)
  995. fatal_CL(error, __LINE__);
  996. cl_mem d_checksum;
  997. d_checksum = clCreateBuffer(context,
  998. CL_MEM_READ_WRITE,
  999. sizeof(fp) * CHECK,
  1000. NULL,
  1001. &error );
  1002. if (error != CL_SUCCESS)
  1003. fatal_CL(error, __LINE__);
  1004. error = clSetKernelArg( kernel,
  1005. 33,
  1006. sizeof(cl_mem),
  1007. (void *) &d_checksum);
  1008. if (error != CL_SUCCESS)
  1009. fatal_CL(error, __LINE__);
  1010. //====================================================================================================100
  1011. // PRINT FRAME PROGRESS START
  1012. //====================================================================================================100
  1013. printf("frame progress: ");
  1014. fflush(NULL);
  1015. //====================================================================================================100
  1016. // LAUNCH
  1017. //====================================================================================================100
  1018. // variables
  1019. fp* frame;
  1020. int frame_no;
  1021. for(frame_no=0; frame_no<common.frames_processed; frame_no++){
  1022. //==================================================50
  1023. // get and write current frame to GPU buffer
  1024. //==================================================50
  1025. // Extract a cropped version of the first frame from the video file
  1026. frame = get_frame( frames, // pointer to video file
  1027. frame_no, // number of frame that needs to be returned
  1028. 0, // cropped?
  1029. 0, // scaled?
  1030. 1); // converted
  1031. // copy frame to GPU memory
  1032. error = clEnqueueWriteBuffer( command_queue,
  1033. d_frame,
  1034. 1,
  1035. 0,
  1036. common.frame_mem,
  1037. frame,
  1038. 0,
  1039. 0,
  1040. 0);
  1041. if (error != CL_SUCCESS)
  1042. fatal_CL(error, __LINE__);
  1043. //==================================================50
  1044. // kernel arguments that change inside this loop
  1045. //==================================================50
  1046. // common_change
  1047. error = clSetKernelArg( kernel,
  1048. 1,
  1049. sizeof(cl_mem),
  1050. (void *) &d_frame);
  1051. if (error != CL_SUCCESS)
  1052. fatal_CL(error, __LINE__);
  1053. error = clSetKernelArg( kernel,
  1054. 2,
  1055. sizeof(int),
  1056. (void *) &frame_no);
  1057. if (error != CL_SUCCESS)
  1058. fatal_CL(error, __LINE__);
  1059. //==================================================50
  1060. // launch kernel
  1061. //==================================================50
  1062. error = clEnqueueNDRangeKernel( command_queue,
  1063. kernel,
  1064. 1,
  1065. NULL,
  1066. global_work_size,
  1067. local_work_size,
  1068. 0,
  1069. NULL,
  1070. NULL);
  1071. if (error != CL_SUCCESS)
  1072. fatal_CL(error, __LINE__);
  1073. //==================================================50
  1074. // finish iteration
  1075. //==================================================50
  1076. // Synchronization, wait for all operations in the command queue so far to finish
  1077. error = clFinish(command_queue);
  1078. if (error != CL_SUCCESS)
  1079. fatal_CL(error, __LINE__);
  1080. // free frame after each loop iteration, since AVI library allocates memory for every frame fetched
  1081. free(frame);
  1082. //==================================================50
  1083. // print frame progress
  1084. //==================================================50
  1085. // print frame progress
  1086. printf("%d ", frame_no);
  1087. fflush(NULL);
  1088. //==================================================50
  1089. // DISPLAY CHECKSUM (TESTING)
  1090. //==================================================50
  1091. #ifdef TEST_CHECKSUM
  1092. fp* checksum;
  1093. checksum = (fp*)malloc(sizeof(fp) * CHECK);
  1094. error = clEnqueueReadBuffer(command_queue,
  1095. d_checksum,
  1096. CL_TRUE,
  1097. 0,
  1098. sizeof(fp)*CHECK,
  1099. checksum,
  1100. 0,
  1101. NULL,
  1102. NULL);
  1103. if (error != CL_SUCCESS)
  1104. fatal_CL(error, __LINE__);
  1105. printf("CHECKSUM:\n");
  1106. for(i=0; i<CHECK; i++){
  1107. printf("%f ", checksum[i]);
  1108. }
  1109. printf("\n\n");
  1110. #endif
  1111. //==================================================50
  1112. // End
  1113. //==================================================50
  1114. }
  1115. //====================================================================================================100
  1116. // PRINT FRAME PROGRESS END
  1117. //====================================================================================================100
  1118. printf("\n");
  1119. fflush(NULL);
  1120. //======================================================================================================================================================150
  1121. // OUTPUT
  1122. //======================================================================================================================================================150
  1123. //====================================================================================================100
  1124. // endo points
  1125. //====================================================================================================100
  1126. error = clEnqueueReadBuffer(command_queue,
  1127. d_tEndoRowLoc,
  1128. CL_TRUE,
  1129. 0,
  1130. common.endo_mem * common.no_frames,
  1131. tEndoRowLoc,
  1132. 0,
  1133. NULL,
  1134. NULL);
  1135. if (error != CL_SUCCESS)
  1136. fatal_CL(error, __LINE__);
  1137. // for testing of the output
  1138. #ifdef TEST_OUTPUT
  1139. int j;
  1140. for(i=0; i<common.frames_processed; i++){
  1141. printf("%d: ", i);
  1142. for(j=0; j<common.endoPoints; j++){
  1143. printf("%d ", tEndoRowLoc[j*common.no_frames+i]);
  1144. }
  1145. printf("\n\n");
  1146. }
  1147. #endif
  1148. error = clEnqueueReadBuffer(command_queue,
  1149. d_tEndoColLoc,
  1150. CL_TRUE,
  1151. 0,
  1152. common.endo_mem * common.no_frames,
  1153. tEndoColLoc,
  1154. 0,
  1155. NULL,
  1156. NULL);
  1157. if (error != CL_SUCCESS)
  1158. fatal_CL(error, __LINE__);
  1159. //====================================================================================================100
  1160. // epi points
  1161. //====================================================================================================100
  1162. error = clEnqueueReadBuffer(command_queue,
  1163. d_tEpiRowLoc,
  1164. CL_TRUE,
  1165. 0,
  1166. common.epi_mem * common.no_frames,
  1167. tEpiRowLoc,
  1168. 0,
  1169. NULL,
  1170. NULL);
  1171. if (error != CL_SUCCESS)
  1172. fatal_CL(error, __LINE__);;
  1173. error = clEnqueueReadBuffer(command_queue,
  1174. d_tEpiColLoc,
  1175. CL_TRUE,
  1176. 0,
  1177. common.epi_mem * common.no_frames,
  1178. tEpiColLoc,
  1179. 0,
  1180. NULL,
  1181. NULL);
  1182. if (error != CL_SUCCESS)
  1183. fatal_CL(error, __LINE__);
  1184. //======================================================================================================================================================150
  1185. // DEALLOCATION
  1186. //======================================================================================================================================================150
  1187. // OpenCL structures
  1188. error = clReleaseKernel(kernel);
  1189. if (error != CL_SUCCESS)
  1190. fatal_CL(error, __LINE__);
  1191. error = clReleaseProgram(program);
  1192. if (error != CL_SUCCESS)
  1193. fatal_CL(error, __LINE__);
  1194. // common_change
  1195. error = clReleaseMemObject(d_frame);
  1196. if (error != CL_SUCCESS)
  1197. fatal_CL(error, __LINE__);
  1198. // common
  1199. error = clReleaseMemObject(d_endoRow);
  1200. if (error != CL_SUCCESS)
  1201. fatal_CL(error, __LINE__);
  1202. error = clReleaseMemObject(d_endoCol);
  1203. if (error != CL_SUCCESS)
  1204. fatal_CL(error, __LINE__);
  1205. error = clReleaseMemObject(d_tEndoRowLoc);
  1206. if (error != CL_SUCCESS)
  1207. fatal_CL(error, __LINE__);
  1208. error = clReleaseMemObject(d_tEndoColLoc);
  1209. if (error != CL_SUCCESS)
  1210. fatal_CL(error, __LINE__);
  1211. error = clReleaseMemObject(d_epiRow);
  1212. if (error != CL_SUCCESS)
  1213. fatal_CL(error, __LINE__);
  1214. error = clReleaseMemObject(d_epiCol);
  1215. if (error != CL_SUCCESS)
  1216. fatal_CL(error, __LINE__);
  1217. error = clReleaseMemObject(d_tEpiRowLoc);
  1218. if (error != CL_SUCCESS)
  1219. fatal_CL(error, __LINE__);
  1220. error = clReleaseMemObject(d_tEpiColLoc);
  1221. if (error != CL_SUCCESS)
  1222. fatal_CL(error, __LINE__);
  1223. // common_unique
  1224. error = clReleaseMemObject(d_endoT);
  1225. if (error != CL_SUCCESS)
  1226. fatal_CL(error, __LINE__);
  1227. error = clReleaseMemObject(d_epiT);
  1228. if (error != CL_SUCCESS)
  1229. fatal_CL(error, __LINE__);
  1230. error = clReleaseMemObject(d_in2);
  1231. if (error != CL_SUCCESS)
  1232. fatal_CL(error, __LINE__);
  1233. error = clReleaseMemObject(d_conv);
  1234. if (error != CL_SUCCESS)
  1235. fatal_CL(error, __LINE__);
  1236. error = clReleaseMemObject(d_in2_pad_cumv);
  1237. if (error != CL_SUCCESS)
  1238. fatal_CL(error, __LINE__);
  1239. error = clReleaseMemObject(d_in2_pad_cumv_sel);
  1240. if (error != CL_SUCCESS)
  1241. fatal_CL(error, __LINE__);
  1242. error = clReleaseMemObject(d_in2_sub_cumh);
  1243. if (error != CL_SUCCESS)
  1244. fatal_CL(error, __LINE__);
  1245. error = clReleaseMemObject(d_in2_sub_cumh_sel);
  1246. if (error != CL_SUCCESS)
  1247. fatal_CL(error, __LINE__);
  1248. error = clReleaseMemObject(d_in2_sub2);
  1249. if (error != CL_SUCCESS)
  1250. fatal_CL(error, __LINE__);
  1251. error = clReleaseMemObject(d_in2_sqr);
  1252. if (error != CL_SUCCESS)
  1253. fatal_CL(error, __LINE__);
  1254. error = clReleaseMemObject(d_in2_sqr_sub2);
  1255. if (error != CL_SUCCESS)
  1256. fatal_CL(error, __LINE__);
  1257. error = clReleaseMemObject(d_in_sqr);
  1258. if (error != CL_SUCCESS)
  1259. fatal_CL(error, __LINE__);
  1260. error = clReleaseMemObject(d_tMask);
  1261. if (error != CL_SUCCESS)
  1262. fatal_CL(error, __LINE__);
  1263. error = clReleaseMemObject(d_mask_conv);
  1264. if (error != CL_SUCCESS)
  1265. fatal_CL(error, __LINE__);
  1266. // OpenCL structures
  1267. error = clFlush(command_queue);
  1268. if (error != CL_SUCCESS)
  1269. fatal_CL(error, __LINE__);
  1270. error = clReleaseCommandQueue(command_queue);
  1271. if (error != CL_SUCCESS)
  1272. fatal_CL(error, __LINE__);
  1273. error = clReleaseContext(context);
  1274. if (error != CL_SUCCESS)
  1275. fatal_CL(error, __LINE__);
  1276. //======================================================================================================================================================150
  1277. // End
  1278. //======================================================================================================================================================150
  1279. }
  1280. //========================================================================================================================================================================================================200
  1281. // END
  1282. //========================================================================================================================================================================================================200