mergesort.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. ////////////////////////////////////////////////////////////////////////////////
  2. // Includes
  3. ////////////////////////////////////////////////////////////////////////////////
  4. #include <fcntl.h>
  5. #include <float.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <math.h>
  10. #include <unistd.h>
  11. #include <sys/types.h>
  12. #include <sys/stat.h>
  13. #include <CL/cl.h>
  14. #include "mergesort.h"
  15. #include <time.h>
  16. ////////////////////////////////////////////////////////////////////////////////
  17. // Defines
  18. ////////////////////////////////////////////////////////////////////////////////
  19. #define BLOCKSIZE 256
  20. #define ROW_LENGTH BLOCKSIZE * 4
  21. #define ROWS 4096
  22. #define DATA_SIZE (1024)
  23. #define MAX_SOURCE_SIZE (0x100000)
  24. cl_device_id device_id; // compute device id
  25. cl_context mergeContext; // compute context
  26. cl_command_queue mergeCommands;
  27. cl_program mergeProgram; // compute program
  28. cl_kernel mergeFirstKernel; // compute kernel
  29. cl_kernel mergePassKernel;
  30. cl_kernel mergePackKernel;
  31. cl_int err;
  32. cl_mem d_resultList_first_buff;
  33. cl_mem d_origList_first_buff;
  34. cl_mem constStartAddr;
  35. cl_mem finalStartAddr;
  36. cl_mem nullElems;
  37. cl_mem d_orig;
  38. cl_mem d_res;
  39. cl_float4 *d_resultList_first_altered = NULL;
  40. cl_event mergeFirstEvent;
  41. cl_event mergePassEvent;
  42. cl_event mergePackEvent;
  43. double mergesum = 0;
  44. ////////////////////////////////////////////////////////////////////////////////
  45. // The mergesort algorithm
  46. ////////////////////////////////////////////////////////////////////////////////
  47. void init_mergesort(int listsize){
  48. cl_uint num = 0;
  49. clGetPlatformIDs(0,NULL,&num);
  50. cl_platform_id platformID[num];
  51. clGetPlatformIDs(num,platformID,NULL);
  52. clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,0,NULL,&num);
  53. cl_device_id devices[num];
  54. err = clGetDeviceIDs(platformID[1],CL_DEVICE_TYPE_CPU,num,devices,NULL);
  55. if (err != CL_SUCCESS)
  56. {
  57. printf("Error: Failed to create a device group!\n");
  58. exit(1);
  59. }
  60. char name[128];
  61. clGetDeviceInfo(devices[0],CL_DEVICE_NAME,128,name,NULL);
  62. mergeContext = clCreateContext(0, 1, &devices[0], NULL, NULL, &err);
  63. mergeCommands = clCreateCommandQueue(mergeContext, devices[0], CL_QUEUE_PROFILING_ENABLE, &err);
  64. d_resultList_first_altered = (cl_float4 *)malloc(listsize*sizeof(float));
  65. d_resultList_first_buff = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, listsize * sizeof(float),NULL,NULL);
  66. d_origList_first_buff = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, listsize * sizeof(float),NULL,NULL);
  67. d_orig = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, listsize * sizeof(float),NULL,NULL);
  68. d_res = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, listsize * sizeof(float),NULL,NULL);
  69. FILE *fp;
  70. const char fileName[]="./mergesort.cl";
  71. size_t source_size;
  72. char *source_str;
  73. fp = fopen(fileName, "r");
  74. if (!fp) {
  75. fprintf(stderr, "Failed to load mergesort kernel.\n");
  76. exit(1);
  77. }
  78. source_str = (char *)malloc(MAX_SOURCE_SIZE);
  79. source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
  80. fclose(fp);
  81. mergeProgram = clCreateProgramWithSource(mergeContext, 1, (const char **) &source_str, (const size_t)&source_size, &err);
  82. if (!mergeProgram)
  83. {
  84. printf("Error: Failed to create merge compute program!\n");
  85. exit(1);
  86. }
  87. err = clBuildProgram(mergeProgram, 0, NULL, NULL, NULL, NULL);
  88. if (err != CL_SUCCESS)
  89. {
  90. size_t len;
  91. char buffer[2048];
  92. printf("Error: Failed to build merge program executable!\n");
  93. clGetProgramBuildInfo(mergeProgram, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
  94. printf("%s\n", buffer);
  95. exit(1);
  96. }
  97. }
  98. void finish_mergesort() {
  99. clReleaseMemObject(constStartAddr);
  100. clReleaseMemObject(nullElems);
  101. clReleaseMemObject(finalStartAddr);
  102. clReleaseMemObject(d_orig);
  103. clReleaseMemObject(d_res);
  104. clReleaseMemObject(d_origList_first_buff);
  105. clReleaseMemObject(d_resultList_first_buff);
  106. clReleaseProgram(mergeProgram);
  107. clReleaseKernel(mergeFirstKernel);
  108. clReleaseCommandQueue(mergeCommands);
  109. clReleaseContext(mergeContext);
  110. }
  111. cl_float4* runMergeSort(int listsize, int divisions,
  112. cl_float4 *d_origList, cl_float4 *d_resultList,
  113. int *sizes, int *nullElements,
  114. unsigned int *origOffsets){
  115. int *startaddr = (int *)malloc((divisions + 1)*sizeof(int));
  116. int largestSize = -1;
  117. startaddr[0] = 0;
  118. for(int i=1; i<=divisions; i++)
  119. {
  120. startaddr[i] = startaddr[i-1] + sizes[i-1];
  121. if(sizes[i-1] > largestSize) largestSize = sizes[i-1];
  122. }
  123. largestSize *= 4;
  124. mergeFirstKernel = clCreateKernel(mergeProgram, "mergeSortFirst", &err);
  125. if (!mergeFirstKernel || err != CL_SUCCESS)
  126. {
  127. printf("Error: Failed to create merge sort first compute kernel!\n");
  128. exit(1);
  129. }
  130. err = clEnqueueWriteBuffer(mergeCommands, d_resultList_first_buff, CL_TRUE, 0, listsize*sizeof(float), d_resultList, 0, NULL, NULL);
  131. if (err != CL_SUCCESS)
  132. {
  133. printf("Error: Failed to write to d_resultList_first_buff source array!\n");
  134. exit(1);
  135. }
  136. err = clEnqueueWriteBuffer(mergeCommands, d_origList_first_buff, CL_TRUE, 0, listsize*sizeof(float), d_origList, 0, NULL, NULL);
  137. if (err != CL_SUCCESS)
  138. {
  139. printf("Error: Failed to write to d_origList_first_buff source array!\n");
  140. exit(1);
  141. }
  142. err = 0;
  143. err = clSetKernelArg(mergeFirstKernel, 0, sizeof(cl_mem), &d_origList_first_buff);
  144. err = clSetKernelArg(mergeFirstKernel, 1, sizeof(cl_mem), &d_resultList_first_buff);
  145. err = clSetKernelArg(mergeFirstKernel, 2, sizeof(cl_int), &listsize);
  146. if (err != CL_SUCCESS)
  147. {
  148. printf("Error: Failed to set merge first kernel arguments! %d\n", err);
  149. exit(1);
  150. }
  151. #ifdef MERGE_WG_SIZE_0
  152. const int THREADS = MERGE_WG_SIZE_0;
  153. #else
  154. const int THREADS = 256;
  155. #endif
  156. size_t local[] = {THREADS,1,1};
  157. int blocks = ((listsize/4)%THREADS == 0) ? (listsize/4)/THREADS : (listsize/4)/THREADS + 1;
  158. size_t global[] = {blocks*THREADS,1,1};
  159. size_t grid[] = {blocks,1,1,1};
  160. err = clEnqueueNDRangeKernel(mergeCommands, mergeFirstKernel, 3, NULL, global, local, 0, NULL, &mergeFirstEvent);
  161. if (err)
  162. {
  163. printf("Error: Failed to execute mergeFirst kernel!\n");
  164. exit(1);
  165. }
  166. clWaitForEvents(1 , &mergeFirstEvent);
  167. clFinish(mergeCommands);
  168. err = clEnqueueReadBuffer( mergeCommands, d_resultList_first_buff, CL_TRUE, 0, listsize*sizeof(float), d_resultList, 0, NULL, NULL );
  169. if (err != CL_SUCCESS)
  170. {
  171. printf("Error: Failed to read prefix output array! %d\n", err);
  172. exit(1);
  173. }
  174. // for(int i = 0; i < listsize/4;i++) {
  175. // printf("RESULT %f \n", d_resultList[i].s[0]);
  176. // printf("RESULT %f \n", d_resultList[i].s[1]);
  177. // printf("RESULT %f \n", d_resultList[i].s[2]);
  178. // printf("RESULT %f \n", d_resultList[i].s[3]);
  179. // }
  180. cl_ulong time_start, time_end;
  181. double total_time;
  182. clGetEventProfilingInfo(mergeFirstEvent, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL);
  183. clGetEventProfilingInfo(mergeFirstEvent, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL);
  184. total_time = time_end - time_start;
  185. mergesum+= total_time / 1000000;
  186. printf("Merge First Kernel Time: %0.3f \n", total_time/1000000);
  187. // for(int i =0; i < listsize/4; i++) {
  188. // printf("TEST %f \n", d_resultList[i].s[0]);
  189. // printf("TEST %f \n", d_resultList[i].s[1]);
  190. // printf("TEST %f \n", d_resultList[i].s[2]);
  191. // printf("TEST %f \n", d_resultList[i].s[3]);
  192. // }
  193. constStartAddr = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, (divisions+1)*sizeof(int),NULL,NULL);
  194. err = clEnqueueWriteBuffer(mergeCommands, constStartAddr, CL_TRUE, 0, (divisions+1)*sizeof(int), startaddr, 0, NULL, NULL);
  195. if (err != CL_SUCCESS)
  196. {
  197. printf("Error: Failed to write to constStartAddr source array!\n");
  198. exit(1);
  199. }
  200. mergePassKernel = clCreateKernel(mergeProgram, "mergeSortPass", &err);
  201. if (!mergePassKernel || err != CL_SUCCESS)
  202. {
  203. printf("Error: Failed to create merge sort pass compute kernel!\n");
  204. exit(1);
  205. }
  206. double mergePassTime = 0;
  207. int nrElems = 2;
  208. while(1==1){
  209. int floatsperthread = (nrElems*4);
  210. // printf("FPT %d \n", floatsperthread);
  211. int threadsPerDiv = (int)ceil(largestSize/(float)floatsperthread);
  212. // printf("TPD %d \n",threadsPerDiv);
  213. int threadsNeeded = threadsPerDiv * divisions;
  214. // printf("TN %d \n", threadsNeeded);
  215. #ifdef MERGE_WG_SIZE_1
  216. local[0] = MERGE_WG_SIZE_1;
  217. #else
  218. local[0] = 208;
  219. #endif
  220. grid[0] = ((threadsNeeded%local[0]) == 0) ?
  221. threadsNeeded/local[0] :
  222. (threadsNeeded/local[0]) + 1;
  223. if(grid[0] < 8){
  224. grid[0] = 8;
  225. local[0] = ((threadsNeeded%grid[0]) == 0) ?
  226. threadsNeeded / grid[0] :
  227. (threadsNeeded / grid[0]) + 1;
  228. }
  229. // Swap orig/result list
  230. cl_float4 *tempList = d_origList;
  231. d_origList = d_resultList;
  232. d_resultList = tempList;
  233. err = clEnqueueWriteBuffer(mergeCommands, d_resultList_first_buff, CL_TRUE, 0, listsize*sizeof(float), d_resultList, 0, NULL, NULL);
  234. if (err != CL_SUCCESS)
  235. {
  236. printf("Error: Failed to write to d_resultList_first_buff source array!\n");
  237. exit(1);
  238. }
  239. err = clEnqueueWriteBuffer(mergeCommands, d_origList_first_buff, CL_TRUE, 0, listsize*sizeof(float), d_origList, 0, NULL, NULL);
  240. if (err != CL_SUCCESS)
  241. {
  242. printf("Error: Failed to write to d_origList_first_buff source array!\n");
  243. exit(1);
  244. }
  245. err = 0;
  246. err = clSetKernelArg(mergePassKernel, 0, sizeof(cl_mem), &d_origList_first_buff);
  247. err = clSetKernelArg(mergePassKernel, 1, sizeof(cl_mem), &d_resultList_first_buff);
  248. err = clSetKernelArg(mergePassKernel, 2, sizeof(cl_int), &nrElems);
  249. err = clSetKernelArg(mergePassKernel, 3, sizeof(int), &threadsPerDiv);
  250. err = clSetKernelArg(mergePassKernel, 4, sizeof(cl_mem), &constStartAddr);
  251. if (err != CL_SUCCESS)
  252. {
  253. printf("Error: Failed to set merge pass kernel arguments! %d\n", err);
  254. exit(1);
  255. }
  256. global[0] = grid[0]*local[0];
  257. err = clEnqueueNDRangeKernel(mergeCommands, mergePassKernel, 3, NULL, global, local, 0, NULL, &mergePassEvent);
  258. if (err)
  259. {
  260. printf("Error: Failed to execute mergePass kernel!\n");
  261. exit(1);
  262. }
  263. clFinish(mergeCommands);
  264. err = clEnqueueReadBuffer( mergeCommands, d_resultList_first_buff, CL_TRUE, 0, listsize*sizeof(float), d_resultList, 0, NULL, NULL );
  265. if (err != CL_SUCCESS)
  266. {
  267. printf("Error: Failed to read prefix output array! %d\n", err);
  268. exit(1);
  269. }
  270. clFinish(mergeCommands);
  271. clGetEventProfilingInfo(mergePassEvent, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL);
  272. clGetEventProfilingInfo(mergePassEvent, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL);
  273. total_time = time_end - time_start;
  274. mergesum+= total_time /1000000;
  275. mergePassTime+= total_time/1000000;
  276. printf("Merge Pass Kernel Iteration Time: %0.3f \n", total_time/1000000);
  277. nrElems *= 2;
  278. floatsperthread = (nrElems*4);
  279. if(threadsPerDiv == 1) break;
  280. }
  281. printf("Merge Pass Kernel Time: %0.3f \n", mergePassTime);
  282. finalStartAddr = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, (divisions+1)*sizeof(int),NULL,NULL);
  283. err = clEnqueueWriteBuffer(mergeCommands, finalStartAddr, CL_TRUE, 0, (divisions+1)*sizeof(int), origOffsets, 0, NULL, NULL);
  284. if (err != CL_SUCCESS)
  285. {
  286. printf("Error: Failed to write to finalStartAddr source array!\n");
  287. exit(1);
  288. }
  289. nullElems = clCreateBuffer(mergeContext,CL_MEM_READ_WRITE, (divisions)*sizeof(int),NULL,NULL);
  290. err = clEnqueueWriteBuffer(mergeCommands, nullElems, CL_TRUE, 0, (divisions)*sizeof(int), nullElements, 0, NULL, NULL);
  291. if (err != CL_SUCCESS)
  292. {
  293. printf("Error: Failed to write to nullElements source array!\n");
  294. exit(1);
  295. }
  296. #ifdef MERGE_WG_SIZE_0
  297. local[0] = MERGE_WG_SIZE_0;
  298. #else
  299. local[0] = 256;
  300. #endif
  301. grid[0] = ((largestSize%local[0]) == 0) ?
  302. largestSize/local[0] :
  303. (largestSize/local[0]) + 1;
  304. grid[1] = divisions;
  305. // grid[0] = 17;
  306. global[0] = grid[0]*local[0];
  307. global[1] = grid[1]*local[1];
  308. mergePackKernel = clCreateKernel(mergeProgram, "mergepack", &err);
  309. if (!mergePackKernel || err != CL_SUCCESS)
  310. {
  311. printf("Error: Failed to create merge sort pack compute kernel!\n");
  312. exit(1);
  313. }
  314. err = clEnqueueWriteBuffer(mergeCommands, d_res, CL_TRUE, 0, listsize*sizeof(float), d_resultList, 0, NULL, NULL);
  315. if (err != CL_SUCCESS)
  316. {
  317. printf("Error: Failed to write to d_resultList_first_buff source array!\n");
  318. exit(1);
  319. }
  320. err = clEnqueueWriteBuffer(mergeCommands, d_orig, CL_TRUE, 0, listsize*sizeof(float), d_origList, 0, NULL, NULL);
  321. if (err != CL_SUCCESS)
  322. {
  323. printf("Error: Failed to write to d_origList_first_buff source array!\n");
  324. exit(1);
  325. }
  326. err = 0;
  327. err = clSetKernelArg(mergePackKernel, 0, sizeof(cl_mem), &d_res);
  328. err = clSetKernelArg(mergePackKernel, 1, sizeof(cl_mem), &d_orig);
  329. err = clSetKernelArg(mergePackKernel, 2, sizeof(cl_mem), &constStartAddr);
  330. err = clSetKernelArg(mergePackKernel, 3, sizeof(cl_mem), &nullElems);
  331. err = clSetKernelArg(mergePackKernel, 4, sizeof(cl_mem), &finalStartAddr);
  332. if (err != CL_SUCCESS)
  333. {
  334. printf("Error: Failed to set merge pack kernel arguments! %d\n", err);
  335. exit(1);
  336. }
  337. err = clEnqueueNDRangeKernel(mergeCommands, mergePackKernel, 3, NULL, global, local, 0, NULL, &mergePackEvent);
  338. if (err)
  339. {
  340. printf("Error: Failed to execute merge pack kernel!\n");
  341. exit(1);
  342. }
  343. clFinish(mergeCommands);
  344. clGetEventProfilingInfo(mergePackEvent, CL_PROFILING_COMMAND_START, sizeof(time_start), &time_start, NULL);
  345. clGetEventProfilingInfo(mergePackEvent, CL_PROFILING_COMMAND_END, sizeof(time_end), &time_end, NULL);
  346. total_time = time_end - time_start;
  347. mergesum+= total_time / 1000000;
  348. printf("Merge Pack Kernel Time: %0.3f \n", total_time/1000000);
  349. err = clEnqueueReadBuffer( mergeCommands, d_orig, CL_TRUE, 0, listsize*sizeof(float), d_origList, 0, NULL, NULL );
  350. if (err != CL_SUCCESS)
  351. {
  352. printf("Error: Failed to read origList array! %d\n", err);
  353. exit(1);
  354. }
  355. free(startaddr);
  356. return d_origList;
  357. }
  358. double getMergeTime() {
  359. return mergesum;
  360. }