hybridsort.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. #ifdef _WIN32
  2. # define WINDOWS_LEAN_AND_MEAN
  3. # define NOMINMAX
  4. # include <windows.h>
  5. #endif
  6. #include <fcntl.h>
  7. #include <float.h>
  8. #include <stdio.h>
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <math.h>
  12. #include <unistd.h>
  13. #include <sys/types.h>
  14. #include <sys/stat.h>
  15. #include <CL/cl.h>
  16. #include "bucketsort.h"
  17. #include "mergesort.h"
  18. #include <time.h>
  19. /* #define VERIFY Y */
  20. /* #define TIMER Y */
  21. ////////////////////////////////////////////////////////////////////////////////
  22. // Use a static data size for simplicity
  23. //
  24. #define SIZE (1000000)
  25. #define DATA_SIZE (1024)
  26. #define MAX_SOURCE_SIZE (0x100000)
  27. #define HISTOGRAM_SIZE (1024 * sizeof(unsigned int))
  28. ////////////////////////////////////////////////////////////////////////////////
  29. int compare(const void *a, const void *b) {
  30. if(*((float *)a) < *((float *)b)) return -1;
  31. else if(*((float *)a) > *((float *)b)) return 1;
  32. else return 0;
  33. }
  34. ////////////////////////////////////////////////////////////////////////////////
  35. cl_float4*runMergeSort(int listsize, int divisions,
  36. cl_float4 *d_origList, cl_float4 *d_resultList,
  37. int *sizes, int *nullElements,
  38. unsigned int *origOffsets);
  39. int parseCommandline(int argc, char *argv[], int *platform_id, int *device_id, int *use_gpu){
  40. int i;
  41. printf("%d", argc);
  42. if (argc < 5) return 1; // error
  43. char flag;
  44. for(i=1;i<argc;i++) {
  45. if (argv[i][0]=='-') {// flag
  46. flag = argv[i][1];
  47. switch (flag) {
  48. case 'p': // platform
  49. i++;
  50. *platform_id = atoi(argv[i]);
  51. break;
  52. case 'd': // device
  53. i++;
  54. *device_id = atoi(argv[i]);
  55. break;
  56. case 'g': // device
  57. i++;
  58. *use_gpu = atoi(argv[i]);
  59. break;
  60. }
  61. }
  62. }
  63. if ((*device_id >= 0 && *platform_id<0) || (*platform_id>=0 && *device_id<0)) // both p and d must be specified if either are specified
  64. return 1;
  65. return 0;
  66. }
  67. void printUsage(){
  68. printf("Hybridsort Usage\n");
  69. printf("\n");
  70. printf("hybridsort r -p [int] -d [int] -g [int]\n");
  71. printf("\n");
  72. printf("example:\n");
  73. printf("$ ./hybridsort r -p 0 -d 0 -g 1\n");
  74. printf("\n");
  75. printf("-p [int] Choose the platform (must choose both platform and device)\n");
  76. printf("-d [int] Choose the device (must choose both platform and device)\n");
  77. printf("-g [int] 1 for gpu and 0 for cpu\n");
  78. printf("\n");
  79. }
  80. int main(int argc, char** argv)
  81. {
  82. int err; // error code returned from api calls
  83. unsigned int correct; // number of correct results returned
  84. size_t global; // global domain size for our calculation
  85. size_t local; // local domain size for our calculation
  86. unsigned int *results;
  87. int platform_id=-1,device_id=-1,use_gpu=-1;
  88. // parse command line
  89. if (parseCommandline(argc, argv, &platform_id, &device_id, &use_gpu)) {
  90. printUsage();
  91. return 0;
  92. }
  93. // Fill our data set with random float values
  94. //
  95. int numElements = 0 ;
  96. if(strcmp(argv[1],"r") == 0) {
  97. numElements = SIZE;
  98. }
  99. else {
  100. FILE *fp;
  101. fp = fopen(argv[1],"r");
  102. if(fp == NULL) {
  103. printf("Error reading file \n");
  104. exit(EXIT_FAILURE);
  105. }
  106. int count = 0;
  107. float c;
  108. while(fscanf(fp,"%f",&c) != EOF) {
  109. count++;
  110. }
  111. fclose(fp);
  112. numElements = count;
  113. }
  114. printf("Sorting list of %d floats.\n", numElements);
  115. int mem_size = (numElements + (DIVISIONS*4))*sizeof(float);
  116. // Allocate enough for the input list
  117. float *cpu_idata = (float *)malloc(mem_size);
  118. float *cpu_odata = (float *)malloc(mem_size);
  119. // Allocate enough for the output list on the cpu side
  120. float *d_output = (float *)malloc(mem_size);
  121. // Allocate enough memory for the output list on the gpu side
  122. float *gpu_odata = (float *)malloc(mem_size);
  123. float datamin = FLT_MAX;
  124. float datamax = -FLT_MAX;
  125. if(strcmp(argv[1],"r")==0) {
  126. for (int i = 0; i < numElements; i++) {
  127. // Generate random floats between 0 and 1 for the input data
  128. cpu_idata[i] = ((float) rand() / RAND_MAX);
  129. //Compare data at index to data minimum, if less than current minimum, set that element as new minimum
  130. datamin = fminf(cpu_idata[i], datamin);
  131. //Same as above but for maximum
  132. datamax = fmaxf(cpu_idata[i], datamax);
  133. }
  134. }
  135. else {
  136. FILE *fp;
  137. fp = fopen(argv[1],"r");
  138. for(int i = 0; i < numElements; i++) {
  139. fscanf(fp,"%f",&cpu_idata[i]);
  140. datamin = fminf(cpu_idata[i], datamin);
  141. datamax = fmaxf(cpu_idata[i],datamax);
  142. }
  143. }
  144. FILE *tp;
  145. const char filename2[]="./hybridinput.txt";
  146. tp = fopen(filename2,"w");
  147. for(int i = 0; i < SIZE; i++) {
  148. fprintf(tp,"%f ",cpu_idata[i]);
  149. }
  150. fclose(tp);
  151. memcpy(cpu_odata, cpu_idata, mem_size);
  152. clock_t gpu_start = clock();
  153. init_bucketsort(numElements, platform_id, device_id, use_gpu);
  154. int *sizes = (int*) malloc(DIVISIONS * sizeof(int));
  155. int *nullElements = (int*) malloc(DIVISIONS * sizeof(int));
  156. unsigned int *origOffsets = (unsigned int *) malloc((DIVISIONS + 1) * sizeof(int));
  157. clock_t bucketsort_start = clock();
  158. bucketSort(cpu_idata,d_output,numElements,sizes,nullElements,datamin,datamax,origOffsets,platform_id,device_id,use_gpu);
  159. clock_t bucketsort_diff = clock() - bucketsort_start;
  160. finish_bucketsort();
  161. double bucketTime = getBucketTime();
  162. cl_float4 *d_origList = (cl_float4*) d_output;
  163. cl_float4 *d_resultList = (cl_float4*) cpu_idata;
  164. int newlistsize = 0;
  165. for(int i = 0; i < DIVISIONS; i++){
  166. newlistsize += sizes[i] * 4;
  167. }
  168. init_mergesort(newlistsize, platform_id, device_id, use_gpu);
  169. clock_t mergesort_start = clock();
  170. cl_float4 *mergeresult = runMergeSort(newlistsize,DIVISIONS,d_origList,d_resultList,sizes,nullElements,origOffsets);
  171. clock_t mergesort_diff = clock() - mergesort_start;
  172. finish_mergesort();
  173. gpu_odata = (float*)mergeresult;
  174. #ifdef TIMER
  175. clock_t gpu_diff = clock() - gpu_start;
  176. int gpu_msec = gpu_diff * 1000 / CLOCKS_PER_SEC;
  177. int bucketsort_msec = bucketsort_diff * 1000 / CLOCKS_PER_SEC;
  178. int mergesort_msec = mergesort_diff * 1000 / CLOCKS_PER_SEC;
  179. double mergeTime = getMergeTime();
  180. printf("GPU execution time: %0.3f ms \n", bucketsort_msec+mergesort_msec+bucketTime+mergeTime);
  181. printf(" --Bucketsort execution time: %0.3f ms \n", bucketsort_msec+bucketTime);
  182. printf(" --Mergesort execution time: %0.3f ms \n", mergesort_msec+mergeTime);
  183. #endif
  184. #ifdef VERIFY
  185. clock_t cpu_start = clock(), cpu_diff;
  186. qsort(cpu_odata, numElements, sizeof(float), compare);
  187. cpu_diff = clock() - cpu_start;
  188. int cpu_msec = cpu_diff * 1000 / CLOCKS_PER_SEC;
  189. printf("CPU execution time: %d ms \n", cpu_msec);
  190. printf("Checking result...");
  191. // Result checking
  192. int count = 0;
  193. for(int i = 0; i < numElements; i++){
  194. if(cpu_odata[i] != gpu_odata[i])
  195. {
  196. printf("Sort missmatch on element %d: \n", i);
  197. printf("CPU = %f : GPU = %f\n", cpu_odata[i], gpu_odata[i]);
  198. count++;
  199. break;
  200. }
  201. }
  202. if(count == 0) printf("PASSED.\n");
  203. else printf("FAILED.\n");
  204. #endif
  205. #ifdef OUTPUT
  206. FILE *tp1;
  207. const char filename3[]="./hybridoutput.txt";
  208. tp1 = fopen(filename3,"w");
  209. for(int i = 0; i < SIZE; i++) {
  210. fprintf(tp1,"%f ",cpu_idata[i]);
  211. }
  212. fclose(tp1);
  213. #endif
  214. // printf("%d \n",cpu_odata[1]);
  215. // int summy = 0;
  216. // for(int i =0; i < HISTOGRAM_SIZE; i++)
  217. // summy+=cpu_odata[i];
  218. // printf("%d \n", summy);
  219. return 0;
  220. }