hybridsort.c 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. #ifdef _WIN32
  2. # define WINDOWS_LEAN_AND_MEAN
  3. # define NOMINMAX
  4. # include <windows.h>
  5. #endif
  6. #include <fcntl.h>
  7. #include <float.h>
  8. #include <stdio.h>
  9. #include <stdlib.h>
  10. #include <string.h>
  11. #include <math.h>
  12. #include <unistd.h>
  13. #include <sys/types.h>
  14. #include <sys/stat.h>
  15. #include <CL/cl.h>
  16. #include "bucketsort.h"
  17. #include "mergesort.h"
  18. #include <time.h>
  19. /* #define VERIFY Y */
  20. /* #define TIMER Y */
  21. ////////////////////////////////////////////////////////////////////////////////
  22. // Use a static data size for simplicity
  23. //
  24. #define SIZE (1000000)
  25. #define DATA_SIZE (1024)
  26. #define MAX_SOURCE_SIZE (0x100000)
  27. #define HISTOGRAM_SIZE (1024 * sizeof(unsigned int))
  28. ////////////////////////////////////////////////////////////////////////////////
  29. int compare(const void *a, const void *b) {
  30. if(*((float *)a) < *((float *)b)) return -1;
  31. else if(*((float *)a) > *((float *)b)) return 1;
  32. else return 0;
  33. }
  34. ////////////////////////////////////////////////////////////////////////////////
  35. cl_float4*runMergeSort(int listsize, int divisions,
  36. cl_float4 *d_origList, cl_float4 *d_resultList,
  37. int *sizes, int *nullElements,
  38. unsigned int *origOffsets);
  39. int main(int argc, char** argv)
  40. {
  41. int err; // error code returned from api calls
  42. unsigned int correct; // number of correct results returned
  43. size_t global; // global domain size for our calculation
  44. size_t local; // local domain size for our calculation
  45. unsigned int *results;
  46. cl_device_id device_id; // compute device id
  47. cl_context context; // compute context
  48. cl_command_queue commands; // compute command queue
  49. cl_program program; // compute program
  50. cl_kernel kernel; // compute kernel
  51. cl_mem input; // device memory used for the input array
  52. cl_mem output; // device memory used for the output array
  53. // Fill our data set with random float values
  54. //
  55. int numElements = 0 ;
  56. if(strcmp(argv[1],"r") ==0) {
  57. numElements = SIZE;
  58. }
  59. else {
  60. FILE *fp;
  61. fp = fopen(argv[1],"r");
  62. if(fp == NULL) {
  63. printf("Error reading file \n");
  64. exit(EXIT_FAILURE);
  65. }
  66. int count = 0;
  67. float c;
  68. while(fscanf(fp,"%f",&c) != EOF) {
  69. count++;
  70. }
  71. fclose(fp);
  72. numElements = count;
  73. }
  74. printf("Sorting list of %d floats.\n", numElements);
  75. int mem_size = (numElements + (DIVISIONS*4))*sizeof(float);
  76. // Allocate enough for the input list
  77. float *cpu_idata = (float *)malloc(mem_size);
  78. float *cpu_odata = (float *)malloc(mem_size);
  79. // Allocate enough for the output list on the cpu side
  80. float *d_output = (float *)malloc(mem_size);
  81. // Allocate enough memory for the output list on the gpu side
  82. float *gpu_odata = (float *)malloc(mem_size);
  83. float datamin = FLT_MAX;
  84. float datamax = -FLT_MAX;
  85. if(strcmp(argv[1],"r")==0) {
  86. for (int i = 0; i < numElements; i++) {
  87. // Generate random floats between 0 and 1 for the input data
  88. cpu_idata[i] = ((float) rand() / RAND_MAX);
  89. //Compare data at index to data minimum, if less than current minimum, set that element as new minimum
  90. datamin = fminf(cpu_idata[i], datamin);
  91. //Same as above but for maximum
  92. datamax = fmaxf(cpu_idata[i], datamax);
  93. }
  94. }
  95. else {
  96. FILE *fp;
  97. fp = fopen(argv[1],"r");
  98. for(int i = 0; i < numElements; i++) {
  99. fscanf(fp,"%f",&cpu_idata[i]);
  100. datamin = fminf(cpu_idata[i], datamin);
  101. datamax = fmaxf(cpu_idata[i],datamax);
  102. }
  103. }
  104. FILE *tp;
  105. const char filename2[]="./hybridinput.txt";
  106. tp = fopen(filename2,"w");
  107. for(int i = 0; i < SIZE; i++) {
  108. fprintf(tp,"%f ",cpu_idata[i]);
  109. }
  110. fclose(tp);
  111. memcpy(cpu_odata, cpu_idata, mem_size);
  112. clock_t gpu_start = clock();
  113. init_bucketsort(numElements);
  114. int *sizes = (int*) malloc(DIVISIONS * sizeof(int));
  115. int *nullElements = (int*) malloc(DIVISIONS * sizeof(int));
  116. unsigned int *origOffsets = (unsigned int *) malloc((DIVISIONS + 1) * sizeof(int));
  117. clock_t bucketsort_start = clock();
  118. bucketSort(cpu_idata,d_output,numElements,sizes,nullElements,datamin,datamax, origOffsets);
  119. clock_t bucketsort_diff = clock() - bucketsort_start;
  120. finish_bucketsort();
  121. double bucketTime = getBucketTime();
  122. cl_float4 *d_origList = (cl_float4*) d_output;
  123. cl_float4 *d_resultList = (cl_float4*) cpu_idata;
  124. int newlistsize = 0;
  125. for(int i = 0; i < DIVISIONS; i++){
  126. newlistsize += sizes[i] * 4;
  127. }
  128. init_mergesort(newlistsize);
  129. clock_t mergesort_start = clock();
  130. cl_float4 *mergeresult = runMergeSort(newlistsize,DIVISIONS,d_origList,d_resultList,sizes,nullElements,origOffsets);
  131. clock_t mergesort_diff = clock() - mergesort_start;
  132. finish_mergesort();
  133. gpu_odata = (float*)mergeresult;
  134. #ifdef TIMER
  135. clock_t gpu_diff = clock() - gpu_start;
  136. int gpu_msec = gpu_diff * 1000 / CLOCKS_PER_SEC;
  137. int bucketsort_msec = bucketsort_diff * 1000 / CLOCKS_PER_SEC;
  138. int mergesort_msec = mergesort_diff * 1000 / CLOCKS_PER_SEC;
  139. double mergeTime = getMergeTime();
  140. printf("GPU execution time: %0.3f ms \n", bucketsort_msec+mergesort_msec+bucketTime+mergeTime);
  141. printf(" --Bucketsort execution time: %0.3f ms \n", bucketsort_msec+bucketTime);
  142. printf(" --Mergesort execution time: %0.3f ms \n", mergesort_msec+mergeTime);
  143. #endif
  144. #ifdef VERIFY
  145. clock_t cpu_start = clock(), cpu_diff;
  146. qsort(cpu_odata, numElements, sizeof(float), compare);
  147. cpu_diff = clock() - cpu_start;
  148. int cpu_msec = cpu_diff * 1000 / CLOCKS_PER_SEC;
  149. printf("CPU execution time: %d ms \n", cpu_msec);
  150. printf("Checking result...");
  151. // Result checking
  152. int count = 0;
  153. for(int i = 0; i < numElements; i++){
  154. if(cpu_odata[i] != gpu_odata[i])
  155. {
  156. printf("Sort missmatch on element %d: \n", i);
  157. printf("CPU = %f : GPU = %f\n", cpu_odata[i], gpu_odata[i]);
  158. count++;
  159. break;
  160. }
  161. }
  162. if(count == 0) printf("PASSED.\n");
  163. else printf("FAILED.\n");
  164. #endif
  165. #ifdef OUTPUT
  166. FILE *tp1;
  167. const char filename3[]="./hybridoutput.txt";
  168. tp1 = fopen(filename3,"w");
  169. for(int i = 0; i < SIZE; i++) {
  170. fprintf(tp1,"%f ",cpu_idata[i]);
  171. }
  172. fclose(tp1);
  173. #endif
  174. // printf("%d \n",cpu_odata[1]);
  175. // int summy = 0;
  176. // for(int i =0; i < HISTOGRAM_SIZE; i++)
  177. // summy+=cpu_odata[i];
  178. // printf("%d \n", summy);
  179. return 0;
  180. }