123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255 |
- #ifdef _WIN32
- # define WINDOWS_LEAN_AND_MEAN
- # define NOMINMAX
- # include <windows.h>
- #endif
- #include <fcntl.h>
- #include <float.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <math.h>
- #include <unistd.h>
- #include <sys/types.h>
- #include <sys/stat.h>
- #include <CL/cl.h>
- #include "bucketsort.h"
- #include "mergesort.h"
- #include <time.h>
- /* #define VERIFY Y */
- /* #define TIMER Y */
- ////////////////////////////////////////////////////////////////////////////////
- // Use a static data size for simplicity
- //
- #define SIZE (1000000)
- #define DATA_SIZE (1024)
- #define MAX_SOURCE_SIZE (0x100000)
- #define HISTOGRAM_SIZE (1024 * sizeof(unsigned int))
- ////////////////////////////////////////////////////////////////////////////////
- int compare(const void *a, const void *b) {
- if(*((float *)a) < *((float *)b)) return -1;
- else if(*((float *)a) > *((float *)b)) return 1;
- else return 0;
- }
- ////////////////////////////////////////////////////////////////////////////////
- cl_float4*runMergeSort(int listsize, int divisions,
- cl_float4 *d_origList, cl_float4 *d_resultList,
- int *sizes, int *nullElements,
- unsigned int *origOffsets);
- int parseCommandline(int argc, char *argv[], int *platform_id, int *device_id, int *use_gpu){
- int i;
- printf("%d", argc);
- if (argc < 5) return 1; // error
- char flag;
-
- for(i=1;i<argc;i++) {
- if (argv[i][0]=='-') {// flag
- flag = argv[i][1];
- switch (flag) {
- case 'p': // platform
- i++;
- *platform_id = atoi(argv[i]);
- break;
- case 'd': // device
- i++;
- *device_id = atoi(argv[i]);
- break;
- case 'g': // device
- i++;
- *use_gpu = atoi(argv[i]);
- break;
- }
- }
- }
- if ((*device_id >= 0 && *platform_id<0) || (*platform_id>=0 && *device_id<0)) // both p and d must be specified if either are specified
- return 1;
- return 0;
- }
- void printUsage(){
- printf("Hybridsort Usage\n");
- printf("\n");
- printf("hybridsort r -p [int] -d [int] -g [int]\n");
- printf("\n");
- printf("example:\n");
- printf("$ ./hybridsort r -p 0 -d 0 -g 1\n");
- printf("\n");
- printf("-p [int] Choose the platform (must choose both platform and device)\n");
- printf("-d [int] Choose the device (must choose both platform and device)\n");
- printf("-g [int] 1 for gpu and 0 for cpu\n");
- printf("\n");
- }
- int main(int argc, char** argv)
- {
- int err; // error code returned from api calls
-
- unsigned int correct; // number of correct results returned
- size_t global; // global domain size for our calculation
- size_t local; // local domain size for our calculation
- unsigned int *results;
-
- int platform_id=-1,device_id=-1,use_gpu=-1;
-
- // parse command line
- if (parseCommandline(argc, argv, &platform_id, &device_id, &use_gpu)) {
- printUsage();
- return 0;
- }
-
- // Fill our data set with random float values
- //
-
- int numElements = 0 ;
-
- if(strcmp(argv[1],"r") == 0) {
- numElements = SIZE;
- }
- else {
- FILE *fp;
- fp = fopen(argv[1],"r");
- if(fp == NULL) {
- printf("Error reading file \n");
- exit(EXIT_FAILURE);
- }
- int count = 0;
- float c;
-
- while(fscanf(fp,"%f",&c) != EOF) {
- count++;
- }
- fclose(fp);
-
- numElements = count;
- }
- printf("Sorting list of %d floats.\n", numElements);
- int mem_size = (numElements + (DIVISIONS*4))*sizeof(float);
- // Allocate enough for the input list
- float *cpu_idata = (float *)malloc(mem_size);
- float *cpu_odata = (float *)malloc(mem_size);
- // Allocate enough for the output list on the cpu side
- float *d_output = (float *)malloc(mem_size);
- // Allocate enough memory for the output list on the gpu side
- float *gpu_odata = (float *)malloc(mem_size);
- float datamin = FLT_MAX;
- float datamax = -FLT_MAX;
-
- if(strcmp(argv[1],"r")==0) {
- for (int i = 0; i < numElements; i++) {
- // Generate random floats between 0 and 1 for the input data
- cpu_idata[i] = ((float) rand() / RAND_MAX);
-
- //Compare data at index to data minimum, if less than current minimum, set that element as new minimum
- datamin = fminf(cpu_idata[i], datamin);
- //Same as above but for maximum
- datamax = fmaxf(cpu_idata[i], datamax);
- }
- }
- else {
- FILE *fp;
- fp = fopen(argv[1],"r");
- for(int i = 0; i < numElements; i++) {
- fscanf(fp,"%f",&cpu_idata[i]);
- datamin = fminf(cpu_idata[i], datamin);
- datamax = fmaxf(cpu_idata[i],datamax);
- }
- }
- FILE *tp;
- const char filename2[]="./hybridinput.txt";
- tp = fopen(filename2,"w");
- for(int i = 0; i < SIZE; i++) {
- fprintf(tp,"%f ",cpu_idata[i]);
- }
-
- fclose(tp);
- memcpy(cpu_odata, cpu_idata, mem_size);
- clock_t gpu_start = clock();
- init_bucketsort(numElements, platform_id, device_id, use_gpu);
- int *sizes = (int*) malloc(DIVISIONS * sizeof(int));
- int *nullElements = (int*) malloc(DIVISIONS * sizeof(int));
- unsigned int *origOffsets = (unsigned int *) malloc((DIVISIONS + 1) * sizeof(int));
- clock_t bucketsort_start = clock();
- bucketSort(cpu_idata,d_output,numElements,sizes,nullElements,datamin,datamax,origOffsets,platform_id,device_id,use_gpu);
- clock_t bucketsort_diff = clock() - bucketsort_start;
- finish_bucketsort();
- double bucketTime = getBucketTime();
- cl_float4 *d_origList = (cl_float4*) d_output;
- cl_float4 *d_resultList = (cl_float4*) cpu_idata;
-
- int newlistsize = 0;
- for(int i = 0; i < DIVISIONS; i++){
- newlistsize += sizes[i] * 4;
- }
-
- init_mergesort(newlistsize, platform_id, device_id, use_gpu);
- clock_t mergesort_start = clock();
- cl_float4 *mergeresult = runMergeSort(newlistsize,DIVISIONS,d_origList,d_resultList,sizes,nullElements,origOffsets);
- clock_t mergesort_diff = clock() - mergesort_start;
- finish_mergesort();
- gpu_odata = (float*)mergeresult;
- #ifdef TIMER
- clock_t gpu_diff = clock() - gpu_start;
- int gpu_msec = gpu_diff * 1000 / CLOCKS_PER_SEC;
- int bucketsort_msec = bucketsort_diff * 1000 / CLOCKS_PER_SEC;
- int mergesort_msec = mergesort_diff * 1000 / CLOCKS_PER_SEC;
- double mergeTime = getMergeTime();
- printf("GPU execution time: %0.3f ms \n", bucketsort_msec+mergesort_msec+bucketTime+mergeTime);
- printf(" --Bucketsort execution time: %0.3f ms \n", bucketsort_msec+bucketTime);
- printf(" --Mergesort execution time: %0.3f ms \n", mergesort_msec+mergeTime);
- #endif
- #ifdef VERIFY
- clock_t cpu_start = clock(), cpu_diff;
-
- qsort(cpu_odata, numElements, sizeof(float), compare);
- cpu_diff = clock() - cpu_start;
- int cpu_msec = cpu_diff * 1000 / CLOCKS_PER_SEC;
- printf("CPU execution time: %d ms \n", cpu_msec);
- printf("Checking result...");
-
- // Result checking
- int count = 0;
- for(int i = 0; i < numElements; i++){
- if(cpu_odata[i] != gpu_odata[i])
- {
- printf("Sort missmatch on element %d: \n", i);
- printf("CPU = %f : GPU = %f\n", cpu_odata[i], gpu_odata[i]);
- count++;
- break;
- }
- }
- if(count == 0) printf("PASSED.\n");
- else printf("FAILED.\n");
- #endif
-
- #ifdef OUTPUT
- FILE *tp1;
- const char filename3[]="./hybridoutput.txt";
- tp1 = fopen(filename3,"w");
- for(int i = 0; i < SIZE; i++) {
- fprintf(tp1,"%f ",cpu_idata[i]);
- }
-
- fclose(tp1);
- #endif
-
- // printf("%d \n",cpu_odata[1]);
- // int summy = 0;
- // for(int i =0; i < HISTOGRAM_SIZE; i++)
- // summy+=cpu_odata[i];
- // printf("%d \n", summy);
- return 0;
- }
|