123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342 |
- #include "CL_helper.h"
- #define STR_SIZE 256
- long long get_time() {
- struct timeval tv;
- gettimeofday(&tv, NULL);
- return (tv.tv_sec * 1000000) + tv.tv_usec;
- }
- char *err_code (cl_int err_in)
- {
- switch (err_in) {
- case CL_SUCCESS :
- return (char*)" CL_SUCCESS ";
- case CL_DEVICE_NOT_FOUND :
- return (char*)" CL_DEVICE_NOT_FOUND ";
- case CL_DEVICE_NOT_AVAILABLE :
- return (char*)" CL_DEVICE_NOT_AVAILABLE ";
- case CL_COMPILER_NOT_AVAILABLE :
- return (char*)" CL_COMPILER_NOT_AVAILABLE ";
- case CL_MEM_OBJECT_ALLOCATION_FAILURE :
- return (char*)" CL_MEM_OBJECT_ALLOCATION_FAILURE ";
- case CL_OUT_OF_RESOURCES :
- return (char*)" CL_OUT_OF_RESOURCES ";
- case CL_OUT_OF_HOST_MEMORY :
- return (char*)" CL_OUT_OF_HOST_MEMORY ";
- case CL_PROFILING_INFO_NOT_AVAILABLE :
- return (char*)" CL_PROFILING_INFO_NOT_AVAILABLE ";
- case CL_MEM_COPY_OVERLAP :
- return (char*)" CL_MEM_COPY_OVERLAP ";
- case CL_IMAGE_FORMAT_MISMATCH :
- return (char*)" CL_IMAGE_FORMAT_MISMATCH ";
- case CL_IMAGE_FORMAT_NOT_SUPPORTED :
- return (char*)" CL_IMAGE_FORMAT_NOT_SUPPORTED ";
- case CL_BUILD_PROGRAM_FAILURE :
- return (char*)" CL_BUILD_PROGRAM_FAILURE ";
- case CL_MAP_FAILURE :
- return (char*)" CL_MAP_FAILURE ";
- case CL_MISALIGNED_SUB_BUFFER_OFFSET :
- return (char*)" CL_MISALIGNED_SUB_BUFFER_OFFSET ";
- case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST :
- return (char*)" CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST ";
- case CL_INVALID_VALUE :
- return (char*)" CL_INVALID_VALUE ";
- case CL_INVALID_DEVICE_TYPE :
- return (char*)" CL_INVALID_DEVICE_TYPE ";
- case CL_INVALID_PLATFORM :
- return (char*)" CL_INVALID_PLATFORM ";
- case CL_INVALID_DEVICE :
- return (char*)" CL_INVALID_DEVICE ";
- case CL_INVALID_CONTEXT :
- return (char*)" CL_INVALID_CONTEXT ";
- case CL_INVALID_QUEUE_PROPERTIES :
- return (char*)" CL_INVALID_QUEUE_PROPERTIES ";
- case CL_INVALID_COMMAND_QUEUE :
- return (char*)" CL_INVALID_COMMAND_QUEUE ";
- case CL_INVALID_HOST_PTR :
- return (char*)" CL_INVALID_HOST_PTR ";
- case CL_INVALID_MEM_OBJECT :
- return (char*)" CL_INVALID_MEM_OBJECT ";
- case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR :
- return (char*)" CL_INVALID_IMAGE_FORMAT_DESCRIPTOR ";
- case CL_INVALID_IMAGE_SIZE :
- return (char*)" CL_INVALID_IMAGE_SIZE ";
- case CL_INVALID_SAMPLER :
- return (char*)" CL_INVALID_SAMPLER ";
- case CL_INVALID_BINARY :
- return (char*)" CL_INVALID_BINARY ";
- case CL_INVALID_BUILD_OPTIONS :
- return (char*)" CL_INVALID_BUILD_OPTIONS ";
- case CL_INVALID_PROGRAM :
- return (char*)" CL_INVALID_PROGRAM ";
- case CL_INVALID_PROGRAM_EXECUTABLE :
- return (char*)" CL_INVALID_PROGRAM_EXECUTABLE ";
- case CL_INVALID_KERNEL_NAME :
- return (char*)" CL_INVALID_KERNEL_NAME ";
- case CL_INVALID_KERNEL_DEFINITION :
- return (char*)" CL_INVALID_KERNEL_DEFINITION ";
- case CL_INVALID_KERNEL :
- return (char*)" CL_INVALID_KERNEL ";
- case CL_INVALID_ARG_INDEX :
- return (char*)" CL_INVALID_ARG_INDEX ";
- case CL_INVALID_ARG_VALUE :
- return (char*)" CL_INVALID_ARG_VALUE ";
- case CL_INVALID_ARG_SIZE :
- return (char*)" CL_INVALID_ARG_SIZE ";
- case CL_INVALID_KERNEL_ARGS :
- return (char*)" CL_INVALID_KERNEL_ARGS ";
- case CL_INVALID_WORK_DIMENSION :
- return (char*)" CL_INVALID_WORK_DIMENSION ";
- case CL_INVALID_WORK_GROUP_SIZE :
- return (char*)" CL_INVALID_WORK_GROUP_SIZE ";
- case CL_INVALID_WORK_ITEM_SIZE :
- return (char*)" CL_INVALID_WORK_ITEM_SIZE ";
- case CL_INVALID_GLOBAL_OFFSET :
- return (char*)" CL_INVALID_GLOBAL_OFFSET ";
- case CL_INVALID_EVENT_WAIT_LIST :
- return (char*)" CL_INVALID_EVENT_WAIT_LIST ";
- case CL_INVALID_EVENT :
- return (char*)" CL_INVALID_EVENT ";
- case CL_INVALID_OPERATION :
- return (char*)" CL_INVALID_OPERATION ";
- case CL_INVALID_GL_OBJECT :
- return (char*)" CL_INVALID_GL_OBJECT ";
- case CL_INVALID_BUFFER_SIZE :
- return (char*)" CL_INVALID_BUFFER_SIZE ";
- case CL_INVALID_MIP_LEVEL :
- return (char*)" CL_INVALID_MIP_LEVEL ";
- case CL_INVALID_GLOBAL_WORK_SIZE :
- return (char*)" CL_INVALID_GLOBAL_WORK_SIZE ";
- case CL_INVALID_PROPERTY :
- return (char*)" CL_INVALID_PROPERTY ";
- default:
- return (char*)"UNKNOWN ERROR";
- }
- }
- int output_device_info(cl_device_id device_id)
- {
- int err;
- cl_device_type device_type;
- cl_uint comp_units;
- cl_char vendor_name[1024] = {0};
- cl_char device_name[1024] = {0};
- #ifdef VERBOSE
- cl_uint max_work_itm_dims;
- size_t max_wrkgrp_size;
- size_t *max_loc_size;
- #endif
- err = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), &device_name, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to access device name!\n");
- return EXIT_FAILURE;
- }
- printf(" \n Device is %s \n",device_name);
- err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to access device type information!\n");
- return EXIT_FAILURE;
- }
- err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), &vendor_name, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to access device vendor name!\n");
- return EXIT_FAILURE;
- }
- printf(" %s \n",vendor_name);
- err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &comp_units, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to access device number of compute units !\n");
- return EXIT_FAILURE;
- }
- #ifdef VERBOSE
- err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint),
- &max_work_itm_dims, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)!\n",
- err_code(err));
- return EXIT_FAILURE;
- }
-
- max_loc_size = (size_t*)malloc(max_work_itm_dims * sizeof(size_t));
- if(max_loc_size == NULL){
- printf(" malloc failed\n");
- return EXIT_FAILURE;
- }
- err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, max_work_itm_dims* sizeof(size_t),
- max_loc_size, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_ITEM_SIZES)!\n",err_code(err));
- return EXIT_FAILURE;
- }
- err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
- &max_wrkgrp_size, NULL);
- if (err != CL_SUCCESS)
- {
- printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_GROUP_SIZE)!\n",err_code(err));
- return EXIT_FAILURE;
- }
- printf("work group, work item information");
- printf("\n max loc dim ");
- for(int i=0; i< max_work_itm_dims; i++)
- printf(" %d ",(int)(*(max_loc_size+i)));
- printf("\n");
- printf(" Max work group size = %d\n",(int)max_wrkgrp_size);
- #endif
- return CL_SUCCESS;
- }
- void fatal(char *s)
- {
- fprintf(stderr, "Error: %s\n", s);
- }
- char *load_kernel_source(const char *filename) {
- // Open the source file
- FILE *file = fopen(filename, "r");
- if (file == NULL) fatal("Error opening kernel source file\n");
- // Determine the size of the file
- if (fseek(file, 0, SEEK_END)) fatal("Error reading kernel source file\n");
- size_t size = ftell(file);
- // Allocate space for the source code (plus one for null-terminator)
- char *source = (char *) malloc(size + 1);
- // Read the source code into the string
- fseek(file, 0, SEEK_SET);
- // printf("Number of elements: %lu\nSize = %lu", fread(source, 1, size, file), size);
- // exit(1);
- if (fread(source, 1, size, file) != size) fatal("Error reading kernel source file\n");
- // Null-terminate the string
- source[size] = '\0';
- // Return the pointer to the string
- return source;
- }
- void readinput(float *vect, int grid_rows, int grid_cols, int layers, char *file) {
- int i,j,k;
- FILE *fp;
- char str[STR_SIZE];
- float val;
- if( (fp = fopen(file, "r" )) ==0 )
- fatal( "The file was not opened" );
- for (i=0; i <= grid_rows-1; i++)
- for (j=0; j <= grid_cols-1; j++)
- for (k=0; k <= layers-1; k++)
- {
- if (fgets(str, STR_SIZE, fp) == NULL) fatal("Error reading file\n");
- if (feof(fp))
- fatal("not enough lines in file");
- //if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != ((i-1)*(grid_cols-2)+j-1)))
- if ((sscanf(str, "%f", &val) != 1))
- fatal("invalid file format");
- vect[i*grid_cols+j+k*grid_rows*grid_cols] = val;
- }
- fclose(fp);
- }
- void writeoutput(float *vect, int grid_rows, int grid_cols, int layers, char *file) {
- int i,j,k, index=0;
- FILE *fp;
- char str[STR_SIZE];
- if( (fp = fopen(file, "w" )) == 0 )
- printf( "The file was not opened\n" );
- for (i=0; i < grid_rows; i++)
- for (j=0; j < grid_cols; j++)
- for (k=0; k < layers; k++)
- {
- sprintf(str, "%d\t%g\n", index, vect[i*grid_cols+j+k*grid_rows*grid_cols]);
- fputs(str,fp);
- index++;
- }
- fclose(fp);
- }
- void computeTempCPU(float *pIn, float* tIn, float *tOut,
- int nx, int ny, int nz, float Cap,
- float Rx, float Ry, float Rz,
- float dt, float amb_temp, int numiter)
- { float ce, cw, cn, cs, ct, cb, cc;
- float stepDivCap = dt / Cap;
- ce = cw =stepDivCap/ Rx;
- cn = cs =stepDivCap/ Ry;
- ct = cb =stepDivCap/ Rz;
- cc = 1.0 - (2.0*ce + 2.0*cn + 3.0*ct);
- int c,w,e,n,s,b,t;
- int x,y,z;
- int i = 0;
- do{
- for(z = 0; z < nz; z++)
- for(y = 0; y < ny; y++)
- for(x = 0; x < nx; x++)
- {
- c = x + y * nx + z * nx * ny;
- w = (x == 0) ? c : c - 1;
- e = (x == nx - 1) ? c : c + 1;
- n = (y == 0) ? c : c - nx;
- s = (y == ny - 1) ? c : c + nx;
- b = (z == 0) ? c : c - nx * ny;
- t = (z == nz - 1) ? c : c + nx * ny;
- tOut[c] = tIn[c]*cc + tIn[n]*cn + tIn[s]*cs + tIn[e]*ce + tIn[w]*cw + tIn[t]*ct + tIn[b]*cb + (dt/Cap) * pIn[c] + ct*amb_temp;
- }
- float *temp = tIn;
- tIn = tOut;
- tOut = temp;
- i++;
- }
- while(i < numiter);
- }
- float accuracy(float *arr1, float *arr2, int len)
- {
- float err = 0.0;
- int i;
- for(i = 0; i < len; i++)
- {
- err += (arr1[i]-arr2[i]) * (arr1[i]-arr2[i]);
- }
- return (float)sqrt(err/len);
- }
-
|