CL_helper.c 11 KB


  1. #include "CL_helper.h"
  2. #define STR_SIZE 256
  3. long long get_time() {
  4. struct timeval tv;
  5. gettimeofday(&tv, NULL);
  6. return (tv.tv_sec * 1000000) + tv.tv_usec;
  7. }
  8. char *err_code (cl_int err_in)
  9. {
  10. switch (err_in) {
  11. case CL_SUCCESS :
  12. return (char*)" CL_SUCCESS ";
  13. case CL_DEVICE_NOT_FOUND :
  14. return (char*)" CL_DEVICE_NOT_FOUND ";
  15. case CL_DEVICE_NOT_AVAILABLE :
  16. return (char*)" CL_DEVICE_NOT_AVAILABLE ";
  17. case CL_COMPILER_NOT_AVAILABLE :
  18. return (char*)" CL_COMPILER_NOT_AVAILABLE ";
  19. case CL_MEM_OBJECT_ALLOCATION_FAILURE :
  20. return (char*)" CL_MEM_OBJECT_ALLOCATION_FAILURE ";
  21. case CL_OUT_OF_RESOURCES :
  22. return (char*)" CL_OUT_OF_RESOURCES ";
  23. case CL_OUT_OF_HOST_MEMORY :
  24. return (char*)" CL_OUT_OF_HOST_MEMORY ";
  25. case CL_PROFILING_INFO_NOT_AVAILABLE :
  26. return (char*)" CL_PROFILING_INFO_NOT_AVAILABLE ";
  27. case CL_MEM_COPY_OVERLAP :
  28. return (char*)" CL_MEM_COPY_OVERLAP ";
  29. case CL_IMAGE_FORMAT_MISMATCH :
  30. return (char*)" CL_IMAGE_FORMAT_MISMATCH ";
  31. case CL_IMAGE_FORMAT_NOT_SUPPORTED :
  32. return (char*)" CL_IMAGE_FORMAT_NOT_SUPPORTED ";
  33. case CL_BUILD_PROGRAM_FAILURE :
  34. return (char*)" CL_BUILD_PROGRAM_FAILURE ";
  35. case CL_MAP_FAILURE :
  36. return (char*)" CL_MAP_FAILURE ";
  37. case CL_MISALIGNED_SUB_BUFFER_OFFSET :
  38. return (char*)" CL_MISALIGNED_SUB_BUFFER_OFFSET ";
  39. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST :
  40. return (char*)" CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST ";
  41. case CL_INVALID_VALUE :
  42. return (char*)" CL_INVALID_VALUE ";
  43. case CL_INVALID_DEVICE_TYPE :
  44. return (char*)" CL_INVALID_DEVICE_TYPE ";
  45. case CL_INVALID_PLATFORM :
  46. return (char*)" CL_INVALID_PLATFORM ";
  47. case CL_INVALID_DEVICE :
  48. return (char*)" CL_INVALID_DEVICE ";
  49. case CL_INVALID_CONTEXT :
  50. return (char*)" CL_INVALID_CONTEXT ";
  51. case CL_INVALID_QUEUE_PROPERTIES :
  52. return (char*)" CL_INVALID_QUEUE_PROPERTIES ";
  53. case CL_INVALID_COMMAND_QUEUE :
  54. return (char*)" CL_INVALID_COMMAND_QUEUE ";
  55. case CL_INVALID_HOST_PTR :
  56. return (char*)" CL_INVALID_HOST_PTR ";
  57. case CL_INVALID_MEM_OBJECT :
  58. return (char*)" CL_INVALID_MEM_OBJECT ";
  59. case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR :
  60. return (char*)" CL_INVALID_IMAGE_FORMAT_DESCRIPTOR ";
  61. case CL_INVALID_IMAGE_SIZE :
  62. return (char*)" CL_INVALID_IMAGE_SIZE ";
  63. case CL_INVALID_SAMPLER :
  64. return (char*)" CL_INVALID_SAMPLER ";
  65. case CL_INVALID_BINARY :
  66. return (char*)" CL_INVALID_BINARY ";
  67. case CL_INVALID_BUILD_OPTIONS :
  68. return (char*)" CL_INVALID_BUILD_OPTIONS ";
  69. case CL_INVALID_PROGRAM :
  70. return (char*)" CL_INVALID_PROGRAM ";
  71. case CL_INVALID_PROGRAM_EXECUTABLE :
  72. return (char*)" CL_INVALID_PROGRAM_EXECUTABLE ";
  73. case CL_INVALID_KERNEL_NAME :
  74. return (char*)" CL_INVALID_KERNEL_NAME ";
  75. case CL_INVALID_KERNEL_DEFINITION :
  76. return (char*)" CL_INVALID_KERNEL_DEFINITION ";
  77. case CL_INVALID_KERNEL :
  78. return (char*)" CL_INVALID_KERNEL ";
  79. case CL_INVALID_ARG_INDEX :
  80. return (char*)" CL_INVALID_ARG_INDEX ";
  81. case CL_INVALID_ARG_VALUE :
  82. return (char*)" CL_INVALID_ARG_VALUE ";
  83. case CL_INVALID_ARG_SIZE :
  84. return (char*)" CL_INVALID_ARG_SIZE ";
  85. case CL_INVALID_KERNEL_ARGS :
  86. return (char*)" CL_INVALID_KERNEL_ARGS ";
  87. case CL_INVALID_WORK_DIMENSION :
  88. return (char*)" CL_INVALID_WORK_DIMENSION ";
  89. case CL_INVALID_WORK_GROUP_SIZE :
  90. return (char*)" CL_INVALID_WORK_GROUP_SIZE ";
  91. case CL_INVALID_WORK_ITEM_SIZE :
  92. return (char*)" CL_INVALID_WORK_ITEM_SIZE ";
  93. case CL_INVALID_GLOBAL_OFFSET :
  94. return (char*)" CL_INVALID_GLOBAL_OFFSET ";
  95. case CL_INVALID_EVENT_WAIT_LIST :
  96. return (char*)" CL_INVALID_EVENT_WAIT_LIST ";
  97. case CL_INVALID_EVENT :
  98. return (char*)" CL_INVALID_EVENT ";
  99. case CL_INVALID_OPERATION :
  100. return (char*)" CL_INVALID_OPERATION ";
  101. case CL_INVALID_GL_OBJECT :
  102. return (char*)" CL_INVALID_GL_OBJECT ";
  103. case CL_INVALID_BUFFER_SIZE :
  104. return (char*)" CL_INVALID_BUFFER_SIZE ";
  105. case CL_INVALID_MIP_LEVEL :
  106. return (char*)" CL_INVALID_MIP_LEVEL ";
  107. case CL_INVALID_GLOBAL_WORK_SIZE :
  108. return (char*)" CL_INVALID_GLOBAL_WORK_SIZE ";
  109. case CL_INVALID_PROPERTY :
  110. return (char*)" CL_INVALID_PROPERTY ";
  111. default:
  112. return (char*)"UNKNOWN ERROR";
  113. }
  114. }
  115. int output_device_info(cl_device_id device_id)
  116. {
  117. int err;
  118. cl_device_type device_type;
  119. cl_uint comp_units;
  120. cl_char vendor_name[1024] = {0};
  121. cl_char device_name[1024] = {0};
  122. #ifdef VERBOSE
  123. cl_uint max_work_itm_dims;
  124. size_t max_wrkgrp_size;
  125. size_t *max_loc_size;
  126. #endif
  127. err = clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_name), &device_name, NULL);
  128. if (err != CL_SUCCESS)
  129. {
  130. printf("Error: Failed to access device name!\n");
  131. return EXIT_FAILURE;
  132. }
  133. printf(" \n Device is %s \n",device_name);
  134. err = clGetDeviceInfo(device_id, CL_DEVICE_TYPE, sizeof(device_type), &device_type, NULL);
  135. if (err != CL_SUCCESS)
  136. {
  137. printf("Error: Failed to access device type information!\n");
  138. return EXIT_FAILURE;
  139. }
  140. err = clGetDeviceInfo(device_id, CL_DEVICE_VENDOR, sizeof(vendor_name), &vendor_name, NULL);
  141. if (err != CL_SUCCESS)
  142. {
  143. printf("Error: Failed to access device vendor name!\n");
  144. return EXIT_FAILURE;
  145. }
  146. printf(" %s \n",vendor_name);
  147. err = clGetDeviceInfo(device_id, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cl_uint), &comp_units, NULL);
  148. if (err != CL_SUCCESS)
  149. {
  150. printf("Error: Failed to access device number of compute units !\n");
  151. return EXIT_FAILURE;
  152. }
  153. #ifdef VERBOSE
  154. err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint),
  155. &max_work_itm_dims, NULL);
  156. if (err != CL_SUCCESS)
  157. {
  158. printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS)!\n",
  159. err_code(err));
  160. return EXIT_FAILURE;
  161. }
  162. max_loc_size = (size_t*)malloc(max_work_itm_dims * sizeof(size_t));
  163. if(max_loc_size == NULL){
  164. printf(" malloc failed\n");
  165. return EXIT_FAILURE;
  166. }
  167. err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_ITEM_SIZES, max_work_itm_dims* sizeof(size_t),
  168. max_loc_size, NULL);
  169. if (err != CL_SUCCESS)
  170. {
  171. printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_ITEM_SIZES)!\n",err_code(err));
  172. return EXIT_FAILURE;
  173. }
  174. err = clGetDeviceInfo( device_id, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t),
  175. &max_wrkgrp_size, NULL);
  176. if (err != CL_SUCCESS)
  177. {
  178. printf("Error: Failed to get device Info (CL_DEVICE_MAX_WORK_GROUP_SIZE)!\n",err_code(err));
  179. return EXIT_FAILURE;
  180. }
  181. printf("work group, work item information");
  182. printf("\n max loc dim ");
  183. for(int i=0; i< max_work_itm_dims; i++)
  184. printf(" %d ",(int)(*(max_loc_size+i)));
  185. printf("\n");
  186. printf(" Max work group size = %d\n",(int)max_wrkgrp_size);
  187. #endif
  188. return CL_SUCCESS;
  189. }
  190. void fatal(char *s)
  191. {
  192. fprintf(stderr, "Error: %s\n", s);
  193. }
  194. char *load_kernel_source(const char *filename) {
  195. // Open the source file
  196. FILE *file = fopen(filename, "r");
  197. if (file == NULL) fatal("Error opening kernel source file\n");
  198. // Determine the size of the file
  199. if (fseek(file, 0, SEEK_END)) fatal("Error reading kernel source file\n");
  200. size_t size = ftell(file);
  201. // Allocate space for the source code (plus one for null-terminator)
  202. char *source = (char *) malloc(size + 1);
  203. // Read the source code into the string
  204. fseek(file, 0, SEEK_SET);
  205. // printf("Number of elements: %lu\nSize = %lu", fread(source, 1, size, file), size);
  206. // exit(1);
  207. if (fread(source, 1, size, file) != size) fatal("Error reading kernel source file\n");
  208. // Null-terminate the string
  209. source[size] = '\0';
  210. // Return the pointer to the string
  211. return source;
  212. }
  213. void readinput(float *vect, int grid_rows, int grid_cols, int layers, char *file) {
  214. int i,j,k;
  215. FILE *fp;
  216. char str[STR_SIZE];
  217. float val;
  218. if( (fp = fopen(file, "r" )) ==0 )
  219. fatal( "The file was not opened" );
  220. for (i=0; i <= grid_rows-1; i++)
  221. for (j=0; j <= grid_cols-1; j++)
  222. for (k=0; k <= layers-1; k++)
  223. {
  224. if (fgets(str, STR_SIZE, fp) == NULL) fatal("Error reading file\n");
  225. if (feof(fp))
  226. fatal("not enough lines in file");
  227. //if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != ((i-1)*(grid_cols-2)+j-1)))
  228. if ((sscanf(str, "%f", &val) != 1))
  229. fatal("invalid file format");
  230. vect[i*grid_cols+j+k*grid_rows*grid_cols] = val;
  231. }
  232. fclose(fp);
  233. }
  234. void writeoutput(float *vect, int grid_rows, int grid_cols, int layers, char *file) {
  235. int i,j,k, index=0;
  236. FILE *fp;
  237. char str[STR_SIZE];
  238. if( (fp = fopen(file, "w" )) == 0 )
  239. printf( "The file was not opened\n" );
  240. for (i=0; i < grid_rows; i++)
  241. for (j=0; j < grid_cols; j++)
  242. for (k=0; k < layers; k++)
  243. {
  244. sprintf(str, "%d\t%g\n", index, vect[i*grid_cols+j+k*grid_rows*grid_cols]);
  245. fputs(str,fp);
  246. index++;
  247. }
  248. fclose(fp);
  249. }
  250. void computeTempCPU(float *pIn, float* tIn, float *tOut,
  251. int nx, int ny, int nz, float Cap,
  252. float Rx, float Ry, float Rz,
  253. float dt, float amb_temp, int numiter)
  254. { float ce, cw, cn, cs, ct, cb, cc;
  255. float stepDivCap = dt / Cap;
  256. ce = cw =stepDivCap/ Rx;
  257. cn = cs =stepDivCap/ Ry;
  258. ct = cb =stepDivCap/ Rz;
  259. cc = 1.0 - (2.0*ce + 2.0*cn + 3.0*ct);
  260. int c,w,e,n,s,b,t;
  261. int x,y,z;
  262. int i = 0;
  263. do{
  264. for(z = 0; z < nz; z++)
  265. for(y = 0; y < ny; y++)
  266. for(x = 0; x < nx; x++)
  267. {
  268. c = x + y * nx + z * nx * ny;
  269. w = (x == 0) ? c : c - 1;
  270. e = (x == nx - 1) ? c : c + 1;
  271. n = (y == 0) ? c : c - nx;
  272. s = (y == ny - 1) ? c : c + nx;
  273. b = (z == 0) ? c : c - nx * ny;
  274. t = (z == nz - 1) ? c : c + nx * ny;
  275. tOut[c] = tIn[c]*cc + tIn[n]*cn + tIn[s]*cs + tIn[e]*ce + tIn[w]*cw + tIn[t]*ct + tIn[b]*cb + (dt/Cap) * pIn[c] + ct*amb_temp;
  276. }
  277. float *temp = tIn;
  278. tIn = tOut;
  279. tOut = temp;
  280. i++;
  281. }
  282. while(i < numiter);
  283. }
  284. float accuracy(float *arr1, float *arr2, int len)
  285. {
  286. float err = 0.0;
  287. int i;
  288. for(i = 0; i < len; i++)
  289. {
  290. err += (arr1[i]-arr2[i]) * (arr1[i]-arr2[i]);
  291. }
  292. return (float)sqrt(err/len);
  293. }