CLHelper.h 46 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432
  1. /********************************************************************
  2. //--cambine:helper function for OpenCL
  3. //--programmer: Jianbin Fang
  4. //--date: 27/12/2010
  5. ********************************************************************/
  6. #ifndef _CL_HELPER_
  7. #define _CL_HELPER_
  8. #include <CL/cl.h>
  9. #include <vector>
  10. #include <iostream>
  11. #include <fstream>
  12. #include <string>
  13. #include "util.h"
  14. using std::string;
  15. using std::ifstream;
  16. using std::cerr;
  17. using std::endl;
  18. using std::cout;
  19. #define PROFILE_
  20. #ifdef PROFILE_
  21. double TE; //: total execution time;
  22. double CC; //: Context creation time;
  23. double CR; //: Context release time;
  24. double MA; //: GPU memory allocation time;
  25. double MF; //: GPU memory free time;
  26. double H2D; //: the time to transfer data from host to device;
  27. double D2H; //: the time to transfer data from device to host;
  28. double D2D; //: the time to transfer data from device to device;
  29. double KE; //: the kernel execution time
  30. double KC; //: the kernel compilation time
  31. #endif
  32. //#pragma OPENCL EXTENSION cl_nv_compiler_options:enable
  33. #define WORK_DIM 2 //work-items dimensions
  34. /*------------------------------------------------------------
  35. @struct: the structure of device properties
  36. @date: 24/03/2011
  37. ------------------------------------------------------------*/
  38. struct _clDeviceProp{
  39. /*CL_DEVICE_ADDRESS_BITS
  40. CL_DEVICE_AVAILABLE
  41. CL_DEVICE_COMPILER_AVAILABLE
  42. CL_DEVICE_ENDIAN_LITTLE
  43. CL_DEVICE_ERROR_CORRECTION_SUPPORT
  44. CL_DEVICE_EXECUTION_CAPABILITIES
  45. CL_DEVICE_EXTENSIONS
  46. CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
  47. CL_DEVICE_GLOBAL_MEM_CACHE_TYPE
  48. CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
  49. CL_DEVICE_GLOBAL_MEM_SIZE
  50. CL_DEVICE_IMAGE_SUPPORT
  51. CL_DEVICE_IMAGE2D_MAX_HEIGHT
  52. CL_DEVICE_IMAGE2D_MAX_WIDTH
  53. CL_DEVICE_IMAGE3D_MAX_DEPTH
  54. CL_DEVICE_IMAGE3D_MAX_HEIGHT
  55. CL_DEVICE_IMAGE3D_MAX_WIDTH
  56. CL_DEVICE_LOCAL_MEM_SIZE
  57. CL_DEVICE_LOCAL_MEM_TYPE
  58. CL_DEVICE_MAX_CLOCK_FREQUENCY
  59. CL_DEVICE_MAX_COMPUTE_UNITS
  60. CL_DEVICE_MAX_CONSTANT_ARGS
  61. CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
  62. CL_DEVICE_MAX_MEM_ALLOC_SIZE
  63. CL_DEVICE_MAX_PARAMETER_SIZE
  64. CL_DEVICE_MAX_READ_IMAGE_ARGS
  65. CL_DEVICE_MAX_SAMPLERS
  66. CL_DEVICE_MAX_WORK_GROUP_SIZE
  67. CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
  68. CL_DEVICE_MAX_WORK_ITEM_SIZES
  69. CL_DEVICE_MAX_WRITE_IMAGE_ARGS
  70. CL_DEVICE_MEM_BASE_ADDR_ALIGN
  71. CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE
  72. CL_DEVICE_NAME
  73. CL_DEVICE_PLATFORM
  74. CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR
  75. CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE
  76. CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT
  77. CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT
  78. CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG
  79. CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT
  80. CL_DEVICE_PROFILE
  81. CL_DEVICE_PROFILING_TIMER_RESOLUTION
  82. CL_DEVICE_QUEUE_PROPERTIES
  83. CL_DEVICE_SINGLE_FP_CONFIG
  84. CL_DEVICE_TYPE
  85. CL_DEVICE_VENDOR_ID
  86. CL_DEVICE_VENDOR
  87. CL_DEVICE_VERSION
  88. CL_DRIVER_VERSION*/
  89. char device_name[100];
  90. };
  91. struct oclHandleStruct{
  92. cl_context context;
  93. cl_device_id *devices;
  94. cl_command_queue queue;
  95. cl_program program;
  96. cl_int cl_status;
  97. std::string error_str;
  98. std::vector<cl_kernel> kernel;
  99. cl_mem pinned_mem_out;
  100. cl_mem pinned_mem_in;
  101. };
  102. struct oclHandleStruct oclHandles;
  103. char kernel_file[100] = "Kernels.cl";
  104. int total_kernels = 5;
  105. //string kernel_names[9] = {"memset_kernel", "initialize_variables", "compute_step_factor", "compute_flux", "time_step", "compute_speed_sqd", "compute_velocity", "compute_pressure", "compute_speed_of_sound"};
  106. string kernel_names[5] = {"memset_kernel", "initialize_variables", "compute_step_factor", "compute_flux", "time_step"};
  107. int work_group_size = BLOCK_SIZE_0;
  108. int device_id_inused = 0; //deviced id used (default : 0)
  109. int number_devices = 0;
  110. /*------------------------------------------------------------
  111. @function: select device to use
  112. @params:
  113. size: the index of device to be used
  114. @return: NULL
  115. @date: 24/03/2011
  116. ------------------------------------------------------------*/
  117. void _clSetDevice(int idx) throw(string){
  118. cl_int resultCL;
  119. oclHandles.context = NULL;
  120. oclHandles.devices = NULL;
  121. oclHandles.queue = NULL;
  122. oclHandles.program = NULL;
  123. cl_uint deviceListSize;
  124. cl_uint numPlatforms;
  125. cl_platform_id targetPlatform = NULL;
  126. resultCL = clGetPlatformIDs(0, NULL, &numPlatforms);
  127. if (resultCL != CL_SUCCESS)
  128. throw (string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)"));
  129. if (!(numPlatforms > 0))
  130. throw (string("InitCL()::Error: No platforms found (clGetPlatformIDs)"));
  131. cl_platform_id* allPlatforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id));
  132. resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL);
  133. if (resultCL != CL_SUCCESS)
  134. throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
  135. /* Select the target platform. Default: first platform */
  136. targetPlatform = allPlatforms[0];
  137. free(allPlatforms);
  138. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
  139. if(oclHandles.cl_status!=CL_SUCCESS){
  140. throw(string("exception in _clInit -> clGetDeviceIDs"));
  141. }
  142. if (deviceListSize == 0)
  143. throw(string("InitCL()::Error: No devices found."));
  144. if(idx>(deviceListSize-1))
  145. throw(string(":invalid device ID:"));
  146. device_id_inused = idx;
  147. }
  148. /*------------------------------------------------------------
  149. @function: get device properties indexed by 'idx'
  150. @params:
  151. idx: device index
  152. prop: output properties
  153. @return: prop
  154. @date: 24/03/2011
  155. ------------------------------------------------------------*/
  156. void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
  157. oclHandles.cl_status= clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL);
  158. #ifdef ERRMSG
  159. if(oclHandles.cl_status != CL_SUCCESS){
  160. oclHandles.error_str = "exception in _clGetDeviceProperties-> ";
  161. switch(oclHandles.cl_status){
  162. case CL_INVALID_DEVICE:
  163. oclHandles.error_str += "CL_INVALID_DEVICE";
  164. break;
  165. case CL_INVALID_VALUE:
  166. oclHandles.error_str += "CL_INVALID_VALUE";
  167. break;
  168. default:
  169. oclHandles.error_str += "unknown reasons";
  170. break;
  171. }
  172. throw(oclHandles.error_str);
  173. }
  174. #endif
  175. }
  176. /*
  177. * Converts the contents of a file into a string
  178. */
  179. string FileToString(const string fileName){
  180. ifstream f(fileName.c_str(), ifstream::in | ifstream::binary);
  181. try{
  182. size_t size;
  183. char* str;
  184. string s;
  185. if(f.is_open()){
  186. size_t fileSize;
  187. f.seekg(0, ifstream::end);
  188. size = fileSize = f.tellg();
  189. f.seekg(0, ifstream::beg);
  190. str = new char[size+1];
  191. if (!str) throw(string("Could not allocate memory"));
  192. f.read(str, fileSize);
  193. f.close();
  194. str[size] = '\0';
  195. s = str;
  196. delete [] str;
  197. return s;
  198. }
  199. }
  200. catch(std::string msg){
  201. cerr << "Exception caught in FileToString(): " << msg << endl;
  202. if(f.is_open())
  203. f.close();
  204. }
  205. catch(...){
  206. cerr << "Exception caught in FileToString()" << endl;
  207. if(f.is_open())
  208. f.close();
  209. }
  210. string errorMsg = "FileToString()::Error: Unable to open file "
  211. + fileName;
  212. throw(errorMsg);
  213. }
  214. /*------------------------------------------------------------
  215. @function: Read command line parameters
  216. @params: NULL
  217. @return:
  218. @date: 24/03/2011
  219. ------------------------------------------------------------*/
  220. char device_type[3];
  221. int device_id = 0;
  222. int platform_id = 0;
  223. void _clCmdParams(int argc, char* argv[]){
  224. for (int i = 0; i < argc; ++i){
  225. switch (argv[i][1]){
  226. case 't': //--t stands for device type
  227. if (++i < argc){
  228. sscanf(argv[i], "%s", device_type);
  229. }
  230. else{
  231. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  232. throw;
  233. }
  234. break;
  235. case 'd': //--d stands for device id
  236. if (++i < argc){
  237. sscanf(argv[i], "%d", &device_id);
  238. }
  239. else{
  240. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  241. throw;
  242. }
  243. break;
  244. case 'p': //--d stands for device id
  245. if (++i < argc){
  246. sscanf(argv[i], "%d", &platform_id);
  247. }
  248. else{
  249. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  250. throw;
  251. }
  252. break;
  253. default:
  254. ;
  255. }
  256. }
  257. }
  258. /*------------------------------------------------------------
  259. @function: Initlize CL objects
  260. @params:
  261. device_id: device id
  262. device_type: the types of devices, e.g. CPU, GPU, ACCERLERATOR,...
  263. (1) -t cpu/gpu/acc -d 0/1/2/...
  264. (2) -t cpu/gpu/acc [-d 0]
  265. (3) [-t default] -d 0/1/2/...
  266. (4) NULL [-d 0]
  267. @return:
  268. @description:
  269. there are 5 steps to initialize all the OpenCL objects needed,
  270. @revised:
  271. get the number of devices and devices have no relationship with context
  272. @date: 24/03/2011
  273. ------------------------------------------------------------*/
  274. void _clInit(string device_type, int device_id)throw(string){
  275. #ifdef PROFILE_
  276. TE = 0;
  277. CC = 0;
  278. CR = 0;
  279. MA = 0;
  280. MF = 0;
  281. H2D = 0;
  282. D2H = 0;
  283. D2D = 0;
  284. KE = 0;
  285. KC = 0;
  286. #endif
  287. int DEVICE_ID_INUSED = 0;
  288. _clDeviceProp prop;
  289. #ifdef PROFILE_
  290. double t1 = gettime();
  291. #endif
  292. cl_int resultCL;
  293. oclHandles.context = NULL;
  294. oclHandles.devices = NULL;
  295. oclHandles.queue = NULL;
  296. oclHandles.program = NULL;
  297. cl_uint deviceListSize;
  298. //-----------------------------------------------
  299. //--cambine-1: find the available platforms and select one
  300. cl_uint numPlatforms;
  301. cl_platform_id targetPlatform = NULL;
  302. resultCL = clGetPlatformIDs(0, NULL, &numPlatforms);
  303. if (resultCL != CL_SUCCESS)
  304. throw (string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)"));
  305. //printf("number of platforms:%d\n",numPlatforms); //by cambine
  306. #ifdef DEV_INFO
  307. std::cout<<"--cambine: number of platforms: "<<numPlatforms<<std::endl;
  308. #endif
  309. if (!(numPlatforms > 0))
  310. throw (string("InitCL()::Error: No platforms found (clGetPlatformIDs)"));
  311. cl_platform_id* allPlatforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id));
  312. resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL);
  313. if (resultCL != CL_SUCCESS)
  314. throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
  315. // Select the target platform. Default: first platform
  316. targetPlatform = allPlatforms[platform_id];
  317. for (int i = 0; i < numPlatforms; i++)
  318. {
  319. char pbuff[128];
  320. resultCL = clGetPlatformInfo( allPlatforms[i],
  321. CL_PLATFORM_VENDOR,
  322. sizeof(pbuff),
  323. pbuff,
  324. NULL);
  325. if (resultCL != CL_SUCCESS)
  326. throw (string("InitCL()::Error: Getting platform info (clGetPlatformInfo)"));
  327. printf("vedor is %s\n",pbuff);
  328. #ifdef DEV_INFO
  329. std::cout<<"--cambine: vedor is: "<<pbuff<<std::endl;
  330. #endif
  331. }
  332. free(allPlatforms);
  333. //-----------------------------------------------
  334. //--cambine-2: detect OpenCL devices
  335. // First, get the size of device list
  336. if(device_type.compare("")!=0){
  337. if(device_type.compare("cpu")==0){
  338. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);
  339. if(oclHandles.cl_status!=CL_SUCCESS){
  340. throw(string("exception in _clInit -> clGetDeviceIDs -> CPU"));
  341. }
  342. }
  343. if(device_type.compare("gpu")==0){
  344. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceListSize);
  345. if(oclHandles.cl_status!=CL_SUCCESS){
  346. throw(string("exception in _clInit -> clGetDeviceIDs -> GPU"));
  347. }
  348. }
  349. if(device_type.compare("acc")==0){
  350. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceListSize);
  351. if(oclHandles.cl_status!=CL_SUCCESS){
  352. throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));
  353. }
  354. }
  355. }
  356. else{
  357. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
  358. if(oclHandles.cl_status!=CL_SUCCESS){
  359. throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
  360. }
  361. }
  362. if (deviceListSize == 0)
  363. throw(string("InitCL()::Error: No devices found."));
  364. #ifdef DEV_INFO
  365. std::cout<<"--cambine: number of device="<<deviceListSize<<std::endl;
  366. #endif
  367. number_devices = deviceListSize;
  368. // Now, allocate the device list
  369. // oclHandles.devices = (cl_device_id *)malloc(deviceListSize);
  370. oclHandles.devices = (cl_device_id *)malloc(sizeof(cl_device_id) * deviceListSize);
  371. if (oclHandles.devices == 0)
  372. throw(string("InitCL()::Error: Could not allocate memory."));
  373. // Next, get the device list data
  374. if(device_type.compare("")!=0){
  375. if(device_type.compare("cpu")==0){
  376. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, oclHandles.devices, NULL);
  377. if(oclHandles.cl_status!=CL_SUCCESS){
  378. throw(string("exception in _clInit -> clGetDeviceIDs -> CPU ->2"));
  379. }
  380. }
  381. if(device_type.compare("gpu")==0){
  382. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, deviceListSize, oclHandles.devices, NULL);
  383. if(oclHandles.cl_status!=CL_SUCCESS){
  384. throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2"));
  385. }
  386. }
  387. if(device_type.compare("acc")==0){
  388. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, deviceListSize, oclHandles.devices, NULL);
  389. if(oclHandles.cl_status!=CL_SUCCESS){
  390. throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));
  391. }
  392. }
  393. }
  394. else{
  395. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
  396. if(oclHandles.cl_status!=CL_SUCCESS){
  397. throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
  398. }
  399. }
  400. if(device_id!=0){
  401. if(device_id>(deviceListSize-1))
  402. throw(string("Invalidate device id"));
  403. DEVICE_ID_INUSED = device_id;
  404. }
  405. _clGetDeviceProperties(DEVICE_ID_INUSED, &prop);
  406. std::cout<<"--cambine: device name="<<prop.device_name<<std::endl;
  407. #ifdef DEV_INFO
  408. std::cout<<"--cambine: return device list successfully!"<<std::endl;
  409. #endif
  410. //-----------------------------------------------
  411. //--cambine-3: create an OpenCL context
  412. #ifdef DEV_INFO
  413. std::cout<<"--cambine: before creating context"<<std::endl;
  414. #endif
  415. cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)targetPlatform, 0 };
  416. oclHandles.context = clCreateContext(0,
  417. deviceListSize,
  418. oclHandles.devices,
  419. NULL,
  420. NULL,
  421. &resultCL);
  422. if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
  423. throw (string("InitCL()::Error: Creating Context (clCreateContextFromType)"));
  424. #ifdef DEV_INFO
  425. std::cout<<"--cambine: create OCL context successfully!"<<std::endl;
  426. #endif
  427. //-----------------------------------------------
  428. //--cambine-4: Create an OpenCL command queue
  429. oclHandles.queue = clCreateCommandQueue(oclHandles.context,
  430. oclHandles.devices[DEVICE_ID_INUSED],
  431. 0,
  432. &resultCL);
  433. if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
  434. throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
  435. #ifdef PROFILE_
  436. double t2 = gettime();
  437. CC += t2 - t1;
  438. #endif
  439. //-----------------------------------------------
  440. //--cambine-5: Load CL file, build CL program object, create CL kernel object
  441. std::string source_str = FileToString(kernel_file);
  442. const char * source = source_str.c_str();
  443. size_t sourceSize[] = { source_str.length() };
  444. oclHandles.program = clCreateProgramWithSource(oclHandles.context,
  445. 1,
  446. &source,
  447. sourceSize,
  448. &resultCL);
  449. if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
  450. throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
  451. //insert debug information
  452. std::string options= "";
  453. //options += " -cl-nv-opt-level=3";
  454. resultCL = clBuildProgram(oclHandles.program, deviceListSize, oclHandles.devices, options.c_str(), NULL, NULL);
  455. if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL)){
  456. cerr << "InitCL()::Error: In clBuildProgram" << endl;
  457. size_t length;
  458. resultCL = clGetProgramBuildInfo(oclHandles.program,
  459. oclHandles.devices[DEVICE_ID_INUSED],
  460. CL_PROGRAM_BUILD_LOG,
  461. 0,
  462. NULL,
  463. &length);
  464. if(resultCL != CL_SUCCESS)
  465. throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
  466. char* buffer = (char*)malloc(length);
  467. resultCL = clGetProgramBuildInfo(oclHandles.program,
  468. oclHandles.devices[DEVICE_ID_INUSED],
  469. CL_PROGRAM_BUILD_LOG,
  470. length,
  471. buffer,
  472. NULL);
  473. if(resultCL != CL_SUCCESS)
  474. throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
  475. cerr << buffer << endl;
  476. FILE * fp = fopen("errinfo", "w");
  477. fprintf(fp, "%s\n", buffer);
  478. fclose(fp);
  479. free(buffer);
  480. throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
  481. }
  482. #ifdef PROFILE_
  483. double t3 = gettime();
  484. KC += t3 - t2;
  485. #endif
  486. //get program information in intermediate representation
  487. #ifdef PTX_MSG
  488. size_t binary_sizes[deviceListSize];
  489. char * binaries[deviceListSize];
  490. //figure out number of devices and the sizes of the binary for each device.
  491. oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*deviceListSize, &binary_sizes, NULL );
  492. if(oclHandles.cl_status!=CL_SUCCESS){
  493. throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));
  494. }
  495. std::cout<<"--cambine:"<<binary_sizes<<std::endl;
  496. //copy over all of the generated binaries.
  497. for(int i=0;i<deviceListSize;i++)
  498. binaries[i] = (char *)malloc( sizeof(char)*(binary_sizes[i]+1));
  499. oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES, sizeof(char *)*deviceListSize, binaries, NULL );
  500. if(oclHandles.cl_status!=CL_SUCCESS){
  501. throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-3"));
  502. }
  503. for(int i=0;i<deviceListSize;i++)
  504. binaries[i][binary_sizes[i]] = '\0';
  505. std::cout<<"--cambine:writing ptd information..."<<std::endl;
  506. FILE * ptx_file = fopen("cl.ptx","w");
  507. if(ptx_file==NULL){
  508. throw(string("exceptions in allocate ptx file."));
  509. }
  510. fprintf(ptx_file,"%s",binaries[DEVICE_ID_INUSED]);
  511. fclose(ptx_file);
  512. std::cout<<"--cambine:writing ptd information done."<<std::endl;
  513. for(int i=0;i<deviceListSize;i++)
  514. free(binaries[i]);
  515. #endif
  516. for (int nKernel = 0; nKernel < total_kernels; nKernel++)
  517. {
  518. // get a kernel object handle for a kernel with the given name
  519. cl_kernel kernel = clCreateKernel(oclHandles.program,
  520. (kernel_names[nKernel]).c_str(),
  521. &resultCL);
  522. if ((resultCL != CL_SUCCESS) || (kernel == NULL))
  523. {
  524. string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" + kernel_names[nKernel] + "\"";
  525. throw(errorMsg);
  526. }
  527. oclHandles.kernel.push_back(kernel);
  528. }
  529. //get resource alocation information
  530. #ifdef RES_MSG
  531. char * build_log;
  532. size_t ret_val_size;
  533. oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
  534. if(oclHandles.cl_status!=CL_SUCCESS){
  535. throw(string("exceptions in _InitCL -> getting resource information"));
  536. }
  537. build_log = (char *)malloc(ret_val_size+1);
  538. oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
  539. if(oclHandles.cl_status!=CL_SUCCESS){
  540. throw(string("exceptions in _InitCL -> getting resources allocation information-2"));
  541. }
  542. build_log[ret_val_size] = '\0';
  543. std::cout<<"--cambine:"<<build_log<<std::endl;
  544. free(build_log);
  545. #endif
  546. #ifdef PROFILE_
  547. double t4 = gettime();
  548. CC += t4 - t3;
  549. #endif
  550. }
  551. /*------------------------------------------------------------
  552. @function: release CL objects
  553. @params: NULL
  554. @return:
  555. @date: 24/03/2011
  556. ------------------------------------------------------------*/
  557. void _clRelease()
  558. {
  559. #ifdef PROFILE_
  560. double t1 = gettime();
  561. #endif
  562. bool errorFlag = false;
  563. for (int nKernel = 0; nKernel < oclHandles.kernel.size(); nKernel++){
  564. if (oclHandles.kernel[nKernel] != NULL){
  565. cl_int resultCL = clReleaseKernel(oclHandles.kernel[nKernel]);
  566. if (resultCL != CL_SUCCESS){
  567. cerr << "ReleaseCL()::Error: In clReleaseKernel" << endl;
  568. errorFlag = true;
  569. }
  570. oclHandles.kernel[nKernel] = NULL;
  571. }
  572. oclHandles.kernel.clear();
  573. }
  574. if (oclHandles.program != NULL){
  575. cl_int resultCL = clReleaseProgram(oclHandles.program);
  576. if (resultCL != CL_SUCCESS){
  577. cerr << "ReleaseCL()::Error: In clReleaseProgram" << endl;
  578. errorFlag = true;
  579. }
  580. oclHandles.program = NULL;
  581. }
  582. if (oclHandles.queue != NULL){
  583. cl_int resultCL = clReleaseCommandQueue(oclHandles.queue);
  584. if (resultCL != CL_SUCCESS)
  585. {
  586. cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl;
  587. errorFlag = true;
  588. }
  589. oclHandles.queue = NULL;
  590. }
  591. free(oclHandles.devices);
  592. if (oclHandles.context != NULL){
  593. cl_int resultCL = clReleaseContext(oclHandles.context);
  594. if (resultCL != CL_SUCCESS){
  595. cerr << "ReleaseCL()::Error: In clReleaseContext" << endl;
  596. errorFlag = true;
  597. }
  598. oclHandles.context = NULL;
  599. }
  600. if (errorFlag) throw(string("ReleaseCL()::Error encountered."));
  601. #ifdef PROFILE_
  602. double t2 = gettime();
  603. CR += t2 - t1;
  604. #endif
  605. }
  606. /*------------------------------------------------------------
  607. @function: create read and write buffer for devices
  608. @params:
  609. size: the size of device memory to be allocated
  610. @return: mem_d
  611. @date: 24/03/2011
  612. ------------------------------------------------------------*/
  613. cl_mem _clMalloc(int size) throw(string){
  614. #ifdef PROFILE_
  615. double t1 = gettime();
  616. #endif
  617. cl_mem d_mem;
  618. d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE, size, NULL, &oclHandles.cl_status);
  619. #ifdef ERRMSG
  620. if(oclHandles.cl_status != CL_SUCCESS){
  621. oclHandles.error_str = "excpetion in _clMalloc -> ";
  622. switch(oclHandles.cl_status){
  623. case CL_INVALID_CONTEXT:
  624. oclHandles.error_str += "CL_INVALID_CONTEXT";
  625. break;
  626. case CL_INVALID_VALUE:
  627. oclHandles.error_str += "CL_INVALID_VALUE";
  628. break;
  629. case CL_INVALID_BUFFER_SIZE:
  630. oclHandles.error_str += "CL_INVALID_BUFFER_SIZE";
  631. break;
  632. case CL_INVALID_HOST_PTR:
  633. oclHandles.error_str += "CL_INVALID_HOST_PTR";
  634. break;
  635. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  636. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  637. break;
  638. case CL_OUT_OF_HOST_MEMORY:
  639. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  640. break;
  641. default:
  642. oclHandles.error_str += "unknown reasons";
  643. break;
  644. }
  645. throw(oclHandles.error_str);
  646. }
  647. #endif
  648. #ifdef PROFILE_
  649. double t2 = gettime();
  650. MA += t2 - t1;
  651. #endif
  652. return d_mem;
  653. }
  654. /*------------------------------------------------------------
  655. @function: malloc pinned memoty
  656. @params:
  657. size: the size of data to be transferred in bytes
  658. @return: the pointer of host adress
  659. @date: 06/04/2011
  660. ------------------------------------------------------------*/
  661. void* _clMallocHost(int size)throw(string){
  662. void * mem_h;
  663. oclHandles.pinned_mem_out = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status);
  664. #ifdef ERRMSG
  665. if(oclHandles.cl_status != CL_SUCCESS){
  666. oclHandles.error_str = "excpetion in _clMallocHost -> clCreateBuffer";
  667. switch(oclHandles.cl_status){
  668. case CL_INVALID_CONTEXT:
  669. oclHandles.error_str += "CL_INVALID_CONTEXT";
  670. break;
  671. case CL_INVALID_VALUE:
  672. oclHandles.error_str += "CL_INVALID_VALUE";
  673. break;
  674. case CL_INVALID_BUFFER_SIZE:
  675. oclHandles.error_str += "CL_INVALID_BUFFER_SIZE";
  676. break;
  677. case CL_INVALID_HOST_PTR:
  678. oclHandles.error_str += "CL_INVALID_HOST_PTR";
  679. break;
  680. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  681. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  682. break;
  683. case CL_OUT_OF_HOST_MEMORY:
  684. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  685. break;
  686. default:
  687. oclHandles.error_str += "unknown reasons";
  688. break;
  689. }
  690. throw(oclHandles.error_str);
  691. }
  692. #endif
  693. mem_h = clEnqueueMapBuffer(oclHandles.queue, oclHandles.pinned_mem_out, CL_TRUE, CL_MAP_READ, 0, size, 0, NULL, NULL, &oclHandles.cl_status);
  694. #ifdef ERRMSG
  695. if(oclHandles.cl_status != CL_SUCCESS||mem_h==NULL){
  696. oclHandles.error_str = "excpetion in _clMallocHost -> clEnqueueMapBuffer";
  697. switch(oclHandles.cl_status){
  698. case CL_INVALID_COMMAND_QUEUE:
  699. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  700. break;
  701. case CL_INVALID_CONTEXT:
  702. oclHandles.error_str += "CL_INVALID_CONTEXT";
  703. break;
  704. case CL_INVALID_MEM_OBJECT:
  705. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  706. break;
  707. case CL_INVALID_VALUE:
  708. oclHandles.error_str += "CL_INVALID_VALUE";
  709. break;
  710. case CL_INVALID_EVENT_WAIT_LIST:
  711. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  712. break;
  713. case CL_MAP_FAILURE:
  714. oclHandles.error_str += "CL_MAP_FAILURE";
  715. break;
  716. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  717. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  718. break;
  719. case CL_OUT_OF_HOST_MEMORY:
  720. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  721. break;
  722. default:
  723. oclHandles.error_str += "unknown reasons";
  724. break;
  725. }
  726. throw(oclHandles.error_str);
  727. }
  728. #endif
  729. return mem_h;
  730. }
  731. /*------------------------------------------------------------
  732. @function: free pinned memory
  733. @params:
  734. io: to free pinned-in or pinned-out memory
  735. mem_h: the host address
  736. @return: NULL
  737. @date: 06/04/2011
  738. ------------------------------------------------------------*/
  739. void _clFreeHost(int io, void * mem_h){
  740. if(io==0){ //in
  741. if(mem_h){
  742. oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_in, (void*)mem_h, 0, NULL, NULL);
  743. #ifdef ERRMSG
  744. if(oclHandles.cl_status != CL_SUCCESS){
  745. oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)";
  746. switch(oclHandles.cl_status){
  747. case CL_INVALID_COMMAND_QUEUE:
  748. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  749. break;
  750. case CL_INVALID_MEM_OBJECT:
  751. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  752. break;
  753. case CL_INVALID_VALUE:
  754. oclHandles.error_str += "CL_INVALID_VALUE";
  755. break;
  756. case CL_OUT_OF_RESOURCES:
  757. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  758. break;
  759. case CL_OUT_OF_HOST_MEMORY:
  760. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  761. break;
  762. case CL_INVALID_CONTEXT:
  763. oclHandles.error_str += "CL_INVALID_CONTEXT";
  764. break;
  765. default:
  766. oclHandles.error_str += "unknown reasons";
  767. break;
  768. }
  769. throw(oclHandles.error_str);
  770. }
  771. #endif
  772. }
  773. }
  774. else if(io==1){ //out
  775. if(mem_h){
  776. oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_out, (void*)mem_h, 0, NULL, NULL);
  777. #ifdef ERRMSG
  778. if(oclHandles.cl_status != CL_SUCCESS){
  779. oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)";
  780. switch(oclHandles.cl_status){
  781. case CL_INVALID_COMMAND_QUEUE:
  782. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  783. break;
  784. case CL_INVALID_MEM_OBJECT:
  785. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  786. break;
  787. case CL_INVALID_VALUE:
  788. oclHandles.error_str += "CL_INVALID_VALUE";
  789. break;
  790. case CL_OUT_OF_RESOURCES:
  791. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  792. break;
  793. case CL_OUT_OF_HOST_MEMORY:
  794. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  795. break;
  796. case CL_INVALID_CONTEXT:
  797. oclHandles.error_str += "CL_INVALID_CONTEXT";
  798. break;
  799. default:
  800. oclHandles.error_str += "unknown reasons";
  801. break;
  802. }
  803. throw(oclHandles.error_str);
  804. }
  805. #endif
  806. }
  807. }
  808. else
  809. throw(string("encounter invalid choice when freeing pinned memmory"));
  810. }
  811. /*------------------------------------------------------------
  812. @function: transfer data from host to device
  813. @params:
  814. dest: the destination device memory
  815. src: the source host memory
  816. size: the size of data to be transferred in bytes
  817. @return: NULL
  818. @date: 17/01/2011
  819. ------------------------------------------------------------*/
  820. void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
  821. #ifdef PROFILE_
  822. double t1 = gettime();
  823. #endif
  824. oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, dst, CL_TRUE, 0, size, src, 0, NULL, NULL);
  825. #ifdef ERRMSG
  826. if(oclHandles.cl_status != CL_SUCCESS){
  827. oclHandles.error_str = "excpetion in _clMemcpyH2D -> ";
  828. switch(oclHandles.cl_status){
  829. case CL_INVALID_COMMAND_QUEUE:
  830. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  831. break;
  832. case CL_INVALID_CONTEXT:
  833. oclHandles.error_str += "CL_INVALID_CONTEXT";
  834. break;
  835. case CL_INVALID_MEM_OBJECT:
  836. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  837. break;
  838. case CL_INVALID_VALUE:
  839. oclHandles.error_str += "CL_INVALID_VALUE";
  840. break;
  841. case CL_INVALID_EVENT_WAIT_LIST:
  842. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  843. break;
  844. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  845. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  846. break;
  847. case CL_OUT_OF_HOST_MEMORY:
  848. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  849. break;
  850. default:
  851. oclHandles.error_str += "Unknown reason";
  852. break;
  853. }
  854. throw(oclHandles.error_str);
  855. }
  856. #endif
  857. #ifdef PROFILE_
  858. double t2 = gettime();
  859. H2D += t2 - t1;
  860. #endif
  861. }
  862. /*------------------------------------------------------------
  863. @function: transfer data from device to host
  864. @params:
  865. dest: the destination device memory
  866. src: the source host memory
  867. size: the size of data to be transferred in bytes
  868. @return: NULL
  869. @date: 17/01/2011
  870. ------------------------------------------------------------*/
  871. void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
  872. #ifdef PROFILE_
  873. double t1 = gettime();
  874. #endif
  875. oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, src, CL_TRUE, 0, size, dst, 0,0,0);
  876. #ifdef ERRMSG
  877. if(oclHandles.cl_status != CL_SUCCESS){
  878. oclHandles.error_str = "excpetion in _clMemCpyD2H -> ";
  879. switch(oclHandles.cl_status){
  880. case CL_INVALID_COMMAND_QUEUE:
  881. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  882. break;
  883. case CL_INVALID_CONTEXT:
  884. oclHandles.error_str += "CL_INVALID_CONTEXT";
  885. break;
  886. case CL_INVALID_MEM_OBJECT:
  887. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  888. break;
  889. case CL_INVALID_VALUE:
  890. oclHandles.error_str += "CL_INVALID_VALUE";
  891. break;
  892. case CL_INVALID_EVENT_WAIT_LIST:
  893. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  894. break;
  895. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  896. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  897. break;
  898. case CL_OUT_OF_HOST_MEMORY:
  899. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  900. break;
  901. default:
  902. oclHandles.error_str += "Unknown reason";
  903. break;
  904. }
  905. throw(oclHandles.error_str);
  906. }
  907. #endif
  908. #ifdef PROFILE_
  909. double t2 = gettime();
  910. D2H += t2 - t1;
  911. #endif
  912. }
  913. /*------------------------------------------------------------
  914. @function: transfer data from device to device
  915. @params:
  916. dest: the destination device memory
  917. src: the source device memory
  918. size: the size of data to be transferred in bytes
  919. @return: NULL
  920. @date: 27/03/2011
  921. ------------------------------------------------------------*/
  922. void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
  923. #ifdef PROFILE_
  924. double t1 = gettime();
  925. #endif
  926. oclHandles.cl_status = clEnqueueCopyBuffer(oclHandles.queue, src, dst, 0, 0, size, 0, NULL, NULL);
  927. #ifdef ERRMSG
  928. if(oclHandles.cl_status != CL_SUCCESS){
  929. oclHandles.error_str = "excpetion in _clCpyMemD2D -> ";
  930. switch(oclHandles.cl_status){
  931. case CL_INVALID_COMMAND_QUEUE:
  932. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  933. break;
  934. case CL_INVALID_CONTEXT:
  935. oclHandles.error_str += "CL_INVALID_CONTEXT";
  936. break;
  937. case CL_INVALID_MEM_OBJECT:
  938. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  939. break;
  940. case CL_INVALID_VALUE:
  941. oclHandles.error_str += "CL_INVALID_VALUE";
  942. break;
  943. case CL_INVALID_EVENT_WAIT_LIST:
  944. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  945. break;
  946. case CL_MISALIGNED_SUB_BUFFER_OFFSET:
  947. oclHandles.error_str += "CL_MISALIGNED_SUB_BUFFER_OFFSET";
  948. break;
  949. case CL_MEM_COPY_OVERLAP:
  950. oclHandles.error_str += "CL_MEM_COPY_OVERLAP";
  951. break;
  952. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  953. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  954. break;
  955. case CL_OUT_OF_RESOURCES:
  956. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  957. break;
  958. case CL_OUT_OF_HOST_MEMORY:
  959. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  960. break;
  961. default:
  962. oclHandles.error_str += "Unknown reason";
  963. break;
  964. }
  965. throw(oclHandles.error_str);
  966. }
  967. #endif
  968. #ifdef PROFILE_
  969. double t2 = gettime();
  970. D2D += t2 - t1;
  971. #endif
  972. }
  973. /*------------------------------------------------------------
  974. @function: set kernel arguments
  975. @params:
  976. kernel_id: the index of kernel to set
  977. arg_idx: the index of argument
  978. d_mem: the variable of device memory
  979. size: the size of device memory
  980. @return: NULL
  981. @date: 03/04/2011
  982. ------------------------------------------------------------*/
  983. void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(string){
  984. if(!size){
  985. oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem);
  986. #ifdef ERRMSG
  987. oclHandles.error_str = "excpetion in _clSetKernelArg()-1 ";
  988. switch(oclHandles.cl_status){
  989. case CL_INVALID_KERNEL:
  990. oclHandles.error_str += "CL_INVALID_KERNEL";
  991. break;
  992. case CL_INVALID_ARG_INDEX:
  993. oclHandles.error_str += "CL_INVALID_ARG_INDEX";
  994. break;
  995. case CL_INVALID_ARG_VALUE:
  996. oclHandles.error_str += "CL_INVALID_ARG_VALUE";
  997. break;
  998. case CL_INVALID_MEM_OBJECT:
  999. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1000. break;
  1001. case CL_INVALID_SAMPLER:
  1002. oclHandles.error_str += "CL_INVALID_SAMPLER";
  1003. break;
  1004. case CL_INVALID_ARG_SIZE:
  1005. oclHandles.error_str += "CL_INVALID_ARG_SIZE";
  1006. break;
  1007. case CL_OUT_OF_RESOURCES:
  1008. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1009. break;
  1010. case CL_OUT_OF_HOST_MEMORY:
  1011. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1012. break;
  1013. default:
  1014. oclHandles.error_str += "Unknown reason";
  1015. break;
  1016. }
  1017. if(oclHandles.cl_status != CL_SUCCESS)
  1018. throw(oclHandles.error_str);
  1019. #endif
  1020. }
  1021. else{
  1022. oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem);
  1023. #ifdef ERRMSG
  1024. oclHandles.error_str = "excpetion in _clSetKernelArg()-2 ";
  1025. switch(oclHandles.cl_status){
  1026. case CL_INVALID_KERNEL:
  1027. oclHandles.error_str += "CL_INVALID_KERNEL";
  1028. break;
  1029. case CL_INVALID_ARG_INDEX:
  1030. oclHandles.error_str += "CL_INVALID_ARG_INDEX";
  1031. break;
  1032. case CL_INVALID_ARG_VALUE:
  1033. oclHandles.error_str += "CL_INVALID_ARG_VALUE";
  1034. break;
  1035. case CL_INVALID_MEM_OBJECT:
  1036. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1037. break;
  1038. case CL_INVALID_SAMPLER:
  1039. oclHandles.error_str += "CL_INVALID_SAMPLER";
  1040. break;
  1041. case CL_INVALID_ARG_SIZE:
  1042. oclHandles.error_str += "CL_INVALID_ARG_SIZE";
  1043. break;
  1044. case CL_OUT_OF_RESOURCES:
  1045. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1046. break;
  1047. case CL_OUT_OF_HOST_MEMORY:
  1048. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1049. break;
  1050. default:
  1051. oclHandles.error_str += "Unknown reason";
  1052. break;
  1053. }
  1054. if(oclHandles.cl_status != CL_SUCCESS)
  1055. throw(oclHandles.error_str);
  1056. #endif
  1057. }
  1058. }
  1059. void _clFinish() throw(string){
  1060. oclHandles.cl_status = clFinish(oclHandles.queue);
  1061. #ifdef ERRMSG
  1062. if(oclHandles.cl_status!=CL_SUCCESS){
  1063. oclHandles.error_str = "excpetion in _clFinish";
  1064. switch(oclHandles.cl_status){
  1065. case CL_INVALID_COMMAND_QUEUE:
  1066. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1067. break;
  1068. case CL_OUT_OF_RESOURCES:
  1069. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1070. break;
  1071. case CL_OUT_OF_HOST_MEMORY:
  1072. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1073. break;
  1074. default:
  1075. oclHandles.error_str += "Unknown reasons";
  1076. break;
  1077. }
  1078. throw(oclHandles.error_str);
  1079. }
  1080. #endif
  1081. }
  1082. /*------------------------------------------------------------
  1083. @function: entry of invoke the kernel function
  1084. @params:
  1085. kernel_id: the index of kernel to set
  1086. work_items: the number of working items
  1087. work_group_size: the size of each work group
  1088. @return: NULL
  1089. @date: 03/04/2011
  1090. ------------------------------------------------------------*/
  1091. void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string){
  1092. #ifdef PROFILE_
  1093. double t1 = gettime();
  1094. #endif
  1095. cl_uint work_dim = WORK_DIM;
  1096. cl_event e[1];
  1097. if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1098. work_items = work_items + (work_group_size-(work_items%work_group_size));
  1099. size_t local_work_size[] = {work_group_size, 1};
  1100. size_t global_work_size[] = {work_items, 1};
  1101. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1102. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1103. #ifdef ERRMSG
  1104. if(oclHandles.cl_status != CL_SUCCESS){
  1105. oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
  1106. switch(oclHandles.cl_status){
  1107. case CL_INVALID_PROGRAM_EXECUTABLE:
  1108. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1109. break;
  1110. case CL_INVALID_COMMAND_QUEUE:
  1111. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1112. break;
  1113. case CL_INVALID_KERNEL:
  1114. oclHandles.error_str += "CL_INVALID_KERNEL";
  1115. break;
  1116. case CL_INVALID_CONTEXT:
  1117. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1118. break;
  1119. case CL_INVALID_KERNEL_ARGS:
  1120. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1121. break;
  1122. case CL_INVALID_WORK_DIMENSION:
  1123. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1124. break;
  1125. case CL_INVALID_GLOBAL_WORK_SIZE:
  1126. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1127. break;
  1128. case CL_INVALID_WORK_GROUP_SIZE:
  1129. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1130. break;
  1131. case CL_INVALID_WORK_ITEM_SIZE:
  1132. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1133. break;
  1134. case CL_INVALID_GLOBAL_OFFSET:
  1135. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1136. break;
  1137. case CL_OUT_OF_RESOURCES:
  1138. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1139. break;
  1140. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1141. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1142. break;
  1143. case CL_INVALID_EVENT_WAIT_LIST:
  1144. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1145. break;
  1146. case CL_OUT_OF_HOST_MEMORY:
  1147. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1148. break;
  1149. default:
  1150. oclHandles.error_str += "Unkown reseason";
  1151. break;
  1152. }
  1153. throw(oclHandles.error_str);
  1154. }
  1155. #endif
  1156. //_clFinish();
  1157. // oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1158. #ifdef ERRMSG
  1159. if (oclHandles.cl_status!= CL_SUCCESS){
  1160. oclHandles.error_str = "excpetion in _clEnqueueNDRange() -> clWaitForEvents ->";
  1161. switch(oclHandles.cl_status){
  1162. case CL_INVALID_VALUE:
  1163. oclHandles.error_str += "CL_INVALID_VALUE";
  1164. break;
  1165. case CL_INVALID_CONTEXT:
  1166. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1167. break;
  1168. case CL_INVALID_EVENT:
  1169. oclHandles.error_str += "CL_INVALID_EVENT";
  1170. break;
  1171. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  1172. oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  1173. break;
  1174. case CL_OUT_OF_RESOURCES:
  1175. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1176. break;
  1177. case CL_OUT_OF_HOST_MEMORY:
  1178. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1179. break;
  1180. default:
  1181. oclHandles.error_str += "Unkown Reason";
  1182. break;
  1183. }
  1184. throw(oclHandles.error_str);
  1185. }
  1186. #endif
  1187. #ifdef PROFILE_
  1188. double t2 = gettime();
  1189. KE += t2 - t1;
  1190. #endif
  1191. }
  1192. /*------------------------------------------------------------
  1193. @function: set device memory in an easy manner
  1194. @params:
  1195. mem_d: the device memory to be set;
  1196. val: set the selected memory to 'val';
  1197. number_elements: the number of elements in the selected memory
  1198. @return: NULL
  1199. @date: 03/04/2011
  1200. ------------------------------------------------------------*/
  1201. void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
  1202. int kernel_id = 0;
  1203. int arg_idx = 0;
  1204. _clSetArgs(kernel_id, arg_idx++, mem_d);
  1205. _clSetArgs(kernel_id, arg_idx++, &val, sizeof(short));
  1206. _clSetArgs(kernel_id, arg_idx++, &number_bytes, sizeof(int));
  1207. _clInvokeKernel(kernel_id, number_bytes, work_group_size);
  1208. }
  1209. /*------------------------------------------------------------
  1210. @function: entry of invoke the kernel function using 2d working items
  1211. @params:
  1212. kernel_id: the index of kernel to set
  1213. range_x: the number of working items in x direction
  1214. range_y: the number of working items in y direction
  1215. group_x: the number of working items in each work group in x direction
  1216. group_y: the number of working items in each work group in y direction
  1217. @return: NULL
  1218. @date: 03/04/2011
  1219. ------------------------------------------------------------*/
  1220. void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string){
  1221. #ifdef PROFILE_
  1222. double t1 = gettime();
  1223. #endif
  1224. cl_uint work_dim = WORK_DIM;
  1225. size_t local_work_size[] = {group_x, group_y};
  1226. size_t global_work_size[] = {range_x, range_y};
  1227. cl_event e[1];
  1228. /*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1229. work_items = work_items + (work_group_size-(work_items%work_group_size));*/
  1230. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1231. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1232. #ifdef ERRMSG
  1233. if(oclHandles.cl_status != CL_SUCCESS){
  1234. oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
  1235. switch(oclHandles.cl_status){
  1236. case CL_INVALID_PROGRAM_EXECUTABLE:
  1237. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1238. break;
  1239. case CL_INVALID_COMMAND_QUEUE:
  1240. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1241. break;
  1242. case CL_INVALID_KERNEL:
  1243. oclHandles.error_str += "CL_INVALID_KERNEL";
  1244. break;
  1245. case CL_INVALID_CONTEXT:
  1246. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1247. break;
  1248. case CL_INVALID_KERNEL_ARGS:
  1249. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1250. break;
  1251. case CL_INVALID_WORK_DIMENSION:
  1252. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1253. break;
  1254. case CL_INVALID_GLOBAL_WORK_SIZE:
  1255. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1256. break;
  1257. case CL_INVALID_WORK_GROUP_SIZE:
  1258. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1259. break;
  1260. case CL_INVALID_WORK_ITEM_SIZE:
  1261. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1262. break;
  1263. case CL_INVALID_GLOBAL_OFFSET:
  1264. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1265. break;
  1266. case CL_OUT_OF_RESOURCES:
  1267. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1268. break;
  1269. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1270. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1271. break;
  1272. case CL_INVALID_EVENT_WAIT_LIST:
  1273. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1274. break;
  1275. case CL_OUT_OF_HOST_MEMORY:
  1276. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1277. break;
  1278. default:
  1279. oclHandles.error_str += "Unkown reseason";
  1280. break;
  1281. }
  1282. throw(oclHandles.error_str);
  1283. }
  1284. #endif
  1285. // oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1286. #ifdef ERRMSG
  1287. if (oclHandles.cl_status!= CL_SUCCESS)
  1288. throw(string("excpetion in _clEnqueueNDRange() -> clWaitForEvents"));
  1289. #endif
  1290. #ifdef PROFILE_
  1291. double t2 = gettime();
  1292. KE += t2 - t1;
  1293. #endif
  1294. }
  1295. /*------------------------------------------------------------
  1296. @function: release OpenCL memory objects
  1297. @params:
  1298. ob: the memory object to free or release
  1299. @return: NULL
  1300. @date: 03/04/2011
  1301. ------------------------------------------------------------*/
  1302. void _clFree(cl_mem ob) throw(string){
  1303. #ifdef PROFILE_
  1304. double t1 = gettime();
  1305. #endif
  1306. if(ob!=NULL)
  1307. oclHandles.cl_status = clReleaseMemObject(ob);
  1308. #ifdef ERRMSG
  1309. if (oclHandles.cl_status!= CL_SUCCESS){
  1310. oclHandles.error_str = "excpetion in _clFree() ->";
  1311. switch(oclHandles.cl_status){
  1312. case CL_INVALID_MEM_OBJECT:
  1313. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1314. break;
  1315. case CL_OUT_OF_RESOURCES:
  1316. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1317. break;
  1318. case CL_OUT_OF_HOST_MEMORY:
  1319. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1320. break;
  1321. default:
  1322. oclHandles.error_str += "Unkown reseason";
  1323. break;
  1324. }
  1325. throw(oclHandles.error_str);
  1326. }
  1327. #endif
  1328. #ifdef PROFILE_
  1329. double t2 = gettime();
  1330. MF += t2 - t1;
  1331. #endif
  1332. }
  1333. /*------------------------------------------------------------
  1334. @function: output time profiling information
  1335. @params: NULL
  1336. @return: NULL
  1337. @date: 03/04/2011
  1338. ------------------------------------------------------------*/
  1339. void _clStatistics(){
  1340. #ifdef PROFILE_
  1341. FILE *fp_pd = fopen("PD_OCL.txt", "a");
  1342. fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC);
  1343. fclose(fp_pd);
  1344. #endif
  1345. return ;
  1346. }
  1347. #endif //_CL_HELPER_