CLHelper.h 48 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528
  1. /********************************************************************
  2. //--cambine:helper function for OpenCL
  3. //--programmer: Jianbin Fang
  4. //--date: 27/12/2010
  5. ********************************************************************/
  6. #ifndef _CL_HELPER_
  7. #define _CL_HELPER_
  8. #include <CL/cl.h>
  9. #include <vector>
  10. #include <iostream>
  11. #include <fstream>
  12. #include <string>
  13. #include <sys/time.h>
  14. using std::string;
  15. using std::ifstream;
  16. using std::cerr;
  17. using std::endl;
  18. using std::cout;
  19. #define PROFILE_
  20. #ifdef PROFILE_
  21. double TE; //: total execution time;
  22. double CC; //: Context creation time;
  23. double CR; //: Context release time;
  24. double MA; //: GPU memory allocation time;
  25. double MF; //: GPU memory free time;
  26. double H2D; //: the time to transfer data from host to device;
  27. double D2H; //: the time to transfer data from device to host;
  28. double D2D; //: the time to transfer data from device to device;
  29. double KE; //: the kernel execution time
  30. double KC; //: the kernel compilation time
  31. #endif
  32. //#pragma OPENCL EXTENSION cl_nv_compiler_options:enable
  33. #define WORK_DIM 2 //work-items dimensions
  34. /*------------------------------------------------------------
  35. @struct: the structure of device properties
  36. @date: 24/03/2011
  37. ------------------------------------------------------------*/
  38. struct _clDeviceProp{
  39. /*CL_DEVICE_ADDRESS_BITS
  40. CL_DEVICE_AVAILABLE
  41. CL_DEVICE_COMPILER_AVAILABLE
  42. CL_DEVICE_ENDIAN_LITTLE
  43. CL_DEVICE_ERROR_CORRECTION_SUPPORT
  44. CL_DEVICE_EXECUTION_CAPABILITIES
  45. CL_DEVICE_EXTENSIONS
  46. CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
  47. CL_DEVICE_GLOBAL_MEM_CACHE_TYPE
  48. CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
  49. CL_DEVICE_GLOBAL_MEM_SIZE
  50. CL_DEVICE_IMAGE_SUPPORT
  51. CL_DEVICE_IMAGE2D_MAX_HEIGHT
  52. CL_DEVICE_IMAGE2D_MAX_WIDTH
  53. CL_DEVICE_IMAGE3D_MAX_DEPTH
  54. CL_DEVICE_IMAGE3D_MAX_HEIGHT
  55. CL_DEVICE_IMAGE3D_MAX_WIDTH
  56. CL_DEVICE_LOCAL_MEM_SIZE
  57. CL_DEVICE_LOCAL_MEM_TYPE
  58. CL_DEVICE_MAX_CLOCK_FREQUENCY
  59. CL_DEVICE_MAX_COMPUTE_UNITS
  60. CL_DEVICE_MAX_CONSTANT_ARGS
  61. CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
  62. CL_DEVICE_MAX_MEM_ALLOC_SIZE
  63. CL_DEVICE_MAX_PARAMETER_SIZE
  64. CL_DEVICE_MAX_READ_IMAGE_ARGS
  65. CL_DEVICE_MAX_SAMPLERS
  66. CL_DEVICE_MAX_WORK_GROUP_SIZE
  67. CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
  68. CL_DEVICE_MAX_WORK_ITEM_SIZES
  69. CL_DEVICE_MAX_WRITE_IMAGE_ARGS
  70. CL_DEVICE_MEM_BASE_ADDR_ALIGN
  71. CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE
  72. CL_DEVICE_NAME
  73. CL_DEVICE_PLATFORM
  74. CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR
  75. CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE
  76. CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT
  77. CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT
  78. CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG
  79. CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT
  80. CL_DEVICE_PROFILE
  81. CL_DEVICE_PROFILING_TIMER_RESOLUTION
  82. CL_DEVICE_QUEUE_PROPERTIES
  83. CL_DEVICE_SINGLE_FP_CONFIG
  84. CL_DEVICE_TYPE
  85. CL_DEVICE_VENDOR_ID
  86. CL_DEVICE_VENDOR
  87. CL_DEVICE_VERSION
  88. CL_DRIVER_VERSION*/
  89. char device_name[100];
  90. };
  91. struct oclHandleStruct{
  92. cl_context context;
  93. cl_device_id *devices;
  94. cl_command_queue queue;
  95. cl_program program;
  96. cl_int cl_status;
  97. std::string error_str;
  98. std::vector<cl_kernel> kernel;
  99. cl_mem pinned_mem_out;
  100. cl_mem pinned_mem_in;
  101. };
  102. struct oclHandleStruct oclHandles;
  103. char kernel_file[100] = "Kernels.cl";
  104. int total_kernels = 2;
  105. string kernel_names[2] = {"memset_kernel", "pgain_kernel"};
  106. int work_group_size = 256;
  107. int device_id_inused = 0; //deviced id used (default : 0)
  108. int number_devices = 0;
  109. double gettime() {
  110. struct timeval t;
  111. gettimeofday(&t,NULL);
  112. return t.tv_sec+t.tv_usec*1e-6;
  113. }
  114. /*------------------------------------------------------------
  115. @function: select device to use
  116. @params:
  117. size: the index of device to be used
  118. @return: NULL
  119. @date: 24/03/2011
  120. ------------------------------------------------------------*/
  121. void _clSetDevice(int idx) throw(string){
  122. if(idx>(number_devices-1))
  123. throw(string(":invalid device ID:"));
  124. device_id_inused = idx;
  125. }
  126. /*------------------------------------------------------------
  127. @function: get device properties indexed by 'idx'
  128. @params:
  129. idx: device index
  130. prop: output properties
  131. @return: prop
  132. @date: 24/03/2011
  133. ------------------------------------------------------------*/
  134. void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
  135. oclHandles.cl_status= clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL);
  136. #ifdef ERRMSG
  137. if(oclHandles.cl_status != CL_SUCCESS){
  138. oclHandles.error_str = "exception in _clGetDeviceProperties-> ";
  139. switch(oclHandles.cl_status){
  140. case CL_INVALID_DEVICE:
  141. oclHandles.error_str += "CL_INVALID_DEVICE";
  142. break;
  143. case CL_INVALID_VALUE:
  144. oclHandles.error_str += "CL_INVALID_VALUE";
  145. break;
  146. default:
  147. oclHandles.error_str += "unknown reasons";
  148. break;
  149. }
  150. throw(oclHandles.error_str);
  151. }
  152. #endif
  153. }
  154. /*
  155. * Converts the contents of a file into a string
  156. */
  157. string FileToString(const string fileName){
  158. ifstream f(fileName.c_str(), ifstream::in | ifstream::binary);
  159. try{
  160. size_t size;
  161. char* str;
  162. string s;
  163. if(f.is_open()){
  164. size_t fileSize;
  165. f.seekg(0, ifstream::end);
  166. size = fileSize = f.tellg();
  167. f.seekg(0, ifstream::beg);
  168. str = new char[size+1];
  169. if (!str) throw(string("Could not allocate memory"));
  170. f.read(str, fileSize);
  171. f.close();
  172. str[size] = '\0';
  173. s = str;
  174. delete [] str;
  175. return s;
  176. }
  177. }
  178. catch(std::string msg){
  179. cerr << "Exception caught in FileToString(): " << msg << endl;
  180. if(f.is_open())
  181. f.close();
  182. }
  183. catch(...){
  184. cerr << "Exception caught in FileToString()" << endl;
  185. if(f.is_open())
  186. f.close();
  187. }
  188. string errorMsg = "FileToString()::Error: Unable to open file "
  189. + fileName;
  190. throw(errorMsg);
  191. }
  192. /*------------------------------------------------------------
  193. @function: Read command line parameters
  194. @params: NULL
  195. @return:
  196. @date: 24/03/2011
  197. ------------------------------------------------------------*/
  198. char device_type[3];
  199. static int platform_idx;
  200. static int device_idx = 0;
  201. void _clCmdParams(int argc, char* argv[]){
  202. for (int i = 0; i < argc; ++i){
  203. switch (argv[i][1]){
  204. case 't': //--t stands for device type
  205. if (++i < argc){
  206. sscanf(argv[i], "%s", device_type);
  207. }
  208. else{
  209. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  210. throw;
  211. }
  212. break;
  213. case 'p': //--p stands for platform id
  214. if (++i < argc){
  215. sscanf(argv[i], "%d", &platform_idx);
  216. }
  217. else{
  218. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  219. throw;
  220. }
  221. break;
  222. case 'd': //--d stands for device id
  223. if (++i < argc){
  224. sscanf(argv[i], "%d", &device_idx);
  225. }
  226. else{
  227. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  228. throw;
  229. }
  230. break;
  231. default:
  232. ;
  233. }
  234. }
  235. }
  236. /*------------------------------------------------------------
  237. @function: Initlize CL objects
  238. @params:
  239. device_id: device id
  240. device_type: the types of devices, e.g. CPU, GPU, ACCERLERATOR,...
  241. (1) -t cpu/gpu/acc -d 0/1/2/...
  242. (2) -t cpu/gpu/acc [-d 0]
  243. (3) [-t default] -d 0/1/2/...
  244. (4) NULL [-d 0]
  245. @return:
  246. @description:
  247. there are 5 steps to initialize all the OpenCL objects needed,
  248. @revised:
  249. get the number of devices and devices have no relationship with context
  250. @date: 24/03/2011
  251. ------------------------------------------------------------*/
  252. void _clInit(int platform_idx, int device_idx, string device_type)throw(string){
  253. #ifdef PROFILE_
  254. TE = 0;
  255. CC = 0;
  256. CR = 0;
  257. MA = 0;
  258. MF = 0;
  259. H2D = 0;
  260. D2H = 0;
  261. D2D = 0;
  262. KE = 0;
  263. KC = 0;
  264. #endif
  265. int DEVICE_ID_INUSED = 0;
  266. _clDeviceProp prop;
  267. #ifdef PROFILE_
  268. double t1 = gettime();
  269. #endif
  270. cl_int resultCL;
  271. oclHandles.context = NULL;
  272. oclHandles.devices = NULL;
  273. oclHandles.queue = NULL;
  274. oclHandles.program = NULL;
  275. cl_uint deviceListSize;
  276. //-----------------------------------------------
  277. //--cambine-1: find the available platforms and select one
  278. cl_uint numPlatforms;
  279. cl_platform_id targetPlatform = NULL;
  280. resultCL = clGetPlatformIDs(0, NULL, &numPlatforms);
  281. if (resultCL != CL_SUCCESS)
  282. throw (string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)"));
  283. //printf("number of platforms:%d\n",numPlatforms); //by cambine
  284. #ifdef DEV_INFO
  285. std::cout<<"--cambine: number of platforms: "<<numPlatforms<<std::endl;
  286. #endif
  287. if (!(numPlatforms > 0))
  288. throw (string("InitCL()::Error: No platforms found (clGetPlatformIDs)"));
  289. cl_platform_id* allPlatforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id));
  290. resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL);
  291. if (resultCL != CL_SUCCESS)
  292. throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
  293. // Select the target platform.
  294. printf("DEBUG %d \n", platform_idx);
  295. targetPlatform = allPlatforms[platform_idx];
  296. for (int i = 0; i < numPlatforms; i++)
  297. {
  298. char pbuff[128];
  299. resultCL = clGetPlatformInfo( allPlatforms[i],
  300. CL_PLATFORM_VENDOR,
  301. sizeof(pbuff),
  302. pbuff,
  303. NULL);
  304. if (resultCL != CL_SUCCESS)
  305. throw (string("InitCL()::Error: Getting platform info (clGetPlatformInfo)"));
  306. //printf("vedor is %s\n",pbuff);
  307. #ifdef DEV_INFO
  308. std::cout<<"--cambine: vedor is: "<<pbuff<<std::endl;
  309. #endif
  310. }
  311. free(allPlatforms);
  312. //-----------------------------------------------
  313. //--cambine-2: detect OpenCL devices
  314. // First, get the size of device list
  315. if(device_type.compare("")!=0){
  316. if(device_type.compare("cpu")==0){
  317. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);
  318. if(oclHandles.cl_status!=CL_SUCCESS){
  319. throw(string("exception in _clInit -> clGetDeviceIDs -> CPU"));
  320. }
  321. }
  322. if(device_type.compare("gpu")==0){
  323. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceListSize);
  324. if(oclHandles.cl_status!=CL_SUCCESS){
  325. throw(string("exception in _clInit -> clGetDeviceIDs -> GPU"));
  326. }
  327. }
  328. if(device_type.compare("acc")==0){
  329. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceListSize);
  330. if(oclHandles.cl_status!=CL_SUCCESS){
  331. throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));
  332. }
  333. }
  334. }
  335. else{
  336. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
  337. if(oclHandles.cl_status!=CL_SUCCESS){
  338. throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
  339. }
  340. }
  341. if (deviceListSize == 0)
  342. throw(string("InitCL()::Error: No devices found."));
  343. #ifdef DEV_INFO
  344. std::cout<<"--cambine: number of device="<<deviceListSize<<std::endl;
  345. #endif
  346. number_devices = deviceListSize;
  347. // Now, allocate the device list
  348. // oclHandles.devices = (cl_device_id *)malloc(deviceListSize);
  349. oclHandles.devices = (cl_device_id *)malloc(sizeof(cl_device_id)*deviceListSize);
  350. if (oclHandles.devices == 0)
  351. throw(string("InitCL()::Error: Could not allocate memory."));
  352. // Next, get the device list data
  353. if(device_type.compare("")!=0){
  354. if(device_type.compare("cpu")==0){
  355. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, oclHandles.devices, NULL);
  356. if(oclHandles.cl_status!=CL_SUCCESS){
  357. throw(string("exception in _clInit -> clGetDeviceIDs -> CPU ->2"));
  358. }
  359. }
  360. if(device_type.compare("gpu")==0){
  361. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, deviceListSize, oclHandles.devices, NULL);
  362. if(oclHandles.cl_status!=CL_SUCCESS){
  363. throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2"));
  364. }
  365. }
  366. if(device_type.compare("acc")==0){
  367. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, deviceListSize, oclHandles.devices, NULL);
  368. if(oclHandles.cl_status!=CL_SUCCESS){
  369. throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));
  370. }
  371. }
  372. }
  373. else{
  374. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
  375. if(oclHandles.cl_status!=CL_SUCCESS){
  376. throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
  377. }
  378. }
  379. if(device_idx!=0){
  380. if(device_idx>(deviceListSize-1))
  381. throw(string("Invalidate device id"));
  382. DEVICE_ID_INUSED = device_idx;
  383. }
  384. _clGetDeviceProperties(DEVICE_ID_INUSED, &prop);
  385. std::cout<<"--cambine: device name="<<prop.device_name<<std::endl;
  386. #ifdef DEV_INFO
  387. std::cout<<"--cambine: return device list successfully!"<<std::endl;
  388. #endif
  389. //-----------------------------------------------
  390. //--cambine-3: create an OpenCL context
  391. #ifdef DEV_INFO
  392. std::cout<<"--cambine: before creating context"<<std::endl;
  393. #endif
  394. cl_device_type device_type_cl = (device_type == "gpu") ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
  395. cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)targetPlatform, 0 };
  396. oclHandles.context = clCreateContextFromType(cprops, device_type_cl, NULL, NULL, NULL);
  397. if (!oclHandles.context) {
  398. printf("ERROR: clCreateContextFromType failed\n");
  399. exit(1);
  400. }
  401. #ifdef DEV_INFO
  402. std::cout<<"--cambine: create OCL context successfully!"<<std::endl;
  403. #endif
  404. //-----------------------------------------------
  405. //--cambine-4: Create an OpenCL command queue
  406. oclHandles.queue = clCreateCommandQueue(oclHandles.context,
  407. oclHandles.devices[DEVICE_ID_INUSED],
  408. 0,
  409. &resultCL);
  410. if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
  411. throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
  412. #ifdef PROFILE_
  413. double t2 = gettime();
  414. CC += t2 - t1;
  415. #endif
  416. //-----------------------------------------------
  417. //--cambine-5: Load CL file, build CL program object, create CL kernel object
  418. std::string source_str = FileToString(kernel_file);
  419. const char * source = source_str.c_str();
  420. size_t sourceSize[] = { source_str.length() };
  421. oclHandles.program = clCreateProgramWithSource(oclHandles.context,
  422. 1,
  423. &source,
  424. sourceSize,
  425. &resultCL);
  426. if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
  427. throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
  428. //insert debug information
  429. std::string options= "";
  430. //options += " -cl-nv-opt-level=3";
  431. resultCL = clBuildProgram(oclHandles.program, deviceListSize, oclHandles.devices, options.c_str(), NULL, NULL);
  432. if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL)){
  433. cerr << "InitCL()::Error: In clBuildProgram" << endl;
  434. size_t length;
  435. resultCL = clGetProgramBuildInfo(oclHandles.program,
  436. oclHandles.devices[DEVICE_ID_INUSED],
  437. CL_PROGRAM_BUILD_LOG,
  438. 0,
  439. NULL,
  440. &length);
  441. if(resultCL != CL_SUCCESS)
  442. throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
  443. char* buffer = (char*)malloc(length);
  444. resultCL = clGetProgramBuildInfo(oclHandles.program,
  445. oclHandles.devices[DEVICE_ID_INUSED],
  446. CL_PROGRAM_BUILD_LOG,
  447. length,
  448. buffer,
  449. NULL);
  450. if(resultCL != CL_SUCCESS)
  451. throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
  452. cerr << buffer << endl;
  453. FILE * fp = fopen("errinfo", "w");
  454. fprintf(fp, "%s\n", buffer);
  455. fclose(fp);
  456. free(buffer);
  457. throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
  458. }
  459. #ifdef PROFILE_
  460. double t3 = gettime();
  461. KC += t3 - t2;
  462. #endif
  463. //get program information in intermediate representation
  464. #ifdef PTX_MSG
  465. size_t binary_sizes[deviceListSize];
  466. char * binaries[deviceListSize];
  467. //figure out number of devices and the sizes of the binary for each device.
  468. oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*deviceListSize, &binary_sizes, NULL );
  469. if(oclHandles.cl_status!=CL_SUCCESS){
  470. throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));
  471. }
  472. std::cout<<"--cambine:"<<binary_sizes<<std::endl;
  473. //copy over all of the generated binaries.
  474. for(int i=0;i<deviceListSize;i++)
  475. binaries[i] = (char *)malloc( sizeof(char)*(binary_sizes[i]+1));
  476. oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES, sizeof(char *)*deviceListSize, binaries, NULL );
  477. if(oclHandles.cl_status!=CL_SUCCESS){
  478. throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-3"));
  479. }
  480. for(int i=0;i<deviceListSize;i++)
  481. binaries[i][binary_sizes[i]] = '\0';
  482. std::cout<<"--cambine:writing ptd information..."<<std::endl;
  483. FILE * ptx_file = fopen("cl.ptx","w");
  484. if(ptx_file==NULL){
  485. throw(string("exceptions in allocate ptx file."));
  486. }
  487. fprintf(ptx_file,"%s",binaries[DEVICE_ID_INUSED]);
  488. fclose(ptx_file);
  489. std::cout<<"--cambine:writing ptd information done."<<std::endl;
  490. for(int i=0;i<deviceListSize;i++)
  491. free(binaries[i]);
  492. #endif
  493. for (int nKernel = 0; nKernel < total_kernels; nKernel++)
  494. {
  495. // get a kernel object handle for a kernel with the given name
  496. cl_kernel kernel = clCreateKernel(oclHandles.program,
  497. (kernel_names[nKernel]).c_str(),
  498. &resultCL);
  499. if ((resultCL != CL_SUCCESS) || (kernel == NULL))
  500. {
  501. string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" + kernel_names[nKernel] + "\"";
  502. throw(errorMsg);
  503. }
  504. oclHandles.kernel.push_back(kernel);
  505. }
  506. //get resource alocation information
  507. #ifdef RES_MSG
  508. char * build_log;
  509. size_t ret_val_size;
  510. oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
  511. if(oclHandles.cl_status!=CL_SUCCESS){
  512. throw(string("exceptions in _InitCL -> getting resource information"));
  513. }
  514. build_log = (char *)malloc(ret_val_size+1);
  515. oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
  516. if(oclHandles.cl_status!=CL_SUCCESS){
  517. throw(string("exceptions in _InitCL -> getting resources allocation information-2"));
  518. }
  519. build_log[ret_val_size] = '\0';
  520. std::cout<<"--cambine:"<<build_log<<std::endl;
  521. free(build_log);
  522. #endif
  523. #ifdef PROFILE_
  524. double t4 = gettime();
  525. CC += t4 - t3;
  526. #endif
  527. }
  528. /*------------------------------------------------------------
  529. @function: release CL objects
  530. @params: NULL
  531. @return:
  532. @date: 24/03/2011
  533. ------------------------------------------------------------*/
  534. void _clRelease()
  535. {
  536. #ifdef PROFILE_
  537. double t1 = gettime();
  538. #endif
  539. bool errorFlag = false;
  540. for (int nKernel = 0; nKernel < oclHandles.kernel.size(); nKernel++){
  541. if (oclHandles.kernel[nKernel] != NULL){
  542. cl_int resultCL = clReleaseKernel(oclHandles.kernel[nKernel]);
  543. if (resultCL != CL_SUCCESS){
  544. cerr << "ReleaseCL()::Error: In clReleaseKernel" << endl;
  545. errorFlag = true;
  546. }
  547. oclHandles.kernel[nKernel] = NULL;
  548. }
  549. oclHandles.kernel.clear();
  550. }
  551. if (oclHandles.program != NULL){
  552. cl_int resultCL = clReleaseProgram(oclHandles.program);
  553. if (resultCL != CL_SUCCESS){
  554. cerr << "ReleaseCL()::Error: In clReleaseProgram" << endl;
  555. errorFlag = true;
  556. }
  557. oclHandles.program = NULL;
  558. }
  559. if (oclHandles.queue != NULL){
  560. cl_int resultCL = clReleaseCommandQueue(oclHandles.queue);
  561. if (resultCL != CL_SUCCESS)
  562. {
  563. cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl;
  564. errorFlag = true;
  565. }
  566. oclHandles.queue = NULL;
  567. }
  568. free(oclHandles.devices);
  569. if (oclHandles.context != NULL){
  570. cl_int resultCL = clReleaseContext(oclHandles.context);
  571. if (resultCL != CL_SUCCESS){
  572. cerr << "ReleaseCL()::Error: In clReleaseContext" << endl;
  573. errorFlag = true;
  574. }
  575. oclHandles.context = NULL;
  576. }
  577. if (errorFlag) throw(string("ReleaseCL()::Error encountered."));
  578. #ifdef PROFILE_
  579. double t2 = gettime();
  580. CR += t2 - t1;
  581. #endif
  582. }
  583. /*------------------------------------------------------------
  584. @function: create read and write buffer for devices
  585. @params:
  586. size: the size of device memory to be allocated
  587. @return: mem_d
  588. @date: 24/03/2011
  589. ------------------------------------------------------------*/
  590. cl_mem _clMalloc(int size) throw(string){
  591. #ifdef PROFILE_
  592. double t1 = gettime();
  593. #endif
  594. cl_mem d_mem;
  595. d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE, size, NULL, &oclHandles.cl_status);
  596. #ifdef ERRMSG
  597. if(oclHandles.cl_status != CL_SUCCESS){
  598. oclHandles.error_str = "excpetion in _clMalloc -> ";
  599. switch(oclHandles.cl_status){
  600. case CL_INVALID_CONTEXT:
  601. oclHandles.error_str += "CL_INVALID_CONTEXT";
  602. break;
  603. case CL_INVALID_VALUE:
  604. oclHandles.error_str += "CL_INVALID_VALUE";
  605. break;
  606. case CL_INVALID_BUFFER_SIZE:
  607. oclHandles.error_str += "CL_INVALID_BUFFER_SIZE";
  608. break;
  609. case CL_INVALID_HOST_PTR:
  610. oclHandles.error_str += "CL_INVALID_HOST_PTR";
  611. break;
  612. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  613. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  614. break;
  615. case CL_OUT_OF_HOST_MEMORY:
  616. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  617. break;
  618. default:
  619. oclHandles.error_str += "unknown reasons";
  620. break;
  621. }
  622. throw(oclHandles.error_str);
  623. }
  624. #endif
  625. #ifdef PROFILE_
  626. double t2 = gettime();
  627. MA += t2 - t1;
  628. #endif
  629. return d_mem;
  630. }
  631. /*------------------------------------------------------------
  632. @function: malloc pinned memoty
  633. @params:
  634. size: the size of data to be transferred in bytes
  635. @return: the pointer of host adress
  636. @date: 06/04/2011
  637. ------------------------------------------------------------*/
  638. void* _clMallocHost(int size)throw(string){
  639. void * mem_h;
  640. oclHandles.pinned_mem_out = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status);
  641. #ifdef ERRMSG
  642. if(oclHandles.cl_status != CL_SUCCESS){
  643. oclHandles.error_str = "excpetion in _clMallocHost -> clCreateBuffer";
  644. switch(oclHandles.cl_status){
  645. case CL_INVALID_CONTEXT:
  646. oclHandles.error_str += "CL_INVALID_CONTEXT";
  647. break;
  648. case CL_INVALID_VALUE:
  649. oclHandles.error_str += "CL_INVALID_VALUE";
  650. break;
  651. case CL_INVALID_BUFFER_SIZE:
  652. oclHandles.error_str += "CL_INVALID_BUFFER_SIZE";
  653. break;
  654. case CL_INVALID_HOST_PTR:
  655. oclHandles.error_str += "CL_INVALID_HOST_PTR";
  656. break;
  657. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  658. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  659. break;
  660. case CL_OUT_OF_HOST_MEMORY:
  661. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  662. break;
  663. default:
  664. oclHandles.error_str += "unknown reasons";
  665. break;
  666. }
  667. throw(oclHandles.error_str);
  668. }
  669. #endif
  670. mem_h = clEnqueueMapBuffer(oclHandles.queue, oclHandles.pinned_mem_out, CL_TRUE, CL_MAP_READ, 0, size, 0, NULL, NULL, &oclHandles.cl_status);
  671. #ifdef ERRMSG
  672. if(oclHandles.cl_status != CL_SUCCESS||mem_h==NULL){
  673. oclHandles.error_str = "excpetion in _clMallocHost -> clEnqueueMapBuffer";
  674. switch(oclHandles.cl_status){
  675. case CL_INVALID_COMMAND_QUEUE:
  676. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  677. break;
  678. case CL_INVALID_CONTEXT:
  679. oclHandles.error_str += "CL_INVALID_CONTEXT";
  680. break;
  681. case CL_INVALID_MEM_OBJECT:
  682. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  683. break;
  684. case CL_INVALID_VALUE:
  685. oclHandles.error_str += "CL_INVALID_VALUE";
  686. break;
  687. case CL_INVALID_EVENT_WAIT_LIST:
  688. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  689. break;
  690. case CL_MAP_FAILURE:
  691. oclHandles.error_str += "CL_MAP_FAILURE";
  692. break;
  693. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  694. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  695. break;
  696. case CL_OUT_OF_HOST_MEMORY:
  697. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  698. break;
  699. default:
  700. oclHandles.error_str += "unknown reasons";
  701. break;
  702. }
  703. throw(oclHandles.error_str);
  704. }
  705. #endif
  706. return mem_h;
  707. }
  708. /*------------------------------------------------------------
  709. @function: free pinned memory
  710. @params:
  711. io: to free pinned-in or pinned-out memory
  712. mem_h: the host address
  713. @return: NULL
  714. @date: 06/04/2011
  715. ------------------------------------------------------------*/
  716. void _clFreeHost(int io, void * mem_h){
  717. if(io==0){ //in
  718. if(mem_h){
  719. oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_in, (void*)mem_h, 0, NULL, NULL);
  720. #ifdef ERRMSG
  721. if(oclHandles.cl_status != CL_SUCCESS){
  722. oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)";
  723. switch(oclHandles.cl_status){
  724. case CL_INVALID_COMMAND_QUEUE:
  725. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  726. break;
  727. case CL_INVALID_MEM_OBJECT:
  728. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  729. break;
  730. case CL_INVALID_VALUE:
  731. oclHandles.error_str += "CL_INVALID_VALUE";
  732. break;
  733. case CL_OUT_OF_RESOURCES:
  734. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  735. break;
  736. case CL_OUT_OF_HOST_MEMORY:
  737. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  738. break;
  739. case CL_INVALID_CONTEXT:
  740. oclHandles.error_str += "CL_INVALID_CONTEXT";
  741. break;
  742. default:
  743. oclHandles.error_str += "unknown reasons";
  744. break;
  745. }
  746. throw(oclHandles.error_str);
  747. }
  748. #endif
  749. }
  750. }
  751. else if(io==1){ //out
  752. if(mem_h){
  753. oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_out, (void*)mem_h, 0, NULL, NULL);
  754. #ifdef ERRMSG
  755. if(oclHandles.cl_status != CL_SUCCESS){
  756. oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)";
  757. switch(oclHandles.cl_status){
  758. case CL_INVALID_COMMAND_QUEUE:
  759. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  760. break;
  761. case CL_INVALID_MEM_OBJECT:
  762. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  763. break;
  764. case CL_INVALID_VALUE:
  765. oclHandles.error_str += "CL_INVALID_VALUE";
  766. break;
  767. case CL_OUT_OF_RESOURCES:
  768. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  769. break;
  770. case CL_OUT_OF_HOST_MEMORY:
  771. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  772. break;
  773. case CL_INVALID_CONTEXT:
  774. oclHandles.error_str += "CL_INVALID_CONTEXT";
  775. break;
  776. default:
  777. oclHandles.error_str += "unknown reasons";
  778. break;
  779. }
  780. throw(oclHandles.error_str);
  781. }
  782. #endif
  783. }
  784. }
  785. else
  786. throw(string("encounter invalid choice when freeing pinned memmory"));
  787. }
  788. /*------------------------------------------------------------
  789. @function: transfer data from host to device
  790. @params:
  791. dest: the destination device memory
  792. src: the source host memory
  793. size: the size of data to be transferred in bytes
  794. @return: NULL
  795. @date: 17/01/2011
  796. ------------------------------------------------------------*/
  797. void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
  798. #ifdef PROFILE_
  799. double t1 = gettime();
  800. #endif
  801. oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, dst, CL_TRUE, 0, size, src, 0, NULL, NULL);
  802. #ifdef ERRMSG
  803. if(oclHandles.cl_status != CL_SUCCESS){
  804. oclHandles.error_str = "excpetion in _clMemcpyH2D -> ";
  805. switch(oclHandles.cl_status){
  806. case CL_INVALID_COMMAND_QUEUE:
  807. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  808. break;
  809. case CL_INVALID_CONTEXT:
  810. oclHandles.error_str += "CL_INVALID_CONTEXT";
  811. break;
  812. case CL_INVALID_MEM_OBJECT:
  813. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  814. break;
  815. case CL_INVALID_VALUE:
  816. oclHandles.error_str += "CL_INVALID_VALUE";
  817. break;
  818. case CL_INVALID_EVENT_WAIT_LIST:
  819. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  820. break;
  821. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  822. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  823. break;
  824. case CL_OUT_OF_HOST_MEMORY:
  825. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  826. break;
  827. default:
  828. oclHandles.error_str += "Unknown reason";
  829. break;
  830. }
  831. throw(oclHandles.error_str);
  832. }
  833. #endif
  834. #ifdef PROFILE_
  835. double t2 = gettime();
  836. H2D += t2 - t1;
  837. #endif
  838. }
  839. /*------------------------------------------------------------
  840. @function: transfer data from device to host
  841. @params:
  842. dest: the destination device memory
  843. src: the source host memory
  844. size: the size of data to be transferred in bytes
  845. @return: NULL
  846. @date: 17/01/2011
  847. ------------------------------------------------------------*/
  848. void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
  849. #ifdef PROFILE_
  850. double t1 = gettime();
  851. #endif
  852. oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, src, CL_TRUE, 0, size, dst, 0,0,0);
  853. #ifdef ERRMSG
  854. if(oclHandles.cl_status != CL_SUCCESS){
  855. oclHandles.error_str = "excpetion in _clMemCpyD2H -> ";
  856. switch(oclHandles.cl_status){
  857. case CL_INVALID_COMMAND_QUEUE:
  858. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  859. break;
  860. case CL_INVALID_CONTEXT:
  861. oclHandles.error_str += "CL_INVALID_CONTEXT";
  862. break;
  863. case CL_INVALID_MEM_OBJECT:
  864. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  865. break;
  866. case CL_INVALID_VALUE:
  867. oclHandles.error_str += "CL_INVALID_VALUE";
  868. break;
  869. case CL_INVALID_EVENT_WAIT_LIST:
  870. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  871. break;
  872. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  873. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  874. break;
  875. case CL_OUT_OF_HOST_MEMORY:
  876. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  877. break;
  878. default:
  879. oclHandles.error_str += "Unknown reason";
  880. break;
  881. }
  882. throw(oclHandles.error_str);
  883. }
  884. #endif
  885. #ifdef PROFILE_
  886. double t2 = gettime();
  887. D2H += t2 - t1;
  888. #endif
  889. }
  890. /*------------------------------------------------------------
  891. @function: transfer data from device to device
  892. @params:
  893. dest: the destination device memory
  894. src: the source device memory
  895. size: the size of data to be transferred in bytes
  896. @return: NULL
  897. @date: 27/03/2011
  898. ------------------------------------------------------------*/
  899. void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
  900. #ifdef PROFILE_
  901. double t1 = gettime();
  902. #endif
  903. oclHandles.cl_status = clEnqueueCopyBuffer(oclHandles.queue, src, dst, 0, 0, size, 0, NULL, NULL);
  904. #ifdef ERRMSG
  905. if(oclHandles.cl_status != CL_SUCCESS){
  906. oclHandles.error_str = "excpetion in _clCpyMemD2D -> ";
  907. switch(oclHandles.cl_status){
  908. case CL_INVALID_COMMAND_QUEUE:
  909. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  910. break;
  911. case CL_INVALID_CONTEXT:
  912. oclHandles.error_str += "CL_INVALID_CONTEXT";
  913. break;
  914. case CL_INVALID_MEM_OBJECT:
  915. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  916. break;
  917. case CL_INVALID_VALUE:
  918. oclHandles.error_str += "CL_INVALID_VALUE";
  919. break;
  920. case CL_INVALID_EVENT_WAIT_LIST:
  921. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  922. break;
  923. case CL_MISALIGNED_SUB_BUFFER_OFFSET:
  924. oclHandles.error_str += "CL_MISALIGNED_SUB_BUFFER_OFFSET";
  925. break;
  926. case CL_MEM_COPY_OVERLAP:
  927. oclHandles.error_str += "CL_MEM_COPY_OVERLAP";
  928. break;
  929. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  930. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  931. break;
  932. case CL_OUT_OF_RESOURCES:
  933. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  934. break;
  935. case CL_OUT_OF_HOST_MEMORY:
  936. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  937. break;
  938. default:
  939. oclHandles.error_str += "Unknown reason";
  940. break;
  941. }
  942. throw(oclHandles.error_str);
  943. }
  944. #endif
  945. #ifdef PROFILE_
  946. double t2 = gettime();
  947. D2D += t2 - t1;
  948. #endif
  949. }
  950. /*------------------------------------------------------------
  951. @function: set kernel arguments
  952. @params:
  953. kernel_id: the index of kernel to set
  954. arg_idx: the index of argument
  955. d_mem: the variable of device memory
  956. size: the size of device memory
  957. @return: NULL
  958. @date: 03/04/2011
  959. ------------------------------------------------------------*/
  960. void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(string){
  961. if(!size){ // normal device memory object
  962. oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem);
  963. #ifdef ERRMSG
  964. if(oclHandles.cl_status != CL_SUCCESS){
  965. oclHandles.error_str = "excpetion in _clSetKernelArg() ";
  966. switch(oclHandles.cl_status){
  967. case CL_INVALID_KERNEL:
  968. oclHandles.error_str += "CL_INVALID_KERNEL";
  969. break;
  970. case CL_INVALID_ARG_INDEX:
  971. oclHandles.error_str += "CL_INVALID_ARG_INDEX";
  972. break;
  973. case CL_INVALID_ARG_VALUE:
  974. oclHandles.error_str += "CL_INVALID_ARG_VALUE";
  975. break;
  976. case CL_INVALID_MEM_OBJECT:
  977. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  978. break;
  979. case CL_INVALID_SAMPLER:
  980. oclHandles.error_str += "CL_INVALID_SAMPLER";
  981. break;
  982. case CL_INVALID_ARG_SIZE:
  983. oclHandles.error_str += "CL_INVALID_ARG_SIZE";
  984. break;
  985. case CL_OUT_OF_RESOURCES:
  986. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  987. break;
  988. case CL_OUT_OF_HOST_MEMORY:
  989. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  990. break;
  991. default:
  992. oclHandles.error_str += "Unknown reason";
  993. break;
  994. }
  995. throw(oclHandles.error_str);
  996. }
  997. #endif
  998. }
  999. else{ //special device object:(1) local memory; (2) single word
  1000. oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem);
  1001. #ifdef ERRMSG
  1002. if(oclHandles.cl_status != CL_SUCCESS){
  1003. oclHandles.error_str = "excpetion in _clSetKernelArg() ";
  1004. switch(oclHandles.cl_status){
  1005. case CL_INVALID_KERNEL:
  1006. oclHandles.error_str += "CL_INVALID_KERNEL";
  1007. break;
  1008. case CL_INVALID_ARG_INDEX:
  1009. oclHandles.error_str += "CL_INVALID_ARG_INDEX";
  1010. break;
  1011. case CL_INVALID_ARG_VALUE:
  1012. oclHandles.error_str += "CL_INVALID_ARG_VALUE";
  1013. break;
  1014. case CL_INVALID_MEM_OBJECT:
  1015. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1016. break;
  1017. case CL_INVALID_SAMPLER:
  1018. oclHandles.error_str += "CL_INVALID_SAMPLER";
  1019. break;
  1020. case CL_INVALID_ARG_SIZE:
  1021. oclHandles.error_str += "CL_INVALID_ARG_SIZE";
  1022. break;
  1023. case CL_OUT_OF_RESOURCES:
  1024. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1025. break;
  1026. case CL_OUT_OF_HOST_MEMORY:
  1027. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1028. break;
  1029. default:
  1030. oclHandles.error_str += "Unknown reason";
  1031. break;
  1032. }
  1033. throw(oclHandles.error_str);
  1034. }
  1035. #endif
  1036. }
  1037. }
  1038. void _clFinish() throw(string){
  1039. oclHandles.cl_status = clFinish(oclHandles.queue);
  1040. #ifdef ERRMSG
  1041. if(oclHandles.cl_status!=CL_SUCCESS){
  1042. oclHandles.error_str = "excpetion in _clFinish";
  1043. switch(oclHandles.cl_status){
  1044. case CL_INVALID_COMMAND_QUEUE:
  1045. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1046. break;
  1047. case CL_OUT_OF_RESOURCES:
  1048. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1049. break;
  1050. case CL_OUT_OF_HOST_MEMORY:
  1051. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1052. break;
  1053. default:
  1054. oclHandles.error_str += "Unknown reasons";
  1055. break;
  1056. }
  1057. throw(oclHandles.error_str);
  1058. }
  1059. #endif
  1060. }
  1061. /*------------------------------------------------------------
  1062. @function: entry of invoke the kernel function
  1063. @params:
  1064. kernel_id: the index of kernel to set
  1065. work_items: the number of working items
  1066. work_group_size: the size of each work group
  1067. @return: NULL
  1068. @date: 03/04/2011
  1069. ------------------------------------------------------------*/
  1070. void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string){
  1071. #ifdef PROFILE_
  1072. double t1 = gettime();
  1073. #endif
  1074. cl_uint work_dim = WORK_DIM;
  1075. cl_event e[1];
  1076. if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1077. work_items = work_items + (work_group_size-(work_items%work_group_size));
  1078. size_t local_work_size[] = {work_group_size, 1};
  1079. size_t global_work_size[] = {work_items, 1};
  1080. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1081. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1082. #ifdef ERRMSG
  1083. if(oclHandles.cl_status != CL_SUCCESS){
  1084. oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
  1085. switch(oclHandles.cl_status){
  1086. case CL_INVALID_PROGRAM_EXECUTABLE:
  1087. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1088. break;
  1089. case CL_INVALID_COMMAND_QUEUE:
  1090. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1091. break;
  1092. case CL_INVALID_KERNEL:
  1093. oclHandles.error_str += "CL_INVALID_KERNEL";
  1094. break;
  1095. case CL_INVALID_CONTEXT:
  1096. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1097. break;
  1098. case CL_INVALID_KERNEL_ARGS:
  1099. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1100. break;
  1101. case CL_INVALID_WORK_DIMENSION:
  1102. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1103. break;
  1104. case CL_INVALID_GLOBAL_WORK_SIZE:
  1105. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1106. break;
  1107. case CL_INVALID_WORK_GROUP_SIZE:
  1108. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1109. break;
  1110. case CL_INVALID_WORK_ITEM_SIZE:
  1111. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1112. break;
  1113. case CL_INVALID_GLOBAL_OFFSET:
  1114. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1115. break;
  1116. case CL_OUT_OF_RESOURCES:
  1117. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1118. break;
  1119. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1120. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1121. break;
  1122. case CL_INVALID_EVENT_WAIT_LIST:
  1123. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1124. break;
  1125. case CL_OUT_OF_HOST_MEMORY:
  1126. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1127. break;
  1128. default:
  1129. oclHandles.error_str += "Unkown reseason";
  1130. break;
  1131. }
  1132. throw(oclHandles.error_str);
  1133. }
  1134. #endif
  1135. //_clFinish();
  1136. oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1137. #ifdef ERRMSG
  1138. if (oclHandles.cl_status!= CL_SUCCESS){
  1139. oclHandles.error_str = "excpetion in _clEnqueueNDRange() -> clWaitForEvents ->";
  1140. switch(oclHandles.cl_status){
  1141. case CL_INVALID_VALUE:
  1142. oclHandles.error_str += "CL_INVALID_VALUE";
  1143. break;
  1144. case CL_INVALID_CONTEXT:
  1145. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1146. break;
  1147. case CL_INVALID_EVENT:
  1148. oclHandles.error_str += "CL_INVALID_EVENT";
  1149. break;
  1150. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  1151. oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  1152. break;
  1153. case CL_OUT_OF_RESOURCES:
  1154. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1155. break;
  1156. case CL_OUT_OF_HOST_MEMORY:
  1157. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1158. break;
  1159. default:
  1160. oclHandles.error_str += "Unkown Reason";
  1161. break;
  1162. }
  1163. throw(oclHandles.error_str);
  1164. }
  1165. #endif
  1166. #ifdef PROFILE_
  1167. double t2 = gettime();
  1168. KE += t2 - t1;
  1169. #endif
  1170. }
  1171. /*------------------------------------------------------------
  1172. @function: set device memory in an easy manner
  1173. @params:
  1174. mem_d: the device memory to be set;
  1175. val: set the selected memory to 'val';
  1176. number_elements: the number of elements in the selected memory
  1177. @return: NULL
  1178. @date: 03/04/2011
  1179. ------------------------------------------------------------*/
  1180. void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
  1181. #ifdef PROFILE_
  1182. double t1 = gettime();
  1183. #endif
  1184. int kernel_id = 0;
  1185. int arg_idx = 0;
  1186. _clSetArgs(kernel_id, arg_idx++, mem_d);
  1187. _clSetArgs(kernel_id, arg_idx++, &val, sizeof(short));
  1188. _clSetArgs(kernel_id, arg_idx++, &number_bytes, sizeof(int));
  1189. cl_uint work_dim = WORK_DIM;
  1190. int work_items = number_bytes;
  1191. cl_event e[1];
  1192. if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1193. work_items = work_items + (work_group_size-(work_items%work_group_size));
  1194. size_t local_work_size[] = {work_group_size, 1};
  1195. size_t global_work_size[] = {work_items, 1};
  1196. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1197. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1198. #ifdef ERRMSG
  1199. if(oclHandles.cl_status != CL_SUCCESS){
  1200. oclHandles.error_str = "excpetion in _clMemset() -> ";
  1201. switch(oclHandles.cl_status){
  1202. case CL_INVALID_PROGRAM_EXECUTABLE:
  1203. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1204. break;
  1205. case CL_INVALID_COMMAND_QUEUE:
  1206. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1207. break;
  1208. case CL_INVALID_KERNEL:
  1209. oclHandles.error_str += "CL_INVALID_KERNEL";
  1210. break;
  1211. case CL_INVALID_CONTEXT:
  1212. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1213. break;
  1214. case CL_INVALID_KERNEL_ARGS:
  1215. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1216. break;
  1217. case CL_INVALID_WORK_DIMENSION:
  1218. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1219. break;
  1220. case CL_INVALID_GLOBAL_WORK_SIZE:
  1221. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1222. break;
  1223. case CL_INVALID_WORK_GROUP_SIZE:
  1224. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1225. break;
  1226. case CL_INVALID_WORK_ITEM_SIZE:
  1227. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1228. break;
  1229. case CL_INVALID_GLOBAL_OFFSET:
  1230. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1231. break;
  1232. case CL_OUT_OF_RESOURCES:
  1233. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1234. break;
  1235. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1236. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1237. break;
  1238. case CL_INVALID_EVENT_WAIT_LIST:
  1239. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1240. break;
  1241. case CL_OUT_OF_HOST_MEMORY:
  1242. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1243. break;
  1244. default:
  1245. oclHandles.error_str += "Unkown reseason";
  1246. break;
  1247. }
  1248. throw(oclHandles.error_str);
  1249. }
  1250. #endif
  1251. //_clFinish();
  1252. oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1253. #ifdef ERRMSG
  1254. if (oclHandles.cl_status!= CL_SUCCESS){
  1255. oclHandles.error_str = "excpetion in _clMemset() -> clWaitForEvents ->";
  1256. switch(oclHandles.cl_status){
  1257. case CL_INVALID_VALUE:
  1258. oclHandles.error_str += "CL_INVALID_VALUE";
  1259. break;
  1260. case CL_INVALID_CONTEXT:
  1261. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1262. break;
  1263. case CL_INVALID_EVENT:
  1264. oclHandles.error_str += "CL_INVALID_EVENT";
  1265. break;
  1266. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  1267. oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  1268. break;
  1269. case CL_OUT_OF_RESOURCES:
  1270. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1271. break;
  1272. case CL_OUT_OF_HOST_MEMORY:
  1273. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1274. break;
  1275. default:
  1276. oclHandles.error_str += "Unkown Reason";
  1277. break;
  1278. }
  1279. throw(oclHandles.error_str);
  1280. }
  1281. #endif
  1282. #ifdef PROFILE_
  1283. double t2 = gettime();
  1284. H2D += t2 - t1;
  1285. #endif
  1286. }
  1287. /*------------------------------------------------------------
  1288. @function: entry of invoke the kernel function using 2d working items
  1289. @params:
  1290. kernel_id: the index of kernel to set
  1291. range_x: the number of working items in x direction
  1292. range_y: the number of working items in y direction
  1293. group_x: the number of working items in each work group in x direction
  1294. group_y: the number of working items in each work group in y direction
  1295. @return: NULL
  1296. @date: 03/04/2011
  1297. ------------------------------------------------------------*/
  1298. void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string){
  1299. #ifdef PROFILE_
  1300. double t1 = gettime();
  1301. #endif
  1302. cl_uint work_dim = WORK_DIM;
  1303. size_t local_work_size[] = {group_x, group_y};
  1304. size_t global_work_size[] = {range_x, range_y};
  1305. cl_event e[1];
  1306. /*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1307. work_items = work_items + (work_group_size-(work_items%work_group_size));*/
  1308. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1309. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1310. #ifdef ERRMSG
  1311. if(oclHandles.cl_status != CL_SUCCESS){
  1312. oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
  1313. switch(oclHandles.cl_status){
  1314. case CL_INVALID_PROGRAM_EXECUTABLE:
  1315. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1316. break;
  1317. case CL_INVALID_COMMAND_QUEUE:
  1318. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1319. break;
  1320. case CL_INVALID_KERNEL:
  1321. oclHandles.error_str += "CL_INVALID_KERNEL";
  1322. break;
  1323. case CL_INVALID_CONTEXT:
  1324. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1325. break;
  1326. case CL_INVALID_KERNEL_ARGS:
  1327. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1328. break;
  1329. case CL_INVALID_WORK_DIMENSION:
  1330. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1331. break;
  1332. case CL_INVALID_GLOBAL_WORK_SIZE:
  1333. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1334. break;
  1335. case CL_INVALID_WORK_GROUP_SIZE:
  1336. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1337. break;
  1338. case CL_INVALID_WORK_ITEM_SIZE:
  1339. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1340. break;
  1341. case CL_INVALID_GLOBAL_OFFSET:
  1342. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1343. break;
  1344. case CL_OUT_OF_RESOURCES:
  1345. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1346. break;
  1347. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1348. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1349. break;
  1350. case CL_INVALID_EVENT_WAIT_LIST:
  1351. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1352. break;
  1353. case CL_OUT_OF_HOST_MEMORY:
  1354. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1355. break;
  1356. default:
  1357. oclHandles.error_str += "Unkown reseason";
  1358. break;
  1359. }
  1360. throw(oclHandles.error_str);
  1361. }
  1362. #endif
  1363. oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1364. #ifdef ERRMSG
  1365. if (oclHandles.cl_status!= CL_SUCCESS){
  1366. oclHandles.error_str = "excpetion in _clInvokeKernel2D() -> clWaitForEvents ->";
  1367. switch(oclHandles.cl_status){
  1368. case CL_INVALID_VALUE:
  1369. oclHandles.error_str += "CL_INVALID_VALUE";
  1370. break;
  1371. case CL_INVALID_CONTEXT:
  1372. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1373. break;
  1374. case CL_INVALID_EVENT:
  1375. oclHandles.error_str += "CL_INVALID_EVENT";
  1376. break;
  1377. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  1378. oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  1379. break;
  1380. case CL_OUT_OF_RESOURCES:
  1381. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1382. break;
  1383. case CL_OUT_OF_HOST_MEMORY:
  1384. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1385. break;
  1386. default:
  1387. oclHandles.error_str += "Unkown Reason";
  1388. break;
  1389. }
  1390. throw(oclHandles.error_str);
  1391. }
  1392. #endif
  1393. #ifdef PROFILE_
  1394. double t2 = gettime();
  1395. KE += t2 - t1;
  1396. #endif
  1397. }
  1398. /*------------------------------------------------------------
  1399. @function: release OpenCL memory objects
  1400. @params:
  1401. ob: the memory object to free or release
  1402. @return: NULL
  1403. @date: 03/04/2011
  1404. ------------------------------------------------------------*/
  1405. void _clFree(cl_mem ob) throw(string){
  1406. #ifdef PROFILE_
  1407. double t1 = gettime();
  1408. #endif
  1409. if(ob!=NULL)
  1410. oclHandles.cl_status = clReleaseMemObject(ob);
  1411. #ifdef ERRMSG
  1412. if (oclHandles.cl_status!= CL_SUCCESS){
  1413. oclHandles.error_str = "excpetion in _clFree() ->";
  1414. switch(oclHandles.cl_status){
  1415. case CL_INVALID_MEM_OBJECT:
  1416. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1417. break;
  1418. case CL_OUT_OF_RESOURCES:
  1419. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1420. break;
  1421. case CL_OUT_OF_HOST_MEMORY:
  1422. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1423. break;
  1424. default:
  1425. oclHandles.error_str += "Unkown reseason";
  1426. break;
  1427. }
  1428. throw(oclHandles.error_str);
  1429. }
  1430. #endif
  1431. #ifdef PROFILE_
  1432. double t2 = gettime();
  1433. MF += t2 - t1;
  1434. #endif
  1435. }
  1436. /*------------------------------------------------------------
  1437. @function: output time profiling information
  1438. @params: NULL
  1439. @return: NULL
  1440. @date: 03/04/2011
  1441. ------------------------------------------------------------*/
  1442. void _clStatistics(){
  1443. #ifdef PROFILE_
  1444. FILE *fp_pd = fopen("PD_OCL.txt", "a");
  1445. fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC);
  1446. fclose(fp_pd);
  1447. #endif
  1448. return ;
  1449. }
  1450. #endif //_CL_HELPER_