CLHelper.h 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421
  1. /********************************************************************
  2. //--cambine:helper function for OpenCL
  3. //--programmer: Jianbin Fang
  4. //--date: 27/12/2010
  5. ********************************************************************/
  6. #ifndef _CL_HELPER_
  7. #define _CL_HELPER_
  8. #include <CL/cl.h>
  9. #include <vector>
  10. #include <iostream>
  11. #include <fstream>
  12. #include <string>
  13. #include "util.h"
  14. using std::string;
  15. using std::ifstream;
  16. using std::cerr;
  17. using std::endl;
  18. using std::cout;
  19. #define PROFILE_
  20. #ifdef PROFILE_
  21. double TE; //: total execution time;
  22. double CC; //: Context creation time;
  23. double CR; //: Context release time;
  24. double MA; //: GPU memory allocation time;
  25. double MF; //: GPU memory free time;
  26. double H2D; //: the time to transfer data from host to device;
  27. double D2H; //: the time to transfer data from device to host;
  28. double D2D; //: the time to transfer data from device to device;
  29. double KE; //: the kernel execution time
  30. double KC; //: the kernel compilation time
  31. #endif
  32. //#pragma OPENCL EXTENSION cl_nv_compiler_options:enable
  33. #define WORK_DIM 2 //work-items dimensions
  34. /*------------------------------------------------------------
  35. @struct: the structure of device properties
  36. @date: 24/03/2011
  37. ------------------------------------------------------------*/
  38. struct _clDeviceProp{
  39. /*CL_DEVICE_ADDRESS_BITS
  40. CL_DEVICE_AVAILABLE
  41. CL_DEVICE_COMPILER_AVAILABLE
  42. CL_DEVICE_ENDIAN_LITTLE
  43. CL_DEVICE_ERROR_CORRECTION_SUPPORT
  44. CL_DEVICE_EXECUTION_CAPABILITIES
  45. CL_DEVICE_EXTENSIONS
  46. CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
  47. CL_DEVICE_GLOBAL_MEM_CACHE_TYPE
  48. CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
  49. CL_DEVICE_GLOBAL_MEM_SIZE
  50. CL_DEVICE_IMAGE_SUPPORT
  51. CL_DEVICE_IMAGE2D_MAX_HEIGHT
  52. CL_DEVICE_IMAGE2D_MAX_WIDTH
  53. CL_DEVICE_IMAGE3D_MAX_DEPTH
  54. CL_DEVICE_IMAGE3D_MAX_HEIGHT
  55. CL_DEVICE_IMAGE3D_MAX_WIDTH
  56. CL_DEVICE_LOCAL_MEM_SIZE
  57. CL_DEVICE_LOCAL_MEM_TYPE
  58. CL_DEVICE_MAX_CLOCK_FREQUENCY
  59. CL_DEVICE_MAX_COMPUTE_UNITS
  60. CL_DEVICE_MAX_CONSTANT_ARGS
  61. CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
  62. CL_DEVICE_MAX_MEM_ALLOC_SIZE
  63. CL_DEVICE_MAX_PARAMETER_SIZE
  64. CL_DEVICE_MAX_READ_IMAGE_ARGS
  65. CL_DEVICE_MAX_SAMPLERS
  66. CL_DEVICE_MAX_WORK_GROUP_SIZE
  67. CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
  68. CL_DEVICE_MAX_WORK_ITEM_SIZES
  69. CL_DEVICE_MAX_WRITE_IMAGE_ARGS
  70. CL_DEVICE_MEM_BASE_ADDR_ALIGN
  71. CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE
  72. CL_DEVICE_NAME
  73. CL_DEVICE_PLATFORM
  74. CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR
  75. CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE
  76. CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT
  77. CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT
  78. CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG
  79. CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT
  80. CL_DEVICE_PROFILE
  81. CL_DEVICE_PROFILING_TIMER_RESOLUTION
  82. CL_DEVICE_QUEUE_PROPERTIES
  83. CL_DEVICE_SINGLE_FP_CONFIG
  84. CL_DEVICE_TYPE
  85. CL_DEVICE_VENDOR_ID
  86. CL_DEVICE_VENDOR
  87. CL_DEVICE_VERSION
  88. CL_DRIVER_VERSION*/
  89. char device_name[100];
  90. };
  91. struct oclHandleStruct{
  92. cl_context context;
  93. cl_device_id *devices;
  94. cl_command_queue queue;
  95. cl_program program;
  96. cl_int cl_status;
  97. std::string error_str;
  98. std::vector<cl_kernel> kernel;
  99. cl_mem pinned_mem_out;
  100. cl_mem pinned_mem_in;
  101. };
  102. struct oclHandleStruct oclHandles;
  103. char kernel_file[100] = "Kernels.cl";
  104. int total_kernels = 5;
  105. //string kernel_names[9] = {"memset_kernel", "initialize_variables", "compute_step_factor", "compute_flux", "time_step", "compute_speed_sqd", "compute_velocity", "compute_pressure", "compute_speed_of_sound"};
  106. string kernel_names[5] = {"memset_kernel", "initialize_variables", "compute_step_factor", "compute_flux", "time_step"};
  107. int work_group_size = BLOCK_SIZE_0;
  108. int device_id_inused = 0; //deviced id used (default : 0)
  109. int number_devices = 0;
  110. /*------------------------------------------------------------
  111. @function: select device to use
  112. @params:
  113. size: the index of device to be used
  114. @return: NULL
  115. @date: 24/03/2011
  116. ------------------------------------------------------------*/
  117. void _clSetDevice(int idx) throw(string){
  118. cl_int resultCL;
  119. oclHandles.context = NULL;
  120. oclHandles.devices = NULL;
  121. oclHandles.queue = NULL;
  122. oclHandles.program = NULL;
  123. cl_uint deviceListSize;
  124. cl_uint numPlatforms;
  125. cl_platform_id targetPlatform = NULL;
  126. resultCL = clGetPlatformIDs(0, NULL, &numPlatforms);
  127. if (resultCL != CL_SUCCESS)
  128. throw (string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)"));
  129. if (!(numPlatforms > 0))
  130. throw (string("InitCL()::Error: No platforms found (clGetPlatformIDs)"));
  131. cl_platform_id* allPlatforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id));
  132. resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL);
  133. if (resultCL != CL_SUCCESS)
  134. throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
  135. /* Select the target platform. Default: first platform */
  136. targetPlatform = allPlatforms[0];
  137. free(allPlatforms);
  138. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
  139. if(oclHandles.cl_status!=CL_SUCCESS){
  140. throw(string("exception in _clInit -> clGetDeviceIDs"));
  141. }
  142. if (deviceListSize == 0)
  143. throw(string("InitCL()::Error: No devices found."));
  144. if(idx>(deviceListSize-1))
  145. throw(string(":invalid device ID:"));
  146. device_id_inused = idx;
  147. }
  148. /*------------------------------------------------------------
  149. @function: get device properties indexed by 'idx'
  150. @params:
  151. idx: device index
  152. prop: output properties
  153. @return: prop
  154. @date: 24/03/2011
  155. ------------------------------------------------------------*/
  156. void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
  157. oclHandles.cl_status= clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL);
  158. #ifdef ERRMSG
  159. if(oclHandles.cl_status != CL_SUCCESS){
  160. oclHandles.error_str = "exception in _clGetDeviceProperties-> ";
  161. switch(oclHandles.cl_status){
  162. case CL_INVALID_DEVICE:
  163. oclHandles.error_str += "CL_INVALID_DEVICE";
  164. break;
  165. case CL_INVALID_VALUE:
  166. oclHandles.error_str += "CL_INVALID_VALUE";
  167. break;
  168. default:
  169. oclHandles.error_str += "unknown reasons";
  170. break;
  171. }
  172. throw(oclHandles.error_str);
  173. }
  174. #endif
  175. }
  176. /*
  177. * Converts the contents of a file into a string
  178. */
  179. string FileToString(const string fileName){
  180. ifstream f(fileName.c_str(), ifstream::in | ifstream::binary);
  181. try{
  182. size_t size;
  183. char* str;
  184. string s;
  185. if(f.is_open()){
  186. size_t fileSize;
  187. f.seekg(0, ifstream::end);
  188. size = fileSize = f.tellg();
  189. f.seekg(0, ifstream::beg);
  190. str = new char[size+1];
  191. if (!str) throw(string("Could not allocate memory"));
  192. f.read(str, fileSize);
  193. f.close();
  194. str[size] = '\0';
  195. s = str;
  196. delete [] str;
  197. return s;
  198. }
  199. }
  200. catch(std::string msg){
  201. cerr << "Exception caught in FileToString(): " << msg << endl;
  202. if(f.is_open())
  203. f.close();
  204. }
  205. catch(...){
  206. cerr << "Exception caught in FileToString()" << endl;
  207. if(f.is_open())
  208. f.close();
  209. }
  210. string errorMsg = "FileToString()::Error: Unable to open file "
  211. + fileName;
  212. throw(errorMsg);
  213. }
  214. /*------------------------------------------------------------
  215. @function: Read command line parameters
  216. @params: NULL
  217. @return:
  218. @date: 24/03/2011
  219. ------------------------------------------------------------*/
  220. char device_type[3];
  221. int device_id = 0;
  222. void _clCmdParams(int argc, char* argv[]){
  223. for (int i = 0; i < argc; ++i){
  224. switch (argv[i][1]){
  225. case 't': //--t stands for device type
  226. if (++i < argc){
  227. sscanf(argv[i], "%s", device_type);
  228. }
  229. else{
  230. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  231. throw;
  232. }
  233. break;
  234. case 'd': //--d stands for device id
  235. if (++i < argc){
  236. sscanf(argv[i], "%d", &device_id);
  237. }
  238. else{
  239. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  240. throw;
  241. }
  242. break;
  243. default:
  244. ;
  245. }
  246. }
  247. }
  248. /*------------------------------------------------------------
  249. @function: Initlize CL objects
  250. @params:
  251. device_id: device id
  252. device_type: the types of devices, e.g. CPU, GPU, ACCERLERATOR,...
  253. (1) -t cpu/gpu/acc -d 0/1/2/...
  254. (2) -t cpu/gpu/acc [-d 0]
  255. (3) [-t default] -d 0/1/2/...
  256. (4) NULL [-d 0]
  257. @return:
  258. @description:
  259. there are 5 steps to initialize all the OpenCL objects needed,
  260. @revised:
  261. get the number of devices and devices have no relationship with context
  262. @date: 24/03/2011
  263. ------------------------------------------------------------*/
  264. void _clInit(string device_type, int device_id)throw(string){
  265. #ifdef PROFILE_
  266. TE = 0;
  267. CC = 0;
  268. CR = 0;
  269. MA = 0;
  270. MF = 0;
  271. H2D = 0;
  272. D2H = 0;
  273. D2D = 0;
  274. KE = 0;
  275. KC = 0;
  276. #endif
  277. int DEVICE_ID_INUSED = 0;
  278. _clDeviceProp prop;
  279. #ifdef PROFILE_
  280. double t1 = gettime();
  281. #endif
  282. cl_int resultCL;
  283. oclHandles.context = NULL;
  284. oclHandles.devices = NULL;
  285. oclHandles.queue = NULL;
  286. oclHandles.program = NULL;
  287. cl_uint deviceListSize;
  288. //-----------------------------------------------
  289. //--cambine-1: find the available platforms and select one
  290. cl_uint numPlatforms;
  291. cl_platform_id targetPlatform = NULL;
  292. resultCL = clGetPlatformIDs(0, NULL, &numPlatforms);
  293. if (resultCL != CL_SUCCESS)
  294. throw (string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)"));
  295. //printf("number of platforms:%d\n",numPlatforms); //by cambine
  296. #ifdef DEV_INFO
  297. std::cout<<"--cambine: number of platforms: "<<numPlatforms<<std::endl;
  298. #endif
  299. if (!(numPlatforms > 0))
  300. throw (string("InitCL()::Error: No platforms found (clGetPlatformIDs)"));
  301. cl_platform_id* allPlatforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id));
  302. resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL);
  303. if (resultCL != CL_SUCCESS)
  304. throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
  305. // Select the target platform. Default: first platform
  306. targetPlatform = allPlatforms[1];
  307. for (int i = 0; i < numPlatforms; i++)
  308. {
  309. char pbuff[128];
  310. resultCL = clGetPlatformInfo( allPlatforms[i],
  311. CL_PLATFORM_VENDOR,
  312. sizeof(pbuff),
  313. pbuff,
  314. NULL);
  315. if (resultCL != CL_SUCCESS)
  316. throw (string("InitCL()::Error: Getting platform info (clGetPlatformInfo)"));
  317. printf("vedor is %s\n",pbuff);
  318. #ifdef DEV_INFO
  319. std::cout<<"--cambine: vedor is: "<<pbuff<<std::endl;
  320. #endif
  321. }
  322. free(allPlatforms);
  323. //-----------------------------------------------
  324. //--cambine-2: detect OpenCL devices
  325. // First, get the size of device list
  326. if(device_type.compare("")!=0){
  327. if(device_type.compare("cpu")==0){
  328. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);
  329. if(oclHandles.cl_status!=CL_SUCCESS){
  330. throw(string("exception in _clInit -> clGetDeviceIDs -> CPU"));
  331. }
  332. }
  333. if(device_type.compare("gpu")==0){
  334. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceListSize);
  335. if(oclHandles.cl_status!=CL_SUCCESS){
  336. throw(string("exception in _clInit -> clGetDeviceIDs -> GPU"));
  337. }
  338. }
  339. if(device_type.compare("acc")==0){
  340. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceListSize);
  341. if(oclHandles.cl_status!=CL_SUCCESS){
  342. throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));
  343. }
  344. }
  345. }
  346. else{
  347. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
  348. if(oclHandles.cl_status!=CL_SUCCESS){
  349. throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
  350. }
  351. }
  352. if (deviceListSize == 0)
  353. throw(string("InitCL()::Error: No devices found."));
  354. #ifdef DEV_INFO
  355. std::cout<<"--cambine: number of device="<<deviceListSize<<std::endl;
  356. #endif
  357. number_devices = deviceListSize;
  358. // Now, allocate the device list
  359. // oclHandles.devices = (cl_device_id *)malloc(deviceListSize);
  360. oclHandles.devices = (cl_device_id *)malloc(sizeof(cl_device_id) * deviceListSize);
  361. if (oclHandles.devices == 0)
  362. throw(string("InitCL()::Error: Could not allocate memory."));
  363. // Next, get the device list data
  364. if(device_type.compare("")!=0){
  365. if(device_type.compare("cpu")==0){
  366. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, oclHandles.devices, NULL);
  367. if(oclHandles.cl_status!=CL_SUCCESS){
  368. throw(string("exception in _clInit -> clGetDeviceIDs -> CPU ->2"));
  369. }
  370. }
  371. if(device_type.compare("gpu")==0){
  372. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, deviceListSize, oclHandles.devices, NULL);
  373. if(oclHandles.cl_status!=CL_SUCCESS){
  374. throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2"));
  375. }
  376. }
  377. if(device_type.compare("acc")==0){
  378. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, deviceListSize, oclHandles.devices, NULL);
  379. if(oclHandles.cl_status!=CL_SUCCESS){
  380. throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));
  381. }
  382. }
  383. }
  384. else{
  385. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
  386. if(oclHandles.cl_status!=CL_SUCCESS){
  387. throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
  388. }
  389. }
  390. if(device_id!=0){
  391. if(device_id>(deviceListSize-1))
  392. throw(string("Invalidate device id"));
  393. DEVICE_ID_INUSED = device_id;
  394. }
  395. _clGetDeviceProperties(DEVICE_ID_INUSED, &prop);
  396. std::cout<<"--cambine: device name="<<prop.device_name<<std::endl;
  397. #ifdef DEV_INFO
  398. std::cout<<"--cambine: return device list successfully!"<<std::endl;
  399. #endif
  400. //-----------------------------------------------
  401. //--cambine-3: create an OpenCL context
  402. #ifdef DEV_INFO
  403. std::cout<<"--cambine: before creating context"<<std::endl;
  404. #endif
  405. cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)targetPlatform, 0 };
  406. oclHandles.context = clCreateContext(0,
  407. deviceListSize,
  408. oclHandles.devices,
  409. NULL,
  410. NULL,
  411. &resultCL);
  412. if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
  413. throw (string("InitCL()::Error: Creating Context (clCreateContextFromType)"));
  414. #ifdef DEV_INFO
  415. std::cout<<"--cambine: create OCL context successfully!"<<std::endl;
  416. #endif
  417. //-----------------------------------------------
  418. //--cambine-4: Create an OpenCL command queue
  419. oclHandles.queue = clCreateCommandQueue(oclHandles.context,
  420. oclHandles.devices[DEVICE_ID_INUSED],
  421. 0,
  422. &resultCL);
  423. if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
  424. throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
  425. #ifdef PROFILE_
  426. double t2 = gettime();
  427. CC += t2 - t1;
  428. #endif
  429. //-----------------------------------------------
  430. //--cambine-5: Load CL file, build CL program object, create CL kernel object
  431. std::string source_str = FileToString(kernel_file);
  432. const char * source = source_str.c_str();
  433. size_t sourceSize[] = { source_str.length() };
  434. oclHandles.program = clCreateProgramWithSource(oclHandles.context,
  435. 1,
  436. &source,
  437. sourceSize,
  438. &resultCL);
  439. if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
  440. throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
  441. //insert debug information
  442. std::string options= "";
  443. //options += " -cl-nv-opt-level=3";
  444. resultCL = clBuildProgram(oclHandles.program, deviceListSize, oclHandles.devices, options.c_str(), NULL, NULL);
  445. if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL)){
  446. cerr << "InitCL()::Error: In clBuildProgram" << endl;
  447. size_t length;
  448. resultCL = clGetProgramBuildInfo(oclHandles.program,
  449. oclHandles.devices[DEVICE_ID_INUSED],
  450. CL_PROGRAM_BUILD_LOG,
  451. 0,
  452. NULL,
  453. &length);
  454. if(resultCL != CL_SUCCESS)
  455. throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
  456. char* buffer = (char*)malloc(length);
  457. resultCL = clGetProgramBuildInfo(oclHandles.program,
  458. oclHandles.devices[DEVICE_ID_INUSED],
  459. CL_PROGRAM_BUILD_LOG,
  460. length,
  461. buffer,
  462. NULL);
  463. if(resultCL != CL_SUCCESS)
  464. throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
  465. cerr << buffer << endl;
  466. FILE * fp = fopen("errinfo", "w");
  467. fprintf(fp, "%s\n", buffer);
  468. fclose(fp);
  469. free(buffer);
  470. throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
  471. }
  472. #ifdef PROFILE_
  473. double t3 = gettime();
  474. KC += t3 - t2;
  475. #endif
  476. //get program information in intermediate representation
  477. #ifdef PTX_MSG
  478. size_t binary_sizes[deviceListSize];
  479. char * binaries[deviceListSize];
  480. //figure out number of devices and the sizes of the binary for each device.
  481. oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*deviceListSize, &binary_sizes, NULL );
  482. if(oclHandles.cl_status!=CL_SUCCESS){
  483. throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));
  484. }
  485. std::cout<<"--cambine:"<<binary_sizes<<std::endl;
  486. //copy over all of the generated binaries.
  487. for(int i=0;i<deviceListSize;i++)
  488. binaries[i] = (char *)malloc( sizeof(char)*(binary_sizes[i]+1));
  489. oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES, sizeof(char *)*deviceListSize, binaries, NULL );
  490. if(oclHandles.cl_status!=CL_SUCCESS){
  491. throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-3"));
  492. }
  493. for(int i=0;i<deviceListSize;i++)
  494. binaries[i][binary_sizes[i]] = '\0';
  495. std::cout<<"--cambine:writing ptd information..."<<std::endl;
  496. FILE * ptx_file = fopen("cl.ptx","w");
  497. if(ptx_file==NULL){
  498. throw(string("exceptions in allocate ptx file."));
  499. }
  500. fprintf(ptx_file,"%s",binaries[DEVICE_ID_INUSED]);
  501. fclose(ptx_file);
  502. std::cout<<"--cambine:writing ptd information done."<<std::endl;
  503. for(int i=0;i<deviceListSize;i++)
  504. free(binaries[i]);
  505. #endif
  506. for (int nKernel = 0; nKernel < total_kernels; nKernel++)
  507. {
  508. // get a kernel object handle for a kernel with the given name
  509. cl_kernel kernel = clCreateKernel(oclHandles.program,
  510. (kernel_names[nKernel]).c_str(),
  511. &resultCL);
  512. if ((resultCL != CL_SUCCESS) || (kernel == NULL))
  513. {
  514. string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" + kernel_names[nKernel] + "\"";
  515. throw(errorMsg);
  516. }
  517. oclHandles.kernel.push_back(kernel);
  518. }
  519. //get resource alocation information
  520. #ifdef RES_MSG
  521. char * build_log;
  522. size_t ret_val_size;
  523. oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
  524. if(oclHandles.cl_status!=CL_SUCCESS){
  525. throw(string("exceptions in _InitCL -> getting resource information"));
  526. }
  527. build_log = (char *)malloc(ret_val_size+1);
  528. oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
  529. if(oclHandles.cl_status!=CL_SUCCESS){
  530. throw(string("exceptions in _InitCL -> getting resources allocation information-2"));
  531. }
  532. build_log[ret_val_size] = '\0';
  533. std::cout<<"--cambine:"<<build_log<<std::endl;
  534. free(build_log);
  535. #endif
  536. #ifdef PROFILE_
  537. double t4 = gettime();
  538. CC += t4 - t3;
  539. #endif
  540. }
  541. /*------------------------------------------------------------
  542. @function: release CL objects
  543. @params: NULL
  544. @return:
  545. @date: 24/03/2011
  546. ------------------------------------------------------------*/
  547. void _clRelease()
  548. {
  549. #ifdef PROFILE_
  550. double t1 = gettime();
  551. #endif
  552. bool errorFlag = false;
  553. for (int nKernel = 0; nKernel < oclHandles.kernel.size(); nKernel++){
  554. if (oclHandles.kernel[nKernel] != NULL){
  555. cl_int resultCL = clReleaseKernel(oclHandles.kernel[nKernel]);
  556. if (resultCL != CL_SUCCESS){
  557. cerr << "ReleaseCL()::Error: In clReleaseKernel" << endl;
  558. errorFlag = true;
  559. }
  560. oclHandles.kernel[nKernel] = NULL;
  561. }
  562. oclHandles.kernel.clear();
  563. }
  564. if (oclHandles.program != NULL){
  565. cl_int resultCL = clReleaseProgram(oclHandles.program);
  566. if (resultCL != CL_SUCCESS){
  567. cerr << "ReleaseCL()::Error: In clReleaseProgram" << endl;
  568. errorFlag = true;
  569. }
  570. oclHandles.program = NULL;
  571. }
  572. if (oclHandles.queue != NULL){
  573. cl_int resultCL = clReleaseCommandQueue(oclHandles.queue);
  574. if (resultCL != CL_SUCCESS)
  575. {
  576. cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl;
  577. errorFlag = true;
  578. }
  579. oclHandles.queue = NULL;
  580. }
  581. free(oclHandles.devices);
  582. if (oclHandles.context != NULL){
  583. cl_int resultCL = clReleaseContext(oclHandles.context);
  584. if (resultCL != CL_SUCCESS){
  585. cerr << "ReleaseCL()::Error: In clReleaseContext" << endl;
  586. errorFlag = true;
  587. }
  588. oclHandles.context = NULL;
  589. }
  590. if (errorFlag) throw(string("ReleaseCL()::Error encountered."));
  591. #ifdef PROFILE_
  592. double t2 = gettime();
  593. CR += t2 - t1;
  594. #endif
  595. }
  596. /*------------------------------------------------------------
  597. @function: create read and write buffer for devices
  598. @params:
  599. size: the size of device memory to be allocated
  600. @return: mem_d
  601. @date: 24/03/2011
  602. ------------------------------------------------------------*/
  603. cl_mem _clMalloc(int size) throw(string){
  604. #ifdef PROFILE_
  605. double t1 = gettime();
  606. #endif
  607. cl_mem d_mem;
  608. d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE, size, NULL, &oclHandles.cl_status);
  609. #ifdef ERRMSG
  610. if(oclHandles.cl_status != CL_SUCCESS){
  611. oclHandles.error_str = "excpetion in _clMalloc -> ";
  612. switch(oclHandles.cl_status){
  613. case CL_INVALID_CONTEXT:
  614. oclHandles.error_str += "CL_INVALID_CONTEXT";
  615. break;
  616. case CL_INVALID_VALUE:
  617. oclHandles.error_str += "CL_INVALID_VALUE";
  618. break;
  619. case CL_INVALID_BUFFER_SIZE:
  620. oclHandles.error_str += "CL_INVALID_BUFFER_SIZE";
  621. break;
  622. case CL_INVALID_HOST_PTR:
  623. oclHandles.error_str += "CL_INVALID_HOST_PTR";
  624. break;
  625. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  626. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  627. break;
  628. case CL_OUT_OF_HOST_MEMORY:
  629. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  630. break;
  631. default:
  632. oclHandles.error_str += "unknown reasons";
  633. break;
  634. }
  635. throw(oclHandles.error_str);
  636. }
  637. #endif
  638. #ifdef PROFILE_
  639. double t2 = gettime();
  640. MA += t2 - t1;
  641. #endif
  642. return d_mem;
  643. }
  644. /*------------------------------------------------------------
  645. @function: malloc pinned memoty
  646. @params:
  647. size: the size of data to be transferred in bytes
  648. @return: the pointer of host adress
  649. @date: 06/04/2011
  650. ------------------------------------------------------------*/
  651. void* _clMallocHost(int size)throw(string){
  652. void * mem_h;
  653. oclHandles.pinned_mem_out = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status);
  654. #ifdef ERRMSG
  655. if(oclHandles.cl_status != CL_SUCCESS){
  656. oclHandles.error_str = "excpetion in _clMallocHost -> clCreateBuffer";
  657. switch(oclHandles.cl_status){
  658. case CL_INVALID_CONTEXT:
  659. oclHandles.error_str += "CL_INVALID_CONTEXT";
  660. break;
  661. case CL_INVALID_VALUE:
  662. oclHandles.error_str += "CL_INVALID_VALUE";
  663. break;
  664. case CL_INVALID_BUFFER_SIZE:
  665. oclHandles.error_str += "CL_INVALID_BUFFER_SIZE";
  666. break;
  667. case CL_INVALID_HOST_PTR:
  668. oclHandles.error_str += "CL_INVALID_HOST_PTR";
  669. break;
  670. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  671. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  672. break;
  673. case CL_OUT_OF_HOST_MEMORY:
  674. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  675. break;
  676. default:
  677. oclHandles.error_str += "unknown reasons";
  678. break;
  679. }
  680. throw(oclHandles.error_str);
  681. }
  682. #endif
  683. mem_h = clEnqueueMapBuffer(oclHandles.queue, oclHandles.pinned_mem_out, CL_TRUE, CL_MAP_READ, 0, size, 0, NULL, NULL, &oclHandles.cl_status);
  684. #ifdef ERRMSG
  685. if(oclHandles.cl_status != CL_SUCCESS||mem_h==NULL){
  686. oclHandles.error_str = "excpetion in _clMallocHost -> clEnqueueMapBuffer";
  687. switch(oclHandles.cl_status){
  688. case CL_INVALID_COMMAND_QUEUE:
  689. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  690. break;
  691. case CL_INVALID_CONTEXT:
  692. oclHandles.error_str += "CL_INVALID_CONTEXT";
  693. break;
  694. case CL_INVALID_MEM_OBJECT:
  695. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  696. break;
  697. case CL_INVALID_VALUE:
  698. oclHandles.error_str += "CL_INVALID_VALUE";
  699. break;
  700. case CL_INVALID_EVENT_WAIT_LIST:
  701. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  702. break;
  703. case CL_MAP_FAILURE:
  704. oclHandles.error_str += "CL_MAP_FAILURE";
  705. break;
  706. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  707. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  708. break;
  709. case CL_OUT_OF_HOST_MEMORY:
  710. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  711. break;
  712. default:
  713. oclHandles.error_str += "unknown reasons";
  714. break;
  715. }
  716. throw(oclHandles.error_str);
  717. }
  718. #endif
  719. return mem_h;
  720. }
  721. /*------------------------------------------------------------
  722. @function: free pinned memory
  723. @params:
  724. io: to free pinned-in or pinned-out memory
  725. mem_h: the host address
  726. @return: NULL
  727. @date: 06/04/2011
  728. ------------------------------------------------------------*/
  729. void _clFreeHost(int io, void * mem_h){
  730. if(io==0){ //in
  731. if(mem_h){
  732. oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_in, (void*)mem_h, 0, NULL, NULL);
  733. #ifdef ERRMSG
  734. if(oclHandles.cl_status != CL_SUCCESS){
  735. oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)";
  736. switch(oclHandles.cl_status){
  737. case CL_INVALID_COMMAND_QUEUE:
  738. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  739. break;
  740. case CL_INVALID_MEM_OBJECT:
  741. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  742. break;
  743. case CL_INVALID_VALUE:
  744. oclHandles.error_str += "CL_INVALID_VALUE";
  745. break;
  746. case CL_OUT_OF_RESOURCES:
  747. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  748. break;
  749. case CL_OUT_OF_HOST_MEMORY:
  750. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  751. break;
  752. case CL_INVALID_CONTEXT:
  753. oclHandles.error_str += "CL_INVALID_CONTEXT";
  754. break;
  755. default:
  756. oclHandles.error_str += "unknown reasons";
  757. break;
  758. }
  759. throw(oclHandles.error_str);
  760. }
  761. #endif
  762. }
  763. }
  764. else if(io==1){ //out
  765. if(mem_h){
  766. oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_out, (void*)mem_h, 0, NULL, NULL);
  767. #ifdef ERRMSG
  768. if(oclHandles.cl_status != CL_SUCCESS){
  769. oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)";
  770. switch(oclHandles.cl_status){
  771. case CL_INVALID_COMMAND_QUEUE:
  772. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  773. break;
  774. case CL_INVALID_MEM_OBJECT:
  775. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  776. break;
  777. case CL_INVALID_VALUE:
  778. oclHandles.error_str += "CL_INVALID_VALUE";
  779. break;
  780. case CL_OUT_OF_RESOURCES:
  781. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  782. break;
  783. case CL_OUT_OF_HOST_MEMORY:
  784. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  785. break;
  786. case CL_INVALID_CONTEXT:
  787. oclHandles.error_str += "CL_INVALID_CONTEXT";
  788. break;
  789. default:
  790. oclHandles.error_str += "unknown reasons";
  791. break;
  792. }
  793. throw(oclHandles.error_str);
  794. }
  795. #endif
  796. }
  797. }
  798. else
  799. throw(string("encounter invalid choice when freeing pinned memmory"));
  800. }
  801. /*------------------------------------------------------------
  802. @function: transfer data from host to device
  803. @params:
  804. dest: the destination device memory
  805. src: the source host memory
  806. size: the size of data to be transferred in bytes
  807. @return: NULL
  808. @date: 17/01/2011
  809. ------------------------------------------------------------*/
  810. void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
  811. #ifdef PROFILE_
  812. double t1 = gettime();
  813. #endif
  814. oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, dst, CL_TRUE, 0, size, src, 0, NULL, NULL);
  815. #ifdef ERRMSG
  816. if(oclHandles.cl_status != CL_SUCCESS){
  817. oclHandles.error_str = "excpetion in _clMemcpyH2D -> ";
  818. switch(oclHandles.cl_status){
  819. case CL_INVALID_COMMAND_QUEUE:
  820. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  821. break;
  822. case CL_INVALID_CONTEXT:
  823. oclHandles.error_str += "CL_INVALID_CONTEXT";
  824. break;
  825. case CL_INVALID_MEM_OBJECT:
  826. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  827. break;
  828. case CL_INVALID_VALUE:
  829. oclHandles.error_str += "CL_INVALID_VALUE";
  830. break;
  831. case CL_INVALID_EVENT_WAIT_LIST:
  832. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  833. break;
  834. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  835. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  836. break;
  837. case CL_OUT_OF_HOST_MEMORY:
  838. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  839. break;
  840. default:
  841. oclHandles.error_str += "Unknown reason";
  842. break;
  843. }
  844. throw(oclHandles.error_str);
  845. }
  846. #endif
  847. #ifdef PROFILE_
  848. double t2 = gettime();
  849. H2D += t2 - t1;
  850. #endif
  851. }
  852. /*------------------------------------------------------------
  853. @function: transfer data from device to host
  854. @params:
  855. dest: the destination device memory
  856. src: the source host memory
  857. size: the size of data to be transferred in bytes
  858. @return: NULL
  859. @date: 17/01/2011
  860. ------------------------------------------------------------*/
  861. void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
  862. #ifdef PROFILE_
  863. double t1 = gettime();
  864. #endif
  865. oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, src, CL_TRUE, 0, size, dst, 0,0,0);
  866. #ifdef ERRMSG
  867. if(oclHandles.cl_status != CL_SUCCESS){
  868. oclHandles.error_str = "excpetion in _clMemCpyD2H -> ";
  869. switch(oclHandles.cl_status){
  870. case CL_INVALID_COMMAND_QUEUE:
  871. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  872. break;
  873. case CL_INVALID_CONTEXT:
  874. oclHandles.error_str += "CL_INVALID_CONTEXT";
  875. break;
  876. case CL_INVALID_MEM_OBJECT:
  877. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  878. break;
  879. case CL_INVALID_VALUE:
  880. oclHandles.error_str += "CL_INVALID_VALUE";
  881. break;
  882. case CL_INVALID_EVENT_WAIT_LIST:
  883. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  884. break;
  885. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  886. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  887. break;
  888. case CL_OUT_OF_HOST_MEMORY:
  889. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  890. break;
  891. default:
  892. oclHandles.error_str += "Unknown reason";
  893. break;
  894. }
  895. throw(oclHandles.error_str);
  896. }
  897. #endif
  898. #ifdef PROFILE_
  899. double t2 = gettime();
  900. D2H += t2 - t1;
  901. #endif
  902. }
  903. /*------------------------------------------------------------
  904. @function: transfer data from device to device
  905. @params:
  906. dest: the destination device memory
  907. src: the source device memory
  908. size: the size of data to be transferred in bytes
  909. @return: NULL
  910. @date: 27/03/2011
  911. ------------------------------------------------------------*/
  912. void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
  913. #ifdef PROFILE_
  914. double t1 = gettime();
  915. #endif
  916. oclHandles.cl_status = clEnqueueCopyBuffer(oclHandles.queue, src, dst, 0, 0, size, 0, NULL, NULL);
  917. #ifdef ERRMSG
  918. if(oclHandles.cl_status != CL_SUCCESS){
  919. oclHandles.error_str = "excpetion in _clCpyMemD2D -> ";
  920. switch(oclHandles.cl_status){
  921. case CL_INVALID_COMMAND_QUEUE:
  922. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  923. break;
  924. case CL_INVALID_CONTEXT:
  925. oclHandles.error_str += "CL_INVALID_CONTEXT";
  926. break;
  927. case CL_INVALID_MEM_OBJECT:
  928. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  929. break;
  930. case CL_INVALID_VALUE:
  931. oclHandles.error_str += "CL_INVALID_VALUE";
  932. break;
  933. case CL_INVALID_EVENT_WAIT_LIST:
  934. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  935. break;
  936. case CL_MISALIGNED_SUB_BUFFER_OFFSET:
  937. oclHandles.error_str += "CL_MISALIGNED_SUB_BUFFER_OFFSET";
  938. break;
  939. case CL_MEM_COPY_OVERLAP:
  940. oclHandles.error_str += "CL_MEM_COPY_OVERLAP";
  941. break;
  942. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  943. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  944. break;
  945. case CL_OUT_OF_RESOURCES:
  946. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  947. break;
  948. case CL_OUT_OF_HOST_MEMORY:
  949. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  950. break;
  951. default:
  952. oclHandles.error_str += "Unknown reason";
  953. break;
  954. }
  955. throw(oclHandles.error_str);
  956. }
  957. #endif
  958. #ifdef PROFILE_
  959. double t2 = gettime();
  960. D2D += t2 - t1;
  961. #endif
  962. }
  963. /*------------------------------------------------------------
  964. @function: set kernel arguments
  965. @params:
  966. kernel_id: the index of kernel to set
  967. arg_idx: the index of argument
  968. d_mem: the variable of device memory
  969. size: the size of device memory
  970. @return: NULL
  971. @date: 03/04/2011
  972. ------------------------------------------------------------*/
  973. void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(string){
  974. if(!size){
  975. oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem);
  976. #ifdef ERRMSG
  977. oclHandles.error_str = "excpetion in _clSetKernelArg()-1 ";
  978. switch(oclHandles.cl_status){
  979. case CL_INVALID_KERNEL:
  980. oclHandles.error_str += "CL_INVALID_KERNEL";
  981. break;
  982. case CL_INVALID_ARG_INDEX:
  983. oclHandles.error_str += "CL_INVALID_ARG_INDEX";
  984. break;
  985. case CL_INVALID_ARG_VALUE:
  986. oclHandles.error_str += "CL_INVALID_ARG_VALUE";
  987. break;
  988. case CL_INVALID_MEM_OBJECT:
  989. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  990. break;
  991. case CL_INVALID_SAMPLER:
  992. oclHandles.error_str += "CL_INVALID_SAMPLER";
  993. break;
  994. case CL_INVALID_ARG_SIZE:
  995. oclHandles.error_str += "CL_INVALID_ARG_SIZE";
  996. break;
  997. case CL_OUT_OF_RESOURCES:
  998. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  999. break;
  1000. case CL_OUT_OF_HOST_MEMORY:
  1001. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1002. break;
  1003. default:
  1004. oclHandles.error_str += "Unknown reason";
  1005. break;
  1006. }
  1007. if(oclHandles.cl_status != CL_SUCCESS)
  1008. throw(oclHandles.error_str);
  1009. #endif
  1010. }
  1011. else{
  1012. oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem);
  1013. #ifdef ERRMSG
  1014. oclHandles.error_str = "excpetion in _clSetKernelArg()-2 ";
  1015. switch(oclHandles.cl_status){
  1016. case CL_INVALID_KERNEL:
  1017. oclHandles.error_str += "CL_INVALID_KERNEL";
  1018. break;
  1019. case CL_INVALID_ARG_INDEX:
  1020. oclHandles.error_str += "CL_INVALID_ARG_INDEX";
  1021. break;
  1022. case CL_INVALID_ARG_VALUE:
  1023. oclHandles.error_str += "CL_INVALID_ARG_VALUE";
  1024. break;
  1025. case CL_INVALID_MEM_OBJECT:
  1026. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1027. break;
  1028. case CL_INVALID_SAMPLER:
  1029. oclHandles.error_str += "CL_INVALID_SAMPLER";
  1030. break;
  1031. case CL_INVALID_ARG_SIZE:
  1032. oclHandles.error_str += "CL_INVALID_ARG_SIZE";
  1033. break;
  1034. case CL_OUT_OF_RESOURCES:
  1035. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1036. break;
  1037. case CL_OUT_OF_HOST_MEMORY:
  1038. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1039. break;
  1040. default:
  1041. oclHandles.error_str += "Unknown reason";
  1042. break;
  1043. }
  1044. if(oclHandles.cl_status != CL_SUCCESS)
  1045. throw(oclHandles.error_str);
  1046. #endif
  1047. }
  1048. }
  1049. void _clFinish() throw(string){
  1050. oclHandles.cl_status = clFinish(oclHandles.queue);
  1051. #ifdef ERRMSG
  1052. if(oclHandles.cl_status!=CL_SUCCESS){
  1053. oclHandles.error_str = "excpetion in _clFinish";
  1054. switch(oclHandles.cl_status){
  1055. case CL_INVALID_COMMAND_QUEUE:
  1056. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1057. break;
  1058. case CL_OUT_OF_RESOURCES:
  1059. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1060. break;
  1061. case CL_OUT_OF_HOST_MEMORY:
  1062. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1063. break;
  1064. default:
  1065. oclHandles.error_str += "Unknown reasons";
  1066. break;
  1067. }
  1068. throw(oclHandles.error_str);
  1069. }
  1070. #endif
  1071. }
  1072. /*------------------------------------------------------------
  1073. @function: entry of invoke the kernel function
  1074. @params:
  1075. kernel_id: the index of kernel to set
  1076. work_items: the number of working items
  1077. work_group_size: the size of each work group
  1078. @return: NULL
  1079. @date: 03/04/2011
  1080. ------------------------------------------------------------*/
  1081. void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string){
  1082. #ifdef PROFILE_
  1083. double t1 = gettime();
  1084. #endif
  1085. cl_uint work_dim = WORK_DIM;
  1086. cl_event e[1];
  1087. if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1088. work_items = work_items + (work_group_size-(work_items%work_group_size));
  1089. size_t local_work_size[] = {work_group_size, 1};
  1090. size_t global_work_size[] = {work_items, 1};
  1091. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1092. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1093. #ifdef ERRMSG
  1094. if(oclHandles.cl_status != CL_SUCCESS){
  1095. oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
  1096. switch(oclHandles.cl_status){
  1097. case CL_INVALID_PROGRAM_EXECUTABLE:
  1098. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1099. break;
  1100. case CL_INVALID_COMMAND_QUEUE:
  1101. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1102. break;
  1103. case CL_INVALID_KERNEL:
  1104. oclHandles.error_str += "CL_INVALID_KERNEL";
  1105. break;
  1106. case CL_INVALID_CONTEXT:
  1107. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1108. break;
  1109. case CL_INVALID_KERNEL_ARGS:
  1110. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1111. break;
  1112. case CL_INVALID_WORK_DIMENSION:
  1113. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1114. break;
  1115. case CL_INVALID_GLOBAL_WORK_SIZE:
  1116. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1117. break;
  1118. case CL_INVALID_WORK_GROUP_SIZE:
  1119. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1120. break;
  1121. case CL_INVALID_WORK_ITEM_SIZE:
  1122. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1123. break;
  1124. case CL_INVALID_GLOBAL_OFFSET:
  1125. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1126. break;
  1127. case CL_OUT_OF_RESOURCES:
  1128. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1129. break;
  1130. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1131. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1132. break;
  1133. case CL_INVALID_EVENT_WAIT_LIST:
  1134. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1135. break;
  1136. case CL_OUT_OF_HOST_MEMORY:
  1137. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1138. break;
  1139. default:
  1140. oclHandles.error_str += "Unkown reseason";
  1141. break;
  1142. }
  1143. throw(oclHandles.error_str);
  1144. }
  1145. #endif
  1146. //_clFinish();
  1147. // oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1148. #ifdef ERRMSG
  1149. if (oclHandles.cl_status!= CL_SUCCESS){
  1150. oclHandles.error_str = "excpetion in _clEnqueueNDRange() -> clWaitForEvents ->";
  1151. switch(oclHandles.cl_status){
  1152. case CL_INVALID_VALUE:
  1153. oclHandles.error_str += "CL_INVALID_VALUE";
  1154. break;
  1155. case CL_INVALID_CONTEXT:
  1156. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1157. break;
  1158. case CL_INVALID_EVENT:
  1159. oclHandles.error_str += "CL_INVALID_EVENT";
  1160. break;
  1161. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  1162. oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  1163. break;
  1164. case CL_OUT_OF_RESOURCES:
  1165. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1166. break;
  1167. case CL_OUT_OF_HOST_MEMORY:
  1168. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1169. break;
  1170. default:
  1171. oclHandles.error_str += "Unkown Reason";
  1172. break;
  1173. }
  1174. throw(oclHandles.error_str);
  1175. }
  1176. #endif
  1177. #ifdef PROFILE_
  1178. double t2 = gettime();
  1179. KE += t2 - t1;
  1180. #endif
  1181. }
  1182. /*------------------------------------------------------------
  1183. @function: set device memory in an easy manner
  1184. @params:
  1185. mem_d: the device memory to be set;
  1186. val: set the selected memory to 'val';
  1187. number_elements: the number of elements in the selected memory
  1188. @return: NULL
  1189. @date: 03/04/2011
  1190. ------------------------------------------------------------*/
  1191. void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
  1192. int kernel_id = 0;
  1193. int arg_idx = 0;
  1194. _clSetArgs(kernel_id, arg_idx++, mem_d);
  1195. _clSetArgs(kernel_id, arg_idx++, &val, sizeof(short));
  1196. _clSetArgs(kernel_id, arg_idx++, &number_bytes, sizeof(int));
  1197. _clInvokeKernel(kernel_id, number_bytes, work_group_size);
  1198. }
  1199. /*------------------------------------------------------------
  1200. @function: entry of invoke the kernel function using 2d working items
  1201. @params:
  1202. kernel_id: the index of kernel to set
  1203. range_x: the number of working items in x direction
  1204. range_y: the number of working items in y direction
  1205. group_x: the number of working items in each work group in x direction
  1206. group_y: the number of working items in each work group in y direction
  1207. @return: NULL
  1208. @date: 03/04/2011
  1209. ------------------------------------------------------------*/
  1210. void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string){
  1211. #ifdef PROFILE_
  1212. double t1 = gettime();
  1213. #endif
  1214. cl_uint work_dim = WORK_DIM;
  1215. size_t local_work_size[] = {group_x, group_y};
  1216. size_t global_work_size[] = {range_x, range_y};
  1217. cl_event e[1];
  1218. /*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1219. work_items = work_items + (work_group_size-(work_items%work_group_size));*/
  1220. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1221. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1222. #ifdef ERRMSG
  1223. if(oclHandles.cl_status != CL_SUCCESS){
  1224. oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
  1225. switch(oclHandles.cl_status){
  1226. case CL_INVALID_PROGRAM_EXECUTABLE:
  1227. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1228. break;
  1229. case CL_INVALID_COMMAND_QUEUE:
  1230. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1231. break;
  1232. case CL_INVALID_KERNEL:
  1233. oclHandles.error_str += "CL_INVALID_KERNEL";
  1234. break;
  1235. case CL_INVALID_CONTEXT:
  1236. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1237. break;
  1238. case CL_INVALID_KERNEL_ARGS:
  1239. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1240. break;
  1241. case CL_INVALID_WORK_DIMENSION:
  1242. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1243. break;
  1244. case CL_INVALID_GLOBAL_WORK_SIZE:
  1245. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1246. break;
  1247. case CL_INVALID_WORK_GROUP_SIZE:
  1248. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1249. break;
  1250. case CL_INVALID_WORK_ITEM_SIZE:
  1251. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1252. break;
  1253. case CL_INVALID_GLOBAL_OFFSET:
  1254. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1255. break;
  1256. case CL_OUT_OF_RESOURCES:
  1257. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1258. break;
  1259. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1260. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1261. break;
  1262. case CL_INVALID_EVENT_WAIT_LIST:
  1263. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1264. break;
  1265. case CL_OUT_OF_HOST_MEMORY:
  1266. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1267. break;
  1268. default:
  1269. oclHandles.error_str += "Unkown reseason";
  1270. break;
  1271. }
  1272. throw(oclHandles.error_str);
  1273. }
  1274. #endif
  1275. // oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1276. #ifdef ERRMSG
  1277. if (oclHandles.cl_status!= CL_SUCCESS)
  1278. throw(string("excpetion in _clEnqueueNDRange() -> clWaitForEvents"));
  1279. #endif
  1280. #ifdef PROFILE_
  1281. double t2 = gettime();
  1282. KE += t2 - t1;
  1283. #endif
  1284. }
  1285. /*------------------------------------------------------------
  1286. @function: release OpenCL memory objects
  1287. @params:
  1288. ob: the memory object to free or release
  1289. @return: NULL
  1290. @date: 03/04/2011
  1291. ------------------------------------------------------------*/
  1292. void _clFree(cl_mem ob) throw(string){
  1293. #ifdef PROFILE_
  1294. double t1 = gettime();
  1295. #endif
  1296. if(ob!=NULL)
  1297. oclHandles.cl_status = clReleaseMemObject(ob);
  1298. #ifdef ERRMSG
  1299. if (oclHandles.cl_status!= CL_SUCCESS){
  1300. oclHandles.error_str = "excpetion in _clFree() ->";
  1301. switch(oclHandles.cl_status){
  1302. case CL_INVALID_MEM_OBJECT:
  1303. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1304. break;
  1305. case CL_OUT_OF_RESOURCES:
  1306. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1307. break;
  1308. case CL_OUT_OF_HOST_MEMORY:
  1309. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1310. break;
  1311. default:
  1312. oclHandles.error_str += "Unkown reseason";
  1313. break;
  1314. }
  1315. throw(oclHandles.error_str);
  1316. }
  1317. #endif
  1318. #ifdef PROFILE_
  1319. double t2 = gettime();
  1320. MF += t2 - t1;
  1321. #endif
  1322. }
  1323. /*------------------------------------------------------------
  1324. @function: output time profiling information
  1325. @params: NULL
  1326. @return: NULL
  1327. @date: 03/04/2011
  1328. ------------------------------------------------------------*/
  1329. void _clStatistics(){
  1330. #ifdef PROFILE_
  1331. FILE *fp_pd = fopen("PD_OCL.txt", "a");
  1332. fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC);
  1333. fclose(fp_pd);
  1334. #endif
  1335. return ;
  1336. }
  1337. #endif //_CL_HELPER_