CLHelper.h 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518
  1. /********************************************************************
  2. //--cambine:helper function for OpenCL
  3. //--programmer: Jianbin Fang
  4. //--date: 27/12/2010
  5. ********************************************************************/
  6. #ifndef _CL_HELPER_
  7. #define _CL_HELPER_
  8. #include <CL/cl.h>
  9. #include <vector>
  10. #include <iostream>
  11. #include <fstream>
  12. #include <string>
  13. #include <sys/time.h>
  14. using std::string;
  15. using std::ifstream;
  16. using std::cerr;
  17. using std::endl;
  18. using std::cout;
  19. #define PROFILE_
  20. #ifdef PROFILE_
  21. double TE; //: total execution time;
  22. double CC; //: Context creation time;
  23. double CR; //: Context release time;
  24. double MA; //: GPU memory allocation time;
  25. double MF; //: GPU memory free time;
  26. double H2D; //: the time to transfer data from host to device;
  27. double D2H; //: the time to transfer data from device to host;
  28. double D2D; //: the time to transfer data from device to device;
  29. double KE; //: the kernel execution time
  30. double KC; //: the kernel compilation time
  31. #endif
  32. //#pragma OPENCL EXTENSION cl_nv_compiler_options:enable
  33. #define WORK_DIM 2 //work-items dimensions
  34. /*------------------------------------------------------------
  35. @struct: the structure of device properties
  36. @date: 24/03/2011
  37. ------------------------------------------------------------*/
  38. struct _clDeviceProp{
  39. /*CL_DEVICE_ADDRESS_BITS
  40. CL_DEVICE_AVAILABLE
  41. CL_DEVICE_COMPILER_AVAILABLE
  42. CL_DEVICE_ENDIAN_LITTLE
  43. CL_DEVICE_ERROR_CORRECTION_SUPPORT
  44. CL_DEVICE_EXECUTION_CAPABILITIES
  45. CL_DEVICE_EXTENSIONS
  46. CL_DEVICE_GLOBAL_MEM_CACHE_SIZE
  47. CL_DEVICE_GLOBAL_MEM_CACHE_TYPE
  48. CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
  49. CL_DEVICE_GLOBAL_MEM_SIZE
  50. CL_DEVICE_IMAGE_SUPPORT
  51. CL_DEVICE_IMAGE2D_MAX_HEIGHT
  52. CL_DEVICE_IMAGE2D_MAX_WIDTH
  53. CL_DEVICE_IMAGE3D_MAX_DEPTH
  54. CL_DEVICE_IMAGE3D_MAX_HEIGHT
  55. CL_DEVICE_IMAGE3D_MAX_WIDTH
  56. CL_DEVICE_LOCAL_MEM_SIZE
  57. CL_DEVICE_LOCAL_MEM_TYPE
  58. CL_DEVICE_MAX_CLOCK_FREQUENCY
  59. CL_DEVICE_MAX_COMPUTE_UNITS
  60. CL_DEVICE_MAX_CONSTANT_ARGS
  61. CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE
  62. CL_DEVICE_MAX_MEM_ALLOC_SIZE
  63. CL_DEVICE_MAX_PARAMETER_SIZE
  64. CL_DEVICE_MAX_READ_IMAGE_ARGS
  65. CL_DEVICE_MAX_SAMPLERS
  66. CL_DEVICE_MAX_WORK_GROUP_SIZE
  67. CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS
  68. CL_DEVICE_MAX_WORK_ITEM_SIZES
  69. CL_DEVICE_MAX_WRITE_IMAGE_ARGS
  70. CL_DEVICE_MEM_BASE_ADDR_ALIGN
  71. CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE
  72. CL_DEVICE_NAME
  73. CL_DEVICE_PLATFORM
  74. CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR
  75. CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE
  76. CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT
  77. CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT
  78. CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG
  79. CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT
  80. CL_DEVICE_PROFILE
  81. CL_DEVICE_PROFILING_TIMER_RESOLUTION
  82. CL_DEVICE_QUEUE_PROPERTIES
  83. CL_DEVICE_SINGLE_FP_CONFIG
  84. CL_DEVICE_TYPE
  85. CL_DEVICE_VENDOR_ID
  86. CL_DEVICE_VENDOR
  87. CL_DEVICE_VERSION
  88. CL_DRIVER_VERSION*/
  89. char device_name[100];
  90. };
  91. struct oclHandleStruct{
  92. cl_context context;
  93. cl_device_id *devices;
  94. cl_command_queue queue;
  95. cl_program program;
  96. cl_int cl_status;
  97. std::string error_str;
  98. std::vector<cl_kernel> kernel;
  99. cl_mem pinned_mem_out;
  100. cl_mem pinned_mem_in;
  101. };
  102. struct oclHandleStruct oclHandles;
  103. char kernel_file[100] = "Kernels.cl";
  104. int total_kernels = 2;
  105. string kernel_names[2] = {"memset_kernel", "pgain_kernel"};
  106. int work_group_size = 256;
  107. int device_id_inused = 0; //deviced id used (default : 0)
  108. int number_devices = 0;
  109. double gettime() {
  110. struct timeval t;
  111. gettimeofday(&t,NULL);
  112. return t.tv_sec+t.tv_usec*1e-6;
  113. }
  114. /*------------------------------------------------------------
  115. @function: select device to use
  116. @params:
  117. size: the index of device to be used
  118. @return: NULL
  119. @date: 24/03/2011
  120. ------------------------------------------------------------*/
  121. void _clSetDevice(int idx) throw(string){
  122. if(idx>(number_devices-1))
  123. throw(string(":invalid device ID:"));
  124. device_id_inused = idx;
  125. }
  126. /*------------------------------------------------------------
  127. @function: get device properties indexed by 'idx'
  128. @params:
  129. idx: device index
  130. prop: output properties
  131. @return: prop
  132. @date: 24/03/2011
  133. ------------------------------------------------------------*/
  134. void _clGetDeviceProperties(int idx, _clDeviceProp *prop) throw(string){
  135. oclHandles.cl_status= clGetDeviceInfo(oclHandles.devices[idx], CL_DEVICE_NAME, 100, prop->device_name, NULL);
  136. #ifdef ERRMSG
  137. if(oclHandles.cl_status != CL_SUCCESS){
  138. oclHandles.error_str = "exception in _clGetDeviceProperties-> ";
  139. switch(oclHandles.cl_status){
  140. case CL_INVALID_DEVICE:
  141. oclHandles.error_str += "CL_INVALID_DEVICE";
  142. break;
  143. case CL_INVALID_VALUE:
  144. oclHandles.error_str += "CL_INVALID_VALUE";
  145. break;
  146. default:
  147. oclHandles.error_str += "unknown reasons";
  148. break;
  149. }
  150. throw(oclHandles.error_str);
  151. }
  152. #endif
  153. }
  154. /*
  155. * Converts the contents of a file into a string
  156. */
  157. string FileToString(const string fileName){
  158. ifstream f(fileName.c_str(), ifstream::in | ifstream::binary);
  159. try{
  160. size_t size;
  161. char* str;
  162. string s;
  163. if(f.is_open()){
  164. size_t fileSize;
  165. f.seekg(0, ifstream::end);
  166. size = fileSize = f.tellg();
  167. f.seekg(0, ifstream::beg);
  168. str = new char[size+1];
  169. if (!str) throw(string("Could not allocate memory"));
  170. f.read(str, fileSize);
  171. f.close();
  172. str[size] = '\0';
  173. s = str;
  174. delete [] str;
  175. return s;
  176. }
  177. }
  178. catch(std::string msg){
  179. cerr << "Exception caught in FileToString(): " << msg << endl;
  180. if(f.is_open())
  181. f.close();
  182. }
  183. catch(...){
  184. cerr << "Exception caught in FileToString()" << endl;
  185. if(f.is_open())
  186. f.close();
  187. }
  188. string errorMsg = "FileToString()::Error: Unable to open file "
  189. + fileName;
  190. throw(errorMsg);
  191. }
  192. /*------------------------------------------------------------
  193. @function: Read command line parameters
  194. @params: NULL
  195. @return:
  196. @date: 24/03/2011
  197. ------------------------------------------------------------*/
  198. char device_type[3];
  199. int device_id = 0;
  200. void _clCmdParams(int argc, char* argv[]){
  201. for (int i = 0; i < argc; ++i){
  202. switch (argv[i][1]){
  203. case 't': //--t stands for device type
  204. if (++i < argc){
  205. sscanf(argv[i], "%s", device_type);
  206. }
  207. else{
  208. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  209. throw;
  210. }
  211. break;
  212. case 'd': //--d stands for device id
  213. if (++i < argc){
  214. sscanf(argv[i], "%d", &device_id);
  215. }
  216. else{
  217. std::cerr << "Could not read argument after option " << argv[i-1] << std::endl;
  218. throw;
  219. }
  220. break;
  221. default:
  222. ;
  223. }
  224. }
  225. }
  226. /*------------------------------------------------------------
  227. @function: Initlize CL objects
  228. @params:
  229. device_id: device id
  230. device_type: the types of devices, e.g. CPU, GPU, ACCERLERATOR,...
  231. (1) -t cpu/gpu/acc -d 0/1/2/...
  232. (2) -t cpu/gpu/acc [-d 0]
  233. (3) [-t default] -d 0/1/2/...
  234. (4) NULL [-d 0]
  235. @return:
  236. @description:
  237. there are 5 steps to initialize all the OpenCL objects needed,
  238. @revised:
  239. get the number of devices and devices have no relationship with context
  240. @date: 24/03/2011
  241. ------------------------------------------------------------*/
  242. void _clInit(string device_type, int device_id)throw(string){
  243. #ifdef PROFILE_
  244. TE = 0;
  245. CC = 0;
  246. CR = 0;
  247. MA = 0;
  248. MF = 0;
  249. H2D = 0;
  250. D2H = 0;
  251. D2D = 0;
  252. KE = 0;
  253. KC = 0;
  254. #endif
  255. int DEVICE_ID_INUSED = 0;
  256. _clDeviceProp prop;
  257. #ifdef PROFILE_
  258. double t1 = gettime();
  259. #endif
  260. cl_int resultCL;
  261. oclHandles.context = NULL;
  262. oclHandles.devices = NULL;
  263. oclHandles.queue = NULL;
  264. oclHandles.program = NULL;
  265. cl_uint deviceListSize;
  266. //-----------------------------------------------
  267. //--cambine-1: find the available platforms and select one
  268. cl_uint numPlatforms;
  269. cl_platform_id targetPlatform = NULL;
  270. resultCL = clGetPlatformIDs(0, NULL, &numPlatforms);
  271. if (resultCL != CL_SUCCESS)
  272. throw (string("InitCL()::Error: Getting number of platforms (clGetPlatformIDs)"));
  273. //printf("number of platforms:%d\n",numPlatforms); //by cambine
  274. #ifdef DEV_INFO
  275. std::cout<<"--cambine: number of platforms: "<<numPlatforms<<std::endl;
  276. #endif
  277. if (!(numPlatforms > 0))
  278. throw (string("InitCL()::Error: No platforms found (clGetPlatformIDs)"));
  279. cl_platform_id* allPlatforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id));
  280. resultCL = clGetPlatformIDs(numPlatforms, allPlatforms, NULL);
  281. if (resultCL != CL_SUCCESS)
  282. throw (string("InitCL()::Error: Getting platform ids (clGetPlatformIDs)"));
  283. // Select the target platform. Default: first platform
  284. targetPlatform = allPlatforms[1];
  285. for (int i = 0; i < numPlatforms; i++)
  286. {
  287. char pbuff[128];
  288. resultCL = clGetPlatformInfo( allPlatforms[i],
  289. CL_PLATFORM_VENDOR,
  290. sizeof(pbuff),
  291. pbuff,
  292. NULL);
  293. if (resultCL != CL_SUCCESS)
  294. throw (string("InitCL()::Error: Getting platform info (clGetPlatformInfo)"));
  295. //printf("vedor is %s\n",pbuff);
  296. #ifdef DEV_INFO
  297. std::cout<<"--cambine: vedor is: "<<pbuff<<std::endl;
  298. #endif
  299. }
  300. free(allPlatforms);
  301. //-----------------------------------------------
  302. //--cambine-2: detect OpenCL devices
  303. // First, get the size of device list
  304. if(device_type.compare("")!=0){
  305. if(device_type.compare("cpu")==0){
  306. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, 0, NULL, &deviceListSize);
  307. if(oclHandles.cl_status!=CL_SUCCESS){
  308. throw(string("exception in _clInit -> clGetDeviceIDs -> CPU"));
  309. }
  310. }
  311. if(device_type.compare("gpu")==0){
  312. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, 0, NULL, &deviceListSize);
  313. if(oclHandles.cl_status!=CL_SUCCESS){
  314. throw(string("exception in _clInit -> clGetDeviceIDs -> GPU"));
  315. }
  316. }
  317. if(device_type.compare("acc")==0){
  318. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &deviceListSize);
  319. if(oclHandles.cl_status!=CL_SUCCESS){
  320. throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR"));
  321. }
  322. }
  323. }
  324. else{
  325. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, 0, NULL, &deviceListSize);
  326. if(oclHandles.cl_status!=CL_SUCCESS){
  327. throw(string("exception in _clInit -> clGetDeviceIDs -> ALL"));
  328. }
  329. }
  330. if (deviceListSize == 0)
  331. throw(string("InitCL()::Error: No devices found."));
  332. #ifdef DEV_INFO
  333. std::cout<<"--cambine: number of device="<<deviceListSize<<std::endl;
  334. #endif
  335. number_devices = deviceListSize;
  336. // Now, allocate the device list
  337. // oclHandles.devices = (cl_device_id *)malloc(deviceListSize);
  338. oclHandles.devices = (cl_device_id *)malloc(sizeof(cl_device_id)*deviceListSize);
  339. if (oclHandles.devices == 0)
  340. throw(string("InitCL()::Error: Could not allocate memory."));
  341. // Next, get the device list data
  342. if(device_type.compare("")!=0){
  343. if(device_type.compare("cpu")==0){
  344. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_CPU, deviceListSize, oclHandles.devices, NULL);
  345. if(oclHandles.cl_status!=CL_SUCCESS){
  346. throw(string("exception in _clInit -> clGetDeviceIDs -> CPU ->2"));
  347. }
  348. }
  349. if(device_type.compare("gpu")==0){
  350. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_GPU, deviceListSize, oclHandles.devices, NULL);
  351. if(oclHandles.cl_status!=CL_SUCCESS){
  352. throw(string("exception in _clInit -> clGetDeviceIDs -> GPU -> 2"));
  353. }
  354. }
  355. if(device_type.compare("acc")==0){
  356. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ACCELERATOR, deviceListSize, oclHandles.devices, NULL);
  357. if(oclHandles.cl_status!=CL_SUCCESS){
  358. throw(string("exception in _clInit -> clGetDeviceIDs -> ACCELERATOR -> 2"));
  359. }
  360. }
  361. }
  362. else{
  363. oclHandles.cl_status = clGetDeviceIDs(targetPlatform, CL_DEVICE_TYPE_ALL, deviceListSize, oclHandles.devices, NULL);
  364. if(oclHandles.cl_status!=CL_SUCCESS){
  365. throw(string("exception in _clInit -> clGetDeviceIDs -> ALL -> 2"));
  366. }
  367. }
  368. if(device_id!=0){
  369. if(device_id>(deviceListSize-1))
  370. throw(string("Invalidate device id"));
  371. DEVICE_ID_INUSED = device_id;
  372. }
  373. _clGetDeviceProperties(DEVICE_ID_INUSED, &prop);
  374. std::cout<<"--cambine: device name="<<prop.device_name<<std::endl;
  375. #ifdef DEV_INFO
  376. std::cout<<"--cambine: return device list successfully!"<<std::endl;
  377. #endif
  378. //-----------------------------------------------
  379. //--cambine-3: create an OpenCL context
  380. #ifdef DEV_INFO
  381. std::cout<<"--cambine: before creating context"<<std::endl;
  382. #endif
  383. cl_context_properties cprops[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)targetPlatform, 0 };
  384. oclHandles.context = clCreateContext(0,
  385. deviceListSize,
  386. oclHandles.devices,
  387. NULL,
  388. NULL,
  389. &resultCL);
  390. if ((resultCL != CL_SUCCESS) || (oclHandles.context == NULL))
  391. throw (string("InitCL()::Error: Creating Context (clCreateContextFromType)"));
  392. #ifdef DEV_INFO
  393. std::cout<<"--cambine: create OCL context successfully!"<<std::endl;
  394. #endif
  395. //-----------------------------------------------
  396. //--cambine-4: Create an OpenCL command queue
  397. oclHandles.queue = clCreateCommandQueue(oclHandles.context,
  398. oclHandles.devices[DEVICE_ID_INUSED],
  399. 0,
  400. &resultCL);
  401. if ((resultCL != CL_SUCCESS) || (oclHandles.queue == NULL))
  402. throw(string("InitCL()::Creating Command Queue. (clCreateCommandQueue)"));
  403. #ifdef PROFILE_
  404. double t2 = gettime();
  405. CC += t2 - t1;
  406. #endif
  407. //-----------------------------------------------
  408. //--cambine-5: Load CL file, build CL program object, create CL kernel object
  409. std::string source_str = FileToString(kernel_file);
  410. const char * source = source_str.c_str();
  411. size_t sourceSize[] = { source_str.length() };
  412. oclHandles.program = clCreateProgramWithSource(oclHandles.context,
  413. 1,
  414. &source,
  415. sourceSize,
  416. &resultCL);
  417. if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL))
  418. throw(string("InitCL()::Error: Loading Binary into cl_program. (clCreateProgramWithBinary)"));
  419. //insert debug information
  420. std::string options= "";
  421. //options += " -cl-nv-opt-level=3";
  422. resultCL = clBuildProgram(oclHandles.program, deviceListSize, oclHandles.devices, options.c_str(), NULL, NULL);
  423. if ((resultCL != CL_SUCCESS) || (oclHandles.program == NULL)){
  424. cerr << "InitCL()::Error: In clBuildProgram" << endl;
  425. size_t length;
  426. resultCL = clGetProgramBuildInfo(oclHandles.program,
  427. oclHandles.devices[DEVICE_ID_INUSED],
  428. CL_PROGRAM_BUILD_LOG,
  429. 0,
  430. NULL,
  431. &length);
  432. if(resultCL != CL_SUCCESS)
  433. throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
  434. char* buffer = (char*)malloc(length);
  435. resultCL = clGetProgramBuildInfo(oclHandles.program,
  436. oclHandles.devices[DEVICE_ID_INUSED],
  437. CL_PROGRAM_BUILD_LOG,
  438. length,
  439. buffer,
  440. NULL);
  441. if(resultCL != CL_SUCCESS)
  442. throw(string("InitCL()::Error: Getting Program build info(clGetProgramBuildInfo)"));
  443. cerr << buffer << endl;
  444. FILE * fp = fopen("errinfo", "w");
  445. fprintf(fp, "%s\n", buffer);
  446. fclose(fp);
  447. free(buffer);
  448. throw(string("InitCL()::Error: Building Program (clBuildProgram)"));
  449. }
  450. #ifdef PROFILE_
  451. double t3 = gettime();
  452. KC += t3 - t2;
  453. #endif
  454. //get program information in intermediate representation
  455. #ifdef PTX_MSG
  456. size_t binary_sizes[deviceListSize];
  457. char * binaries[deviceListSize];
  458. //figure out number of devices and the sizes of the binary for each device.
  459. oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*deviceListSize, &binary_sizes, NULL );
  460. if(oclHandles.cl_status!=CL_SUCCESS){
  461. throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-2"));
  462. }
  463. std::cout<<"--cambine:"<<binary_sizes<<std::endl;
  464. //copy over all of the generated binaries.
  465. for(int i=0;i<deviceListSize;i++)
  466. binaries[i] = (char *)malloc( sizeof(char)*(binary_sizes[i]+1));
  467. oclHandles.cl_status = clGetProgramInfo(oclHandles.program, CL_PROGRAM_BINARIES, sizeof(char *)*deviceListSize, binaries, NULL );
  468. if(oclHandles.cl_status!=CL_SUCCESS){
  469. throw(string("--cambine:exception in _InitCL -> clGetProgramInfo-3"));
  470. }
  471. for(int i=0;i<deviceListSize;i++)
  472. binaries[i][binary_sizes[i]] = '\0';
  473. std::cout<<"--cambine:writing ptd information..."<<std::endl;
  474. FILE * ptx_file = fopen("cl.ptx","w");
  475. if(ptx_file==NULL){
  476. throw(string("exceptions in allocate ptx file."));
  477. }
  478. fprintf(ptx_file,"%s",binaries[DEVICE_ID_INUSED]);
  479. fclose(ptx_file);
  480. std::cout<<"--cambine:writing ptd information done."<<std::endl;
  481. for(int i=0;i<deviceListSize;i++)
  482. free(binaries[i]);
  483. #endif
  484. for (int nKernel = 0; nKernel < total_kernels; nKernel++)
  485. {
  486. // get a kernel object handle for a kernel with the given name
  487. cl_kernel kernel = clCreateKernel(oclHandles.program,
  488. (kernel_names[nKernel]).c_str(),
  489. &resultCL);
  490. if ((resultCL != CL_SUCCESS) || (kernel == NULL))
  491. {
  492. string errorMsg = "InitCL()::Error: Creating Kernel (clCreateKernel) \"" + kernel_names[nKernel] + "\"";
  493. throw(errorMsg);
  494. }
  495. oclHandles.kernel.push_back(kernel);
  496. }
  497. //get resource alocation information
  498. #ifdef RES_MSG
  499. char * build_log;
  500. size_t ret_val_size;
  501. oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size);
  502. if(oclHandles.cl_status!=CL_SUCCESS){
  503. throw(string("exceptions in _InitCL -> getting resource information"));
  504. }
  505. build_log = (char *)malloc(ret_val_size+1);
  506. oclHandles.cl_status = clGetProgramBuildInfo(oclHandles.program, oclHandles.devices[DEVICE_ID_INUSED], CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL);
  507. if(oclHandles.cl_status!=CL_SUCCESS){
  508. throw(string("exceptions in _InitCL -> getting resources allocation information-2"));
  509. }
  510. build_log[ret_val_size] = '\0';
  511. std::cout<<"--cambine:"<<build_log<<std::endl;
  512. free(build_log);
  513. #endif
  514. #ifdef PROFILE_
  515. double t4 = gettime();
  516. CC += t4 - t3;
  517. #endif
  518. }
  519. /*------------------------------------------------------------
  520. @function: release CL objects
  521. @params: NULL
  522. @return:
  523. @date: 24/03/2011
  524. ------------------------------------------------------------*/
  525. void _clRelease()
  526. {
  527. #ifdef PROFILE_
  528. double t1 = gettime();
  529. #endif
  530. bool errorFlag = false;
  531. for (int nKernel = 0; nKernel < oclHandles.kernel.size(); nKernel++){
  532. if (oclHandles.kernel[nKernel] != NULL){
  533. cl_int resultCL = clReleaseKernel(oclHandles.kernel[nKernel]);
  534. if (resultCL != CL_SUCCESS){
  535. cerr << "ReleaseCL()::Error: In clReleaseKernel" << endl;
  536. errorFlag = true;
  537. }
  538. oclHandles.kernel[nKernel] = NULL;
  539. }
  540. oclHandles.kernel.clear();
  541. }
  542. if (oclHandles.program != NULL){
  543. cl_int resultCL = clReleaseProgram(oclHandles.program);
  544. if (resultCL != CL_SUCCESS){
  545. cerr << "ReleaseCL()::Error: In clReleaseProgram" << endl;
  546. errorFlag = true;
  547. }
  548. oclHandles.program = NULL;
  549. }
  550. if (oclHandles.queue != NULL){
  551. cl_int resultCL = clReleaseCommandQueue(oclHandles.queue);
  552. if (resultCL != CL_SUCCESS)
  553. {
  554. cerr << "ReleaseCL()::Error: In clReleaseCommandQueue" << endl;
  555. errorFlag = true;
  556. }
  557. oclHandles.queue = NULL;
  558. }
  559. free(oclHandles.devices);
  560. if (oclHandles.context != NULL){
  561. cl_int resultCL = clReleaseContext(oclHandles.context);
  562. if (resultCL != CL_SUCCESS){
  563. cerr << "ReleaseCL()::Error: In clReleaseContext" << endl;
  564. errorFlag = true;
  565. }
  566. oclHandles.context = NULL;
  567. }
  568. if (errorFlag) throw(string("ReleaseCL()::Error encountered."));
  569. #ifdef PROFILE_
  570. double t2 = gettime();
  571. CR += t2 - t1;
  572. #endif
  573. }
  574. /*------------------------------------------------------------
  575. @function: create read and write buffer for devices
  576. @params:
  577. size: the size of device memory to be allocated
  578. @return: mem_d
  579. @date: 24/03/2011
  580. ------------------------------------------------------------*/
  581. cl_mem _clMalloc(int size) throw(string){
  582. #ifdef PROFILE_
  583. double t1 = gettime();
  584. #endif
  585. cl_mem d_mem;
  586. d_mem = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE, size, NULL, &oclHandles.cl_status);
  587. #ifdef ERRMSG
  588. if(oclHandles.cl_status != CL_SUCCESS){
  589. oclHandles.error_str = "excpetion in _clMalloc -> ";
  590. switch(oclHandles.cl_status){
  591. case CL_INVALID_CONTEXT:
  592. oclHandles.error_str += "CL_INVALID_CONTEXT";
  593. break;
  594. case CL_INVALID_VALUE:
  595. oclHandles.error_str += "CL_INVALID_VALUE";
  596. break;
  597. case CL_INVALID_BUFFER_SIZE:
  598. oclHandles.error_str += "CL_INVALID_BUFFER_SIZE";
  599. break;
  600. case CL_INVALID_HOST_PTR:
  601. oclHandles.error_str += "CL_INVALID_HOST_PTR";
  602. break;
  603. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  604. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  605. break;
  606. case CL_OUT_OF_HOST_MEMORY:
  607. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  608. break;
  609. default:
  610. oclHandles.error_str += "unknown reasons";
  611. break;
  612. }
  613. throw(oclHandles.error_str);
  614. }
  615. #endif
  616. #ifdef PROFILE_
  617. double t2 = gettime();
  618. MA += t2 - t1;
  619. #endif
  620. return d_mem;
  621. }
  622. /*------------------------------------------------------------
  623. @function: malloc pinned memoty
  624. @params:
  625. size: the size of data to be transferred in bytes
  626. @return: the pointer of host adress
  627. @date: 06/04/2011
  628. ------------------------------------------------------------*/
  629. void* _clMallocHost(int size)throw(string){
  630. void * mem_h;
  631. oclHandles.pinned_mem_out = clCreateBuffer(oclHandles.context, CL_MEM_READ_WRITE|CL_MEM_ALLOC_HOST_PTR, size, NULL, &oclHandles.cl_status);
  632. #ifdef ERRMSG
  633. if(oclHandles.cl_status != CL_SUCCESS){
  634. oclHandles.error_str = "excpetion in _clMallocHost -> clCreateBuffer";
  635. switch(oclHandles.cl_status){
  636. case CL_INVALID_CONTEXT:
  637. oclHandles.error_str += "CL_INVALID_CONTEXT";
  638. break;
  639. case CL_INVALID_VALUE:
  640. oclHandles.error_str += "CL_INVALID_VALUE";
  641. break;
  642. case CL_INVALID_BUFFER_SIZE:
  643. oclHandles.error_str += "CL_INVALID_BUFFER_SIZE";
  644. break;
  645. case CL_INVALID_HOST_PTR:
  646. oclHandles.error_str += "CL_INVALID_HOST_PTR";
  647. break;
  648. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  649. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  650. break;
  651. case CL_OUT_OF_HOST_MEMORY:
  652. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  653. break;
  654. default:
  655. oclHandles.error_str += "unknown reasons";
  656. break;
  657. }
  658. throw(oclHandles.error_str);
  659. }
  660. #endif
  661. mem_h = clEnqueueMapBuffer(oclHandles.queue, oclHandles.pinned_mem_out, CL_TRUE, CL_MAP_READ, 0, size, 0, NULL, NULL, &oclHandles.cl_status);
  662. #ifdef ERRMSG
  663. if(oclHandles.cl_status != CL_SUCCESS||mem_h==NULL){
  664. oclHandles.error_str = "excpetion in _clMallocHost -> clEnqueueMapBuffer";
  665. switch(oclHandles.cl_status){
  666. case CL_INVALID_COMMAND_QUEUE:
  667. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  668. break;
  669. case CL_INVALID_CONTEXT:
  670. oclHandles.error_str += "CL_INVALID_CONTEXT";
  671. break;
  672. case CL_INVALID_MEM_OBJECT:
  673. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  674. break;
  675. case CL_INVALID_VALUE:
  676. oclHandles.error_str += "CL_INVALID_VALUE";
  677. break;
  678. case CL_INVALID_EVENT_WAIT_LIST:
  679. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  680. break;
  681. case CL_MAP_FAILURE:
  682. oclHandles.error_str += "CL_MAP_FAILURE";
  683. break;
  684. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  685. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  686. break;
  687. case CL_OUT_OF_HOST_MEMORY:
  688. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  689. break;
  690. default:
  691. oclHandles.error_str += "unknown reasons";
  692. break;
  693. }
  694. throw(oclHandles.error_str);
  695. }
  696. #endif
  697. return mem_h;
  698. }
  699. /*------------------------------------------------------------
  700. @function: free pinned memory
  701. @params:
  702. io: to free pinned-in or pinned-out memory
  703. mem_h: the host address
  704. @return: NULL
  705. @date: 06/04/2011
  706. ------------------------------------------------------------*/
  707. void _clFreeHost(int io, void * mem_h){
  708. if(io==0){ //in
  709. if(mem_h){
  710. oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_in, (void*)mem_h, 0, NULL, NULL);
  711. #ifdef ERRMSG
  712. if(oclHandles.cl_status != CL_SUCCESS){
  713. oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)";
  714. switch(oclHandles.cl_status){
  715. case CL_INVALID_COMMAND_QUEUE:
  716. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  717. break;
  718. case CL_INVALID_MEM_OBJECT:
  719. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  720. break;
  721. case CL_INVALID_VALUE:
  722. oclHandles.error_str += "CL_INVALID_VALUE";
  723. break;
  724. case CL_OUT_OF_RESOURCES:
  725. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  726. break;
  727. case CL_OUT_OF_HOST_MEMORY:
  728. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  729. break;
  730. case CL_INVALID_CONTEXT:
  731. oclHandles.error_str += "CL_INVALID_CONTEXT";
  732. break;
  733. default:
  734. oclHandles.error_str += "unknown reasons";
  735. break;
  736. }
  737. throw(oclHandles.error_str);
  738. }
  739. #endif
  740. }
  741. }
  742. else if(io==1){ //out
  743. if(mem_h){
  744. oclHandles.cl_status = clEnqueueUnmapMemObject(oclHandles.queue, oclHandles.pinned_mem_out, (void*)mem_h, 0, NULL, NULL);
  745. #ifdef ERRMSG
  746. if(oclHandles.cl_status != CL_SUCCESS){
  747. oclHandles.error_str = "excpetion in _clFreeHost -> clEnqueueUnmapMemObject(in)";
  748. switch(oclHandles.cl_status){
  749. case CL_INVALID_COMMAND_QUEUE:
  750. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  751. break;
  752. case CL_INVALID_MEM_OBJECT:
  753. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  754. break;
  755. case CL_INVALID_VALUE:
  756. oclHandles.error_str += "CL_INVALID_VALUE";
  757. break;
  758. case CL_OUT_OF_RESOURCES:
  759. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  760. break;
  761. case CL_OUT_OF_HOST_MEMORY:
  762. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  763. break;
  764. case CL_INVALID_CONTEXT:
  765. oclHandles.error_str += "CL_INVALID_CONTEXT";
  766. break;
  767. default:
  768. oclHandles.error_str += "unknown reasons";
  769. break;
  770. }
  771. throw(oclHandles.error_str);
  772. }
  773. #endif
  774. }
  775. }
  776. else
  777. throw(string("encounter invalid choice when freeing pinned memmory"));
  778. }
  779. /*------------------------------------------------------------
  780. @function: transfer data from host to device
  781. @params:
  782. dest: the destination device memory
  783. src: the source host memory
  784. size: the size of data to be transferred in bytes
  785. @return: NULL
  786. @date: 17/01/2011
  787. ------------------------------------------------------------*/
  788. void _clMemcpyH2D(cl_mem dst, const void *src, int size) throw(string){
  789. #ifdef PROFILE_
  790. double t1 = gettime();
  791. #endif
  792. oclHandles.cl_status = clEnqueueWriteBuffer(oclHandles.queue, dst, CL_TRUE, 0, size, src, 0, NULL, NULL);
  793. #ifdef ERRMSG
  794. if(oclHandles.cl_status != CL_SUCCESS){
  795. oclHandles.error_str = "excpetion in _clMemcpyH2D -> ";
  796. switch(oclHandles.cl_status){
  797. case CL_INVALID_COMMAND_QUEUE:
  798. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  799. break;
  800. case CL_INVALID_CONTEXT:
  801. oclHandles.error_str += "CL_INVALID_CONTEXT";
  802. break;
  803. case CL_INVALID_MEM_OBJECT:
  804. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  805. break;
  806. case CL_INVALID_VALUE:
  807. oclHandles.error_str += "CL_INVALID_VALUE";
  808. break;
  809. case CL_INVALID_EVENT_WAIT_LIST:
  810. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  811. break;
  812. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  813. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  814. break;
  815. case CL_OUT_OF_HOST_MEMORY:
  816. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  817. break;
  818. default:
  819. oclHandles.error_str += "Unknown reason";
  820. break;
  821. }
  822. throw(oclHandles.error_str);
  823. }
  824. #endif
  825. #ifdef PROFILE_
  826. double t2 = gettime();
  827. H2D += t2 - t1;
  828. #endif
  829. }
  830. /*------------------------------------------------------------
  831. @function: transfer data from device to host
  832. @params:
  833. dest: the destination device memory
  834. src: the source host memory
  835. size: the size of data to be transferred in bytes
  836. @return: NULL
  837. @date: 17/01/2011
  838. ------------------------------------------------------------*/
  839. void _clMemcpyD2H(void * dst, cl_mem src, int size) throw(string){
  840. #ifdef PROFILE_
  841. double t1 = gettime();
  842. #endif
  843. oclHandles.cl_status = clEnqueueReadBuffer(oclHandles.queue, src, CL_TRUE, 0, size, dst, 0,0,0);
  844. #ifdef ERRMSG
  845. if(oclHandles.cl_status != CL_SUCCESS){
  846. oclHandles.error_str = "excpetion in _clMemCpyD2H -> ";
  847. switch(oclHandles.cl_status){
  848. case CL_INVALID_COMMAND_QUEUE:
  849. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  850. break;
  851. case CL_INVALID_CONTEXT:
  852. oclHandles.error_str += "CL_INVALID_CONTEXT";
  853. break;
  854. case CL_INVALID_MEM_OBJECT:
  855. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  856. break;
  857. case CL_INVALID_VALUE:
  858. oclHandles.error_str += "CL_INVALID_VALUE";
  859. break;
  860. case CL_INVALID_EVENT_WAIT_LIST:
  861. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  862. break;
  863. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  864. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  865. break;
  866. case CL_OUT_OF_HOST_MEMORY:
  867. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  868. break;
  869. default:
  870. oclHandles.error_str += "Unknown reason";
  871. break;
  872. }
  873. throw(oclHandles.error_str);
  874. }
  875. #endif
  876. #ifdef PROFILE_
  877. double t2 = gettime();
  878. D2H += t2 - t1;
  879. #endif
  880. }
  881. /*------------------------------------------------------------
  882. @function: transfer data from device to device
  883. @params:
  884. dest: the destination device memory
  885. src: the source device memory
  886. size: the size of data to be transferred in bytes
  887. @return: NULL
  888. @date: 27/03/2011
  889. ------------------------------------------------------------*/
  890. void _clMemcpyD2D(cl_mem dst, cl_mem src, int size) throw(string){
  891. #ifdef PROFILE_
  892. double t1 = gettime();
  893. #endif
  894. oclHandles.cl_status = clEnqueueCopyBuffer(oclHandles.queue, src, dst, 0, 0, size, 0, NULL, NULL);
  895. #ifdef ERRMSG
  896. if(oclHandles.cl_status != CL_SUCCESS){
  897. oclHandles.error_str = "excpetion in _clCpyMemD2D -> ";
  898. switch(oclHandles.cl_status){
  899. case CL_INVALID_COMMAND_QUEUE:
  900. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  901. break;
  902. case CL_INVALID_CONTEXT:
  903. oclHandles.error_str += "CL_INVALID_CONTEXT";
  904. break;
  905. case CL_INVALID_MEM_OBJECT:
  906. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  907. break;
  908. case CL_INVALID_VALUE:
  909. oclHandles.error_str += "CL_INVALID_VALUE";
  910. break;
  911. case CL_INVALID_EVENT_WAIT_LIST:
  912. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  913. break;
  914. case CL_MISALIGNED_SUB_BUFFER_OFFSET:
  915. oclHandles.error_str += "CL_MISALIGNED_SUB_BUFFER_OFFSET";
  916. break;
  917. case CL_MEM_COPY_OVERLAP:
  918. oclHandles.error_str += "CL_MEM_COPY_OVERLAP";
  919. break;
  920. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  921. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  922. break;
  923. case CL_OUT_OF_RESOURCES:
  924. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  925. break;
  926. case CL_OUT_OF_HOST_MEMORY:
  927. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  928. break;
  929. default:
  930. oclHandles.error_str += "Unknown reason";
  931. break;
  932. }
  933. throw(oclHandles.error_str);
  934. }
  935. #endif
  936. #ifdef PROFILE_
  937. double t2 = gettime();
  938. D2D += t2 - t1;
  939. #endif
  940. }
  941. /*------------------------------------------------------------
  942. @function: set kernel arguments
  943. @params:
  944. kernel_id: the index of kernel to set
  945. arg_idx: the index of argument
  946. d_mem: the variable of device memory
  947. size: the size of device memory
  948. @return: NULL
  949. @date: 03/04/2011
  950. ------------------------------------------------------------*/
  951. void _clSetArgs(int kernel_id, int arg_idx, void * d_mem, int size = 0) throw(string){
  952. if(!size){ // normal device memory object
  953. oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, sizeof(d_mem), &d_mem);
  954. #ifdef ERRMSG
  955. if(oclHandles.cl_status != CL_SUCCESS){
  956. oclHandles.error_str = "excpetion in _clSetKernelArg() ";
  957. switch(oclHandles.cl_status){
  958. case CL_INVALID_KERNEL:
  959. oclHandles.error_str += "CL_INVALID_KERNEL";
  960. break;
  961. case CL_INVALID_ARG_INDEX:
  962. oclHandles.error_str += "CL_INVALID_ARG_INDEX";
  963. break;
  964. case CL_INVALID_ARG_VALUE:
  965. oclHandles.error_str += "CL_INVALID_ARG_VALUE";
  966. break;
  967. case CL_INVALID_MEM_OBJECT:
  968. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  969. break;
  970. case CL_INVALID_SAMPLER:
  971. oclHandles.error_str += "CL_INVALID_SAMPLER";
  972. break;
  973. case CL_INVALID_ARG_SIZE:
  974. oclHandles.error_str += "CL_INVALID_ARG_SIZE";
  975. break;
  976. case CL_OUT_OF_RESOURCES:
  977. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  978. break;
  979. case CL_OUT_OF_HOST_MEMORY:
  980. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  981. break;
  982. default:
  983. oclHandles.error_str += "Unknown reason";
  984. break;
  985. }
  986. throw(oclHandles.error_str);
  987. }
  988. #endif
  989. }
  990. else{ //special device object:(1) local memory; (2) single word
  991. oclHandles.cl_status = clSetKernelArg(oclHandles.kernel[kernel_id], arg_idx, size, d_mem);
  992. #ifdef ERRMSG
  993. if(oclHandles.cl_status != CL_SUCCESS){
  994. oclHandles.error_str = "excpetion in _clSetKernelArg() ";
  995. switch(oclHandles.cl_status){
  996. case CL_INVALID_KERNEL:
  997. oclHandles.error_str += "CL_INVALID_KERNEL";
  998. break;
  999. case CL_INVALID_ARG_INDEX:
  1000. oclHandles.error_str += "CL_INVALID_ARG_INDEX";
  1001. break;
  1002. case CL_INVALID_ARG_VALUE:
  1003. oclHandles.error_str += "CL_INVALID_ARG_VALUE";
  1004. break;
  1005. case CL_INVALID_MEM_OBJECT:
  1006. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1007. break;
  1008. case CL_INVALID_SAMPLER:
  1009. oclHandles.error_str += "CL_INVALID_SAMPLER";
  1010. break;
  1011. case CL_INVALID_ARG_SIZE:
  1012. oclHandles.error_str += "CL_INVALID_ARG_SIZE";
  1013. break;
  1014. case CL_OUT_OF_RESOURCES:
  1015. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1016. break;
  1017. case CL_OUT_OF_HOST_MEMORY:
  1018. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1019. break;
  1020. default:
  1021. oclHandles.error_str += "Unknown reason";
  1022. break;
  1023. }
  1024. throw(oclHandles.error_str);
  1025. }
  1026. #endif
  1027. }
  1028. }
  1029. void _clFinish() throw(string){
  1030. oclHandles.cl_status = clFinish(oclHandles.queue);
  1031. #ifdef ERRMSG
  1032. if(oclHandles.cl_status!=CL_SUCCESS){
  1033. oclHandles.error_str = "excpetion in _clFinish";
  1034. switch(oclHandles.cl_status){
  1035. case CL_INVALID_COMMAND_QUEUE:
  1036. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1037. break;
  1038. case CL_OUT_OF_RESOURCES:
  1039. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1040. break;
  1041. case CL_OUT_OF_HOST_MEMORY:
  1042. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1043. break;
  1044. default:
  1045. oclHandles.error_str += "Unknown reasons";
  1046. break;
  1047. }
  1048. throw(oclHandles.error_str);
  1049. }
  1050. #endif
  1051. }
  1052. /*------------------------------------------------------------
  1053. @function: entry of invoke the kernel function
  1054. @params:
  1055. kernel_id: the index of kernel to set
  1056. work_items: the number of working items
  1057. work_group_size: the size of each work group
  1058. @return: NULL
  1059. @date: 03/04/2011
  1060. ------------------------------------------------------------*/
  1061. void _clInvokeKernel(int kernel_id, int work_items, int work_group_size) throw(string){
  1062. #ifdef PROFILE_
  1063. double t1 = gettime();
  1064. #endif
  1065. cl_uint work_dim = WORK_DIM;
  1066. cl_event e[1];
  1067. if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1068. work_items = work_items + (work_group_size-(work_items%work_group_size));
  1069. size_t local_work_size[] = {work_group_size, 1};
  1070. size_t global_work_size[] = {work_items, 1};
  1071. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1072. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1073. #ifdef ERRMSG
  1074. if(oclHandles.cl_status != CL_SUCCESS){
  1075. oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
  1076. switch(oclHandles.cl_status){
  1077. case CL_INVALID_PROGRAM_EXECUTABLE:
  1078. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1079. break;
  1080. case CL_INVALID_COMMAND_QUEUE:
  1081. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1082. break;
  1083. case CL_INVALID_KERNEL:
  1084. oclHandles.error_str += "CL_INVALID_KERNEL";
  1085. break;
  1086. case CL_INVALID_CONTEXT:
  1087. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1088. break;
  1089. case CL_INVALID_KERNEL_ARGS:
  1090. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1091. break;
  1092. case CL_INVALID_WORK_DIMENSION:
  1093. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1094. break;
  1095. case CL_INVALID_GLOBAL_WORK_SIZE:
  1096. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1097. break;
  1098. case CL_INVALID_WORK_GROUP_SIZE:
  1099. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1100. break;
  1101. case CL_INVALID_WORK_ITEM_SIZE:
  1102. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1103. break;
  1104. case CL_INVALID_GLOBAL_OFFSET:
  1105. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1106. break;
  1107. case CL_OUT_OF_RESOURCES:
  1108. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1109. break;
  1110. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1111. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1112. break;
  1113. case CL_INVALID_EVENT_WAIT_LIST:
  1114. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1115. break;
  1116. case CL_OUT_OF_HOST_MEMORY:
  1117. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1118. break;
  1119. default:
  1120. oclHandles.error_str += "Unkown reseason";
  1121. break;
  1122. }
  1123. throw(oclHandles.error_str);
  1124. }
  1125. #endif
  1126. //_clFinish();
  1127. oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1128. #ifdef ERRMSG
  1129. if (oclHandles.cl_status!= CL_SUCCESS){
  1130. oclHandles.error_str = "excpetion in _clEnqueueNDRange() -> clWaitForEvents ->";
  1131. switch(oclHandles.cl_status){
  1132. case CL_INVALID_VALUE:
  1133. oclHandles.error_str += "CL_INVALID_VALUE";
  1134. break;
  1135. case CL_INVALID_CONTEXT:
  1136. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1137. break;
  1138. case CL_INVALID_EVENT:
  1139. oclHandles.error_str += "CL_INVALID_EVENT";
  1140. break;
  1141. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  1142. oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  1143. break;
  1144. case CL_OUT_OF_RESOURCES:
  1145. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1146. break;
  1147. case CL_OUT_OF_HOST_MEMORY:
  1148. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1149. break;
  1150. default:
  1151. oclHandles.error_str += "Unkown Reason";
  1152. break;
  1153. }
  1154. throw(oclHandles.error_str);
  1155. }
  1156. #endif
  1157. #ifdef PROFILE_
  1158. double t2 = gettime();
  1159. KE += t2 - t1;
  1160. #endif
  1161. }
  1162. /*------------------------------------------------------------
  1163. @function: set device memory in an easy manner
  1164. @params:
  1165. mem_d: the device memory to be set;
  1166. val: set the selected memory to 'val';
  1167. number_elements: the number of elements in the selected memory
  1168. @return: NULL
  1169. @date: 03/04/2011
  1170. ------------------------------------------------------------*/
  1171. void _clMemset(cl_mem mem_d, short val, int number_bytes)throw(string){
  1172. #ifdef PROFILE_
  1173. double t1 = gettime();
  1174. #endif
  1175. int kernel_id = 0;
  1176. int arg_idx = 0;
  1177. _clSetArgs(kernel_id, arg_idx++, mem_d);
  1178. _clSetArgs(kernel_id, arg_idx++, &val, sizeof(short));
  1179. _clSetArgs(kernel_id, arg_idx++, &number_bytes, sizeof(int));
  1180. cl_uint work_dim = WORK_DIM;
  1181. int work_items = number_bytes;
  1182. cl_event e[1];
  1183. if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1184. work_items = work_items + (work_group_size-(work_items%work_group_size));
  1185. size_t local_work_size[] = {work_group_size, 1};
  1186. size_t global_work_size[] = {work_items, 1};
  1187. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1188. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1189. #ifdef ERRMSG
  1190. if(oclHandles.cl_status != CL_SUCCESS){
  1191. oclHandles.error_str = "excpetion in _clMemset() -> ";
  1192. switch(oclHandles.cl_status){
  1193. case CL_INVALID_PROGRAM_EXECUTABLE:
  1194. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1195. break;
  1196. case CL_INVALID_COMMAND_QUEUE:
  1197. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1198. break;
  1199. case CL_INVALID_KERNEL:
  1200. oclHandles.error_str += "CL_INVALID_KERNEL";
  1201. break;
  1202. case CL_INVALID_CONTEXT:
  1203. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1204. break;
  1205. case CL_INVALID_KERNEL_ARGS:
  1206. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1207. break;
  1208. case CL_INVALID_WORK_DIMENSION:
  1209. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1210. break;
  1211. case CL_INVALID_GLOBAL_WORK_SIZE:
  1212. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1213. break;
  1214. case CL_INVALID_WORK_GROUP_SIZE:
  1215. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1216. break;
  1217. case CL_INVALID_WORK_ITEM_SIZE:
  1218. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1219. break;
  1220. case CL_INVALID_GLOBAL_OFFSET:
  1221. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1222. break;
  1223. case CL_OUT_OF_RESOURCES:
  1224. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1225. break;
  1226. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1227. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1228. break;
  1229. case CL_INVALID_EVENT_WAIT_LIST:
  1230. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1231. break;
  1232. case CL_OUT_OF_HOST_MEMORY:
  1233. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1234. break;
  1235. default:
  1236. oclHandles.error_str += "Unkown reseason";
  1237. break;
  1238. }
  1239. throw(oclHandles.error_str);
  1240. }
  1241. #endif
  1242. //_clFinish();
  1243. oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1244. #ifdef ERRMSG
  1245. if (oclHandles.cl_status!= CL_SUCCESS){
  1246. oclHandles.error_str = "excpetion in _clMemset() -> clWaitForEvents ->";
  1247. switch(oclHandles.cl_status){
  1248. case CL_INVALID_VALUE:
  1249. oclHandles.error_str += "CL_INVALID_VALUE";
  1250. break;
  1251. case CL_INVALID_CONTEXT:
  1252. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1253. break;
  1254. case CL_INVALID_EVENT:
  1255. oclHandles.error_str += "CL_INVALID_EVENT";
  1256. break;
  1257. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  1258. oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  1259. break;
  1260. case CL_OUT_OF_RESOURCES:
  1261. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1262. break;
  1263. case CL_OUT_OF_HOST_MEMORY:
  1264. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1265. break;
  1266. default:
  1267. oclHandles.error_str += "Unkown Reason";
  1268. break;
  1269. }
  1270. throw(oclHandles.error_str);
  1271. }
  1272. #endif
  1273. #ifdef PROFILE_
  1274. double t2 = gettime();
  1275. H2D += t2 - t1;
  1276. #endif
  1277. }
  1278. /*------------------------------------------------------------
  1279. @function: entry of invoke the kernel function using 2d working items
  1280. @params:
  1281. kernel_id: the index of kernel to set
  1282. range_x: the number of working items in x direction
  1283. range_y: the number of working items in y direction
  1284. group_x: the number of working items in each work group in x direction
  1285. group_y: the number of working items in each work group in y direction
  1286. @return: NULL
  1287. @date: 03/04/2011
  1288. ------------------------------------------------------------*/
  1289. void _clInvokeKernel2D(int kernel_id, int range_x, int range_y, int group_x, int group_y) throw(string){
  1290. #ifdef PROFILE_
  1291. double t1 = gettime();
  1292. #endif
  1293. cl_uint work_dim = WORK_DIM;
  1294. size_t local_work_size[] = {group_x, group_y};
  1295. size_t global_work_size[] = {range_x, range_y};
  1296. cl_event e[1];
  1297. /*if(work_items%work_group_size != 0) //process situations that work_items cannot be divided by work_group_size
  1298. work_items = work_items + (work_group_size-(work_items%work_group_size));*/
  1299. oclHandles.cl_status = clEnqueueNDRangeKernel(oclHandles.queue, oclHandles.kernel[kernel_id], work_dim, 0, \
  1300. global_work_size, local_work_size, 0 , 0, &(e[0]) );
  1301. #ifdef ERRMSG
  1302. if(oclHandles.cl_status != CL_SUCCESS){
  1303. oclHandles.error_str = "excpetion in _clInvokeKernel() -> ";
  1304. switch(oclHandles.cl_status){
  1305. case CL_INVALID_PROGRAM_EXECUTABLE:
  1306. oclHandles.error_str += "CL_INVALID_PROGRAM_EXECUTABLE";
  1307. break;
  1308. case CL_INVALID_COMMAND_QUEUE:
  1309. oclHandles.error_str += "CL_INVALID_COMMAND_QUEUE";
  1310. break;
  1311. case CL_INVALID_KERNEL:
  1312. oclHandles.error_str += "CL_INVALID_KERNEL";
  1313. break;
  1314. case CL_INVALID_CONTEXT:
  1315. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1316. break;
  1317. case CL_INVALID_KERNEL_ARGS:
  1318. oclHandles.error_str += "CL_INVALID_KERNEL_ARGS";
  1319. break;
  1320. case CL_INVALID_WORK_DIMENSION:
  1321. oclHandles.error_str += "CL_INVALID_WORK_DIMENSION";
  1322. break;
  1323. case CL_INVALID_GLOBAL_WORK_SIZE:
  1324. oclHandles.error_str += "CL_INVALID_GLOBAL_WORK_SIZE";
  1325. break;
  1326. case CL_INVALID_WORK_GROUP_SIZE:
  1327. oclHandles.error_str += "CL_INVALID_WORK_GROUP_SIZE";
  1328. break;
  1329. case CL_INVALID_WORK_ITEM_SIZE:
  1330. oclHandles.error_str += "CL_INVALID_WORK_ITEM_SIZE";
  1331. break;
  1332. case CL_INVALID_GLOBAL_OFFSET:
  1333. oclHandles.error_str += "CL_INVALID_GLOBAL_OFFSET";
  1334. break;
  1335. case CL_OUT_OF_RESOURCES:
  1336. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1337. break;
  1338. case CL_MEM_OBJECT_ALLOCATION_FAILURE:
  1339. oclHandles.error_str += "CL_MEM_OBJECT_ALLOCATION_FAILURE";
  1340. break;
  1341. case CL_INVALID_EVENT_WAIT_LIST:
  1342. oclHandles.error_str += "CL_INVALID_EVENT_WAIT_LIST";
  1343. break;
  1344. case CL_OUT_OF_HOST_MEMORY:
  1345. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1346. break;
  1347. default:
  1348. oclHandles.error_str += "Unkown reseason";
  1349. break;
  1350. }
  1351. throw(oclHandles.error_str);
  1352. }
  1353. #endif
  1354. oclHandles.cl_status = clWaitForEvents(1, &e[0]);
  1355. #ifdef ERRMSG
  1356. if (oclHandles.cl_status!= CL_SUCCESS){
  1357. oclHandles.error_str = "excpetion in _clInvokeKernel2D() -> clWaitForEvents ->";
  1358. switch(oclHandles.cl_status){
  1359. case CL_INVALID_VALUE:
  1360. oclHandles.error_str += "CL_INVALID_VALUE";
  1361. break;
  1362. case CL_INVALID_CONTEXT:
  1363. oclHandles.error_str += "CL_INVALID_CONTEXT";
  1364. break;
  1365. case CL_INVALID_EVENT:
  1366. oclHandles.error_str += "CL_INVALID_EVENT";
  1367. break;
  1368. case CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST:
  1369. oclHandles.error_str += "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
  1370. break;
  1371. case CL_OUT_OF_RESOURCES:
  1372. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1373. break;
  1374. case CL_OUT_OF_HOST_MEMORY:
  1375. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1376. break;
  1377. default:
  1378. oclHandles.error_str += "Unkown Reason";
  1379. break;
  1380. }
  1381. throw(oclHandles.error_str);
  1382. }
  1383. #endif
  1384. #ifdef PROFILE_
  1385. double t2 = gettime();
  1386. KE += t2 - t1;
  1387. #endif
  1388. }
  1389. /*------------------------------------------------------------
  1390. @function: release OpenCL memory objects
  1391. @params:
  1392. ob: the memory object to free or release
  1393. @return: NULL
  1394. @date: 03/04/2011
  1395. ------------------------------------------------------------*/
  1396. void _clFree(cl_mem ob) throw(string){
  1397. #ifdef PROFILE_
  1398. double t1 = gettime();
  1399. #endif
  1400. if(ob!=NULL)
  1401. oclHandles.cl_status = clReleaseMemObject(ob);
  1402. #ifdef ERRMSG
  1403. if (oclHandles.cl_status!= CL_SUCCESS){
  1404. oclHandles.error_str = "excpetion in _clFree() ->";
  1405. switch(oclHandles.cl_status){
  1406. case CL_INVALID_MEM_OBJECT:
  1407. oclHandles.error_str += "CL_INVALID_MEM_OBJECT";
  1408. break;
  1409. case CL_OUT_OF_RESOURCES:
  1410. oclHandles.error_str += "CL_OUT_OF_RESOURCES";
  1411. break;
  1412. case CL_OUT_OF_HOST_MEMORY:
  1413. oclHandles.error_str += "CL_OUT_OF_HOST_MEMORY";
  1414. break;
  1415. default:
  1416. oclHandles.error_str += "Unkown reseason";
  1417. break;
  1418. }
  1419. throw(oclHandles.error_str);
  1420. }
  1421. #endif
  1422. #ifdef PROFILE_
  1423. double t2 = gettime();
  1424. MF += t2 - t1;
  1425. #endif
  1426. }
  1427. /*------------------------------------------------------------
  1428. @function: output time profiling information
  1429. @params: NULL
  1430. @return: NULL
  1431. @date: 03/04/2011
  1432. ------------------------------------------------------------*/
  1433. void _clStatistics(){
  1434. #ifdef PROFILE_
  1435. FILE *fp_pd = fopen("PD_OCL.txt", "a");
  1436. fprintf(fp_pd, "%lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf, %lf\n", CC, CR, MA, MF, H2D, D2H, D2D, KE, KC);
  1437. fclose(fp_pd);
  1438. #endif
  1439. return ;
  1440. }
  1441. #endif //_CL_HELPER_