clutils.cpp 41 KB


  1. /****************************************************************************\
  2. * Copyright (c) 2011, Advanced Micro Devices, Inc. *
  3. * All rights reserved. *
  4. * *
  5. * Redistribution and use in source and binary forms, with or without *
  6. * modification, are permitted provided that the following conditions *
  7. * are met: *
  8. * *
  9. * Redistributions of source code must retain the above copyright notice, *
  10. * this list of conditions and the following disclaimer. *
  11. * *
  12. * Redistributions in binary form must reproduce the above copyright notice, *
  13. * this list of conditions and the following disclaimer in the documentation *
  14. * and/or other materials provided with the distribution. *
  15. * *
  16. * Neither the name of the copyright holder nor the names of its contributors *
  17. * may be used to endorse or promote products derived from this software *
  18. * without specific prior written permission. *
  19. * *
  20. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
  21. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED *
  22. * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
  23. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR *
  24. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
  25. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
  26. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
  27. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
  28. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
  29. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
  30. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
  31. * *
  32. * If you use the software (in whole or in part), you shall adhere to all *
  33. * applicable U.S., European, and other export laws, including but not *
  34. * limited to the U.S. Export Administration Regulations (“EAR”), (15 C.F.R. *
  35. * Sections 730 through 774), and E.U. Council Regulation (EC) No 1334/2000 *
  36. * of 22 June 2000. Further, pursuant to Section 740.6 of the EAR, you *
  37. * hereby certify that, except pursuant to a license granted by the United *
  38. * States Department of Commerce Bureau of Industry and Security or as *
  39. * otherwise permitted pursuant to a License Exception under the U.S. Export *
  40. * Administration Regulations ("EAR"), you will not (1) export, re-export or *
  41. * release to a national of a country in Country Groups D:1, E:1 or E:2 any *
  42. * restricted technology, software, or source code you receive hereunder, *
  43. * or (2) export to Country Groups D:1, E:1 or E:2 the direct product of such *
  44. * technology or software, if such foreign produced direct product is subject *
  45. * to national security controls as identified on the Commerce Control List *
  46. *(currently found in Supplement 1 to Part 774 of EAR). For the most current *
  47. * Country Group listings, or for additional information about the EAR or *
  48. * your obligations under those regulations, please refer to the U.S. Bureau *
  49. * of Industry and Security’s website at http://www.bis.doc.gov/. *
  50. \****************************************************************************/
  51. #include <stdlib.h>
  52. #include <stdio.h>
  53. #include <string.h>
  54. #include <math.h>
  55. #include <CL/cl.h>
  56. #include "clutils.h"
  57. #include "utils.h"
  58. // The following variables have file scope to simplify
  59. // the utility functions
  60. //! All discoverable OpenCL platforms
  61. static cl_platform_id* platforms = NULL;
  62. static cl_uint numPlatforms;
  63. //! All discoverable OpenCL devices (one pointer per platform)
  64. static cl_device_id** devices = NULL;
  65. static cl_uint* numDevices;
  66. //! The chosen OpenCL platform
  67. static cl_platform_id platform = NULL;
  68. //! The chosen OpenCL device
  69. static cl_device_id device = NULL;
  70. //! OpenCL context
  71. static cl_context context = NULL;
  72. //! OpenCL command queue
  73. static cl_command_queue commandQueue = NULL;
  74. static cl_command_queue commandQueueProf = NULL;
  75. static cl_command_queue commandQueueNoProf = NULL;
  76. //! Global status of events
  77. static bool eventsEnabled = false;
  78. //-------------------------------------------------------
  79. // Initialization and Cleanup
  80. //-------------------------------------------------------
  81. //! Initialize OpenCl environment on one device
  82. /*!
  83. Init function for one device. Looks for supported devices and creates a context
  84. \return returns a context initialized
  85. */
  86. cl_context cl_init(char devicePreference)
  87. {
  88. cl_int status;
  89. // Discover and populate the platforms
  90. status = clGetPlatformIDs(0, NULL, &numPlatforms);
  91. cl_errChk(status, "Getting platform IDs", true);
  92. if (numPlatforms > 0)
  93. {
  94. // Get all the platforms
  95. platforms = (cl_platform_id*)alloc(numPlatforms *
  96. sizeof(cl_platform_id));
  97. status = clGetPlatformIDs(numPlatforms, platforms, NULL);
  98. cl_errChk(status, "Getting platform IDs", true);
  99. }
  100. else
  101. {
  102. // If no platforms are available, we shouldn't continue
  103. printf("No OpenCL platforms found\n");
  104. exit(-1);
  105. }
  106. // Allocate space for the device lists and lengths
  107. numDevices = (cl_uint*)alloc(sizeof(cl_uint)*numPlatforms);
  108. devices = (cl_device_id**)alloc(sizeof(cl_device_id*)*numPlatforms);
  109. // If a device preference was supplied, we'll limit the search of devices
  110. // based on type
  111. cl_device_type deviceType = CL_DEVICE_TYPE_ALL;
  112. if(devicePreference == 'c') {
  113. deviceType = CL_DEVICE_TYPE_CPU;
  114. }
  115. if(devicePreference == 'g') {
  116. deviceType = CL_DEVICE_TYPE_GPU;
  117. }
  118. // Traverse the platforms array printing information and
  119. // populating devices
  120. for(unsigned int i = 0; i < numPlatforms ; i++)
  121. {
  122. // Print out some basic info about the platform
  123. char* platformName = NULL;
  124. char* platformVendor = NULL;
  125. platformName = cl_getPlatformName(platforms[i]);
  126. platformVendor = cl_getPlatformVendor(platforms[i]);
  127. status = clGetDeviceIDs(platforms[i], deviceType, 0, NULL, &numDevices[i]);
  128. cl_errChk(status, "Getting device IDs", false);
  129. if(status != CL_SUCCESS) {
  130. printf("This is a known NVIDIA bug (if platform == AMD then die)\n");
  131. printf("Setting number of devices to 0 and continuing\n");
  132. numDevices[i] = 0;
  133. }
  134. printf("Platform %d (%d devices):\n", i, numDevices[i]);
  135. printf("\tName: %s\n", platformName);
  136. printf("\tVendor: %s\n", platformVendor);
  137. free(platformName);
  138. free(platformVendor);
  139. // Populate OpenCL devices if any exist
  140. if(numDevices[i] != 0)
  141. {
  142. // Allocate an array of devices of size "numDevices"
  143. devices[i] = (cl_device_id*)alloc(sizeof(cl_device_id)*numDevices[i]);
  144. // Populate Arrray with devices
  145. status = clGetDeviceIDs(platforms[i], deviceType, numDevices[i],
  146. devices[i], NULL);
  147. cl_errChk(status, "Getting device IDs", true);
  148. }
  149. // Print some information about each device
  150. for( unsigned int j = 0; j < numDevices[i]; j++)
  151. {
  152. char* deviceName = NULL;
  153. char* deviceVendor = NULL;
  154. printf("\tDevice %d:\n", j);
  155. deviceName = cl_getDeviceName(devices[i][j]);
  156. deviceVendor = cl_getDeviceVendor(devices[i][j]);
  157. printf("\t\tName: %s\n", deviceName);
  158. printf("\t\tVendor: %s\n", deviceVendor);
  159. free(deviceName);
  160. free(deviceVendor);
  161. }
  162. }
  163. // Hard-code in the platform/device to use, or uncomment 'scanf'
  164. // to decide at runtime
  165. cl_uint chosen_platform, chosen_device;
  166. // UNCOMMENT the following two lines to manually select device each time
  167. //printf("Enter Platform and Device No (Seperated by Space) \n");
  168. //scanf("%d %d", &chosen_platform, &chosen_device);
  169. chosen_platform = 0;
  170. chosen_device = 0;
  171. printf("Using Platform %d, Device %d \n", chosen_platform, chosen_device);
  172. // Do a sanity check of platform/device selection
  173. if(chosen_platform >= numPlatforms ||
  174. chosen_device >= numDevices[chosen_platform]) {
  175. printf("Invalid platform/device combination\n");
  176. exit(-1);
  177. }
  178. // Set the selected platform and device
  179. platform = platforms[chosen_platform];
  180. device = devices[chosen_platform][chosen_device];
  181. // Create the context
  182. cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM,
  183. (cl_context_properties)(platform), 0};
  184. context = clCreateContext(cps, 1, &device, NULL, NULL, &status);
  185. cl_errChk(status, "Creating context", true);
  186. // Create the command queue
  187. commandQueueProf = clCreateCommandQueue(context, device,
  188. CL_QUEUE_PROFILING_ENABLE, &status);
  189. cl_errChk(status, "creating command queue", true);
  190. commandQueueNoProf = clCreateCommandQueue(context, device, 0, &status);
  191. cl_errChk(status, "creating command queue", true);
  192. if(eventsEnabled) {
  193. printf("Profiling enabled\n");
  194. commandQueue = commandQueueProf;
  195. }
  196. else {
  197. printf("Profiling disabled\n");
  198. commandQueue = commandQueueNoProf;
  199. }
  200. return context;
  201. }
  202. cl_context cl_init_context(int platform_id, int device_id, int quiet, int use_gpu) {
  203. int printInfo=1;
  204. if (platform_id >= 0 && device_id >= 0) printInfo = 0;
  205. cl_int status;
  206. // Used to iterate through the platforms and devices, respectively
  207. cl_uint numPlatforms;
  208. cl_uint numDevices;
  209. // These will hold the platform and device we select (can potentially be
  210. // multiple, but we're just doing one for now)
  211. // cl_platform_id platform = NULL;
  212. status = clGetPlatformIDs(0, NULL, &numPlatforms);
  213. if (printInfo) printf("Number of platforms detected:%d\n", numPlatforms);
  214. // Print some information about the available platforms
  215. cl_platform_id *platforms = NULL;
  216. cl_device_id * devices = NULL;
  217. if (numPlatforms > 0)
  218. {
  219. // get all the platforms
  220. platforms = (cl_platform_id*)malloc(numPlatforms *
  221. sizeof(cl_platform_id));
  222. status = clGetPlatformIDs(numPlatforms, platforms, NULL);
  223. // Traverse the platforms array
  224. if (printInfo) printf("Checking For OpenCl Compatible Devices\n");
  225. for(unsigned int i = 0; i < numPlatforms ; i++)
  226. {
  227. char pbuf[100];
  228. if (printInfo) printf("Platform %d:\t", i);
  229. status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
  230. sizeof(pbuf), pbuf, NULL);
  231. if (printInfo) printf("Vendor: %s\n", pbuf);
  232. //unsigned int numDevices;
  233. status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
  234. if(cl_errChk(status, "checking for devices",true))
  235. exit(1);
  236. if(numDevices == 0) {
  237. printf("There are no devices for Platform %d\n",i);
  238. exit(0);
  239. }
  240. else
  241. {
  242. if (printInfo) printf("\tNo of devices for Platform %d is %u\n",i, numDevices);
  243. //! Allocate an array of devices of size "numDevices"
  244. devices = (cl_device_id*)malloc(sizeof(cl_device_id)*numDevices);
  245. //! Populate Arrray with devices
  246. status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numDevices,
  247. devices, NULL);
  248. if(cl_errChk(status, "getting device IDs",true)) {
  249. exit(1);
  250. }
  251. }
  252. for( unsigned int j = 0; j < numDevices; j++)
  253. {
  254. char dbuf[100];
  255. char deviceStr[100];
  256. if (printInfo) printf("\tDevice: %d\t", j);
  257. status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(dbuf),
  258. deviceStr, NULL);
  259. cl_errChk(status, "Getting Device Info\n",true);
  260. if (printInfo) printf("Vendor: %s", deviceStr);
  261. status = clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(dbuf),
  262. dbuf, NULL);
  263. if (printInfo) printf("\n\t\tName: %s\n", dbuf);
  264. }
  265. }
  266. }
  267. else
  268. {
  269. // If no platforms are available, we're sunk!
  270. printf("No OpenCL platforms found\n");
  271. exit(0);
  272. }
  273. int platform_touse;
  274. unsigned int device_touse;
  275. if (printInfo) printf("Enter Platform and Device No (Seperated by Space) \n");
  276. if (printInfo) scanf("%d %d", &platform_touse, &device_touse);
  277. else {
  278. platform_touse = platform_id;
  279. device_touse = device_id;
  280. }
  281. if (!quiet) printf("Using Platform %d \t Device No %d \n",platform_touse, device_touse);
  282. //! Recheck how many devices does our chosen platform have
  283. status = clGetDeviceIDs(platforms[platform_touse], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
  284. if(device_touse > numDevices)
  285. {
  286. printf("Invalid Device Number\n");
  287. exit(1);
  288. }
  289. //! Populate devices array with all the visible devices of our chosen platform
  290. devices = (cl_device_id *)malloc(sizeof(cl_device_id)*numDevices);
  291. status = clGetDeviceIDs(platforms[platform_touse],
  292. CL_DEVICE_TYPE_ALL, numDevices,
  293. devices, NULL);
  294. if(cl_errChk(status,"Error in Getting Devices\n",true)) exit(1);
  295. // Selector for CPU/GPU on the basis of the use_gpu parameter
  296. cl_device_type device_type= use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
  297. cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM,
  298. (cl_context_properties)(platforms[platform_touse]), 0};
  299. cl_context_properties *cprops = cps;
  300. context = clCreateContextFromType(
  301. cprops, device_type,
  302. NULL, NULL, &status);
  303. if(cl_errChk(status, "creating Context",true)) {
  304. exit(1);
  305. }
  306. #define PROFILING
  307. #ifdef PROFILING
  308. commandQueue = clCreateCommandQueue(context,
  309. devices[device_touse], CL_QUEUE_PROFILING_ENABLE, &status);
  310. #else
  311. clCommandQueue = clCreateCommandQueue(clGPUContext,
  312. devices[device_touse], NULL, &status);
  313. #endif // PROFILING
  314. if(cl_errChk(status, "creating command queue",true)) {
  315. exit(1);
  316. }
  317. return context;
  318. }
  319. /*!
  320. Release all resources that the user doesn't have access to.
  321. */
  322. void cl_cleanup()
  323. {
  324. // Free the command queue
  325. if(commandQueue) {
  326. clReleaseCommandQueue(commandQueue);
  327. }
  328. // Free the context
  329. if(context) {
  330. clReleaseContext(context);
  331. }
  332. free(devices);
  333. free(numDevices);
  334. // Free the platforms
  335. free(platforms);
  336. }
  337. //! Release a kernel object
  338. /*!
  339. \param mem The kernel object to release
  340. */
  341. void cl_freeKernel(cl_kernel kernel)
  342. {
  343. cl_int status;
  344. if(kernel != NULL) {
  345. status = clReleaseKernel(kernel);
  346. cl_errChk(status, "Releasing kernel object", true);
  347. }
  348. }
  349. //! Release memory allocated on the device
  350. /*!
  351. \param mem The device pointer to release
  352. */
  353. void cl_freeMem(cl_mem mem)
  354. {
  355. cl_int status;
  356. if(mem != NULL) {
  357. status = clReleaseMemObject(mem);
  358. cl_errChk(status, "Releasing mem object", true);
  359. }
  360. }
  361. //! Release a program object
  362. /*!
  363. \param mem The program object to release
  364. */
  365. void cl_freeProgram(cl_program program)
  366. {
  367. cl_int status;
  368. if(program != NULL) {
  369. status = clReleaseProgram(program);
  370. cl_errChk(status, "Releasing program object", true);
  371. }
  372. }
  373. //! Returns a reference to the command queue
  374. /*!
  375. Returns a reference to the command queue \n
  376. Used for any OpenCl call that needs the command queue declared in clutils.cpp
  377. */
  378. cl_command_queue cl_getCommandQueue()
  379. {
  380. return commandQueue;
  381. }
  382. //-------------------------------------------------------
  383. // Synchronization functions
  384. //-------------------------------------------------------
  385. /*!
  386. Wait till all pending commands in queue are finished
  387. */
  388. void cl_sync()
  389. {
  390. clFinish(commandQueue);
  391. }
  392. //-------------------------------------------------------
  393. // Memory allocation
  394. //-------------------------------------------------------
  395. //! Allocate a buffer on a device
  396. /*!
  397. \param mem_size Size of memory in bytes
  398. \param flags Optional cl_mem_flags
  399. \return Returns a cl_mem object that points to device memory
  400. */
  401. cl_mem cl_allocBuffer(size_t mem_size, cl_mem_flags flags)
  402. {
  403. cl_mem mem;
  404. cl_int status;
  405. /*!
  406. Logging information for keeping track of device memory
  407. */
  408. static int allocationCount = 1;
  409. static size_t allocationSize = 0;
  410. allocationCount++;
  411. allocationSize += mem_size;
  412. mem = clCreateBuffer(context, flags, mem_size, NULL, &status);
  413. cl_errChk(status, "creating buffer", true);
  414. return mem;
  415. }
  416. //! Allocate constant memory on device
  417. /*!
  418. \param mem_size Size of memory in bytes
  419. \param host_ptr Host pointer that contains the data
  420. \return Returns a cl_mem object that points to device memory
  421. */
  422. cl_mem cl_allocBufferConst(size_t mem_size, void* host_ptr)
  423. {
  424. cl_mem mem;
  425. cl_int status;
  426. mem = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
  427. mem_size, host_ptr, &status);
  428. cl_errChk(status, "Error creating const mem buffer", true);
  429. return mem;
  430. }
  431. //! Allocate a buffer on device pinning the host memory at host_ptr
  432. /*!
  433. \param mem_size Size of memory in bytes
  434. \return Returns a cl_mem object that points to pinned memory on the host
  435. */
  436. cl_mem cl_allocBufferPinned(size_t mem_size)
  437. {
  438. cl_mem mem;
  439. cl_int status;
  440. mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
  441. mem_size, NULL, &status);
  442. cl_errChk(status, "Error allocating pinned memory", true);
  443. return mem;
  444. }
  445. //! Allocate an image on a device
  446. /*!
  447. \param height Number of rows in the image
  448. \param width Number of columns in the image
  449. \param elemSize Size of the elements in the image
  450. \param flags Optional cl_mem_flags
  451. \return Returns a cl_mem object that points to device memory
  452. */
  453. cl_mem cl_allocImage(size_t height, size_t width, char type, cl_mem_flags flags)
  454. {
  455. cl_mem mem;
  456. cl_int status;
  457. size_t elemSize = 0;
  458. cl_image_format format;
  459. format.image_channel_order = CL_R;
  460. switch(type) {
  461. case 'f':
  462. elemSize = sizeof(float);
  463. format.image_channel_data_type = CL_FLOAT;
  464. break;
  465. case 'i':
  466. elemSize = sizeof(int);
  467. format.image_channel_data_type = CL_SIGNED_INT32;
  468. break;
  469. default:
  470. printf("Error creating image: Unsupported image type.\n");
  471. exit(-1);
  472. }
  473. /*!
  474. Logging information for keeping track of device memory
  475. */
  476. static int allocationCount = 1;
  477. static size_t allocationSize = 0;
  478. allocationCount++;
  479. allocationSize += height*width*elemSize;
  480. // Create the image
  481. mem = clCreateImage2D(context, flags, &format, width, height, 0, NULL, &status);
  482. //cl_errChk(status, "creating image", true);
  483. if(status != CL_SUCCESS) {
  484. printf("Error creating image: Images may not be supported for this device.\n");
  485. printSupportedImageFormats();
  486. getchar();
  487. exit(-1);
  488. }
  489. return mem;
  490. }
  491. //-------------------------------------------------------
  492. // Data transfers
  493. //-------------------------------------------------------
  494. // Copy and map a buffer
  495. void* cl_copyAndMapBuffer(cl_mem dst, cl_mem src, size_t size) {
  496. void* ptr; // Pointer to the pinned memory that will be returned
  497. cl_copyBufferToBuffer(dst, src, size);
  498. ptr = cl_mapBuffer(dst, size, CL_MAP_READ);
  499. return ptr;
  500. }
  501. // Copy a buffer
  502. void cl_copyBufferToBuffer(cl_mem dst, cl_mem src, size_t size)
  503. {
  504. cl_int status;
  505. status = clEnqueueCopyBuffer(commandQueue, src, dst, 0, 0, size, 0, NULL,
  506. NULL);
  507. cl_errChk(status, "Copying buffer", true);
  508. }
  509. //! Copy a buffer to the device
  510. /*!
  511. \param dst Valid device pointer
  512. \param src Host pointer that contains the data
  513. \param mem_size Size of data to copy
  514. \param blocking Blocking or non-blocking operation
  515. */
  516. void cl_copyBufferToDevice(cl_mem dst, void* src, size_t mem_size, cl_bool blocking)
  517. {
  518. cl_int status;
  519. status = clEnqueueWriteBuffer(commandQueue, dst, blocking, 0,
  520. mem_size, src, 0, NULL, NULL);
  521. cl_errChk(status, "Writing buffer", true);
  522. }
  523. //! Copy a buffer to the host
  524. /*!
  525. \param dst Valid host pointer
  526. \param src Device pointer that contains the data
  527. \param mem_size Size of data to copy
  528. \param blocking Blocking or non-blocking operation
  529. */
  530. void cl_copyBufferToHost(void* dst, cl_mem src, size_t mem_size, cl_bool blocking)
  531. {
  532. cl_int status;
  533. status = clEnqueueReadBuffer(commandQueue, src, blocking, 0,
  534. mem_size, dst, 0, NULL, NULL);
  535. cl_errChk(status, "Reading buffer", true);
  536. }
  537. //! Copy a buffer to a 2D image
  538. /*!
  539. \param src Valid device buffer
  540. \param dst Empty device image
  541. \param mem_size Size of data to copy
  542. */
  543. void cl_copyBufferToImage(cl_mem buffer, cl_mem image, int height, int width)
  544. {
  545. size_t origin[3] = {0, 0, 0};
  546. size_t region[3] = {width, height, 1};
  547. cl_int status;
  548. status = clEnqueueCopyBufferToImage(commandQueue, buffer, image, 0,
  549. origin, region, 0, NULL, NULL);
  550. cl_errChk(status, "Copying buffer to image", true);
  551. }
  552. // Copy data to an image on the device
  553. /*!
  554. \param dst Valid device pointer
  555. \param src Host pointer that contains the data
  556. \param height Height of the image
  557. \param width Width of the image
  558. */
  559. void cl_copyImageToDevice(cl_mem dst, void* src, size_t height, size_t width)
  560. {
  561. cl_int status;
  562. size_t origin[3] = {0, 0, 0};
  563. size_t region[3] = {width, height, 1};
  564. status = clEnqueueWriteImage(commandQueue, dst, CL_TRUE, origin,
  565. region, 0, 0, src, 0, NULL, NULL);
  566. cl_errChk(status, "Writing image", true);
  567. }
  568. //! Copy an image to the host
  569. /*!
  570. \param dst Valid host pointer
  571. \param src Device pointer that contains the data
  572. \param height Height of the image
  573. \param width Width of the image
  574. */
  575. void cl_copyImageToHost(void* dst, cl_mem src, size_t height, size_t width)
  576. {
  577. cl_int status;
  578. size_t origin[3] = {0, 0, 0};
  579. size_t region[3] = {width, height, 1};
  580. status = clEnqueueReadImage(commandQueue, src, CL_TRUE, origin,
  581. region, 0, 0, dst, 0, NULL, NULL);
  582. cl_errChk(status, "Reading image", true);
  583. }
  584. //! Map a buffer into a host address
  585. /*!
  586. \param mem cl_mem object
  587. \param mem_size Size of memory in bytes
  588. \param flags Optional cl_mem_flags
  589. \return Returns a host pointer that points to the mapped region
  590. */
  591. void *cl_mapBuffer(cl_mem mem, size_t mem_size, cl_mem_flags flags)
  592. {
  593. cl_int status;
  594. void *ptr;
  595. ptr = (void *)clEnqueueMapBuffer(commandQueue, mem, CL_TRUE, flags,
  596. 0, mem_size, 0, NULL, NULL, &status);
  597. cl_errChk(status, "Error mapping a buffer", true);
  598. return ptr;
  599. }
  600. //! Unmap a buffer or image
  601. /*!
  602. \param mem cl_mem object
  603. \param ptr A host pointer that points to the mapped region
  604. */
  605. void cl_unmapBuffer(cl_mem mem, void *ptr)
  606. {
  607. // TODO It looks like AMD doesn't support profiling unmapping yet. Leaving the
  608. // commented code here until it's supported
  609. cl_int status;
  610. status = clEnqueueUnmapMemObject(commandQueue, mem, ptr, 0, NULL, NULL);
  611. cl_errChk(status, "Error unmapping a buffer or image", true);
  612. }
  613. void cl_writeToZCBuffer(cl_mem mem, void* data, size_t size)
  614. {
  615. void* ptr;
  616. ptr = cl_mapBuffer(mem, size, CL_MAP_WRITE);
  617. memcpy(ptr, data, size);
  618. cl_unmapBuffer(mem, ptr);
  619. }
  620. //-------------------------------------------------------
  621. // Program and kernels
  622. //-------------------------------------------------------
  623. //! Convert source code file into cl_program
  624. /*!
  625. Compile Opencl source file into a cl_program. The cl_program will be made into a kernel in PrecompileKernels()
  626. \param kernelPath Filename of OpenCl code
  627. \param compileoptions Compilation options
  628. \param verbosebuild Switch to enable verbose Output
  629. */
  630. cl_program cl_compileProgram(char* kernelPath, char* compileoptions, bool verbosebuild )
  631. {
  632. cl_int status;
  633. FILE *fp = NULL;
  634. char *source = NULL;
  635. long int size;
  636. printf("\t%s\n", kernelPath);
  637. // Determine the size of the source file
  638. #ifdef _WIN32
  639. fopen_s(&fp, kernelPath, "rb");
  640. #else
  641. fp = fopen(kernelPath, "rb");
  642. #endif
  643. if(!fp) {
  644. printf("Could not open kernel file\n");
  645. exit(-1);
  646. }
  647. status = fseek(fp, 0, SEEK_END);
  648. if(status != 0) {
  649. printf("Error seeking to end of file\n");
  650. exit(-1);
  651. }
  652. size = ftell(fp);
  653. if(size < 0) {
  654. printf("Error getting file position\n");
  655. exit(-1);
  656. }
  657. rewind(fp);
  658. // Allocate enough space for the source code
  659. source = (char *)alloc(size + 1);
  660. // fill with NULLs (just for fun)
  661. for (int i = 0; i < size+1; i++) {
  662. source[i] = '\0';
  663. }
  664. // Read in the source code
  665. fread(source, 1, size, fp);
  666. source[size] = '\0';
  667. // Create the program object
  668. cl_program clProgramReturn = clCreateProgramWithSource(context, 1,
  669. (const char **)&source, NULL, &status);
  670. cl_errChk(status, "Creating program", true);
  671. free(source);
  672. fclose(fp);
  673. // Try to compile the program
  674. status = clBuildProgram(clProgramReturn, 0, NULL, compileoptions, NULL, NULL);
  675. if(cl_errChk(status, "Building program", false) || verbosebuild == 1)
  676. {
  677. cl_build_status build_status;
  678. clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_STATUS,
  679. sizeof(cl_build_status), &build_status, NULL);
  680. if(build_status == CL_SUCCESS && verbosebuild == 0) {
  681. return clProgramReturn;
  682. }
  683. //char *build_log;
  684. size_t ret_val_size;
  685. printf("Device: %p",device);
  686. clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_LOG, 0,
  687. NULL, &ret_val_size);
  688. char *build_log = (char*)alloc(ret_val_size+1);
  689. clGetProgramBuildInfo(clProgramReturn, device, CL_PROGRAM_BUILD_LOG,
  690. ret_val_size+1, build_log, NULL);
  691. // to be careful, terminate with \0
  692. // there's no information in the reference whether the string is 0
  693. // terminated or not
  694. build_log[ret_val_size] = '\0';
  695. printf("Build log:\n %s...\n", build_log);
  696. if(build_status != CL_SUCCESS) {
  697. getchar();
  698. exit(-1);
  699. }
  700. else
  701. return clProgramReturn;
  702. }
  703. // print the ptx information
  704. // printBinaries(clProgram);
  705. return clProgramReturn;
  706. }
  707. //! Create a kernel from compiled source
  708. /*!
  709. Create a kernel from compiled source
  710. \param program Compiled OpenCL program
  711. \param kernel_name Name of the kernel in the program
  712. \return Returns a cl_kernel object for the specified kernel
  713. */
  714. cl_kernel cl_createKernel(cl_program program, const char* kernel_name) {
  715. cl_kernel kernel;
  716. cl_int status;
  717. kernel = clCreateKernel(program, kernel_name, &status);
  718. cl_errChk(status, "Creating kernel", true);
  719. return kernel;
  720. }
  721. //! Set an argument for a OpenCL kernel
  722. /*!
  723. Set an argument for a OpenCL kernel
  724. \param kernel The kernel for which the argument is being set
  725. \param index The argument index
  726. \param size The size of the argument
  727. \param data A pointer to the argument
  728. */
  729. void cl_setKernelArg(cl_kernel kernel, unsigned int index, size_t size,
  730. void* data)
  731. {
  732. cl_int status;
  733. status = clSetKernelArg(kernel, index, size, data);
  734. cl_errChk(status, "Setting kernel arg", true);
  735. }
  736. //-------------------------------------------------------
  737. // Profiling/events
  738. //-------------------------------------------------------
  739. //! Time kernel execution using cl_event
  740. /*!
  741. Prints out the time taken between the start and end of an event
  742. \param event_time
  743. */
  744. double cl_computeExecTime(cl_event event_time)
  745. {
  746. cl_int status;
  747. cl_ulong starttime;
  748. cl_ulong endtime;
  749. double elapsed;
  750. status = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_START,
  751. sizeof(cl_ulong), &starttime, NULL);
  752. cl_errChk(status, "profiling start", true);
  753. status = clGetEventProfilingInfo(event_time, CL_PROFILING_COMMAND_END,
  754. sizeof(cl_ulong), &endtime, NULL);
  755. cl_errChk(status, "profiling end", true);
  756. // Convert to ms
  757. elapsed = (double)(endtime-starttime)/1000000.0;
  758. return elapsed;
  759. }
  760. //! Compute the elapsed time between two timer values
  761. double cl_computeTime(cl_time start, cl_time end)
  762. {
  763. #ifdef _WIN32
  764. __int64 freq;
  765. int status;
  766. status = QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
  767. if(status == 0) {
  768. perror("QueryPerformanceFrequency");
  769. exit(-1);
  770. }
  771. // Return time in ms
  772. return double(end-start)/(double(freq)/1000.0);
  773. #else
  774. return end-start;
  775. #endif
  776. }
  777. //! Grab the current time using a system-specific timer
  778. void cl_getTime(cl_time* time)
  779. {
  780. #ifdef _WIN32
  781. int status = QueryPerformanceCounter((LARGE_INTEGER*)time);
  782. if(status == 0) {
  783. perror("QueryPerformanceCounter");
  784. exit(-1);
  785. }
  786. #else
  787. // Use gettimeofday to get the current time
  788. struct timeval curTime;
  789. gettimeofday(&curTime, NULL);
  790. // Convert timeval into double
  791. *time = curTime.tv_sec * 1000 + (double)curTime.tv_usec/1000;
  792. #endif
  793. }
  794. //-------------------------------------------------------
  795. // Error handling
  796. //-------------------------------------------------------
  797. //! OpenCl error code list
  798. /*!
  799. An array of character strings used to give the error corresponding to the error code \n
  800. The error code is the index within this array
  801. */
  802. char *cl_errs[MAX_ERR_VAL] = {
  803. (char *)"CL_SUCCESS", // 0
  804. (char *)"CL_DEVICE_NOT_FOUND", //-1
  805. (char *)"CL_DEVICE_NOT_AVAILABLE", //-2
  806. (char *)"CL_COMPILER_NOT_AVAILABLE", //-3
  807. (char *)"CL_MEM_OBJECT_ALLOCATION_FAILURE", //-4
  808. (char *)"CL_OUT_OF_RESOURCES", //-5
  809. (char *)"CL_OUT_OF_HOST_MEMORY", //-6
  810. (char *)"CL_PROFILING_INFO_NOT_AVAILABLE", //-7
  811. (char *)"CL_MEM_COPY_OVERLAP", //-8
  812. (char *)"CL_IMAGE_FORMAT_MISMATCH", //-9
  813. (char *)"CL_IMAGE_FORMAT_NOT_SUPPORTED", //-10
  814. (char *)"CL_BUILD_PROGRAM_FAILURE", //-11
  815. (char *)"CL_MAP_FAILURE", //-12
  816. (char *)"", //-13
  817. (char *)"", //-14
  818. (char *)"", //-15
  819. (char *)"", //-16
  820. (char *)"", //-17
  821. (char *)"", //-18
  822. (char *)"", //-19
  823. (char *)"", //-20
  824. (char *)"", //-21
  825. (char *)"", //-22
  826. (char *)"", //-23
  827. (char *)"", //-24
  828. (char *)"", //-25
  829. (char *)"", //-26
  830. (char *)"", //-27
  831. (char *)"", //-28
  832. (char *)"", //-29
  833. (char *)"CL_INVALID_VALUE", //-30
  834. (char *)"CL_INVALID_DEVICE_TYPE", //-31
  835. (char *)"CL_INVALID_PLATFORM", //-32
  836. (char *)"CL_INVALID_DEVICE", //-33
  837. (char *)"CL_INVALID_CONTEXT", //-34
  838. (char *)"CL_INVALID_QUEUE_PROPERTIES", //-35
  839. (char *)"CL_INVALID_COMMAND_QUEUE", //-36
  840. (char *)"CL_INVALID_HOST_PTR", //-37
  841. (char *)"CL_INVALID_MEM_OBJECT", //-38
  842. (char *)"CL_INVALID_IMAGE_FORMAT_DESCRIPTOR", //-39
  843. (char *)"CL_INVALID_IMAGE_SIZE", //-40
  844. (char *)"CL_INVALID_SAMPLER", //-41
  845. (char *)"CL_INVALID_BINARY", //-42
  846. (char *)"CL_INVALID_BUILD_OPTIONS", //-43
  847. (char *)"CL_INVALID_PROGRAM", //-44
  848. (char *)"CL_INVALID_PROGRAM_EXECUTABLE", //-45
  849. (char *)"CL_INVALID_KERNEL_NAME", //-46
  850. (char *)"CL_INVALID_KERNEL_DEFINITION", //-47
  851. (char *)"CL_INVALID_KERNEL", //-48
  852. (char *)"CL_INVALID_ARG_INDEX", //-49
  853. (char *)"CL_INVALID_ARG_VALUE", //-50
  854. (char *)"CL_INVALID_ARG_SIZE", //-51
  855. (char *)"CL_INVALID_KERNEL_ARGS", //-52
  856. (char *)"CL_INVALID_WORK_DIMENSION ", //-53
  857. (char *)"CL_INVALID_WORK_GROUP_SIZE", //-54
  858. (char *)"CL_INVALID_WORK_ITEM_SIZE", //-55
  859. (char *)"CL_INVALID_GLOBAL_OFFSET", //-56
  860. (char *)"CL_INVALID_EVENT_WAIT_LIST", //-57
  861. (char *)"CL_INVALID_EVENT", //-58
  862. (char *)"CL_INVALID_OPERATION", //-59
  863. (char *)"CL_INVALID_GL_OBJECT", //-60
  864. (char *)"CL_INVALID_BUFFER_SIZE", //-61
  865. (char *)"CL_INVALID_MIP_LEVEL", //-62
  866. (char *)"CL_INVALID_GLOBAL_WORK_SIZE"}; //-63
  867. //! OpenCl Error checker
  868. /*!
  869. Checks for error code as per cl_int returned by OpenCl
  870. \param status Error value as cl_int
  871. \param msg User provided error message
  872. \return True if Error Seen, False if no error
  873. */
  874. int cl_errChk(const cl_int status, const char * msg, bool exitOnErr)
  875. {
  876. if(status != CL_SUCCESS) {
  877. printf("OpenCL Error: %d %s %s\n", status, cl_errs[-status], msg);
  878. if(exitOnErr) {
  879. exit(-1);
  880. }
  881. return true;
  882. }
  883. return false;
  884. }
  885. // Queries the supported image formats for the device and prints
  886. // them to the screen
  887. void printSupportedImageFormats()
  888. {
  889. cl_uint numFormats;
  890. cl_int status;
  891. status = clGetSupportedImageFormats(context, 0, CL_MEM_OBJECT_IMAGE2D,
  892. 0, NULL, &numFormats);
  893. cl_errChk(status, "getting supported image formats", true);
  894. cl_image_format* imageFormats = NULL;
  895. imageFormats = (cl_image_format*)alloc(sizeof(cl_image_format)*numFormats);
  896. status = clGetSupportedImageFormats(context, 0, CL_MEM_OBJECT_IMAGE2D,
  897. numFormats, imageFormats, NULL);
  898. printf("There are %d supported image formats\n", numFormats);
  899. cl_uint orders[]={CL_R, CL_A, CL_INTENSITY, CL_LUMINANCE, CL_RG,
  900. CL_RA, CL_RGB, CL_RGBA, CL_ARGB, CL_BGRA};
  901. char *orderstr[]={(char *)"CL_R", (char *)"CL_A",(char *)"CL_INTENSITY", (char *)"CL_LUMINANCE", (char *)"CL_RG",
  902. (char *)"CL_RA", (char *)"CL_RGB", (char *)"CL_RGBA", (char *)"CL_ARGB", (char *)"CL_BGRA"};
  903. cl_uint types[]={
  904. CL_SNORM_INT8 , CL_SNORM_INT16, CL_UNORM_INT8, CL_UNORM_INT16,
  905. CL_UNORM_SHORT_565, CL_UNORM_SHORT_555, CL_UNORM_INT_101010,CL_SIGNED_INT8,
  906. CL_SIGNED_INT16, CL_SIGNED_INT32, CL_UNSIGNED_INT8, CL_UNSIGNED_INT16,
  907. CL_UNSIGNED_INT32, CL_HALF_FLOAT, CL_FLOAT};
  908. char * typesstr[]={
  909. (char *)"CL_SNORM_INT8" ,(char *)"CL_SNORM_INT16",(char *)"CL_UNORM_INT8",(char *)"CL_UNORM_INT16",
  910. (char *)"CL_UNORM_SHORT_565",(char *)"CL_UNORM_SHORT_555",(char *)"CL_UNORM_INT_101010",
  911. (char *)"CL_SIGNED_INT8",(char *)"CL_SIGNED_INT16",(char *)"CL_SIGNED_INT32",(char *)"CL_UNSIGNED_INT8",
  912. (char *)"CL_UNSIGNED_INT16",(char *)"CL_UNSIGNED_INT32",(char *)"CL_HALF_FLOAT",(char *)"CL_FLOAT"};
  913. printf("Supported Formats:\n");
  914. for(int i = 0; i < (int)numFormats; i++) {
  915. printf("\tFormat %d: ", i);
  916. for(int j = 0; j < (int)(sizeof(orders)/sizeof(cl_int)); j++) {
  917. if(imageFormats[i].image_channel_order == orders[j]) {
  918. printf("%s, ", orderstr[j]);
  919. }
  920. }
  921. for(int j = 0; j < (int)(sizeof(types)/sizeof(cl_int)); j++) {
  922. if(imageFormats[i].image_channel_data_type == types[j]) {
  923. printf("%s, ", typesstr[j]);
  924. }
  925. }
  926. printf("\n");
  927. }
  928. free(imageFormats);
  929. }
  930. //-------------------------------------------------------
  931. // Platform and device information
  932. //-------------------------------------------------------
  933. //! Returns true if AMD is the device vendor
  934. bool cl_deviceIsAMD(cl_device_id dev) {
  935. bool retval = false;
  936. char* vendor = cl_getDeviceVendor(dev);
  937. if(strncmp(vendor, "Advanced", 8) == 0) {
  938. retval = true;
  939. }
  940. free(vendor);
  941. return retval;
  942. }
  943. //! Returns true if NVIDIA is the device vendor
  944. bool cl_deviceIsNVIDIA(cl_device_id dev) {
  945. bool retval = false;
  946. char* vendor = cl_getDeviceVendor(dev);
  947. if(strncmp(vendor, "NVIDIA", 6) == 0) {
  948. retval = true;
  949. }
  950. free(vendor);
  951. return retval;
  952. }
  953. //! Returns true if NVIDIA is the device vendor
  954. bool cl_platformIsNVIDIA(cl_platform_id plat) {
  955. bool retval = false;
  956. char* vendor = cl_getPlatformVendor(plat);
  957. if(strncmp(vendor, "NVIDIA", 6) == 0) {
  958. retval = true;
  959. }
  960. free(vendor);
  961. return retval;
  962. }
  963. //! Get the name of the vendor for a device
  964. char* cl_getDeviceDriverVersion(cl_device_id dev)
  965. {
  966. cl_int status;
  967. size_t devInfoSize;
  968. char* devInfoStr = NULL;
  969. // If dev is NULL, set it to the default device
  970. if(dev == NULL) {
  971. dev = device;
  972. }
  973. // Print the vendor
  974. status = clGetDeviceInfo(dev, CL_DRIVER_VERSION, 0,
  975. NULL, &devInfoSize);
  976. cl_errChk(status, "Getting vendor name", true);
  977. devInfoStr = (char*)alloc(devInfoSize);
  978. status = clGetDeviceInfo(dev, CL_DRIVER_VERSION, devInfoSize,
  979. devInfoStr, NULL);
  980. cl_errChk(status, "Getting vendor name", true);
  981. return devInfoStr;
  982. }
  983. //! The the name of the device as supplied by the OpenCL implementation
  984. char* cl_getDeviceName(cl_device_id dev)
  985. {
  986. cl_int status;
  987. size_t devInfoSize;
  988. char* devInfoStr = NULL;
  989. // If dev is NULL, set it to the default device
  990. if(dev == NULL) {
  991. dev = device;
  992. }
  993. // Print the name
  994. status = clGetDeviceInfo(dev, CL_DEVICE_NAME, 0,
  995. NULL, &devInfoSize);
  996. cl_errChk(status, "Getting device name", true);
  997. devInfoStr = (char*)alloc(devInfoSize);
  998. status = clGetDeviceInfo(dev, CL_DEVICE_NAME, devInfoSize,
  999. devInfoStr, NULL);
  1000. cl_errChk(status, "Getting device name", true);
  1001. return(devInfoStr);
  1002. }
  1003. //! Get the name of the vendor for a device
  1004. char* cl_getDeviceVendor(cl_device_id dev)
  1005. {
  1006. cl_int status;
  1007. size_t devInfoSize;
  1008. char* devInfoStr = NULL;
  1009. // If dev is NULL, set it to the default device
  1010. if(dev == NULL) {
  1011. dev = device;
  1012. }
  1013. // Print the vendor
  1014. status = clGetDeviceInfo(dev, CL_DEVICE_VENDOR, 0,
  1015. NULL, &devInfoSize);
  1016. cl_errChk(status, "Getting vendor name", true);
  1017. devInfoStr = (char*)alloc(devInfoSize);
  1018. status = clGetDeviceInfo(dev, CL_DEVICE_VENDOR, devInfoSize,
  1019. devInfoStr, NULL);
  1020. cl_errChk(status, "Getting vendor name", true);
  1021. return devInfoStr;
  1022. }
  1023. //! Get the name of the vendor for a device
  1024. char* cl_getDeviceVersion(cl_device_id dev)
  1025. {
  1026. cl_int status;
  1027. size_t devInfoSize;
  1028. char* devInfoStr = NULL;
  1029. // If dev is NULL, set it to the default device
  1030. if(dev == NULL) {
  1031. dev = device;
  1032. }
  1033. // Print the vendor
  1034. status = clGetDeviceInfo(dev, CL_DEVICE_VERSION, 0,
  1035. NULL, &devInfoSize);
  1036. cl_errChk(status, "Getting vendor name", true);
  1037. devInfoStr = (char*)alloc(devInfoSize);
  1038. status = clGetDeviceInfo(dev, CL_DEVICE_VERSION, devInfoSize,
  1039. devInfoStr, NULL);
  1040. cl_errChk(status, "Getting vendor name", true);
  1041. return devInfoStr;
  1042. }
  1043. //! The the name of the device as supplied by the OpenCL implementation
  1044. char* cl_getPlatformName(cl_platform_id platform)
  1045. {
  1046. cl_int status;
  1047. size_t platformInfoSize;
  1048. char* platformInfoStr = NULL;
  1049. // Print the name
  1050. status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0,
  1051. NULL, &platformInfoSize);
  1052. cl_errChk(status, "Getting platform name", true);
  1053. platformInfoStr = (char*)alloc(platformInfoSize);
  1054. status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, platformInfoSize,
  1055. platformInfoStr, NULL);
  1056. cl_errChk(status, "Getting platform name", true);
  1057. return(platformInfoStr);
  1058. }
  1059. //! The the name of the device as supplied by the OpenCL implementation
  1060. char* cl_getPlatformVendor(cl_platform_id platform)
  1061. {
  1062. cl_int status;
  1063. size_t platformInfoSize;
  1064. char* platformInfoStr = NULL;
  1065. // Print the name
  1066. status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, 0,
  1067. NULL, &platformInfoSize);
  1068. cl_errChk(status, "Getting platform name", true);
  1069. platformInfoStr = (char*)alloc(platformInfoSize);
  1070. status = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, platformInfoSize,
  1071. platformInfoStr, NULL);
  1072. cl_errChk(status, "Getting platform name", true);
  1073. return(platformInfoStr);
  1074. }