kernel_gpu_opencl_2.cl 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. //========================================================================================================================================================================================================200
  2. // DEFINE/INCLUDE
  3. //========================================================================================================================================================================================================200
  4. //======================================================================================================================================================150
  5. // DEFINE
  6. //======================================================================================================================================================150
  7. // double precision support (switch between as needed for NVIDIA/AMD)
  8. #ifdef AMDAPP
  9. #pragma OPENCL EXTENSION cl_amd_fp64 : enable
  10. #else
  11. #pragma OPENCL EXTENSION cl_khr_fp64 : enable
  12. #endif
  13. // clBuildProgram compiler cannot link this file for some reason, so had to redefine constants and structures below
  14. // #include ../common.h // (in directory specified to compiler) main function header
  15. //======================================================================================================================================================150
  16. // DEFINE (had to bring from ../common.h here because feature of including headers in clBuildProgram does not work for some reason)
  17. //======================================================================================================================================================150
  18. // change to double if double precision needed
  19. #define fp float
  20. //#define DEFAULT_ORDER_2 256
  21. //======================================================================================================================================================150
  22. // STRUCTURES (had to bring from ../common.h here because feature of including headers in clBuildProgram does not work for some reason)
  23. //======================================================================================================================================================150
  24. // ???
  25. typedef struct knode {
  26. int location;
  27. int indices [DEFAULT_ORDER_2 + 1];
  28. int keys [DEFAULT_ORDER_2 + 1];
  29. bool is_leaf;
  30. int num_keys;
  31. } knode;
  32. //========================================================================================================================================================================================================200
  33. // findRangeK function
  34. //========================================================================================================================================================================================================200
  35. __kernel void
  36. findRangeK( long height,
  37. __global knode *knodesD,
  38. long knodes_elem,
  39. __global long *currKnodeD,
  40. __global long *offsetD,
  41. __global long *lastKnodeD,
  42. __global long *offset_2D,
  43. __global int *startD,
  44. __global int *endD,
  45. __global int *RecstartD,
  46. __global int *ReclenD)
  47. {
  48. // private thread IDs
  49. int thid = get_local_id(0);
  50. int bid = get_group_id(0);
  51. // ???
  52. int i;
  53. for(i = 0; i < height; i++){
  54. if((knodesD[currKnodeD[bid]].keys[thid] <= startD[bid]) && (knodesD[currKnodeD[bid]].keys[thid+1] > startD[bid])){
  55. // this conditional statement is inserted to avoid crush due to but in original code
  56. // "offset[bid]" calculated below that later addresses part of knodes goes outside of its bounds cause segmentation fault
  57. // more specifically, values saved into knodes->indices in the main function are out of bounds of knodes that they address
  58. if(knodesD[currKnodeD[bid]].indices[thid] < knodes_elem){
  59. offsetD[bid] = knodesD[currKnodeD[bid]].indices[thid];
  60. }
  61. }
  62. if((knodesD[lastKnodeD[bid]].keys[thid] <= endD[bid]) && (knodesD[lastKnodeD[bid]].keys[thid+1] > endD[bid])){
  63. // this conditional statement is inserted to avoid crush due to but in original code
  64. // "offset_2[bid]" calculated below that later addresses part of knodes goes outside of its bounds cause segmentation fault
  65. // more specifically, values saved into knodes->indices in the main function are out of bounds of knodes that they address
  66. if(knodesD[lastKnodeD[bid]].indices[thid] < knodes_elem){
  67. offset_2D[bid] = knodesD[lastKnodeD[bid]].indices[thid];
  68. }
  69. }
  70. //__syncthreads();
  71. barrier(CLK_LOCAL_MEM_FENCE);
  72. // set for next tree level
  73. if(thid==0){
  74. currKnodeD[bid] = offsetD[bid];
  75. lastKnodeD[bid] = offset_2D[bid];
  76. }
  77. // __syncthreads();
  78. barrier(CLK_LOCAL_MEM_FENCE);
  79. }
  80. // Find the index of the starting record
  81. if(knodesD[currKnodeD[bid]].keys[thid] == startD[bid]){
  82. RecstartD[bid] = knodesD[currKnodeD[bid]].indices[thid];
  83. }
  84. // __syncthreads();
  85. barrier(CLK_LOCAL_MEM_FENCE);
  86. // Find the index of the ending record
  87. if(knodesD[lastKnodeD[bid]].keys[thid] == endD[bid]){
  88. ReclenD[bid] = knodesD[lastKnodeD[bid]].indices[thid] - RecstartD[bid]+1;
  89. }
  90. }
  91. //========================================================================================================================================================================================================200
  92. // End
  93. //========================================================================================================================================================================================================200