nw.cl 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. #define SCORE(i, j) input_itemsets_l[j + i * (BLOCK_SIZE+1)]
  2. #define REF(i, j) reference_l[j + i * BLOCK_SIZE]
  3. int maximum( int a,
  4. int b,
  5. int c){
  6. int k;
  7. if( a <= b )
  8. k = b;
  9. else
  10. k = a;
  11. if( k <=c )
  12. return(c);
  13. else
  14. return(k);
  15. }
  16. __kernel void
  17. nw_kernel1(__global int * reference_d,
  18. __global int * input_itemsets_d,
  19. __global int * output_itemsets_d,
  20. __local int * input_itemsets_l,
  21. __local int * reference_l,
  22. int cols,
  23. int penalty,
  24. int blk,
  25. int block_width,
  26. int worksize,
  27. int offset_r,
  28. int offset_c
  29. )
  30. {
  31. // Block index
  32. int bx = get_group_id(0);
  33. //int bx = get_global_id(0)/BLOCK_SIZE;
  34. // Thread index
  35. int tx = get_local_id(0);
  36. // Base elements
  37. int base = offset_r * cols + offset_c;
  38. int b_index_x = bx;
  39. int b_index_y = blk - 1 - bx;
  40. int index = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 );
  41. int index_n = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 );
  42. int index_w = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols );
  43. int index_nw = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x;
  44. if (tx == 0){
  45. SCORE(tx, 0) = input_itemsets_d[index_nw + tx];
  46. }
  47. barrier(CLK_LOCAL_MEM_FENCE);
  48. for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++)
  49. REF(ty, tx) = reference_d[index + cols * ty];
  50. barrier(CLK_LOCAL_MEM_FENCE);
  51. SCORE((tx + 1), 0) = input_itemsets_d[index_w + cols * tx];
  52. barrier(CLK_LOCAL_MEM_FENCE);
  53. SCORE(0, (tx + 1)) = input_itemsets_d[index_n];
  54. barrier(CLK_LOCAL_MEM_FENCE);
  55. for( int m = 0 ; m < BLOCK_SIZE ; m++){
  56. if ( tx <= m ){
  57. int t_index_x = tx + 1;
  58. int t_index_y = m - tx + 1;
  59. SCORE(t_index_y, t_index_x) = maximum( SCORE((t_index_y-1), (t_index_x-1)) + REF((t_index_y-1), (t_index_x-1)),
  60. SCORE((t_index_y), (t_index_x-1)) - (penalty),
  61. SCORE((t_index_y-1), (t_index_x)) - (penalty));
  62. }
  63. barrier(CLK_LOCAL_MEM_FENCE);
  64. }
  65. barrier(CLK_LOCAL_MEM_FENCE);
  66. for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){
  67. if ( tx <= m){
  68. int t_index_x = tx + BLOCK_SIZE - m ;
  69. int t_index_y = BLOCK_SIZE - tx;
  70. SCORE(t_index_y, t_index_x) = maximum( SCORE((t_index_y-1), (t_index_x-1)) + REF((t_index_y-1), (t_index_x-1)),
  71. SCORE((t_index_y), (t_index_x-1)) - (penalty),
  72. SCORE((t_index_y-1), (t_index_x)) - (penalty));
  73. }
  74. barrier(CLK_LOCAL_MEM_FENCE);
  75. }
  76. for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++)
  77. input_itemsets_d[index + cols * ty] = SCORE((ty+1), (tx+1));
  78. return;
  79. }
  80. __kernel void
  81. nw_kernel2(__global int * reference_d,
  82. __global int * input_itemsets_d,
  83. __global int * output_itemsets_d,
  84. __local int * input_itemsets_l,
  85. __local int * reference_l,
  86. int cols,
  87. int penalty,
  88. int blk,
  89. int block_width,
  90. int worksize,
  91. int offset_r,
  92. int offset_c
  93. )
  94. {
  95. int bx = get_group_id(0);
  96. //int bx = get_global_id(0)/BLOCK_SIZE;
  97. // Thread index
  98. int tx = get_local_id(0);
  99. // Base elements
  100. int base = offset_r * cols + offset_c;
  101. int b_index_x = bx + block_width - blk ;
  102. int b_index_y = block_width - bx -1;
  103. int index = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 );
  104. int index_n = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 );
  105. int index_w = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols );
  106. int index_nw = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x;
  107. if (tx == 0)
  108. SCORE(tx, 0) = input_itemsets_d[index_nw];
  109. for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++)
  110. REF(ty, tx) = reference_d[index + cols * ty];
  111. barrier(CLK_LOCAL_MEM_FENCE);
  112. SCORE((tx + 1), 0) = input_itemsets_d[index_w + cols * tx];
  113. barrier(CLK_LOCAL_MEM_FENCE);
  114. SCORE(0, (tx + 1)) = input_itemsets_d[index_n];
  115. barrier(CLK_LOCAL_MEM_FENCE);
  116. for( int m = 0 ; m < BLOCK_SIZE ; m++){
  117. if ( tx <= m ){
  118. int t_index_x = tx + 1;
  119. int t_index_y = m - tx + 1;
  120. SCORE(t_index_y, t_index_x) = maximum( SCORE((t_index_y-1), (t_index_x-1)) + REF((t_index_y-1), (t_index_x-1)),
  121. SCORE((t_index_y), (t_index_x-1)) - (penalty),
  122. SCORE((t_index_y-1), (t_index_x)) - (penalty));
  123. }
  124. barrier(CLK_LOCAL_MEM_FENCE);
  125. }
  126. for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){
  127. if ( tx <= m){
  128. int t_index_x = tx + BLOCK_SIZE - m ;
  129. int t_index_y = BLOCK_SIZE - tx;
  130. SCORE(t_index_y, t_index_x) = maximum( SCORE((t_index_y-1), (t_index_x-1)) + REF((t_index_y-1), (t_index_x-1)),
  131. SCORE((t_index_y), (t_index_x-1)) - (penalty),
  132. SCORE((t_index_y-1), (t_index_x)) - (penalty));
  133. }
  134. barrier(CLK_LOCAL_MEM_FENCE);
  135. }
  136. for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++)
  137. input_itemsets_d[index + ty * cols] = SCORE((ty+1), (tx+1));
  138. return;
  139. }