123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202 |
- #define SCORE(i, j) input_itemsets_l[j + i * (BLOCK_SIZE+1)]
- #define REF(i, j) reference_l[j + i * BLOCK_SIZE]
- int maximum( int a,
- int b,
- int c){
- int k;
- if( a <= b )
- k = b;
- else
- k = a;
- if( k <=c )
- return(c);
- else
- return(k);
- }
- __kernel void
- nw_kernel1(__global int * reference_d,
- __global int * input_itemsets_d,
- __global int * output_itemsets_d,
- __local int * input_itemsets_l,
- __local int * reference_l,
- int cols,
- int penalty,
- int blk,
- int block_width,
- int worksize,
- int offset_r,
- int offset_c
- )
- {
- // Block index
- int bx = get_group_id(0);
- //int bx = get_global_id(0)/BLOCK_SIZE;
-
- // Thread index
- int tx = get_local_id(0);
-
- // Base elements
- int base = offset_r * cols + offset_c;
-
- int b_index_x = bx;
- int b_index_y = blk - 1 - bx;
-
-
- int index = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 );
- int index_n = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 );
- int index_w = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols );
- int index_nw = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x;
-
-
- if (tx == 0){
- SCORE(tx, 0) = input_itemsets_d[index_nw + tx];
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++)
- REF(ty, tx) = reference_d[index + cols * ty];
- barrier(CLK_LOCAL_MEM_FENCE);
- SCORE((tx + 1), 0) = input_itemsets_d[index_w + cols * tx];
- barrier(CLK_LOCAL_MEM_FENCE);
- SCORE(0, (tx + 1)) = input_itemsets_d[index_n];
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
-
- for( int m = 0 ; m < BLOCK_SIZE ; m++){
-
- if ( tx <= m ){
-
- int t_index_x = tx + 1;
- int t_index_y = m - tx + 1;
-
- SCORE(t_index_y, t_index_x) = maximum( SCORE((t_index_y-1), (t_index_x-1)) + REF((t_index_y-1), (t_index_x-1)),
- SCORE((t_index_y), (t_index_x-1)) - (penalty),
- SCORE((t_index_y-1), (t_index_x)) - (penalty));
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){
-
- if ( tx <= m){
-
- int t_index_x = tx + BLOCK_SIZE - m ;
- int t_index_y = BLOCK_SIZE - tx;
- SCORE(t_index_y, t_index_x) = maximum( SCORE((t_index_y-1), (t_index_x-1)) + REF((t_index_y-1), (t_index_x-1)),
- SCORE((t_index_y), (t_index_x-1)) - (penalty),
- SCORE((t_index_y-1), (t_index_x)) - (penalty));
-
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
-
- for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++)
- input_itemsets_d[index + cols * ty] = SCORE((ty+1), (tx+1));
-
- return;
-
- }
- __kernel void
- nw_kernel2(__global int * reference_d,
- __global int * input_itemsets_d,
- __global int * output_itemsets_d,
- __local int * input_itemsets_l,
- __local int * reference_l,
- int cols,
- int penalty,
- int blk,
- int block_width,
- int worksize,
- int offset_r,
- int offset_c
- )
- {
- int bx = get_group_id(0);
- //int bx = get_global_id(0)/BLOCK_SIZE;
-
- // Thread index
- int tx = get_local_id(0);
-
- // Base elements
- int base = offset_r * cols + offset_c;
-
- int b_index_x = bx + block_width - blk ;
- int b_index_y = block_width - bx -1;
-
-
- int index = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( cols + 1 );
- int index_n = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + ( 1 );
- int index_w = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + ( cols );
- int index_nw = base + cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x;
-
- if (tx == 0)
- SCORE(tx, 0) = input_itemsets_d[index_nw];
- for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++)
- REF(ty, tx) = reference_d[index + cols * ty];
- barrier(CLK_LOCAL_MEM_FENCE);
- SCORE((tx + 1), 0) = input_itemsets_d[index_w + cols * tx];
- barrier(CLK_LOCAL_MEM_FENCE);
- SCORE(0, (tx + 1)) = input_itemsets_d[index_n];
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- for( int m = 0 ; m < BLOCK_SIZE ; m++){
-
- if ( tx <= m ){
-
- int t_index_x = tx + 1;
- int t_index_y = m - tx + 1;
- SCORE(t_index_y, t_index_x) = maximum( SCORE((t_index_y-1), (t_index_x-1)) + REF((t_index_y-1), (t_index_x-1)),
- SCORE((t_index_y), (t_index_x-1)) - (penalty),
- SCORE((t_index_y-1), (t_index_x)) - (penalty));
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- for( int m = BLOCK_SIZE - 2 ; m >=0 ; m--){
-
- if ( tx <= m){
-
- int t_index_x = tx + BLOCK_SIZE - m ;
- int t_index_y = BLOCK_SIZE - tx;
- SCORE(t_index_y, t_index_x) = maximum( SCORE((t_index_y-1), (t_index_x-1)) + REF((t_index_y-1), (t_index_x-1)),
- SCORE((t_index_y), (t_index_x-1)) - (penalty),
- SCORE((t_index_y-1), (t_index_x)) - (penalty));
-
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- }
- for ( int ty = 0 ; ty < BLOCK_SIZE ; ty++)
- input_itemsets_d[index + ty * cols] = SCORE((ty+1), (tx+1));
-
-
- return;
-
- }
|