// BUG IN SRAD APPLICATIONS SEEMS TO BE SOMEWHERE IN THIS CODE, MEMORY CORRUPTION // srad kernel __global__ void srad( fp d_lambda, int d_Nr, int d_Nc, long d_Ne, int *d_iN, int *d_iS, int *d_jE, int *d_jW, fp *d_dN, fp *d_dS, fp *d_dE, fp *d_dW, fp d_q0sqr, fp *d_c, fp *d_I){ // indexes int bx = blockIdx.x; // get current horizontal block index int tx = threadIdx.x; // get current horizontal thread index int ei = bx*NUMBER_THREADS+tx; // more threads than actual elements !!! int row; // column, x position int col; // row, y position // variables fp d_Jc; fp d_dN_loc, d_dS_loc, d_dW_loc, d_dE_loc; fp d_c_loc; fp d_G2,d_L,d_num,d_den,d_qsqr; // figure out row/col location in new matrix row = (ei+1) % d_Nr - 1; // (0-n) row col = (ei+1) / d_Nr + 1 - 1; // (0-n) column if((ei+1) % d_Nr == 0){ row = d_Nr - 1; col = col - 1; } if(ei 1){ // if diffusion coefficient > 1 d_c_loc = 1; // ... set to 1 } // save data to global memory d_dN[ei] = d_dN_loc; d_dS[ei] = d_dS_loc; d_dW[ei] = d_dW_loc; d_dE[ei] = d_dE_loc; d_c[ei] = d_c_loc; } }