srad2_kernel.c 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. // BUG IN SRAD APPLICATIONS SEEMS TO BE SOMEWHERE IN THIS CODE, MEMORY CORRUPTION
  2. // srad kernel
  3. __global__ void srad2( fp d_lambda,
  4. int d_Nr,
  5. int d_Nc,
  6. long d_Ne,
  7. int *d_iN,
  8. int *d_iS,
  9. int *d_jE,
  10. int *d_jW,
  11. fp *d_dN,
  12. fp *d_dS,
  13. fp *d_dE,
  14. fp *d_dW,
  15. fp *d_c,
  16. fp *d_I){
  17. // indexes
  18. int bx = blockIdx.x; // get current horizontal block index
  19. int tx = threadIdx.x; // get current horizontal thread index
  20. int ei = bx*NUMBER_THREADS+tx; // more threads than actual elements !!!
  21. int row; // column, x position
  22. int col; // row, y position
  23. // variables
  24. fp d_cN,d_cS,d_cW,d_cE;
  25. fp d_D;
  26. // figure out row/col location in new matrix
  27. row = (ei+1) % d_Nr - 1; // (0-n) row
  28. col = (ei+1) / d_Nr + 1 - 1; // (0-n) column
  29. if((ei+1) % d_Nr == 0){
  30. row = d_Nr - 1;
  31. col = col - 1;
  32. }
  33. if(ei<d_Ne){ // make sure that only threads matching jobs run
  34. // diffusion coefficent
  35. d_cN = d_c[ei]; // north diffusion coefficient
  36. d_cS = d_c[d_iS[row] + d_Nr*col]; // south diffusion coefficient
  37. d_cW = d_c[ei]; // west diffusion coefficient
  38. d_cE = d_c[row + d_Nr * d_jE[col]]; // east diffusion coefficient
  39. // divergence (equ 58)
  40. d_D = d_cN*d_dN[ei] + d_cS*d_dS[ei] + d_cW*d_dW[ei] + d_cE*d_dE[ei];// divergence
  41. // image update (equ 61) (every element of IMAGE)
  42. d_I[ei] = d_I[ei] + 0.25*d_lambda*d_D; // updates image (based on input time step and divergence)
  43. }
  44. }