srad_kernel.c 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. // BUG IN SRAD APPLICATIONS SEEMS TO BE SOMEWHERE IN THIS CODE, MEMORY CORRUPTION
  2. // srad kernel
  3. __global__ void srad( fp d_lambda,
  4. int d_Nr,
  5. int d_Nc,
  6. long d_Ne,
  7. int *d_iN,
  8. int *d_iS,
  9. int *d_jE,
  10. int *d_jW,
  11. fp *d_dN,
  12. fp *d_dS,
  13. fp *d_dE,
  14. fp *d_dW,
  15. fp d_q0sqr,
  16. fp *d_c,
  17. fp *d_I){
  18. // indexes
  19. int bx = blockIdx.x; // get current horizontal block index
  20. int tx = threadIdx.x; // get current horizontal thread index
  21. int ei = bx*NUMBER_THREADS+tx; // more threads than actual elements !!!
  22. int row; // column, x position
  23. int col; // row, y position
  24. // variables
  25. fp d_Jc;
  26. fp d_dN_loc, d_dS_loc, d_dW_loc, d_dE_loc;
  27. fp d_c_loc;
  28. fp d_G2,d_L,d_num,d_den,d_qsqr;
  29. // figure out row/col location in new matrix
  30. row = (ei+1) % d_Nr - 1; // (0-n) row
  31. col = (ei+1) / d_Nr + 1 - 1; // (0-n) column
  32. if((ei+1) % d_Nr == 0){
  33. row = d_Nr - 1;
  34. col = col - 1;
  35. }
  36. if(ei<d_Ne){ // make sure that only threads matching jobs run
  37. // directional derivatives, ICOV, diffusion coefficent
  38. d_Jc = d_I[ei]; // get value of the current element
  39. // directional derivates (every element of IMAGE)(try to copy to shared memory or temp files)
  40. d_dN_loc = d_I[d_iN[row] + d_Nr*col] - d_Jc; // north direction derivative
  41. d_dS_loc = d_I[d_iS[row] + d_Nr*col] - d_Jc; // south direction derivative
  42. d_dW_loc = d_I[row + d_Nr*d_jW[col]] - d_Jc; // west direction derivative
  43. d_dE_loc = d_I[row + d_Nr*d_jE[col]] - d_Jc; // east direction derivative
  44. // normalized discrete gradient mag squared (equ 52,53)
  45. d_G2 = (d_dN_loc*d_dN_loc + d_dS_loc*d_dS_loc + d_dW_loc*d_dW_loc + d_dE_loc*d_dE_loc) / (d_Jc*d_Jc); // gradient (based on derivatives)
  46. // normalized discrete laplacian (equ 54)
  47. d_L = (d_dN_loc + d_dS_loc + d_dW_loc + d_dE_loc) / d_Jc; // laplacian (based on derivatives)
  48. // ICOV (equ 31/35)
  49. d_num = (0.5*d_G2) - ((1.0/16.0)*(d_L*d_L)) ; // num (based on gradient and laplacian)
  50. d_den = 1 + (0.25*d_L); // den (based on laplacian)
  51. d_qsqr = d_num/(d_den*d_den); // qsqr (based on num and den)
  52. // diffusion coefficent (equ 33) (every element of IMAGE)
  53. d_den = (d_qsqr-d_q0sqr) / (d_q0sqr * (1+d_q0sqr)) ; // den (based on qsqr and q0sqr)
  54. d_c_loc = 1.0 / (1.0+d_den) ; // diffusion coefficient (based on den)
  55. // saturate diffusion coefficent to 0-1 range
  56. if (d_c_loc < 0){ // if diffusion coefficient < 0
  57. d_c_loc = 0; // ... set to 0
  58. }
  59. else if (d_c_loc > 1){ // if diffusion coefficient > 1
  60. d_c_loc = 1; // ... set to 1
  61. }
  62. // save data to global memory
  63. d_dN[ei] = d_dN_loc;
  64. d_dS[ei] = d_dS_loc;
  65. d_dW[ei] = d_dW_loc;
  66. d_dE[ei] = d_dE_loc;
  67. d_c[ei] = d_c_loc;
  68. }
  69. }