Kernels.cl 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284
  1. /* ============================================================
  2. //--functions: kernel funtion
  3. //--programmer: Jianbin Fang
  4. //--date: 24/03/2011
  5. ============================================================ */
  6. #ifndef _KERNEL_
  7. #define _KERNEL_
  8. #define GAMMA (1.4f)
  9. #define NDIM 3
  10. #define NNB 4
  11. #define RK 3 // 3rd order RK
  12. #define ff_mach 1.2f
  13. #define deg_angle_of_attack 0.0f
  14. #define VAR_DENSITY 0
  15. #define VAR_MOMENTUM 1
  16. #define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM)
  17. #define NVAR (VAR_DENSITY_ENERGY+1)
  18. //#pragma OPENCL EXTENSION CL_MAD : enable
  19. //self-defined user type
  20. typedef struct{
  21. float x;
  22. float y;
  23. float z;
  24. } FLOAT3;
  25. /*------------------------------------------------------------
  26. @function: set memory
  27. @params:
  28. mem_d: target memory to be set;
  29. val: set the target memory to value 'val'
  30. num_bytes: the number of bytes all together
  31. @return: through mem_d
  32. ------------------------------------------------------------*/
  33. __kernel void memset_kernel(__global char * mem_d, short val, int ct){
  34. const int thread_id = get_global_id(0);
  35. if( thread_id >= ct) return;
  36. mem_d[thread_id] = val;
  37. }
  38. //--cambine: omit &
  39. inline void compute_velocity(float density, FLOAT3 momentum, FLOAT3* velocity){
  40. velocity->x = momentum.x / density;
  41. velocity->y = momentum.y / density;
  42. velocity->z = momentum.z / density;
  43. }
  44. inline float compute_speed_sqd(FLOAT3 velocity){
  45. return velocity.x*velocity.x + velocity.y*velocity.y + velocity.z*velocity.z;
  46. }
  47. inline float compute_pressure(float density, float density_energy, float speed_sqd){
  48. return ((float)(GAMMA) - (float)(1.0f))*(density_energy - (float)(0.5f)*density*speed_sqd);
  49. }
  50. inline float compute_speed_of_sound(float density, float pressure){
  51. //return sqrtf(float(GAMMA)*pressure/density);
  52. return sqrt((float)(GAMMA)*pressure/density);
  53. }
  54. inline void compute_flux_contribution(float density, FLOAT3 momentum, float density_energy, float pressure, FLOAT3 velocity, FLOAT3* fc_momentum_x, FLOAT3* fc_momentum_y, FLOAT3* fc_momentum_z, FLOAT3* fc_density_energy)
  55. {
  56. fc_momentum_x->x = velocity.x*momentum.x + pressure;
  57. fc_momentum_x->y = velocity.x*momentum.y;
  58. fc_momentum_x->z = velocity.x*momentum.z;
  59. fc_momentum_y->x = fc_momentum_x->y;
  60. fc_momentum_y->y = velocity.y*momentum.y + pressure;
  61. fc_momentum_y->z = velocity.y*momentum.z;
  62. fc_momentum_z->x = fc_momentum_x->z;
  63. fc_momentum_z->y = fc_momentum_y->z;
  64. fc_momentum_z->z = velocity.z*momentum.z + pressure;
  65. float de_p = density_energy+pressure;
  66. fc_density_energy->x = velocity.x*de_p;
  67. fc_density_energy->y = velocity.y*de_p;
  68. fc_density_energy->z = velocity.z*de_p;
  69. }
  70. __kernel void initialize_variables(__global float* variables, __constant float* ff_variable, int nelr){
  71. //const int i = (blockDim.x*blockIdx.x + threadIdx.x);
  72. const int i = get_global_id(0);
  73. if( i >= nelr) return;
  74. for(int j = 0; j < NVAR; j++)
  75. variables[i + j*nelr] = ff_variable[j];
  76. }
  77. __kernel void compute_step_factor(__global float* variables,
  78. __global float* areas,
  79. __global float* step_factors,
  80. int nelr){
  81. //const int i = (blockDim.x*blockIdx.x + threadIdx.x);
  82. const int i = get_global_id(0);
  83. if( i >= nelr) return;
  84. float density = variables[i + VAR_DENSITY*nelr];
  85. FLOAT3 momentum;
  86. momentum.x = variables[i + (VAR_MOMENTUM+0)*nelr];
  87. momentum.y = variables[i + (VAR_MOMENTUM+1)*nelr];
  88. momentum.z = variables[i + (VAR_MOMENTUM+2)*nelr];
  89. float density_energy = variables[i + VAR_DENSITY_ENERGY*nelr];
  90. FLOAT3 velocity; compute_velocity(density, momentum, &velocity);
  91. float speed_sqd = compute_speed_sqd(velocity);
  92. //float speed_sqd;
  93. //compute_speed_sqd(velocity, speed_sqd);
  94. float pressure = compute_pressure(density, density_energy, speed_sqd);
  95. float speed_of_sound = compute_speed_of_sound(density, pressure);
  96. // dt = float(0.5f) * sqrtf(areas[i]) / (||v|| + c).... but when we do time stepping, this later would need to be divided by the area, so we just do it all at once
  97. //step_factors[i] = (float)(0.5f) / (sqrtf(areas[i]) * (sqrtf(speed_sqd) + speed_of_sound));
  98. step_factors[i] = (float)(0.5f) / (sqrt(areas[i]) * (sqrt(speed_sqd) + speed_of_sound));
  99. }
  100. __kernel void compute_flux(
  101. __global int* elements_surrounding_elements,
  102. __global float* normals,
  103. __global float* variables,
  104. __constant float* ff_variable,
  105. __global float* fluxes,
  106. __constant FLOAT3* ff_flux_contribution_density_energy,
  107. __constant FLOAT3* ff_flux_contribution_momentum_x,
  108. __constant FLOAT3* ff_flux_contribution_momentum_y,
  109. __constant FLOAT3* ff_flux_contribution_momentum_z,
  110. int nelr){
  111. const float smoothing_coefficient = (float)(0.2f);
  112. //const int i = (blockDim.x*blockIdx.x + threadIdx.x);
  113. const int i = get_global_id(0);
  114. if( i >= nelr) return;
  115. int j, nb;
  116. FLOAT3 normal; float normal_len;
  117. float factor;
  118. float density_i = variables[i + VAR_DENSITY*nelr];
  119. FLOAT3 momentum_i;
  120. momentum_i.x = variables[i + (VAR_MOMENTUM+0)*nelr];
  121. momentum_i.y = variables[i + (VAR_MOMENTUM+1)*nelr];
  122. momentum_i.z = variables[i + (VAR_MOMENTUM+2)*nelr];
  123. float density_energy_i = variables[i + VAR_DENSITY_ENERGY*nelr];
  124. FLOAT3 velocity_i; compute_velocity(density_i, momentum_i, &velocity_i);
  125. float speed_sqd_i = compute_speed_sqd(velocity_i);
  126. //float speed_sqd_i;
  127. //compute_speed_sqd(velocity_i, speed_sqd_i);
  128. //float speed_i = sqrtf(speed_sqd_i);
  129. float speed_i = sqrt(speed_sqd_i);
  130. float pressure_i = compute_pressure(density_i, density_energy_i, speed_sqd_i);
  131. float speed_of_sound_i = compute_speed_of_sound(density_i, pressure_i);
  132. FLOAT3 flux_contribution_i_momentum_x, flux_contribution_i_momentum_y, flux_contribution_i_momentum_z;
  133. FLOAT3 flux_contribution_i_density_energy;
  134. compute_flux_contribution(density_i, momentum_i, density_energy_i, pressure_i, velocity_i, &flux_contribution_i_momentum_x, &flux_contribution_i_momentum_y, &flux_contribution_i_momentum_z, &flux_contribution_i_density_energy);
  135. float flux_i_density = (float)(0.0f);
  136. FLOAT3 flux_i_momentum;
  137. flux_i_momentum.x = (float)(0.0f);
  138. flux_i_momentum.y = (float)(0.0f);
  139. flux_i_momentum.z = (float)(0.0f);
  140. float flux_i_density_energy = (float)(0.0f);
  141. FLOAT3 velocity_nb;
  142. float density_nb, density_energy_nb;
  143. FLOAT3 momentum_nb;
  144. FLOAT3 flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y, flux_contribution_nb_momentum_z;
  145. FLOAT3 flux_contribution_nb_density_energy;
  146. float speed_sqd_nb, speed_of_sound_nb, pressure_nb;
  147. #pragma unroll
  148. for(j = 0; j < NNB; j++)
  149. {
  150. nb = elements_surrounding_elements[i + j*nelr];
  151. normal.x = normals[i + (j + 0*NNB)*nelr];
  152. normal.y = normals[i + (j + 1*NNB)*nelr];
  153. normal.z = normals[i + (j + 2*NNB)*nelr];
  154. //normal_len = sqrtf(normal.x*normal.x + normal.y*normal.y + normal.z*normal.z);
  155. normal_len = sqrt(normal.x*normal.x + normal.y*normal.y + normal.z*normal.z);
  156. if(nb >= 0) // a legitimate neighbor
  157. {
  158. density_nb = variables[nb + VAR_DENSITY*nelr];
  159. momentum_nb.x = variables[nb + (VAR_MOMENTUM+0)*nelr];
  160. momentum_nb.y = variables[nb + (VAR_MOMENTUM+1)*nelr];
  161. momentum_nb.z = variables[nb + (VAR_MOMENTUM+2)*nelr];
  162. density_energy_nb = variables[nb + VAR_DENSITY_ENERGY*nelr];
  163. compute_velocity(density_nb, momentum_nb, &velocity_nb);
  164. speed_sqd_nb = compute_speed_sqd(velocity_nb);
  165. pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb);
  166. speed_of_sound_nb = compute_speed_of_sound(density_nb, pressure_nb);
  167. compute_flux_contribution(density_nb, momentum_nb, density_energy_nb, pressure_nb, velocity_nb, &flux_contribution_nb_momentum_x, &flux_contribution_nb_momentum_y, &flux_contribution_nb_momentum_z, &flux_contribution_nb_density_energy);
  168. // artificial viscosity
  169. factor = -normal_len*smoothing_coefficient*(float)(0.5f)*(speed_i + sqrt(speed_sqd_nb) + speed_of_sound_i + speed_of_sound_nb);
  170. flux_i_density += factor*(density_i-density_nb);
  171. flux_i_density_energy += factor*(density_energy_i-density_energy_nb);
  172. flux_i_momentum.x += factor*(momentum_i.x-momentum_nb.x);
  173. flux_i_momentum.y += factor*(momentum_i.y-momentum_nb.y);
  174. flux_i_momentum.z += factor*(momentum_i.z-momentum_nb.z);
  175. // accumulate cell-centered fluxes
  176. factor = (float)(0.5f)*normal.x;
  177. flux_i_density += factor*(momentum_nb.x+momentum_i.x);
  178. flux_i_density_energy += factor*(flux_contribution_nb_density_energy.x+flux_contribution_i_density_energy.x);
  179. flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.x+flux_contribution_i_momentum_x.x);
  180. flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.x+flux_contribution_i_momentum_y.x);
  181. flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.x+flux_contribution_i_momentum_z.x);
  182. factor = (float)(0.5f)*normal.y;
  183. flux_i_density += factor*(momentum_nb.y+momentum_i.y);
  184. flux_i_density_energy += factor*(flux_contribution_nb_density_energy.y+flux_contribution_i_density_energy.y);
  185. flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.y+flux_contribution_i_momentum_x.y);
  186. flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.y+flux_contribution_i_momentum_y.y);
  187. flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.y+flux_contribution_i_momentum_z.y);
  188. factor = (float)(0.5f)*normal.z;
  189. flux_i_density += factor*(momentum_nb.z+momentum_i.z);
  190. flux_i_density_energy += factor*(flux_contribution_nb_density_energy.z+flux_contribution_i_density_energy.z);
  191. flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.z+flux_contribution_i_momentum_x.z);
  192. flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.z+flux_contribution_i_momentum_y.z);
  193. flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.z+flux_contribution_i_momentum_z.z);
  194. }
  195. else if(nb == -1) // a wing boundary
  196. {
  197. flux_i_momentum.x += normal.x*pressure_i;
  198. flux_i_momentum.y += normal.y*pressure_i;
  199. flux_i_momentum.z += normal.z*pressure_i;
  200. }
  201. else if(nb == -2) // a far field boundary
  202. {
  203. factor = (float)(0.5f)*normal.x;
  204. flux_i_density += factor*(ff_variable[VAR_MOMENTUM+0]+momentum_i.x);
  205. flux_i_density_energy += factor*(ff_flux_contribution_density_energy[0].x+flux_contribution_i_density_energy.x);
  206. flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x[0].x + flux_contribution_i_momentum_x.x);
  207. flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y[0].x + flux_contribution_i_momentum_y.x);
  208. flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z[0].x + flux_contribution_i_momentum_z.x);
  209. factor = (float)(0.5f)*normal.y;
  210. flux_i_density += factor*(ff_variable[VAR_MOMENTUM+1]+momentum_i.y);
  211. flux_i_density_energy += factor*(ff_flux_contribution_density_energy[0].y+flux_contribution_i_density_energy.y);
  212. flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x[0].y + flux_contribution_i_momentum_x.y);
  213. flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y[0].y + flux_contribution_i_momentum_y.y);
  214. flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z[0].y + flux_contribution_i_momentum_z.y);
  215. factor = (float)(0.5f)*normal.z;
  216. flux_i_density += factor*(ff_variable[VAR_MOMENTUM+2]+momentum_i.z);
  217. flux_i_density_energy += factor*(ff_flux_contribution_density_energy[0].z+flux_contribution_i_density_energy.z);
  218. flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x[0].z + flux_contribution_i_momentum_x.z);
  219. flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y[0].z + flux_contribution_i_momentum_y.z);
  220. flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z[0].z + flux_contribution_i_momentum_z.z);
  221. }
  222. }
  223. fluxes[i + VAR_DENSITY*nelr] = flux_i_density;
  224. fluxes[i + (VAR_MOMENTUM+0)*nelr] = flux_i_momentum.x;
  225. fluxes[i + (VAR_MOMENTUM+1)*nelr] = flux_i_momentum.y;
  226. fluxes[i + (VAR_MOMENTUM+2)*nelr] = flux_i_momentum.z;
  227. fluxes[i + VAR_DENSITY_ENERGY*nelr] = flux_i_density_energy;
  228. }
  229. __kernel void time_step(int j, int nelr,
  230. __global float* old_variables,
  231. __global float* variables,
  232. __global float* step_factors,
  233. __global float* fluxes){
  234. //const int i = (blockDim.x*blockIdx.x + threadIdx.x);
  235. const int i = get_global_id(0);
  236. if( i >= nelr) return;
  237. float factor = step_factors[i]/(float)(RK+1-j);
  238. variables[i + VAR_DENSITY*nelr] = old_variables[i + VAR_DENSITY*nelr] + factor*fluxes[i + VAR_DENSITY*nelr];
  239. variables[i + VAR_DENSITY_ENERGY*nelr] = old_variables[i + VAR_DENSITY_ENERGY*nelr] + factor*fluxes[i + VAR_DENSITY_ENERGY*nelr];
  240. variables[i + (VAR_MOMENTUM+0)*nelr] = old_variables[i + (VAR_MOMENTUM+0)*nelr] + factor*fluxes[i + (VAR_MOMENTUM+0)*nelr];
  241. variables[i + (VAR_MOMENTUM+1)*nelr] = old_variables[i + (VAR_MOMENTUM+1)*nelr] + factor*fluxes[i + (VAR_MOMENTUM+1)*nelr];
  242. variables[i + (VAR_MOMENTUM+2)*nelr] = old_variables[i + (VAR_MOMENTUM+2)*nelr] + factor*fluxes[i + (VAR_MOMENTUM+2)*nelr];
  243. }
  244. #endif