123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284 |
- /* ============================================================
- //--functions: kernel funtion
- //--programmer: Jianbin Fang
- //--date: 24/03/2011
- ============================================================ */
- #ifndef _KERNEL_
- #define _KERNEL_
- #define GAMMA (1.4f)
- #define NDIM 3
- #define NNB 4
- #define RK 3 // 3rd order RK
- #define ff_mach 1.2f
- #define deg_angle_of_attack 0.0f
- #define VAR_DENSITY 0
- #define VAR_MOMENTUM 1
- #define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM)
- #define NVAR (VAR_DENSITY_ENERGY+1)
- //#pragma OPENCL EXTENSION CL_MAD : enable
- //self-defined user type
- typedef struct{
- float x;
- float y;
- float z;
- } FLOAT3;
- /*------------------------------------------------------------
- @function: set memory
- @params:
- mem_d: target memory to be set;
- val: set the target memory to value 'val'
- num_bytes: the number of bytes all together
- @return: through mem_d
- ------------------------------------------------------------*/
- __kernel void memset_kernel(__global char * mem_d, short val, int ct){
- const int thread_id = get_global_id(0);
- if( thread_id >= ct) return;
- mem_d[thread_id] = val;
- }
- //--cambine: omit &
- inline void compute_velocity(float density, FLOAT3 momentum, FLOAT3* velocity){
- velocity->x = momentum.x / density;
- velocity->y = momentum.y / density;
- velocity->z = momentum.z / density;
- }
-
- inline float compute_speed_sqd(FLOAT3 velocity){
- return velocity.x*velocity.x + velocity.y*velocity.y + velocity.z*velocity.z;
- }
- inline float compute_pressure(float density, float density_energy, float speed_sqd){
- return ((float)(GAMMA) - (float)(1.0f))*(density_energy - (float)(0.5f)*density*speed_sqd);
- }
- inline float compute_speed_of_sound(float density, float pressure){
- //return sqrtf(float(GAMMA)*pressure/density);
- return sqrt((float)(GAMMA)*pressure/density);
- }
- inline void compute_flux_contribution(float density, FLOAT3 momentum, float density_energy, float pressure, FLOAT3 velocity, FLOAT3* fc_momentum_x, FLOAT3* fc_momentum_y, FLOAT3* fc_momentum_z, FLOAT3* fc_density_energy)
- {
- fc_momentum_x->x = velocity.x*momentum.x + pressure;
- fc_momentum_x->y = velocity.x*momentum.y;
- fc_momentum_x->z = velocity.x*momentum.z;
-
-
- fc_momentum_y->x = fc_momentum_x->y;
- fc_momentum_y->y = velocity.y*momentum.y + pressure;
- fc_momentum_y->z = velocity.y*momentum.z;
- fc_momentum_z->x = fc_momentum_x->z;
- fc_momentum_z->y = fc_momentum_y->z;
- fc_momentum_z->z = velocity.z*momentum.z + pressure;
- float de_p = density_energy+pressure;
- fc_density_energy->x = velocity.x*de_p;
- fc_density_energy->y = velocity.y*de_p;
- fc_density_energy->z = velocity.z*de_p;
- }
- __kernel void initialize_variables(__global float* variables, __constant float* ff_variable, int nelr){
- //const int i = (blockDim.x*blockIdx.x + threadIdx.x);
- const int i = get_global_id(0);
- if( i >= nelr) return;
- for(int j = 0; j < NVAR; j++)
- variables[i + j*nelr] = ff_variable[j];
-
- }
- __kernel void compute_step_factor(__global float* variables,
- __global float* areas,
- __global float* step_factors,
- int nelr){
- //const int i = (blockDim.x*blockIdx.x + threadIdx.x);
- const int i = get_global_id(0);
- if( i >= nelr) return;
- float density = variables[i + VAR_DENSITY*nelr];
- FLOAT3 momentum;
- momentum.x = variables[i + (VAR_MOMENTUM+0)*nelr];
- momentum.y = variables[i + (VAR_MOMENTUM+1)*nelr];
- momentum.z = variables[i + (VAR_MOMENTUM+2)*nelr];
-
- float density_energy = variables[i + VAR_DENSITY_ENERGY*nelr];
-
- FLOAT3 velocity; compute_velocity(density, momentum, &velocity);
- float speed_sqd = compute_speed_sqd(velocity);
- //float speed_sqd;
- //compute_speed_sqd(velocity, speed_sqd);
- float pressure = compute_pressure(density, density_energy, speed_sqd);
- float speed_of_sound = compute_speed_of_sound(density, pressure);
- // dt = float(0.5f) * sqrtf(areas[i]) / (||v|| + c).... but when we do time stepping, this later would need to be divided by the area, so we just do it all at once
- //step_factors[i] = (float)(0.5f) / (sqrtf(areas[i]) * (sqrtf(speed_sqd) + speed_of_sound));
- step_factors[i] = (float)(0.5f) / (sqrt(areas[i]) * (sqrt(speed_sqd) + speed_of_sound));
- }
- __kernel void compute_flux(
- __global int* elements_surrounding_elements,
- __global float* normals,
- __global float* variables,
- __constant float* ff_variable,
- __global float* fluxes,
- __constant FLOAT3* ff_flux_contribution_density_energy,
- __constant FLOAT3* ff_flux_contribution_momentum_x,
- __constant FLOAT3* ff_flux_contribution_momentum_y,
- __constant FLOAT3* ff_flux_contribution_momentum_z,
- int nelr){
- const float smoothing_coefficient = (float)(0.2f);
- //const int i = (blockDim.x*blockIdx.x + threadIdx.x);
- const int i = get_global_id(0);
- if( i >= nelr) return;
- int j, nb;
- FLOAT3 normal; float normal_len;
- float factor;
-
- float density_i = variables[i + VAR_DENSITY*nelr];
- FLOAT3 momentum_i;
- momentum_i.x = variables[i + (VAR_MOMENTUM+0)*nelr];
- momentum_i.y = variables[i + (VAR_MOMENTUM+1)*nelr];
- momentum_i.z = variables[i + (VAR_MOMENTUM+2)*nelr];
- float density_energy_i = variables[i + VAR_DENSITY_ENERGY*nelr];
- FLOAT3 velocity_i; compute_velocity(density_i, momentum_i, &velocity_i);
- float speed_sqd_i = compute_speed_sqd(velocity_i);
- //float speed_sqd_i;
- //compute_speed_sqd(velocity_i, speed_sqd_i);
- //float speed_i = sqrtf(speed_sqd_i);
- float speed_i = sqrt(speed_sqd_i);
- float pressure_i = compute_pressure(density_i, density_energy_i, speed_sqd_i);
- float speed_of_sound_i = compute_speed_of_sound(density_i, pressure_i);
- FLOAT3 flux_contribution_i_momentum_x, flux_contribution_i_momentum_y, flux_contribution_i_momentum_z;
- FLOAT3 flux_contribution_i_density_energy;
- compute_flux_contribution(density_i, momentum_i, density_energy_i, pressure_i, velocity_i, &flux_contribution_i_momentum_x, &flux_contribution_i_momentum_y, &flux_contribution_i_momentum_z, &flux_contribution_i_density_energy);
-
- float flux_i_density = (float)(0.0f);
- FLOAT3 flux_i_momentum;
- flux_i_momentum.x = (float)(0.0f);
- flux_i_momentum.y = (float)(0.0f);
- flux_i_momentum.z = (float)(0.0f);
- float flux_i_density_energy = (float)(0.0f);
-
- FLOAT3 velocity_nb;
- float density_nb, density_energy_nb;
- FLOAT3 momentum_nb;
- FLOAT3 flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y, flux_contribution_nb_momentum_z;
- FLOAT3 flux_contribution_nb_density_energy;
- float speed_sqd_nb, speed_of_sound_nb, pressure_nb;
-
- #pragma unroll
- for(j = 0; j < NNB; j++)
- {
- nb = elements_surrounding_elements[i + j*nelr];
- normal.x = normals[i + (j + 0*NNB)*nelr];
- normal.y = normals[i + (j + 1*NNB)*nelr];
- normal.z = normals[i + (j + 2*NNB)*nelr];
- //normal_len = sqrtf(normal.x*normal.x + normal.y*normal.y + normal.z*normal.z);
- normal_len = sqrt(normal.x*normal.x + normal.y*normal.y + normal.z*normal.z);
-
- if(nb >= 0) // a legitimate neighbor
- {
- density_nb = variables[nb + VAR_DENSITY*nelr];
- momentum_nb.x = variables[nb + (VAR_MOMENTUM+0)*nelr];
- momentum_nb.y = variables[nb + (VAR_MOMENTUM+1)*nelr];
- momentum_nb.z = variables[nb + (VAR_MOMENTUM+2)*nelr];
- density_energy_nb = variables[nb + VAR_DENSITY_ENERGY*nelr];
- compute_velocity(density_nb, momentum_nb, &velocity_nb);
- speed_sqd_nb = compute_speed_sqd(velocity_nb);
- pressure_nb = compute_pressure(density_nb, density_energy_nb, speed_sqd_nb);
- speed_of_sound_nb = compute_speed_of_sound(density_nb, pressure_nb);
- compute_flux_contribution(density_nb, momentum_nb, density_energy_nb, pressure_nb, velocity_nb, &flux_contribution_nb_momentum_x, &flux_contribution_nb_momentum_y, &flux_contribution_nb_momentum_z, &flux_contribution_nb_density_energy);
-
- // artificial viscosity
- factor = -normal_len*smoothing_coefficient*(float)(0.5f)*(speed_i + sqrt(speed_sqd_nb) + speed_of_sound_i + speed_of_sound_nb);
- flux_i_density += factor*(density_i-density_nb);
- flux_i_density_energy += factor*(density_energy_i-density_energy_nb);
- flux_i_momentum.x += factor*(momentum_i.x-momentum_nb.x);
- flux_i_momentum.y += factor*(momentum_i.y-momentum_nb.y);
- flux_i_momentum.z += factor*(momentum_i.z-momentum_nb.z);
- // accumulate cell-centered fluxes
- factor = (float)(0.5f)*normal.x;
- flux_i_density += factor*(momentum_nb.x+momentum_i.x);
- flux_i_density_energy += factor*(flux_contribution_nb_density_energy.x+flux_contribution_i_density_energy.x);
- flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.x+flux_contribution_i_momentum_x.x);
- flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.x+flux_contribution_i_momentum_y.x);
- flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.x+flux_contribution_i_momentum_z.x);
-
- factor = (float)(0.5f)*normal.y;
- flux_i_density += factor*(momentum_nb.y+momentum_i.y);
- flux_i_density_energy += factor*(flux_contribution_nb_density_energy.y+flux_contribution_i_density_energy.y);
- flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.y+flux_contribution_i_momentum_x.y);
- flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.y+flux_contribution_i_momentum_y.y);
- flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.y+flux_contribution_i_momentum_z.y);
-
- factor = (float)(0.5f)*normal.z;
- flux_i_density += factor*(momentum_nb.z+momentum_i.z);
- flux_i_density_energy += factor*(flux_contribution_nb_density_energy.z+flux_contribution_i_density_energy.z);
- flux_i_momentum.x += factor*(flux_contribution_nb_momentum_x.z+flux_contribution_i_momentum_x.z);
- flux_i_momentum.y += factor*(flux_contribution_nb_momentum_y.z+flux_contribution_i_momentum_y.z);
- flux_i_momentum.z += factor*(flux_contribution_nb_momentum_z.z+flux_contribution_i_momentum_z.z);
- }
- else if(nb == -1) // a wing boundary
- {
- flux_i_momentum.x += normal.x*pressure_i;
- flux_i_momentum.y += normal.y*pressure_i;
- flux_i_momentum.z += normal.z*pressure_i;
- }
- else if(nb == -2) // a far field boundary
- {
- factor = (float)(0.5f)*normal.x;
- flux_i_density += factor*(ff_variable[VAR_MOMENTUM+0]+momentum_i.x);
- flux_i_density_energy += factor*(ff_flux_contribution_density_energy[0].x+flux_contribution_i_density_energy.x);
- flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x[0].x + flux_contribution_i_momentum_x.x);
- flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y[0].x + flux_contribution_i_momentum_y.x);
- flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z[0].x + flux_contribution_i_momentum_z.x);
-
- factor = (float)(0.5f)*normal.y;
- flux_i_density += factor*(ff_variable[VAR_MOMENTUM+1]+momentum_i.y);
- flux_i_density_energy += factor*(ff_flux_contribution_density_energy[0].y+flux_contribution_i_density_energy.y);
- flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x[0].y + flux_contribution_i_momentum_x.y);
- flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y[0].y + flux_contribution_i_momentum_y.y);
- flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z[0].y + flux_contribution_i_momentum_z.y);
- factor = (float)(0.5f)*normal.z;
- flux_i_density += factor*(ff_variable[VAR_MOMENTUM+2]+momentum_i.z);
- flux_i_density_energy += factor*(ff_flux_contribution_density_energy[0].z+flux_contribution_i_density_energy.z);
- flux_i_momentum.x += factor*(ff_flux_contribution_momentum_x[0].z + flux_contribution_i_momentum_x.z);
- flux_i_momentum.y += factor*(ff_flux_contribution_momentum_y[0].z + flux_contribution_i_momentum_y.z);
- flux_i_momentum.z += factor*(ff_flux_contribution_momentum_z[0].z + flux_contribution_i_momentum_z.z);
- }
- }
- fluxes[i + VAR_DENSITY*nelr] = flux_i_density;
- fluxes[i + (VAR_MOMENTUM+0)*nelr] = flux_i_momentum.x;
- fluxes[i + (VAR_MOMENTUM+1)*nelr] = flux_i_momentum.y;
- fluxes[i + (VAR_MOMENTUM+2)*nelr] = flux_i_momentum.z;
- fluxes[i + VAR_DENSITY_ENERGY*nelr] = flux_i_density_energy;
- }
- __kernel void time_step(int j, int nelr,
- __global float* old_variables,
- __global float* variables,
- __global float* step_factors,
- __global float* fluxes){
- //const int i = (blockDim.x*blockIdx.x + threadIdx.x);
- const int i = get_global_id(0);
- if( i >= nelr) return;
- float factor = step_factors[i]/(float)(RK+1-j);
- variables[i + VAR_DENSITY*nelr] = old_variables[i + VAR_DENSITY*nelr] + factor*fluxes[i + VAR_DENSITY*nelr];
- variables[i + VAR_DENSITY_ENERGY*nelr] = old_variables[i + VAR_DENSITY_ENERGY*nelr] + factor*fluxes[i + VAR_DENSITY_ENERGY*nelr];
- variables[i + (VAR_MOMENTUM+0)*nelr] = old_variables[i + (VAR_MOMENTUM+0)*nelr] + factor*fluxes[i + (VAR_MOMENTUM+0)*nelr];
- variables[i + (VAR_MOMENTUM+1)*nelr] = old_variables[i + (VAR_MOMENTUM+1)*nelr] + factor*fluxes[i + (VAR_MOMENTUM+1)*nelr];
- variables[i + (VAR_MOMENTUM+2)*nelr] = old_variables[i + (VAR_MOMENTUM+2)*nelr] + factor*fluxes[i + (VAR_MOMENTUM+2)*nelr];
-
- }
- #endif
|