123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417 |
- /********************************************************************
- euler3d.cpp
- : parallelized code of CFD
- - original code from the AIAA-2009-4001 by Andrew Corrigan, acorriga@gmu.edu
- - parallelization with OpenCL API has been applied by
- Jianbin Fang - j.fang@tudelft.nl
- Delft University of Technology
- Faculty of Electrical Engineering, Mathematics and Computer Science
- Department of Software Technology
- Parallel and Distributed Systems Group
- on 24/03/2011
- ********************************************************************/
- #include <iostream>
- #include <fstream>
- #include <math.h>
- #include "CLHelper.h"
- /*
- * Options
- *
- */
- #define GAMMA 1.4f
- #define iterations 2000
- #ifndef block_length
- #define block_length 192
- #endif
- #define NDIM 3
- #define NNB 4
- #define RK 3 // 3rd order RK
- #define ff_mach 1.2f
- #define deg_angle_of_attack 0.0f
- /*
- * not options
- */
- #if block_length > 128
- #warning "the kernels may fail too launch on some systems if the block length is too large"
- #endif
- #define VAR_DENSITY 0
- #define VAR_MOMENTUM 1
- #define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM)
- #define NVAR (VAR_DENSITY_ENERGY+1)
- //self-defined user type
- typedef struct{
- float x;
- float y;
- float z;
- } float3;
- /*
- * Generic functions
- */
- template <typename T>
- cl_mem alloc(int N){
- cl_mem mem_d = _clMalloc(sizeof(T)*N);
- return mem_d;
- }
- template <typename T>
- void dealloc(cl_mem array){
- _clFree(array);
- }
- template <typename T>
- void copy(cl_mem dst, cl_mem src, int N){
- _clMemcpyD2D(dst, src, N*sizeof(T));
- }
- template <typename T>
- void upload(cl_mem dst, T* src, int N){
- _clMemcpyH2D(dst, src, N*sizeof(T));
- }
- template <typename T>
- void download(T* dst, cl_mem src, int N){
- _clMemcpyD2H(dst, src, N*sizeof(T));
- }
- void dump(cl_mem variables, int nel, int nelr){
- float* h_variables = new float[nelr*NVAR];
- download(h_variables, variables, nelr*NVAR);
- {
- std::ofstream file("density");
- file << nel << " " << nelr << std::endl;
- for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY*nelr] << std::endl;
- }
- {
- std::ofstream file("momentum");
- file << nel << " " << nelr << std::endl;
- for(int i = 0; i < nel; i++)
- {
- for(int j = 0; j != NDIM; j++)
- file << h_variables[i + (VAR_MOMENTUM+j)*nelr] << " ";
- file << std::endl;
- }
- }
- {
- std::ofstream file("density_energy");
- file << nel << " " << nelr << std::endl;
- for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY_ENERGY*nelr] << std::endl;
- }
- delete[] h_variables;
- }
- void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(string){
- int work_items = nelr;
- int work_group_size = BLOCK_SIZE_1;
- int kernel_id = 1;
- int arg_idx = 0;
- _clSetArgs(kernel_id, arg_idx++, variables);
- _clSetArgs(kernel_id, arg_idx++, ff_variable);
- _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int));
- _clInvokeKernel(kernel_id, work_items, work_group_size);
- }
- void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_factors){
- int work_items = nelr;
- int work_group_size = BLOCK_SIZE_2;
- int kernel_id = 2;
- int arg_idx = 0;
- _clSetArgs(kernel_id, arg_idx++, variables);
- _clSetArgs(kernel_id, arg_idx++, areas);
- _clSetArgs(kernel_id, arg_idx++, step_factors);
- _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int));
- _clInvokeKernel(kernel_id, work_items, work_group_size);
- }
- void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals, cl_mem variables, cl_mem ff_variable, \
- cl_mem fluxes, cl_mem ff_flux_contribution_density_energy,
- cl_mem ff_flux_contribution_momentum_x,
- cl_mem ff_flux_contribution_momentum_y,
- cl_mem ff_flux_contribution_momentum_z){
- int work_items = nelr;
- int work_group_size = BLOCK_SIZE_3;
- int kernel_id = 3;
- int arg_idx = 0;
- _clSetArgs(kernel_id, arg_idx++, elements_surrounding_elements);
- _clSetArgs(kernel_id, arg_idx++, normals);
- _clSetArgs(kernel_id, arg_idx++, variables);
- _clSetArgs(kernel_id, arg_idx++, ff_variable);
- _clSetArgs(kernel_id, arg_idx++, fluxes);
- _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_density_energy);
- _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_x);
- _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_y);
- _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_z);
- _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int));
- _clInvokeKernel(kernel_id, work_items, work_group_size);
- }
- void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem step_factors, cl_mem fluxes){
- int work_items = nelr;
- int work_group_size = BLOCK_SIZE_4;
- int kernel_id = 4;
- int arg_idx = 0;
- _clSetArgs(kernel_id, arg_idx++, &j, sizeof(int));
- _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int));
- _clSetArgs(kernel_id, arg_idx++, old_variables);
- _clSetArgs(kernel_id, arg_idx++, variables);
- _clSetArgs(kernel_id, arg_idx++, step_factors);
- _clSetArgs(kernel_id, arg_idx++, fluxes);
- _clInvokeKernel(kernel_id, work_items, work_group_size);
- }
- inline void compute_flux_contribution(float& density, float3& momentum, float& density_energy, float& pressure, float3& velocity, float3& fc_momentum_x, float3& fc_momentum_y, float3& fc_momentum_z, float3& fc_density_energy)
- {
- fc_momentum_x.x = velocity.x*momentum.x + pressure;
- fc_momentum_x.y = velocity.x*momentum.y;
- fc_momentum_x.z = velocity.x*momentum.z;
- fc_momentum_y.x = fc_momentum_x.y;
- fc_momentum_y.y = velocity.y*momentum.y + pressure;
- fc_momentum_y.z = velocity.y*momentum.z;
- fc_momentum_z.x = fc_momentum_x.z;
- fc_momentum_z.y = fc_momentum_y.z;
- fc_momentum_z.z = velocity.z*momentum.z + pressure;
- float de_p = density_energy+pressure;
- fc_density_energy.x = velocity.x*de_p;
- fc_density_energy.y = velocity.y*de_p;
- fc_density_energy.z = velocity.z*de_p;
- }
- /*
- * Main function
- */
- int main(int argc, char** argv){
- printf("WG size of kernel:initialize = %d, WG size of kernel:compute_step_factor = %d, WG size of kernel:compute_flux = %d, WG size of kernel:time_step = %d\n", BLOCK_SIZE_1, BLOCK_SIZE_2, BLOCK_SIZE_3, BLOCK_SIZE_4);
- if (argc < 2){
- std::cout << "specify data file name and [platform id] [device id] [device type] " << std::endl;
- return 0;
- }
- const char* data_file_name = argv[1];
- _clCmdParams(argc, argv);
- cl_mem ff_variable, ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y,ff_flux_contribution_momentum_z, ff_flux_contribution_density_energy;
- cl_mem areas, elements_surrounding_elements, normals;
- cl_mem variables, old_variables, fluxes, step_factors;
- float h_ff_variable[NVAR];
- try{
- _clInit(device_type, device_id);
- // set far field conditions and load them into constant memory on the gpu
- {
- //float h_ff_variable[NVAR];
- const float angle_of_attack = float(3.1415926535897931 / 180.0f) * float(deg_angle_of_attack);
- h_ff_variable[VAR_DENSITY] = float(1.4);
- float ff_pressure = float(1.0f);
- float ff_speed_of_sound = sqrt(GAMMA*ff_pressure / h_ff_variable[VAR_DENSITY]);
- float ff_speed = float(ff_mach)*ff_speed_of_sound;
- float3 ff_velocity;
- ff_velocity.x = ff_speed*float(cos((float)angle_of_attack));
- ff_velocity.y = ff_speed*float(sin((float)angle_of_attack));
- ff_velocity.z = 0.0f;
- h_ff_variable[VAR_MOMENTUM+0] = h_ff_variable[VAR_DENSITY] * ff_velocity.x;
- h_ff_variable[VAR_MOMENTUM+1] = h_ff_variable[VAR_DENSITY] * ff_velocity.y;
- h_ff_variable[VAR_MOMENTUM+2] = h_ff_variable[VAR_DENSITY] * ff_velocity.z;
- h_ff_variable[VAR_DENSITY_ENERGY] = h_ff_variable[VAR_DENSITY]*(float(0.5f)*(ff_speed*ff_speed)) + (ff_pressure / float(GAMMA-1.0f));
- float3 h_ff_momentum;
- h_ff_momentum.x = *(h_ff_variable+VAR_MOMENTUM+0);
- h_ff_momentum.y = *(h_ff_variable+VAR_MOMENTUM+1);
- h_ff_momentum.z = *(h_ff_variable+VAR_MOMENTUM+2);
- float3 h_ff_flux_contribution_momentum_x;
- float3 h_ff_flux_contribution_momentum_y;
- float3 h_ff_flux_contribution_momentum_z;
- float3 h_ff_flux_contribution_density_energy;
- compute_flux_contribution(h_ff_variable[VAR_DENSITY], h_ff_momentum, h_ff_variable[VAR_DENSITY_ENERGY], ff_pressure, ff_velocity, h_ff_flux_contribution_momentum_x, h_ff_flux_contribution_momentum_y, h_ff_flux_contribution_momentum_z, h_ff_flux_contribution_density_energy);
- // copy far field conditions to the gpu
- //cl_mem ff_variable, ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y,ff_flux_contribution_momentum_z, ff_flux_contribution_density_energy;
- ff_variable = _clMalloc(NVAR*sizeof(float));
- ff_flux_contribution_momentum_x = _clMalloc(sizeof(float3));
- ff_flux_contribution_momentum_y = _clMalloc(sizeof(float3));
- ff_flux_contribution_momentum_z = _clMalloc(sizeof(float3));
- ff_flux_contribution_density_energy = _clMalloc(sizeof(float3));
- _clMemcpyH2D(ff_variable, h_ff_variable, NVAR*sizeof(float));
- _clMemcpyH2D(ff_flux_contribution_momentum_x, &h_ff_flux_contribution_momentum_x, sizeof(float3));
- _clMemcpyH2D(ff_flux_contribution_momentum_y, &h_ff_flux_contribution_momentum_y, sizeof(float3));
- _clMemcpyH2D(ff_flux_contribution_momentum_z, &h_ff_flux_contribution_momentum_z, sizeof(float3));
- _clMemcpyH2D(ff_flux_contribution_density_energy, &h_ff_flux_contribution_density_energy, sizeof(float3));
- _clFinish();
- }
- int nel;
- int nelr;
- // read in domain geometry
- //float* areas;
- //int* elements_surrounding_elements;
- //float* normals;
- {
- std::ifstream file(data_file_name);
- if(file==NULL){
- throw(string("can not find/open file!"));
- }
- file >> nel;
- nelr = block_length*((nel / block_length )+ std::min(1, nel % block_length));
- std::cout<<"--cambine: nel="<<nel<<", nelr="<<nelr<<std::endl;
- float* h_areas = new float[nelr];
- int* h_elements_surrounding_elements = new int[nelr*NNB];
- float* h_normals = new float[nelr*NDIM*NNB];
- // read in data
- for(int i = 0; i < nel; i++)
- {
- file >> h_areas[i];
- for(int j = 0; j < NNB; j++)
- {
- file >> h_elements_surrounding_elements[i + j*nelr];
- if(h_elements_surrounding_elements[i+j*nelr] < 0) h_elements_surrounding_elements[i+j*nelr] = -1;
- h_elements_surrounding_elements[i + j*nelr]--; //it's coming in with Fortran numbering
- for(int k = 0; k < NDIM; k++)
- {
- file >> h_normals[i + (j + k*NNB)*nelr];
- h_normals[i + (j + k*NNB)*nelr] = -h_normals[i + (j + k*NNB)*nelr];
- }
- }
- }
- // fill in remaining data
- int last = nel-1;
- for(int i = nel; i < nelr; i++)
- {
- h_areas[i] = h_areas[last];
- for(int j = 0; j < NNB; j++)
- {
- // duplicate the last element
- h_elements_surrounding_elements[i + j*nelr] = h_elements_surrounding_elements[last + j*nelr];
- for(int k = 0; k < NDIM; k++) h_normals[last + (j + k*NNB)*nelr] = h_normals[last + (j + k*NNB)*nelr];
- }
- }
- areas = alloc<float>(nelr);
- upload<float>(areas, h_areas, nelr);
- elements_surrounding_elements = alloc<int>(nelr*NNB);
- upload<int>(elements_surrounding_elements, h_elements_surrounding_elements, nelr*NNB);
- normals = alloc<float>(nelr*NDIM*NNB);
- upload<float>(normals, h_normals, nelr*NDIM*NNB);
- delete[] h_areas;
- delete[] h_elements_surrounding_elements;
- delete[] h_normals;
- }
- // Create arrays and set initial conditions
- variables = alloc<float>(nelr*NVAR);
- int tp = 0;
- initialize_variables(nelr, variables, ff_variable);
- old_variables = alloc<float>(nelr*NVAR);
- fluxes = alloc<float>(nelr*NVAR);
- step_factors = alloc<float>(nelr);
- // make sure all memory is floatly allocated before we start timing
- initialize_variables(nelr, old_variables, ff_variable);
- initialize_variables(nelr, fluxes, ff_variable);
- _clMemset(step_factors, 0, sizeof(float)*nelr);
- // make sure CUDA isn't still doing something before we start timing
- _clFinish();
- // these need to be computed the first time in order to compute time step
- std::cout << "Starting..." << std::endl;
- // Begin iterations
- for(int i = 0; i < iterations; i++){
- copy<float>(old_variables, variables, nelr*NVAR);
- // for the first iteration we compute the time step
- compute_step_factor(nelr, variables, areas, step_factors);
- for(int j = 0; j < RK; j++){
- compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy, \
- ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z);
- time_step(j, nelr, old_variables, variables, step_factors, fluxes);
- }
- }
- _clFinish();
- std::cout << "Saving solution..." << std::endl;
- dump(variables, nel, nelr);
- std::cout << "Saved solution..." << std::endl;
- _clStatistics();
- std::cout << "Cleaning up..." << std::endl;
- //--release resources
- _clFree(ff_variable);
- _clFree(ff_flux_contribution_momentum_x);
- _clFree(ff_flux_contribution_momentum_y);
- _clFree(ff_flux_contribution_momentum_z);
- _clFree(ff_flux_contribution_density_energy);
- _clFree(areas);
- _clFree(elements_surrounding_elements);
- _clFree(normals);
- _clFree(variables);
- _clFree(old_variables);
- _clFree(fluxes);
- _clFree(step_factors);
- _clRelease();
- std::cout << "Done..." << std::endl;
- }
- catch(string msg){
- std::cout<<"--cambine:( an exception catched in main body ->"<<msg<<std::endl;
- _clFree(ff_variable);
- _clFree(ff_flux_contribution_momentum_x);
- _clFree(ff_flux_contribution_momentum_y);
- _clFree(ff_flux_contribution_momentum_z);
- _clFree(ff_flux_contribution_density_energy);
- _clFree(areas);
- _clFree(elements_surrounding_elements);
- _clFree(normals);
- _clFree(variables);
- _clFree(old_variables);
- _clFree(fluxes);
- _clFree(step_factors);
- _clRelease();
- }
- catch(...){
- std::cout<<"--cambine:( unknow exceptions in main body..."<<std::endl;
- _clFree(ff_variable);
- _clFree(ff_flux_contribution_momentum_x);
- _clFree(ff_flux_contribution_momentum_y);
- _clFree(ff_flux_contribution_momentum_z);
- _clFree(ff_flux_contribution_density_energy);
- _clFree(areas);
- _clFree(elements_surrounding_elements);
- _clFree(normals);
- _clFree(variables);
- _clFree(old_variables);
- _clFree(fluxes);
- _clFree(step_factors);
- _clRelease();
- }
- return 0;
- }
|