/******************************************************************** euler3d.cpp : parallelized code of CFD - original code from the AIAA-2009-4001 by Andrew Corrigan, acorriga@gmu.edu - parallelization with OpenCL API has been applied by Jianbin Fang - j.fang@tudelft.nl Delft University of Technology Faculty of Electrical Engineering, Mathematics and Computer Science Department of Software Technology Parallel and Distributed Systems Group on 24/03/2011 ********************************************************************/ #include #include #include #include "CLHelper.h" /* * Options * */ #define GAMMA 1.4f #define iterations 2000 #ifndef block_length #define block_length 192 #endif #define NDIM 3 #define NNB 4 #define RK 3 // 3rd order RK #define ff_mach 1.2f #define deg_angle_of_attack 0.0f /* * not options */ #if block_length > 128 #warning "the kernels may fail too launch on some systems if the block length is too large" #endif #define VAR_DENSITY 0 #define VAR_MOMENTUM 1 #define VAR_DENSITY_ENERGY (VAR_MOMENTUM+NDIM) #define NVAR (VAR_DENSITY_ENERGY+1) //self-defined user type typedef struct{ float x; float y; float z; } float3; /* * Generic functions */ template cl_mem alloc(int N){ cl_mem mem_d = _clMalloc(sizeof(T)*N); return mem_d; } template void dealloc(cl_mem array){ _clFree(array); } template void copy(cl_mem dst, cl_mem src, int N){ _clMemcpyD2D(dst, src, N*sizeof(T)); } template void upload(cl_mem dst, T* src, int N){ _clMemcpyH2D(dst, src, N*sizeof(T)); } template void download(T* dst, cl_mem src, int N){ _clMemcpyD2H(dst, src, N*sizeof(T)); } void dump(cl_mem variables, int nel, int nelr){ float* h_variables = new float[nelr*NVAR]; download(h_variables, variables, nelr*NVAR); { std::ofstream file("density"); file << nel << " " << nelr << std::endl; for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY*nelr] << std::endl; } { std::ofstream file("momentum"); file << nel << " " << nelr << std::endl; for(int i = 0; i < nel; i++) { for(int j = 0; j != NDIM; j++) file << h_variables[i + (VAR_MOMENTUM+j)*nelr] << " "; file << std::endl; } } { std::ofstream file("density_energy"); file << nel << " " << nelr << std::endl; for(int i = 0; i < nel; i++) file << h_variables[i + VAR_DENSITY_ENERGY*nelr] << std::endl; } delete[] h_variables; } void initialize_variables(int nelr, cl_mem variables, cl_mem ff_variable) throw(string){ int work_items = nelr; int work_group_size = BLOCK_SIZE_1; int kernel_id = 1; int arg_idx = 0; _clSetArgs(kernel_id, arg_idx++, variables); _clSetArgs(kernel_id, arg_idx++, ff_variable); _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); _clInvokeKernel(kernel_id, work_items, work_group_size); } void compute_step_factor(int nelr, cl_mem variables, cl_mem areas, cl_mem step_factors){ int work_items = nelr; int work_group_size = BLOCK_SIZE_2; int kernel_id = 2; int arg_idx = 0; _clSetArgs(kernel_id, arg_idx++, variables); _clSetArgs(kernel_id, arg_idx++, areas); _clSetArgs(kernel_id, arg_idx++, step_factors); _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); _clInvokeKernel(kernel_id, work_items, work_group_size); } void compute_flux(int nelr, cl_mem elements_surrounding_elements, cl_mem normals, cl_mem variables, cl_mem ff_variable, \ cl_mem fluxes, cl_mem ff_flux_contribution_density_energy, cl_mem ff_flux_contribution_momentum_x, cl_mem ff_flux_contribution_momentum_y, cl_mem ff_flux_contribution_momentum_z){ int work_items = nelr; int work_group_size = BLOCK_SIZE_3; int kernel_id = 3; int arg_idx = 0; _clSetArgs(kernel_id, arg_idx++, elements_surrounding_elements); _clSetArgs(kernel_id, arg_idx++, normals); _clSetArgs(kernel_id, arg_idx++, variables); _clSetArgs(kernel_id, arg_idx++, ff_variable); _clSetArgs(kernel_id, arg_idx++, fluxes); _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_density_energy); _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_x); _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_y); _clSetArgs(kernel_id, arg_idx++, ff_flux_contribution_momentum_z); _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); _clInvokeKernel(kernel_id, work_items, work_group_size); } void time_step(int j, int nelr, cl_mem old_variables, cl_mem variables, cl_mem step_factors, cl_mem fluxes){ int work_items = nelr; int work_group_size = BLOCK_SIZE_4; int kernel_id = 4; int arg_idx = 0; _clSetArgs(kernel_id, arg_idx++, &j, sizeof(int)); _clSetArgs(kernel_id, arg_idx++, &nelr, sizeof(int)); _clSetArgs(kernel_id, arg_idx++, old_variables); _clSetArgs(kernel_id, arg_idx++, variables); _clSetArgs(kernel_id, arg_idx++, step_factors); _clSetArgs(kernel_id, arg_idx++, fluxes); _clInvokeKernel(kernel_id, work_items, work_group_size); } inline void compute_flux_contribution(float& density, float3& momentum, float& density_energy, float& pressure, float3& velocity, float3& fc_momentum_x, float3& fc_momentum_y, float3& fc_momentum_z, float3& fc_density_energy) { fc_momentum_x.x = velocity.x*momentum.x + pressure; fc_momentum_x.y = velocity.x*momentum.y; fc_momentum_x.z = velocity.x*momentum.z; fc_momentum_y.x = fc_momentum_x.y; fc_momentum_y.y = velocity.y*momentum.y + pressure; fc_momentum_y.z = velocity.y*momentum.z; fc_momentum_z.x = fc_momentum_x.z; fc_momentum_z.y = fc_momentum_y.z; fc_momentum_z.z = velocity.z*momentum.z + pressure; float de_p = density_energy+pressure; fc_density_energy.x = velocity.x*de_p; fc_density_energy.y = velocity.y*de_p; fc_density_energy.z = velocity.z*de_p; } /* * Main function */ int main(int argc, char** argv){ printf("WG size of kernel:initialize = %d, WG size of kernel:compute_step_factor = %d, WG size of kernel:compute_flux = %d, WG size of kernel:time_step = %d\n", BLOCK_SIZE_1, BLOCK_SIZE_2, BLOCK_SIZE_3, BLOCK_SIZE_4); if (argc < 2){ std::cout << "specify data file name and [platform id] [device id] [device type] " << std::endl; return 0; } const char* data_file_name = argv[1]; _clCmdParams(argc, argv); cl_mem ff_variable, ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y,ff_flux_contribution_momentum_z, ff_flux_contribution_density_energy; cl_mem areas, elements_surrounding_elements, normals; cl_mem variables, old_variables, fluxes, step_factors; float h_ff_variable[NVAR]; try{ _clInit(device_type, device_id); // set far field conditions and load them into constant memory on the gpu { //float h_ff_variable[NVAR]; const float angle_of_attack = float(3.1415926535897931 / 180.0f) * float(deg_angle_of_attack); h_ff_variable[VAR_DENSITY] = float(1.4); float ff_pressure = float(1.0f); float ff_speed_of_sound = sqrt(GAMMA*ff_pressure / h_ff_variable[VAR_DENSITY]); float ff_speed = float(ff_mach)*ff_speed_of_sound; float3 ff_velocity; ff_velocity.x = ff_speed*float(cos((float)angle_of_attack)); ff_velocity.y = ff_speed*float(sin((float)angle_of_attack)); ff_velocity.z = 0.0f; h_ff_variable[VAR_MOMENTUM+0] = h_ff_variable[VAR_DENSITY] * ff_velocity.x; h_ff_variable[VAR_MOMENTUM+1] = h_ff_variable[VAR_DENSITY] * ff_velocity.y; h_ff_variable[VAR_MOMENTUM+2] = h_ff_variable[VAR_DENSITY] * ff_velocity.z; h_ff_variable[VAR_DENSITY_ENERGY] = h_ff_variable[VAR_DENSITY]*(float(0.5f)*(ff_speed*ff_speed)) + (ff_pressure / float(GAMMA-1.0f)); float3 h_ff_momentum; h_ff_momentum.x = *(h_ff_variable+VAR_MOMENTUM+0); h_ff_momentum.y = *(h_ff_variable+VAR_MOMENTUM+1); h_ff_momentum.z = *(h_ff_variable+VAR_MOMENTUM+2); float3 h_ff_flux_contribution_momentum_x; float3 h_ff_flux_contribution_momentum_y; float3 h_ff_flux_contribution_momentum_z; float3 h_ff_flux_contribution_density_energy; compute_flux_contribution(h_ff_variable[VAR_DENSITY], h_ff_momentum, h_ff_variable[VAR_DENSITY_ENERGY], ff_pressure, ff_velocity, h_ff_flux_contribution_momentum_x, h_ff_flux_contribution_momentum_y, h_ff_flux_contribution_momentum_z, h_ff_flux_contribution_density_energy); // copy far field conditions to the gpu //cl_mem ff_variable, ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y,ff_flux_contribution_momentum_z, ff_flux_contribution_density_energy; ff_variable = _clMalloc(NVAR*sizeof(float)); ff_flux_contribution_momentum_x = _clMalloc(sizeof(float3)); ff_flux_contribution_momentum_y = _clMalloc(sizeof(float3)); ff_flux_contribution_momentum_z = _clMalloc(sizeof(float3)); ff_flux_contribution_density_energy = _clMalloc(sizeof(float3)); _clMemcpyH2D(ff_variable, h_ff_variable, NVAR*sizeof(float)); _clMemcpyH2D(ff_flux_contribution_momentum_x, &h_ff_flux_contribution_momentum_x, sizeof(float3)); _clMemcpyH2D(ff_flux_contribution_momentum_y, &h_ff_flux_contribution_momentum_y, sizeof(float3)); _clMemcpyH2D(ff_flux_contribution_momentum_z, &h_ff_flux_contribution_momentum_z, sizeof(float3)); _clMemcpyH2D(ff_flux_contribution_density_energy, &h_ff_flux_contribution_density_energy, sizeof(float3)); _clFinish(); } int nel; int nelr; // read in domain geometry //float* areas; //int* elements_surrounding_elements; //float* normals; { std::ifstream file(data_file_name); if(file==NULL){ throw(string("can not find/open file!")); } file >> nel; nelr = block_length*((nel / block_length )+ std::min(1, nel % block_length)); std::cout<<"--cambine: nel="<(old_variables, variables, nelr*NVAR); // for the first iteration we compute the time step compute_step_factor(nelr, variables, areas, step_factors); for(int j = 0; j < RK; j++){ compute_flux(nelr, elements_surrounding_elements, normals, variables, ff_variable, fluxes, ff_flux_contribution_density_energy, \ ff_flux_contribution_momentum_x, ff_flux_contribution_momentum_y, ff_flux_contribution_momentum_z); time_step(j, nelr, old_variables, variables, step_factors, fluxes); } } _clFinish(); std::cout << "Saving solution..." << std::endl; dump(variables, nel, nelr); std::cout << "Saved solution..." << std::endl; _clStatistics(); std::cout << "Cleaning up..." << std::endl; //--release resources _clFree(ff_variable); _clFree(ff_flux_contribution_momentum_x); _clFree(ff_flux_contribution_momentum_y); _clFree(ff_flux_contribution_momentum_z); _clFree(ff_flux_contribution_density_energy); _clFree(areas); _clFree(elements_surrounding_elements); _clFree(normals); _clFree(variables); _clFree(old_variables); _clFree(fluxes); _clFree(step_factors); _clRelease(); std::cout << "Done..." << std::endl; } catch(string msg){ std::cout<<"--cambine:( an exception catched in main body ->"<