123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404 |
- #ifdef __cplusplus
- extern "C" {
- #endif
- //========================================================================================================================================================================================================200
- //======================================================================================================================================================150
- //====================================================================================================100
- //==================================================50
- //========================================================================================================================================================================================================200
- // INFORMATION
- //========================================================================================================================================================================================================200
- //======================================================================================================================================================150
- // UPDATE
- //======================================================================================================================================================150
- // 2009.12 Lukasz G. Szafaryn
- // -- entire code written
- //======================================================================================================================================================150
- // DESCRIPTION
- //======================================================================================================================================================150
- // Description
- //======================================================================================================================================================150
- // USE
- //======================================================================================================================================================150
- // How to run
- //========================================================================================================================================================================================================200
- // DEFINE/INCLUDE
- //========================================================================================================================================================================================================200
- //======================================================================================================================================================150
- // LIBRARIES
- //======================================================================================================================================================150
- #include <stdio.h> // (in path known to compiler) needed by printf
- #include <stdlib.h> // (in path known to compiler) needed by malloc
- #include <stdbool.h> // (in path known to compiler) needed by true/false
- //======================================================================================================================================================150
- // UTILITIES
- //======================================================================================================================================================150
- #include "./util/timer/timer.h" // (in path specified here)
- #include "./util/num/num.h" // (in path specified here)
- //======================================================================================================================================================150
- // MAIN FUNCTION HEADER
- //======================================================================================================================================================150
- #include "./main.h" // (in the current directory)
- //======================================================================================================================================================150
- // KERNEL
- //======================================================================================================================================================150
- #include "./kernel/kernel_gpu_opencl_wrapper.h" // (in library path specified here)
- // Helper function to print usage
- void usage(char *argv0) {
- char *help =
- "\nUsage: %s [switches] \n\n"
- " -b :number of boxes [default=0]\n"
- " -p platform_id :OCL platform to use [default=0]\n"
- " -d device_id :OCL device to use [default=0]\n"
- " -g use_gpu :1 for GPU 0 for CPU [default=0]\n";
- fprintf(stderr, help, argv0);
- exit(-1);
- }
- //========================================================================================================================================================================================================200
- // MAIN FUNCTION
- //========================================================================================================================================================================================================200
- int main( int argc,
- char *argv [])
- {
- //======================================================================================================================================================150
- // CPU/MCPU VARIABLES
- //======================================================================================================================================================150
- // timer
- long long time0;
- time0 = get_time();
- // timer
- long long time1;
- long long time2;
- long long time3;
- long long time4;
- long long time5;
- long long time6;
- long long time7;
- // counters
- int i, j, k, l, m, n;
- // system memory
- par_str par_cpu;
- dim_str dim_cpu;
- box_str* box_cpu;
- FOUR_VECTOR* rv_cpu;
- fp* qv_cpu;
- FOUR_VECTOR* fv_cpu;
- int nh;
- printf("WG size of kernel = %d \n", NUMBER_THREADS);
- time1 = get_time();
- //======================================================================================================================================================150
- // CHECK INPUT ARGUMENTS
- //======================================================================================================================================================150
- // assing default values
- dim_cpu.arch_arg = 0;
- dim_cpu.cores_arg = 1;
- dim_cpu.boxes1d_arg = 1;
- /*// go through arguments
- if(argc>=3){
- for(dim_cpu.cur_arg=1; dim_cpu.cur_arg<argc; dim_cpu.cur_arg++){
- // check if -boxes1d
- if(strcmp(argv[dim_cpu.cur_arg], "-boxes1d")==0){
- // check if value provided
- if(argc>=dim_cpu.cur_arg+1){
- // check if value is a number
- if(isInteger(argv[dim_cpu.cur_arg+1])==1){
- dim_cpu.boxes1d_arg = atoi(argv[dim_cpu.cur_arg+1]);
- if(dim_cpu.boxes1d_arg<0){
- printf("ERROR: Wrong value to -boxes1d argument, cannot be <=0\n");
- return 0;
- }
- dim_cpu.cur_arg = dim_cpu.cur_arg+1;
- }
- // value is not a number
- else{
- printf("ERROR: Value to -boxes1d argument in not a number\n");
- return 0;
- }
- }
- // value not provided
- else{
- printf("ERROR: Missing value to -boxes1d argument\n");
- return 0;
- }
- }
- // unknown
- else{
- printf("ERROR: Unknown argument\n");
- return 0;
- }
- }
- // Print configuration
- printf("Configuration used: arch = %d, cores = %d, boxes1d = %d\n", dim_cpu.arch_arg, dim_cpu.cores_arg, dim_cpu.boxes1d_arg);
- }
- else{
- printf("Provide boxes1d argument, example: -boxes1d 16");
- return 0;
- }*/
-
- // Arguments parsing for platform, device and type
-
- // Variables to store information on platform and device to use_gpu
- int platform_id = 0;
- int device_id = 0;
- int use_gpu = 0;
- /* obtain command line arguments and change appropriate options */
- int opt;
- extern char *optarg;
- while ((opt=getopt(argc,argv,"b:p:d:g:"))!= EOF) {
- switch (opt) {
- case 'p': platform_id = atoi(optarg);
- break;
- case 'd': device_id = atoi(optarg);
- break;
- case 'g': use_gpu = atoi(optarg);
- break;
- case 'b': dim_cpu.boxes1d_arg = atoi(optarg);
- break;
- case '?': usage(argv[0]);
- break;
- default: usage(argv[0]);
- break;
- }
- }
- time2 = get_time();
- //======================================================================================================================================================150
- // INPUTS
- //======================================================================================================================================================150
- par_cpu.alpha = 0.5;
- time3 = get_time();
- //======================================================================================================================================================150
- // DIMENSIONS
- //======================================================================================================================================================150
- // total number of boxes
- dim_cpu.number_boxes = dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg; // 8*8*8=512
- // how many particles space has in each direction
- dim_cpu.space_elem = dim_cpu.number_boxes * NUMBER_PAR_PER_BOX; //512*100=51,200
- dim_cpu.space_mem = dim_cpu.space_elem * sizeof(FOUR_VECTOR);
- dim_cpu.space_mem2 = dim_cpu.space_elem * sizeof(fp);
- // box array
- dim_cpu.box_mem = dim_cpu.number_boxes * sizeof(box_str);
- time4 = get_time();
- //======================================================================================================================================================150
- // SYSTEM MEMORY
- //======================================================================================================================================================150
- //====================================================================================================100
- // BOX
- //====================================================================================================100
- // allocate boxes
- box_cpu = (box_str*)malloc(dim_cpu.box_mem);
- // initialize number of home boxes
- nh = 0;
- // home boxes in z direction
- for(i=0; i<dim_cpu.boxes1d_arg; i++){
- // home boxes in y direction
- for(j=0; j<dim_cpu.boxes1d_arg; j++){
- // home boxes in x direction
- for(k=0; k<dim_cpu.boxes1d_arg; k++){
- // current home box
- box_cpu[nh].x = k;
- box_cpu[nh].y = j;
- box_cpu[nh].z = i;
- box_cpu[nh].number = nh;
- box_cpu[nh].offset = nh * NUMBER_PAR_PER_BOX;
- // initialize number of neighbor boxes
- box_cpu[nh].nn = 0;
- // neighbor boxes in z direction
- for(l=-1; l<2; l++){
- // neighbor boxes in y direction
- for(m=-1; m<2; m++){
- // neighbor boxes in x direction
- for(n=-1; n<2; n++){
- // check if (this neighbor exists) and (it is not the same as home box)
- if( (((i+l)>=0 && (j+m)>=0 && (k+n)>=0)==true && ((i+l)<dim_cpu.boxes1d_arg && (j+m)<dim_cpu.boxes1d_arg && (k+n)<dim_cpu.boxes1d_arg)==true) &&
- (l==0 && m==0 && n==0)==false ){
- // current neighbor box
- box_cpu[nh].nei[box_cpu[nh].nn].x = (k+n);
- box_cpu[nh].nei[box_cpu[nh].nn].y = (j+m);
- box_cpu[nh].nei[box_cpu[nh].nn].z = (i+l);
- box_cpu[nh].nei[box_cpu[nh].nn].number = (box_cpu[nh].nei[box_cpu[nh].nn].z * dim_cpu.boxes1d_arg * dim_cpu.boxes1d_arg) +
- (box_cpu[nh].nei[box_cpu[nh].nn].y * dim_cpu.boxes1d_arg) +
- box_cpu[nh].nei[box_cpu[nh].nn].x;
- box_cpu[nh].nei[box_cpu[nh].nn].offset = box_cpu[nh].nei[box_cpu[nh].nn].number * NUMBER_PAR_PER_BOX;
- // increment neighbor box
- box_cpu[nh].nn = box_cpu[nh].nn + 1;
- }
- } // neighbor boxes in x direction
- } // neighbor boxes in y direction
- } // neighbor boxes in z direction
- // increment home box
- nh = nh + 1;
- } // home boxes in x direction
- } // home boxes in y direction
- } // home boxes in z direction
- //====================================================================================================100
- // PARAMETERS, DISTANCE, CHARGE AND FORCE
- //====================================================================================================100
- // random generator seed set to random value - time in this case
- srand(time(NULL));
- // input (distances)
- rv_cpu = (FOUR_VECTOR*)malloc(dim_cpu.space_mem);
- for(i=0; i<dim_cpu.space_elem; i=i+1){
- rv_cpu[i].v = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0
- // rv_cpu[i].v = 0.1; // get a number in the range 0.1 - 1.0
- rv_cpu[i].x = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0
- // rv_cpu[i].x = 0.2; // get a number in the range 0.1 - 1.0
- rv_cpu[i].y = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0
- // rv_cpu[i].y = 0.3; // get a number in the range 0.1 - 1.0
- rv_cpu[i].z = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0
- // rv_cpu[i].z = 0.4; // get a number in the range 0.1 - 1.0
- }
- // input (charge)
- qv_cpu = (fp*)malloc(dim_cpu.space_mem2);
- for(i=0; i<dim_cpu.space_elem; i=i+1){
- qv_cpu[i] = (rand()%10 + 1) / 10.0; // get a number in the range 0.1 - 1.0
- // qv_cpu[i] = 0.5; // get a number in the range 0.1 - 1.0
- }
- // output (forces)
- fv_cpu = (FOUR_VECTOR*)malloc(dim_cpu.space_mem);
- for(i=0; i<dim_cpu.space_elem; i=i+1){
- fv_cpu[i].v = 0; // set to 0, because kernels keeps adding to initial value
- fv_cpu[i].x = 0; // set to 0, because kernels keeps adding to initial value
- fv_cpu[i].y = 0; // set to 0, because kernels keeps adding to initial value
- fv_cpu[i].z = 0; // set to 0, because kernels keeps adding to initial value
- }
- time5 = get_time();
- //======================================================================================================================================================150
- // KERNEL
- //======================================================================================================================================================150
- //====================================================================================================100
- // GPU_OPENCL
- //====================================================================================================100
- kernel_gpu_opencl_wrapper(par_cpu,
- dim_cpu,
- box_cpu,
- rv_cpu,
- qv_cpu,
- fv_cpu,
- platform_id,
- device_id,
- use_gpu);
- time6 = get_time();
- //======================================================================================================================================================150
- // SYSTEM MEMORY DEALLOCATION
- //======================================================================================================================================================150
- // dump results
- #ifdef OUTPUT
- FILE *fptr;
- fptr = fopen("result.txt", "w");
- for(i=0; i<dim_cpu.space_elem; i=i+1){
- fprintf(fptr, "%f, %f, %f, %f\n", fv_cpu[i].v, fv_cpu[i].x, fv_cpu[i].y, fv_cpu[i].z);
- }
- fclose(fptr);
- #endif
- free(rv_cpu);
- free(qv_cpu);
- free(fv_cpu);
- free(box_cpu);
- time7 = get_time();
- //======================================================================================================================================================150
- // DISPLAY TIMING
- //======================================================================================================================================================150
- // printf("Time spent in different stages of the application:\n");
- // printf("%15.12f s, %15.12f % : VARIABLES\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time7-time0) * 100);
- // printf("%15.12f s, %15.12f % : INPUT ARGUMENTS\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time7-time0) * 100);
- // printf("%15.12f s, %15.12f % : INPUTS\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time7-time0) * 100);
- // printf("%15.12f s, %15.12f % : dim_cpu\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time7-time0) * 100);
- // printf("%15.12f s, %15.12f % : SYS MEM: ALO\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time7-time0) * 100);
- // printf("%15.12f s, %15.12f % : KERNEL: COMPUTE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time7-time0) * 100);
- // printf("%15.12f s, %15.12f % : SYS MEM: FRE\n", (float) (time7-time6) / 1000000, (float) (time7-time6) / (float) (time7-time0) * 100);
- // printf("Total time:\n");
- // printf("%.12f s\n", (float) (time7-time0) / 1000000);
- //======================================================================================================================================================150
- // RETURN
- //======================================================================================================================================================150
- return 0.0; // always returns 0.0
- }
- //========================================================================================================================================================================================================200
- // END
- //========================================================================================================================================================================================================200
- #ifdef __cplusplus
- }
- #endif
|