//--by Jianbin Fang #define __CL_ENABLE_EXCEPTIONS #include #include #include #include #include #include #include #ifdef PROFILING #include "timer.h" #endif #include "CLHelper.h" #include "util.h" #define MAX_THREADS_PER_BLOCK 256 int platform_id = 0; int device_id = 0; int use_gpu = 0; //Structure to hold a node information struct Node { int starting; int no_of_edges; }; //---------------------------------------------------------- //--bfs on cpu //--programmer: jianbin //--date: 26/01/2011 //--note: width is changed to the new_width //---------------------------------------------------------- void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \ int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \ char *h_graph_visited, int *h_cost_ref){ char stop; int k = 0; do{ //if no thread changes this value then the loop stops stop=false; for(int tid = 0; tid < no_of_nodes; tid++ ) { if (h_graph_mask[tid] == true){ h_graph_mask[tid]=false; for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){ int id = h_graph_edges[i]; //--cambine: node id is connected with node tid if(!h_graph_visited[id]){ //--cambine: if node id has not been visited, enter the body below h_cost_ref[id]=h_cost_ref[tid]+1; h_updating_graph_mask[id]=true; } } } } for(int tid=0; tid< no_of_nodes ; tid++ ) { if (h_updating_graph_mask[tid] == true){ h_graph_mask[tid]=true; h_graph_visited[tid]=true; stop=true; h_updating_graph_mask[tid]=false; } } k++; } while(stop); } //---------------------------------------------------------- //--breadth first search on GPUs //---------------------------------------------------------- void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \ int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \ char *h_graph_visited, int *h_cost) throw(std::string){ //int number_elements = height*width; char h_over; cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \ d_graph_visited, d_cost, d_over; try{ //--1 transfer data from host to device _clInit(platform_id, device_id, use_gpu); d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes); d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges); d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask); d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask); d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited); d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost); d_over = _clMallocRW(sizeof(char), &h_over); _clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes); _clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges); _clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask); _clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask); _clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited); _clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost); //--2 invoke kernel #ifdef PROFILING timer kernel_timer; double kernel_time = 0.0; kernel_timer.reset(); kernel_timer.start(); #endif do{ h_over = false; _clMemcpyH2D(d_over, sizeof(char), &h_over); //--kernel 0 int kernel_id = 0; int kernel_idx = 0; _clSetArgs(kernel_id, kernel_idx++, d_graph_nodes); _clSetArgs(kernel_id, kernel_idx++, d_graph_edges); _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_cost); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); //int work_items = no_of_nodes; _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); //--kernel 1 kernel_id = 1; kernel_idx = 0; _clSetArgs(kernel_id, kernel_idx++, d_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask); _clSetArgs(kernel_id, kernel_idx++, d_graph_visited); _clSetArgs(kernel_id, kernel_idx++, d_over); _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int)); //work_items = no_of_nodes; _clInvokeKernel(kernel_id, no_of_nodes, work_group_size); _clMemcpyD2H(d_over,sizeof(char), &h_over); }while(h_over); _clFinish(); #ifdef PROFILING kernel_timer.stop(); kernel_time = kernel_timer.getTimeInSeconds(); #endif //--3 transfer data from device to host _clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost); //--statistics #ifdef PROFILING std::cout<<"kernel time(s):"<MAX_THREADS_PER_BLOCK){ num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK); num_of_threads_per_block = MAX_THREADS_PER_BLOCK; } work_group_size = num_of_threads_per_block; // allocate host memory h_graph_nodes = (Node*) malloc(sizeof(Node)*no_of_nodes); h_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes); h_updating_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes); h_graph_visited = (char*) malloc(sizeof(char)*no_of_nodes); int start, edgeno; // initalize the memory for(int i = 0; i < no_of_nodes; i++){ fscanf(fp,"%d %d",&start,&edgeno); h_graph_nodes[i].starting = start; h_graph_nodes[i].no_of_edges = edgeno; h_graph_mask[i]=false; h_updating_graph_mask[i]=false; h_graph_visited[i]=false; } //read the source node from the file fscanf(fp,"%d",&source); source=0; //set the source node as true in the mask h_graph_mask[source]=true; h_graph_visited[source]=true; fscanf(fp,"%d",&edge_list_size); int id,cost; int* h_graph_edges = (int*) malloc(sizeof(int)*edge_list_size); for(int i=0; i < edge_list_size ; i++){ fscanf(fp,"%d",&id); fscanf(fp,"%d",&cost); h_graph_edges[i] = id; } if(fp) fclose(fp); // allocate mem for the result on host side int *h_cost = (int*) malloc(sizeof(int)*no_of_nodes); int *h_cost_ref = (int*)malloc(sizeof(int)*no_of_nodes); for(int i=0;i(h_cost_ref, h_cost, no_of_nodes); //release host memory free(h_graph_nodes); free(h_graph_mask); free(h_updating_graph_mask); free(h_graph_visited); } catch(std::string msg){ std::cout<<"--cambine: exception in main ->"<