bfs.cpp 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299
  1. //--by Jianbin Fang
  2. #define __CL_ENABLE_EXCEPTIONS
  3. #include <cstdlib>
  4. #include <iostream>
  5. #include <string>
  6. #include <cstring>
  7. #ifdef PROFILING
  8. #include "timer.h"
  9. #endif
  10. #include "CLHelper.h"
  11. #include "util.h"
  12. #define MAX_THREADS_PER_BLOCK 256
  13. //Structure to hold a node information
  14. struct Node
  15. {
  16. int starting;
  17. int no_of_edges;
  18. };
  19. //----------------------------------------------------------
  20. //--bfs on cpu
  21. //--programmer: jianbin
  22. //--date: 26/01/2011
  23. //--note: width is changed to the new_width
  24. //----------------------------------------------------------
  25. void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
  26. int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
  27. char *h_graph_visited, int *h_cost_ref){
  28. char stop;
  29. int k = 0;
  30. do{
  31. //if no thread changes this value then the loop stops
  32. stop=false;
  33. for(int tid = 0; tid < no_of_nodes; tid++ )
  34. {
  35. if (h_graph_mask[tid] == true){
  36. h_graph_mask[tid]=false;
  37. for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){
  38. int id = h_graph_edges[i]; //--cambine: node id is connected with node tid
  39. if(!h_graph_visited[id]){ //--cambine: if node id has not been visited, enter the body below
  40. h_cost_ref[id]=h_cost_ref[tid]+1;
  41. h_updating_graph_mask[id]=true;
  42. }
  43. }
  44. }
  45. }
  46. for(int tid=0; tid< no_of_nodes ; tid++ )
  47. {
  48. if (h_updating_graph_mask[tid] == true){
  49. h_graph_mask[tid]=true;
  50. h_graph_visited[tid]=true;
  51. stop=true;
  52. h_updating_graph_mask[tid]=false;
  53. }
  54. }
  55. k++;
  56. }
  57. while(stop);
  58. }
  59. //----------------------------------------------------------
  60. //--breadth first search on GPUs
  61. //----------------------------------------------------------
  62. void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
  63. int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
  64. char *h_graph_visited, int *h_cost)
  65. throw(std::string){
  66. //int number_elements = height*width;
  67. char h_over;
  68. cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
  69. d_graph_visited, d_cost, d_over;
  70. try{
  71. //--1 transfer data from host to device
  72. _clInit();
  73. d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
  74. d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
  75. d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
  76. d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask);
  77. d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited);
  78. d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
  79. d_over = _clMallocRW(sizeof(char), &h_over);
  80. _clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
  81. _clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);
  82. _clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);
  83. _clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);
  84. _clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);
  85. _clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);
  86. //--2 invoke kernel
  87. #ifdef PROFILING
  88. timer kernel_timer;
  89. double kernel_time = 0.0;
  90. kernel_timer.reset();
  91. kernel_timer.start();
  92. #endif
  93. do{
  94. h_over = false;
  95. _clMemcpyH2D(d_over, sizeof(char), &h_over);
  96. //--kernel 0
  97. int kernel_id = 0;
  98. int kernel_idx = 0;
  99. _clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
  100. _clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
  101. _clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
  102. _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
  103. _clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
  104. _clSetArgs(kernel_id, kernel_idx++, d_cost);
  105. _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
  106. //int work_items = no_of_nodes;
  107. _clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
  108. //--kernel 1
  109. kernel_id = 1;
  110. kernel_idx = 0;
  111. _clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
  112. _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
  113. _clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
  114. _clSetArgs(kernel_id, kernel_idx++, d_over);
  115. _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
  116. //work_items = no_of_nodes;
  117. _clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
  118. _clMemcpyD2H(d_over,sizeof(char), &h_over);
  119. }while(h_over);
  120. _clFinish();
  121. #ifdef PROFILING
  122. kernel_timer.stop();
  123. kernel_time = kernel_timer.getTimeInSeconds();
  124. #endif
  125. //--3 transfer data from device to host
  126. _clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
  127. //--statistics
  128. #ifdef PROFILING
  129. std::cout<<"kernel time(s):"<<kernel_time<<std::endl;
  130. #endif
  131. //--4 release cl resources.
  132. _clFree(d_graph_nodes);
  133. _clFree(d_graph_edges);
  134. _clFree(d_graph_mask);
  135. _clFree(d_updating_graph_mask);
  136. _clFree(d_graph_visited);
  137. _clFree(d_cost);
  138. _clFree(d_over);
  139. _clRelease();
  140. }
  141. catch(std::string msg){
  142. _clFree(d_graph_nodes);
  143. _clFree(d_graph_edges);
  144. _clFree(d_graph_mask);
  145. _clFree(d_updating_graph_mask);
  146. _clFree(d_graph_visited);
  147. _clFree(d_cost);
  148. _clFree(d_over);
  149. _clRelease();
  150. std::string e_str = "in run_transpose_gpu -> ";
  151. e_str += msg;
  152. throw(e_str);
  153. }
  154. return ;
  155. }
  156. void Usage(int argc, char**argv){
  157. fprintf(stderr,"Usage: %s <input_file>\n", argv[0]);
  158. }
  159. //----------------------------------------------------------
  160. //--cambine: main function
  161. //--author: created by Jianbin Fang
  162. //--date: 25/01/2011
  163. //----------------------------------------------------------
  164. int main(int argc, char * argv[])
  165. {
  166. int no_of_nodes;
  167. int edge_list_size;
  168. FILE *fp;
  169. Node* h_graph_nodes;
  170. char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
  171. try{
  172. char *input_f;
  173. if(argc!=2){
  174. Usage(argc, argv);
  175. exit(0);
  176. }
  177. input_f = argv[1];
  178. printf("Reading File\n");
  179. //Read in Graph from a file
  180. fp = fopen(input_f,"r");
  181. if(!fp){
  182. printf("Error Reading graph file\n");
  183. return 0;
  184. }
  185. int source = 0;
  186. fscanf(fp,"%d",&no_of_nodes);
  187. int num_of_blocks = 1;
  188. int num_of_threads_per_block = no_of_nodes;
  189. //Make execution Parameters according to the number of nodes
  190. //Distribute threads across multiple Blocks if necessary
  191. if(no_of_nodes>MAX_THREADS_PER_BLOCK){
  192. num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK);
  193. num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
  194. }
  195. work_group_size = num_of_threads_per_block;
  196. // allocate host memory
  197. h_graph_nodes = (Node*) malloc(sizeof(Node)*no_of_nodes);
  198. h_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
  199. h_updating_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
  200. h_graph_visited = (char*) malloc(sizeof(char)*no_of_nodes);
  201. int start, edgeno;
  202. // initalize the memory
  203. for(int i = 0; i < no_of_nodes; i++){
  204. fscanf(fp,"%d %d",&start,&edgeno);
  205. h_graph_nodes[i].starting = start;
  206. h_graph_nodes[i].no_of_edges = edgeno;
  207. h_graph_mask[i]=false;
  208. h_updating_graph_mask[i]=false;
  209. h_graph_visited[i]=false;
  210. }
  211. //read the source node from the file
  212. fscanf(fp,"%d",&source);
  213. source=0;
  214. //set the source node as true in the mask
  215. h_graph_mask[source]=true;
  216. h_graph_visited[source]=true;
  217. fscanf(fp,"%d",&edge_list_size);
  218. int id,cost;
  219. int* h_graph_edges = (int*) malloc(sizeof(int)*edge_list_size);
  220. for(int i=0; i < edge_list_size ; i++){
  221. fscanf(fp,"%d",&id);
  222. fscanf(fp,"%d",&cost);
  223. h_graph_edges[i] = id;
  224. }
  225. if(fp)
  226. fclose(fp);
  227. // allocate mem for the result on host side
  228. int *h_cost = (int*) malloc(sizeof(int)*no_of_nodes);
  229. int *h_cost_ref = (int*)malloc(sizeof(int)*no_of_nodes);
  230. for(int i=0;i<no_of_nodes;i++){
  231. h_cost[i]=-1;
  232. h_cost_ref[i] = -1;
  233. }
  234. h_cost[source]=0;
  235. h_cost_ref[source]=0;
  236. //---------------------------------------------------------
  237. //--gpu entry
  238. run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
  239. //---------------------------------------------------------
  240. //--cpu entry
  241. // initalize the memory again
  242. for(int i = 0; i < no_of_nodes; i++){
  243. h_graph_mask[i]=false;
  244. h_updating_graph_mask[i]=false;
  245. h_graph_visited[i]=false;
  246. }
  247. //set the source node as true in the mask
  248. source=0;
  249. h_graph_mask[source]=true;
  250. h_graph_visited[source]=true;
  251. run_bfs_cpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost_ref);
  252. //---------------------------------------------------------
  253. //--result varification
  254. compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
  255. //release host memory
  256. free(h_graph_nodes);
  257. free(h_graph_mask);
  258. free(h_updating_graph_mask);
  259. free(h_graph_visited);
  260. }
  261. catch(std::string msg){
  262. std::cout<<"--cambine: exception in main ->"<<msg<<std::endl;
  263. //release host memory
  264. free(h_graph_nodes);
  265. free(h_graph_mask);
  266. free(h_updating_graph_mask);
  267. free(h_graph_visited);
  268. }
  269. return 0;
  270. }