bfs.cpp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307
  1. //--by Jianbin Fang
  2. #define __CL_ENABLE_EXCEPTIONS
  3. #include <cstdlib>
  4. #include <iostream>
  5. #include <string>
  6. #include <cstring>
  7. #ifdef PROFILING
  8. #include "timer.h"
  9. #endif
  10. #include "CLHelper.h"
  11. #include "util.h"
  12. #define MAX_THREADS_PER_BLOCK 256
  13. int platform_num = 0;
  14. int device_num = 0;
  15. int use_gpu = 0;
  16. //Structure to hold a node information
  17. struct Node
  18. {
  19. int starting;
  20. int no_of_edges;
  21. };
  22. //----------------------------------------------------------
  23. //--bfs on cpu
  24. //--programmer: jianbin
  25. //--date: 26/01/2011
  26. //--note: width is changed to the new_width
  27. //----------------------------------------------------------
  28. void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
  29. int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
  30. char *h_graph_visited, int *h_cost_ref){
  31. char stop;
  32. int k = 0;
  33. do{
  34. //if no thread changes this value then the loop stops
  35. stop=false;
  36. for(int tid = 0; tid < no_of_nodes; tid++ )
  37. {
  38. if (h_graph_mask[tid] == true){
  39. h_graph_mask[tid]=false;
  40. for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){
  41. int id = h_graph_edges[i]; //--cambine: node id is connected with node tid
  42. if(!h_graph_visited[id]){ //--cambine: if node id has not been visited, enter the body below
  43. h_cost_ref[id]=h_cost_ref[tid]+1;
  44. h_updating_graph_mask[id]=true;
  45. }
  46. }
  47. }
  48. }
  49. for(int tid=0; tid< no_of_nodes ; tid++ )
  50. {
  51. if (h_updating_graph_mask[tid] == true){
  52. h_graph_mask[tid]=true;
  53. h_graph_visited[tid]=true;
  54. stop=true;
  55. h_updating_graph_mask[tid]=false;
  56. }
  57. }
  58. k++;
  59. }
  60. while(stop);
  61. }
  62. //----------------------------------------------------------
  63. //--breadth first search on GPUs
  64. //----------------------------------------------------------
  65. void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
  66. int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
  67. char *h_graph_visited, int *h_cost)
  68. throw(std::string){
  69. //int number_elements = height*width;
  70. char h_over;
  71. cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
  72. d_graph_visited, d_cost, d_over;
  73. try{
  74. //--1 transfer data from host to device
  75. _clInit(platform_num, device_num, use_gpu);
  76. d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
  77. d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
  78. d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
  79. d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask);
  80. d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited);
  81. d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
  82. d_over = _clMallocRW(sizeof(char), &h_over);
  83. _clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
  84. _clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);
  85. _clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);
  86. _clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);
  87. _clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);
  88. _clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);
  89. //--2 invoke kernel
  90. #ifdef PROFILING
  91. timer kernel_timer;
  92. double kernel_time = 0.0;
  93. kernel_timer.reset();
  94. kernel_timer.start();
  95. #endif
  96. do{
  97. h_over = false;
  98. _clMemcpyH2D(d_over, sizeof(char), &h_over);
  99. //--kernel 0
  100. int kernel_id = 0;
  101. int kernel_idx = 0;
  102. _clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
  103. _clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
  104. _clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
  105. _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
  106. _clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
  107. _clSetArgs(kernel_id, kernel_idx++, d_cost);
  108. _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
  109. //int work_items = no_of_nodes;
  110. _clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
  111. //--kernel 1
  112. kernel_id = 1;
  113. kernel_idx = 0;
  114. _clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
  115. _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
  116. _clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
  117. _clSetArgs(kernel_id, kernel_idx++, d_over);
  118. _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
  119. //work_items = no_of_nodes;
  120. _clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
  121. _clMemcpyD2H(d_over,sizeof(char), &h_over);
  122. }while(h_over);
  123. _clFinish();
  124. #ifdef PROFILING
  125. kernel_timer.stop();
  126. kernel_time = kernel_timer.getTimeInSeconds();
  127. #endif
  128. //--3 transfer data from device to host
  129. _clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
  130. //--statistics
  131. #ifdef PROFILING
  132. std::cout<<"kernel time(s):"<<kernel_time<<std::endl;
  133. #endif
  134. //--4 release cl resources.
  135. _clFree(d_graph_nodes);
  136. _clFree(d_graph_edges);
  137. _clFree(d_graph_mask);
  138. _clFree(d_updating_graph_mask);
  139. _clFree(d_graph_visited);
  140. _clFree(d_cost);
  141. _clFree(d_over);
  142. _clRelease();
  143. }
  144. catch(std::string msg){
  145. _clFree(d_graph_nodes);
  146. _clFree(d_graph_edges);
  147. _clFree(d_graph_mask);
  148. _clFree(d_updating_graph_mask);
  149. _clFree(d_graph_visited);
  150. _clFree(d_cost);
  151. _clFree(d_over);
  152. _clRelease();
  153. std::string e_str = "in run_transpose_gpu -> ";
  154. e_str += msg;
  155. throw(e_str);
  156. }
  157. return ;
  158. }
  159. void Usage(int argc, char**argv){
  160. fprintf(stderr,"Usage: %s <input_file> <platform_number> <device_number> <use_gpu>\n", argv[0]);
  161. }
  162. //----------------------------------------------------------
  163. //--cambine: main function
  164. //--author: created by Jianbin Fang
  165. //--date: 25/01/2011
  166. //----------------------------------------------------------
  167. int main(int argc, char * argv[])
  168. {
  169. int no_of_nodes;
  170. int edge_list_size;
  171. FILE *fp;
  172. Node* h_graph_nodes;
  173. char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
  174. try{
  175. char *input_f;
  176. if(argc!=5){
  177. Usage(argc, argv);
  178. exit(0);
  179. }
  180. input_f = argv[1];
  181. printf("Reading File\n");
  182. //Read in Graph from a file
  183. fp = fopen(input_f,"r");
  184. if(!fp){
  185. printf("Error Reading graph file\n");
  186. return 0;
  187. }
  188. platform_num = atoi(argv[2]);
  189. device_num = atoi(argv[3]);
  190. use_gpu = atoi(argv[4]);
  191. int source = 0;
  192. fscanf(fp,"%d",&no_of_nodes);
  193. int num_of_blocks = 1;
  194. int num_of_threads_per_block = no_of_nodes;
  195. //Make execution Parameters according to the number of nodes
  196. //Distribute threads across multiple Blocks if necessary
  197. if(no_of_nodes>MAX_THREADS_PER_BLOCK){
  198. num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK);
  199. num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
  200. }
  201. work_group_size = num_of_threads_per_block;
  202. // allocate host memory
  203. h_graph_nodes = (Node*) malloc(sizeof(Node)*no_of_nodes);
  204. h_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
  205. h_updating_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
  206. h_graph_visited = (char*) malloc(sizeof(char)*no_of_nodes);
  207. int start, edgeno;
  208. // initalize the memory
  209. for(int i = 0; i < no_of_nodes; i++){
  210. fscanf(fp,"%d %d",&start,&edgeno);
  211. h_graph_nodes[i].starting = start;
  212. h_graph_nodes[i].no_of_edges = edgeno;
  213. h_graph_mask[i]=false;
  214. h_updating_graph_mask[i]=false;
  215. h_graph_visited[i]=false;
  216. }
  217. //read the source node from the file
  218. fscanf(fp,"%d",&source);
  219. source=0;
  220. //set the source node as true in the mask
  221. h_graph_mask[source]=true;
  222. h_graph_visited[source]=true;
  223. fscanf(fp,"%d",&edge_list_size);
  224. int id,cost;
  225. int* h_graph_edges = (int*) malloc(sizeof(int)*edge_list_size);
  226. for(int i=0; i < edge_list_size ; i++){
  227. fscanf(fp,"%d",&id);
  228. fscanf(fp,"%d",&cost);
  229. h_graph_edges[i] = id;
  230. }
  231. if(fp)
  232. fclose(fp);
  233. // allocate mem for the result on host side
  234. int *h_cost = (int*) malloc(sizeof(int)*no_of_nodes);
  235. int *h_cost_ref = (int*)malloc(sizeof(int)*no_of_nodes);
  236. for(int i=0;i<no_of_nodes;i++){
  237. h_cost[i]=-1;
  238. h_cost_ref[i] = -1;
  239. }
  240. h_cost[source]=0;
  241. h_cost_ref[source]=0;
  242. //---------------------------------------------------------
  243. //--gpu entry
  244. run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
  245. //---------------------------------------------------------
  246. //--cpu entry
  247. // initalize the memory again
  248. for(int i = 0; i < no_of_nodes; i++){
  249. h_graph_mask[i]=false;
  250. h_updating_graph_mask[i]=false;
  251. h_graph_visited[i]=false;
  252. }
  253. //set the source node as true in the mask
  254. source=0;
  255. h_graph_mask[source]=true;
  256. h_graph_visited[source]=true;
  257. run_bfs_cpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost_ref);
  258. //---------------------------------------------------------
  259. //--result varification
  260. compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
  261. //release host memory
  262. free(h_graph_nodes);
  263. free(h_graph_mask);
  264. free(h_updating_graph_mask);
  265. free(h_graph_visited);
  266. }
  267. catch(std::string msg){
  268. std::cout<<"--cambine: exception in main ->"<<msg<<std::endl;
  269. //release host memory
  270. free(h_graph_nodes);
  271. free(h_graph_mask);
  272. free(h_updating_graph_mask);
  273. free(h_graph_visited);
  274. }
  275. return 0;
  276. }