bfs.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331
  1. //--by Jianbin Fang
  2. #define __CL_ENABLE_EXCEPTIONS
  3. #include <cstdlib>
  4. #include <cstring>
  5. #include <iostream>
  6. #include <stdlib.h>
  7. #include <stdio.h>
  8. #include <string>
  9. #include <unistd.h>
  10. #ifdef PROFILING
  11. #include "timer.h"
  12. #endif
  13. #include "CLHelper.h"
  14. #include "util.h"
  15. #define MAX_THREADS_PER_BLOCK 256
  16. int platform_id = 0;
  17. int device_id = 0;
  18. int use_gpu = 0;
  19. //Structure to hold a node information
  20. struct Node
  21. {
  22. int starting;
  23. int no_of_edges;
  24. };
  25. //----------------------------------------------------------
  26. //--bfs on cpu
  27. //--programmer: jianbin
  28. //--date: 26/01/2011
  29. //--note: width is changed to the new_width
  30. //----------------------------------------------------------
  31. void run_bfs_cpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
  32. int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
  33. char *h_graph_visited, int *h_cost_ref){
  34. char stop;
  35. int k = 0;
  36. do{
  37. //if no thread changes this value then the loop stops
  38. stop=false;
  39. for(int tid = 0; tid < no_of_nodes; tid++ )
  40. {
  41. if (h_graph_mask[tid] == true){
  42. h_graph_mask[tid]=false;
  43. for(int i=h_graph_nodes[tid].starting; i<(h_graph_nodes[tid].no_of_edges + h_graph_nodes[tid].starting); i++){
  44. int id = h_graph_edges[i]; //--cambine: node id is connected with node tid
  45. if(!h_graph_visited[id]){ //--cambine: if node id has not been visited, enter the body below
  46. h_cost_ref[id]=h_cost_ref[tid]+1;
  47. h_updating_graph_mask[id]=true;
  48. }
  49. }
  50. }
  51. }
  52. for(int tid=0; tid< no_of_nodes ; tid++ )
  53. {
  54. if (h_updating_graph_mask[tid] == true){
  55. h_graph_mask[tid]=true;
  56. h_graph_visited[tid]=true;
  57. stop=true;
  58. h_updating_graph_mask[tid]=false;
  59. }
  60. }
  61. k++;
  62. }
  63. while(stop);
  64. }
  65. //----------------------------------------------------------
  66. //--breadth first search on GPUs
  67. //----------------------------------------------------------
  68. void run_bfs_gpu(int no_of_nodes, Node *h_graph_nodes, int edge_list_size, \
  69. int *h_graph_edges, char *h_graph_mask, char *h_updating_graph_mask, \
  70. char *h_graph_visited, int *h_cost)
  71. throw(std::string){
  72. //int number_elements = height*width;
  73. char h_over;
  74. cl_mem d_graph_nodes, d_graph_edges, d_graph_mask, d_updating_graph_mask, \
  75. d_graph_visited, d_cost, d_over;
  76. try{
  77. //--1 transfer data from host to device
  78. _clInit(platform_id, device_id, use_gpu);
  79. d_graph_nodes = _clMalloc(no_of_nodes*sizeof(Node), h_graph_nodes);
  80. d_graph_edges = _clMalloc(edge_list_size*sizeof(int), h_graph_edges);
  81. d_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_graph_mask);
  82. d_updating_graph_mask = _clMallocRW(no_of_nodes*sizeof(char), h_updating_graph_mask);
  83. d_graph_visited = _clMallocRW(no_of_nodes*sizeof(char), h_graph_visited);
  84. d_cost = _clMallocRW(no_of_nodes*sizeof(int), h_cost);
  85. d_over = _clMallocRW(sizeof(char), &h_over);
  86. _clMemcpyH2D(d_graph_nodes, no_of_nodes*sizeof(Node), h_graph_nodes);
  87. _clMemcpyH2D(d_graph_edges, edge_list_size*sizeof(int), h_graph_edges);
  88. _clMemcpyH2D(d_graph_mask, no_of_nodes*sizeof(char), h_graph_mask);
  89. _clMemcpyH2D(d_updating_graph_mask, no_of_nodes*sizeof(char), h_updating_graph_mask);
  90. _clMemcpyH2D(d_graph_visited, no_of_nodes*sizeof(char), h_graph_visited);
  91. _clMemcpyH2D(d_cost, no_of_nodes*sizeof(int), h_cost);
  92. //--2 invoke kernel
  93. #ifdef PROFILING
  94. timer kernel_timer;
  95. double kernel_time = 0.0;
  96. kernel_timer.reset();
  97. kernel_timer.start();
  98. #endif
  99. do{
  100. h_over = false;
  101. _clMemcpyH2D(d_over, sizeof(char), &h_over);
  102. //--kernel 0
  103. int kernel_id = 0;
  104. int kernel_idx = 0;
  105. _clSetArgs(kernel_id, kernel_idx++, d_graph_nodes);
  106. _clSetArgs(kernel_id, kernel_idx++, d_graph_edges);
  107. _clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
  108. _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
  109. _clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
  110. _clSetArgs(kernel_id, kernel_idx++, d_cost);
  111. _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
  112. //int work_items = no_of_nodes;
  113. _clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
  114. //--kernel 1
  115. kernel_id = 1;
  116. kernel_idx = 0;
  117. _clSetArgs(kernel_id, kernel_idx++, d_graph_mask);
  118. _clSetArgs(kernel_id, kernel_idx++, d_updating_graph_mask);
  119. _clSetArgs(kernel_id, kernel_idx++, d_graph_visited);
  120. _clSetArgs(kernel_id, kernel_idx++, d_over);
  121. _clSetArgs(kernel_id, kernel_idx++, &no_of_nodes, sizeof(int));
  122. //work_items = no_of_nodes;
  123. _clInvokeKernel(kernel_id, no_of_nodes, work_group_size);
  124. _clMemcpyD2H(d_over,sizeof(char), &h_over);
  125. }while(h_over);
  126. _clFinish();
  127. #ifdef PROFILING
  128. kernel_timer.stop();
  129. kernel_time = kernel_timer.getTimeInSeconds();
  130. #endif
  131. //--3 transfer data from device to host
  132. _clMemcpyD2H(d_cost,no_of_nodes*sizeof(int), h_cost);
  133. //--statistics
  134. #ifdef PROFILING
  135. std::cout<<"kernel time(s):"<<kernel_time<<std::endl;
  136. #endif
  137. //--4 release cl resources.
  138. _clFree(d_graph_nodes);
  139. _clFree(d_graph_edges);
  140. _clFree(d_graph_mask);
  141. _clFree(d_updating_graph_mask);
  142. _clFree(d_graph_visited);
  143. _clFree(d_cost);
  144. _clFree(d_over);
  145. _clRelease();
  146. }
  147. catch(std::string msg){
  148. _clFree(d_graph_nodes);
  149. _clFree(d_graph_edges);
  150. _clFree(d_graph_mask);
  151. _clFree(d_updating_graph_mask);
  152. _clFree(d_graph_visited);
  153. _clFree(d_cost);
  154. _clFree(d_over);
  155. _clRelease();
  156. std::string e_str = "in run_transpose_gpu -> ";
  157. e_str += msg;
  158. throw(e_str);
  159. }
  160. return ;
  161. }
  162. void Usage(char *argv0){
  163. char *help =
  164. "\nUsage: %s [switches] \n\n"
  165. " -f :file to process \n"
  166. " -p platform_id :OCL platform to use [default=0]\n"
  167. " -d device_id :OCL device to use [default=0]\n"
  168. " -g use_gpu :1 for GPU 0 for CPU [default=0]\n";
  169. fprintf(stderr, help, argv0);
  170. exit(-1);
  171. }
  172. //----------------------------------------------------------
  173. //--cambine: main function
  174. //--author: created by Jianbin Fang
  175. //--date: 25/01/2011
  176. //----------------------------------------------------------
  177. int main(int argc, char * argv[])
  178. {
  179. int no_of_nodes;
  180. int edge_list_size;
  181. FILE *fp;
  182. Node* h_graph_nodes;
  183. char *h_graph_mask, *h_updating_graph_mask, *h_graph_visited;
  184. char *input_f;
  185. try{
  186. // Arguments parsing for platform, device and type
  187. // Variables to store information on platform and device to use_gpu
  188. /* obtain command line arguments and change appropriate options */
  189. int opt;
  190. extern char *optarg;
  191. while ((opt=getopt(argc,argv,"f:p:d:g:"))!= EOF) {
  192. switch (opt) {
  193. case 'p': platform_id = atoi(optarg);
  194. break;
  195. case 'd': device_id = atoi(optarg);
  196. break;
  197. case 'g': use_gpu = atoi(optarg);
  198. break;
  199. case 'f': input_f = optarg;
  200. break;
  201. case '?': Usage(argv[0]);
  202. break;
  203. default: Usage(argv[0]);
  204. break;
  205. }
  206. }
  207. printf("Reading File\n");
  208. //Read in Graph from a file
  209. fp = fopen(input_f,"r");
  210. if(!fp){
  211. printf("Error Reading graph file\n");
  212. return 0;
  213. }
  214. int source = 0;
  215. fscanf(fp,"%d",&no_of_nodes);
  216. int num_of_blocks = 1;
  217. int num_of_threads_per_block = no_of_nodes;
  218. //Make execution Parameters according to the number of nodes
  219. //Distribute threads across multiple Blocks if necessary
  220. if(no_of_nodes>MAX_THREADS_PER_BLOCK){
  221. num_of_blocks = (int)ceil(no_of_nodes/(double)MAX_THREADS_PER_BLOCK);
  222. num_of_threads_per_block = MAX_THREADS_PER_BLOCK;
  223. }
  224. work_group_size = num_of_threads_per_block;
  225. // allocate host memory
  226. h_graph_nodes = (Node*) malloc(sizeof(Node)*no_of_nodes);
  227. h_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
  228. h_updating_graph_mask = (char*) malloc(sizeof(char)*no_of_nodes);
  229. h_graph_visited = (char*) malloc(sizeof(char)*no_of_nodes);
  230. int start, edgeno;
  231. // initalize the memory
  232. for(int i = 0; i < no_of_nodes; i++){
  233. fscanf(fp,"%d %d",&start,&edgeno);
  234. h_graph_nodes[i].starting = start;
  235. h_graph_nodes[i].no_of_edges = edgeno;
  236. h_graph_mask[i]=false;
  237. h_updating_graph_mask[i]=false;
  238. h_graph_visited[i]=false;
  239. }
  240. //read the source node from the file
  241. fscanf(fp,"%d",&source);
  242. source=0;
  243. //set the source node as true in the mask
  244. h_graph_mask[source]=true;
  245. h_graph_visited[source]=true;
  246. fscanf(fp,"%d",&edge_list_size);
  247. int id,cost;
  248. int* h_graph_edges = (int*) malloc(sizeof(int)*edge_list_size);
  249. for(int i=0; i < edge_list_size ; i++){
  250. fscanf(fp,"%d",&id);
  251. fscanf(fp,"%d",&cost);
  252. h_graph_edges[i] = id;
  253. }
  254. if(fp)
  255. fclose(fp);
  256. // allocate mem for the result on host side
  257. int *h_cost = (int*) malloc(sizeof(int)*no_of_nodes);
  258. int *h_cost_ref = (int*)malloc(sizeof(int)*no_of_nodes);
  259. for(int i=0;i<no_of_nodes;i++){
  260. h_cost[i]=-1;
  261. h_cost_ref[i] = -1;
  262. }
  263. h_cost[source]=0;
  264. h_cost_ref[source]=0;
  265. //---------------------------------------------------------
  266. //--gpu entry
  267. run_bfs_gpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost);
  268. //---------------------------------------------------------
  269. //--cpu entry
  270. // initalize the memory again
  271. for(int i = 0; i < no_of_nodes; i++){
  272. h_graph_mask[i]=false;
  273. h_updating_graph_mask[i]=false;
  274. h_graph_visited[i]=false;
  275. }
  276. //set the source node as true in the mask
  277. source=0;
  278. h_graph_mask[source]=true;
  279. h_graph_visited[source]=true;
  280. run_bfs_cpu(no_of_nodes,h_graph_nodes,edge_list_size,h_graph_edges, h_graph_mask, h_updating_graph_mask, h_graph_visited, h_cost_ref);
  281. //---------------------------------------------------------
  282. //--result varification
  283. compare_results<int>(h_cost_ref, h_cost, no_of_nodes);
  284. //release host memory
  285. free(h_graph_nodes);
  286. free(h_graph_mask);
  287. free(h_updating_graph_mask);
  288. free(h_graph_visited);
  289. }
  290. catch(std::string msg){
  291. std::cout<<"--cambine: exception in main ->"<<msg<<std::endl;
  292. //release host memory
  293. free(h_graph_nodes);
  294. free(h_graph_mask);
  295. free(h_updating_graph_mask);
  296. free(h_graph_visited);
  297. }
  298. return 0;
  299. }