util.h 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. #ifndef _C_UTIL_
  2. #define _C_UTIL_
  3. #include <math.h>
  4. #include <iostream>
  5. #include <omp.h>
  6. #include <sys/time.h>
  7. #ifdef RD_WG_SIZE_0_0
  8. #define BLOCK_SIZE_0 RD_WG_SIZE_0_0
  9. #elif defined(RD_WG_SIZE_0)
  10. #define BLOCK_SIZE_0 RD_WG_SIZE_0
  11. #elif defined(RD_WG_SIZE)
  12. #define BLOCK_SIZE_0 RD_WG_SIZE
  13. #else
  14. #define BLOCK_SIZE_0 192
  15. #endif
  16. #ifdef RD_WG_SIZE_1_0
  17. #define BLOCK_SIZE_1 RD_WG_SIZE_1_0
  18. #elif defined(RD_WG_SIZE_1)
  19. #define BLOCK_SIZE_1 RD_WG_SIZE_1
  20. #elif defined(RD_WG_SIZE)
  21. #define BLOCK_SIZE_1 RD_WG_SIZE
  22. #else
  23. #define BLOCK_SIZE_1 192
  24. #endif
  25. #ifdef RD_WG_SIZE_2_0
  26. #define BLOCK_SIZE_2 RD_WG_SIZE_2_0
  27. #elif defined(RD_WG_SIZE_1)
  28. #define BLOCK_SIZE_2 RD_WG_SIZE_2
  29. #elif defined(RD_WG_SIZE)
  30. #define BLOCK_SIZE_2 RD_WG_SIZE
  31. #else
  32. #define BLOCK_SIZE_2 192
  33. #endif
  34. #ifdef RD_WG_SIZE_3_0
  35. #define BLOCK_SIZE_3 RD_WG_SIZE_3_0
  36. #elif defined(RD_WG_SIZE_3)
  37. #define BLOCK_SIZE_3 RD_WG_SIZE_3
  38. #elif defined(RD_WG_SIZE)
  39. #define BLOCK_SIZE_3 RD_WG_SIZE
  40. #else
  41. #define BLOCK_SIZE_3 192
  42. #endif
  43. #ifdef RD_WG_SIZE_4_0
  44. #define BLOCK_SIZE_4 RD_WG_SIZE_4_0
  45. #elif defined(RD_WG_SIZE_4)
  46. #define BLOCK_SIZE_4 RD_WG_SIZE_4
  47. #elif defined(RD_WG_SIZE)
  48. #define BLOCK_SIZE_4 RD_WG_SIZE
  49. #else
  50. #define BLOCK_SIZE_4 192
  51. #endif
  52. using std::endl;
  53. double gettime() {
  54. struct timeval t;
  55. gettimeofday(&t,NULL);
  56. return t.tv_sec+t.tv_usec*1e-6;
  57. }
  58. //-------------------------------------------------------------------
  59. //--initialize array with maximum limit
  60. //-------------------------------------------------------------------
  61. template<typename datatype>
  62. void fill(datatype *A, const int n, const datatype maxi){
  63. for (int j = 0; j < n; j++){
  64. A[j] = ((datatype) maxi * (rand() / (RAND_MAX + 1.0f)));
  65. }
  66. }
  67. //--print matrix
  68. template<typename datatype>
  69. void print_matrix(datatype *A, int height, int width){
  70. for(int i=0; i<height; i++){
  71. for(int j=0; j<width; j++){
  72. int idx = i*width + j;
  73. std::cout<<A[idx]<<" ";
  74. }
  75. std::cout<<std::endl;
  76. }
  77. return;
  78. }
  79. //-------------------------------------------------------------------
  80. //--verify results
  81. //-------------------------------------------------------------------
  82. #define MAX_RELATIVE_ERROR .002
  83. template<typename datatype>
  84. void verify_array(const datatype *cpuResults, const datatype *gpuResults, const int size){
  85. bool passed = true;
  86. #pragma omp parallel for
  87. for (int i=0; i<size; i++){
  88. if (fabs(cpuResults[i] - gpuResults[i]) / cpuResults[i] > MAX_RELATIVE_ERROR){
  89. passed = false;
  90. }
  91. }
  92. if (passed){
  93. std::cout << "--cambine:passed:-)" << std::endl;
  94. }
  95. else{
  96. std::cout << "--cambine: failed:-(" << std::endl;
  97. }
  98. return ;
  99. }
  100. template<typename datatype>
  101. void compare_results(const datatype *cpu_results, const datatype *gpu_results, const int size){
  102. bool passed = true;
  103. //#pragma omp parallel for
  104. for (int i=0; i<size; i++){
  105. if (cpu_results[i]!=gpu_results[i]){
  106. passed = false;
  107. }
  108. }
  109. if (passed){
  110. std::cout << "--cambine:passed:-)" << std::endl;
  111. }
  112. else{
  113. std::cout << "--cambine: failed:-(" << std::endl;
  114. }
  115. return ;
  116. }
  117. #endif