util.h 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. #ifndef _C_UTIL_
  2. #define _C_UTIL_
  3. #include <cstdlib>
  4. #include <math.h>
  5. #include <iostream>
  6. #include <omp.h>
  7. #include <sys/time.h>
  8. #ifdef RD_WG_SIZE_0_0
  9. #define BLOCK_SIZE_0 RD_WG_SIZE_0_0
  10. #elif defined(RD_WG_SIZE_0)
  11. #define BLOCK_SIZE_0 RD_WG_SIZE_0
  12. #elif defined(RD_WG_SIZE)
  13. #define BLOCK_SIZE_0 RD_WG_SIZE
  14. #else
  15. #define BLOCK_SIZE_0 192
  16. #endif
  17. #ifdef RD_WG_SIZE_1_0
  18. #define BLOCK_SIZE_1 RD_WG_SIZE_1_0
  19. #elif defined(RD_WG_SIZE_1)
  20. #define BLOCK_SIZE_1 RD_WG_SIZE_1
  21. #elif defined(RD_WG_SIZE)
  22. #define BLOCK_SIZE_1 RD_WG_SIZE
  23. #else
  24. #define BLOCK_SIZE_1 192
  25. #endif
  26. #ifdef RD_WG_SIZE_2_0
  27. #define BLOCK_SIZE_2 RD_WG_SIZE_2_0
  28. #elif defined(RD_WG_SIZE_1)
  29. #define BLOCK_SIZE_2 RD_WG_SIZE_2
  30. #elif defined(RD_WG_SIZE)
  31. #define BLOCK_SIZE_2 RD_WG_SIZE
  32. #else
  33. #define BLOCK_SIZE_2 192
  34. #endif
  35. #ifdef RD_WG_SIZE_3_0
  36. #define BLOCK_SIZE_3 RD_WG_SIZE_3_0
  37. #elif defined(RD_WG_SIZE_3)
  38. #define BLOCK_SIZE_3 RD_WG_SIZE_3
  39. #elif defined(RD_WG_SIZE)
  40. #define BLOCK_SIZE_3 RD_WG_SIZE
  41. #else
  42. #define BLOCK_SIZE_3 192
  43. #endif
  44. #ifdef RD_WG_SIZE_4_0
  45. #define BLOCK_SIZE_4 RD_WG_SIZE_4_0
  46. #elif defined(RD_WG_SIZE_4)
  47. #define BLOCK_SIZE_4 RD_WG_SIZE_4
  48. #elif defined(RD_WG_SIZE)
  49. #define BLOCK_SIZE_4 RD_WG_SIZE
  50. #else
  51. #define BLOCK_SIZE_4 192
  52. #endif
  53. using std::endl;
  54. double gettime() {
  55. struct timeval t;
  56. gettimeofday(&t,NULL);
  57. return t.tv_sec+t.tv_usec*1e-6;
  58. }
  59. //-------------------------------------------------------------------
  60. //--initialize array with maximum limit
  61. //-------------------------------------------------------------------
  62. template<typename datatype>
  63. void fill(datatype *A, const int n, const datatype maxi){
  64. for (int j = 0; j < n; j++){
  65. A[j] = ((datatype) maxi * (std::rand() / (RAND_MAX + 1.0f)));
  66. }
  67. }
  68. //--print matrix
  69. template<typename datatype>
  70. void print_matrix(datatype *A, int height, int width){
  71. for(int i=0; i<height; i++){
  72. for(int j=0; j<width; j++){
  73. int idx = i*width + j;
  74. std::cout<<A[idx]<<" ";
  75. }
  76. std::cout<<std::endl;
  77. }
  78. return;
  79. }
  80. //-------------------------------------------------------------------
  81. //--verify results
  82. //-------------------------------------------------------------------
  83. #define MAX_RELATIVE_ERROR .002
  84. template<typename datatype>
  85. void verify_array(const datatype *cpuResults, const datatype *gpuResults, const int size){
  86. bool passed = true;
  87. #pragma omp parallel for
  88. for (int i=0; i<size; i++){
  89. if (fabs(cpuResults[i] - gpuResults[i]) / cpuResults[i] > MAX_RELATIVE_ERROR){
  90. passed = false;
  91. }
  92. }
  93. if (passed){
  94. std::cout << "--cambine:passed:-)" << std::endl;
  95. }
  96. else{
  97. std::cout << "--cambine: failed:-(" << std::endl;
  98. }
  99. return ;
  100. }
  101. template<typename datatype>
  102. void compare_results(const datatype *cpu_results, const datatype *gpu_results, const int size){
  103. bool passed = true;
  104. //#pragma omp parallel for
  105. for (int i=0; i<size; i++){
  106. if (cpu_results[i]!=gpu_results[i]){
  107. passed = false;
  108. }
  109. }
  110. if (passed){
  111. std::cout << "--cambine:passed:-)" << std::endl;
  112. }
  113. else{
  114. std::cout << "--cambine: failed:-(" << std::endl;
  115. }
  116. return ;
  117. }
  118. #endif