machine.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. /**************************************************************************
  2. **
  3. ** Copyright (C) 1993 David E. Stewart & Zbigniew Leyk, all rights reserved.
  4. **
  5. ** Meschach Library
  6. **
  7. ** This Meschach Library is provided "as is" without any express
  8. ** or implied warranty of any kind with respect to this software.
  9. ** In particular the authors shall not be liable for any direct,
  10. ** indirect, special, incidental or consequential damages arising
  11. ** in any way from use of the software.
  12. **
  13. ** Everyone is granted permission to copy, modify and redistribute this
  14. ** Meschach Library, provided:
  15. ** 1. All copies contain this copyright notice.
  16. ** 2. All modified copies shall carry a notice stating who
  17. ** made the last modification and the date of such modification.
  18. ** 3. No charge is made for this software or works derived from it.
  19. ** This clause shall not be construed as constraining other software
  20. ** distributed on the same medium as this software, nor is a
  21. ** distribution fee considered a charge.
  22. **
  23. ***************************************************************************/
  24. /*
  25. This file contains basic routines which are used by the functions
  26. in meschach.a etc.
  27. These are the routines that should be modified in order to take
  28. full advantage of specialised architectures (pipelining, vector
  29. processors etc).
  30. */
  31. static char *rcsid = "$Id: machine.c,v 1.4 1994/01/13 05:28:56 des Exp $";
  32. #include "machine.h"
  33. /* __ip__ -- inner product */
  34. #ifndef ANSI_C
  35. double __ip__(dp1,dp2,len)
  36. register Real *dp1, *dp2;
  37. int len;
  38. #else
  39. double __ip__(const Real *dp1, const Real *dp2, int len)
  40. #endif
  41. {
  42. #ifdef VUNROLL
  43. register int len4;
  44. register Real sum1, sum2, sum3;
  45. #endif
  46. register int i;
  47. register Real sum;
  48. sum = 0.0;
  49. #ifdef VUNROLL
  50. sum1 = sum2 = sum3 = 0.0;
  51. len4 = len / 4;
  52. len = len % 4;
  53. for ( i = 0; i < len4; i++ )
  54. {
  55. sum += dp1[4*i]*dp2[4*i];
  56. sum1 += dp1[4*i+1]*dp2[4*i+1];
  57. sum2 += dp1[4*i+2]*dp2[4*i+2];
  58. sum3 += dp1[4*i+3]*dp2[4*i+3];
  59. }
  60. sum += sum1 + sum2 + sum3;
  61. dp1 += 4*len4; dp2 += 4*len4;
  62. #endif
  63. for ( i = 0; i < len; i++ )
  64. sum += dp1[i]*dp2[i];
  65. return sum;
  66. }
  67. /* __mltadd__ -- scalar multiply and add c.f. v_mltadd() */
  68. #ifndef ANSI_C
  69. void __mltadd__(dp1,dp2,s,len)
  70. register Real *dp1, *dp2;
  71. register double s;
  72. register int len;
  73. #else
  74. void __mltadd__(Real *dp1, const Real *dp2, double s, int len)
  75. #endif
  76. {
  77. register int i;
  78. #ifdef VUNROLL
  79. register int len4;
  80. len4 = len / 4;
  81. len = len % 4;
  82. for ( i = 0; i < len4; i++ )
  83. {
  84. dp1[4*i] += s*dp2[4*i];
  85. dp1[4*i+1] += s*dp2[4*i+1];
  86. dp1[4*i+2] += s*dp2[4*i+2];
  87. dp1[4*i+3] += s*dp2[4*i+3];
  88. }
  89. dp1 += 4*len4; dp2 += 4*len4;
  90. #endif
  91. for ( i = 0; i < len; i++ )
  92. dp1[i] += s*dp2[i];
  93. }
  94. /* __smlt__ scalar multiply array c.f. sv_mlt() */
  95. #ifndef ANSI_C
  96. void __smlt__(dp,s,out,len)
  97. register Real *dp, *out;
  98. register double s;
  99. register int len;
  100. #else
  101. void __smlt__(const Real *dp, double s, Real *out, int len)
  102. #endif
  103. {
  104. register int i;
  105. for ( i = 0; i < len; i++ )
  106. out[i] = s*dp[i];
  107. }
  108. /* __add__ -- add arrays c.f. v_add() */
  109. #ifndef ANSI_C
  110. void __add__(dp1,dp2,out,len)
  111. register Real *dp1, *dp2, *out;
  112. register int len;
  113. #else
  114. void __add__(const Real *dp1, const Real *dp2, Real *out, int len)
  115. #endif
  116. {
  117. register int i;
  118. for ( i = 0; i < len; i++ )
  119. out[i] = dp1[i] + dp2[i];
  120. }
  121. /* __sub__ -- subtract arrays c.f. v_sub() */
  122. #ifndef ANSI_C
  123. void __sub__(dp1,dp2,out,len)
  124. register Real *dp1, *dp2, *out;
  125. register int len;
  126. #else
  127. void __sub__(const Real *dp1, const Real *dp2, Real *out, int len)
  128. #endif
  129. {
  130. register int i;
  131. for ( i = 0; i < len; i++ )
  132. out[i] = dp1[i] - dp2[i];
  133. }
  134. /* __zero__ -- zeros an array of floating point numbers */
  135. #ifndef ANSI_C
  136. void __zero__(dp,len)
  137. register Real *dp;
  138. register int len;
  139. #else
  140. void __zero__(Real *dp, int len)
  141. #endif
  142. {
  143. #ifdef CHAR0ISDBL0
  144. /* if a floating point zero is equivalent to a string of nulls */
  145. MEM_ZERO((char *)dp,len*sizeof(Real));
  146. #else
  147. /* else, need to zero the array entry by entry */
  148. int i;
  149. for ( i = 0; i < len; i++ )
  150. dp[i] = 0.0;
  151. #endif
  152. }