iternsym.c 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395
  1. /**************************************************************************
  2. **
  3. ** Copyright (C) 1993 David E. Stewart & Zbigniew Leyk, all rights reserved.
  4. **
  5. ** Meschach Library
  6. **
  7. ** This Meschach Library is provided "as is" without any express
  8. ** or implied warranty of any kind with respect to this software.
  9. ** In particular the authors shall not be liable for any direct,
  10. ** indirect, special, incidental or consequential damages arising
  11. ** in any way from use of the software.
  12. **
  13. ** Everyone is granted permission to copy, modify and redistribute this
  14. ** Meschach Library, provided:
  15. ** 1. All copies contain this copyright notice.
  16. ** 2. All modified copies shall carry a notice stating who
  17. ** made the last modification and the date of such modification.
  18. ** 3. No charge is made for this software or works derived from it.
  19. ** This clause shall not be construed as constraining other software
  20. ** distributed on the same medium as this software, nor is a
  21. ** distribution fee considered a charge.
  22. **
  23. ***************************************************************************/
  24. /* iter.c 17/09/93 */
  25. /*
  26. ITERATIVE METHODS - implementation of several iterative methods;
  27. see also iter0.c
  28. */
  29. #include <stdio.h>
  30. #include <math.h>
  31. #include "matrix.h"
  32. #include "matrix2.h"
  33. #include "sparse.h"
  34. #include "iter.h"
  35. static char rcsid[] = "$Header: iternsym.c,v 1.6 1995/01/30 14:53:01 des Exp $";
  36. #ifdef ANSI_C
  37. VEC *spCHsolve(SPMAT *,VEC *,VEC *);
  38. #else
  39. VEC *spCHsolve();
  40. #endif
  41. /*
  42. iter_cgs -- uses CGS to compute a solution x to A.x=b
  43. */
  44. #ifndef ANSI_C
  45. VEC *iter_cgs(ip,r0)
  46. ITER *ip;
  47. VEC *r0;
  48. #else
  49. VEC *iter_cgs(ITER *ip, VEC *r0)
  50. #endif
  51. {
  52. STATIC VEC *p = VNULL, *q = VNULL, *r = VNULL, *u = VNULL;
  53. STATIC VEC *v = VNULL, *z = VNULL;
  54. VEC *tmp;
  55. Real alpha, beta, nres, rho, old_rho, sigma, inner;
  56. if (ip == INULL)
  57. error(E_NULL,"iter_cgs");
  58. if (!ip->Ax || !ip->b || !r0)
  59. error(E_NULL,"iter_cgs");
  60. if ( ip->x == ip->b )
  61. error(E_INSITU,"iter_cgs");
  62. if (!ip->stop_crit)
  63. error(E_NULL,"iter_cgs");
  64. if ( r0->dim != ip->b->dim )
  65. error(E_SIZES,"iter_cgs");
  66. if ( ip->eps <= 0.0 ) ip->eps = MACHEPS;
  67. p = v_resize(p,ip->b->dim);
  68. q = v_resize(q,ip->b->dim);
  69. r = v_resize(r,ip->b->dim);
  70. u = v_resize(u,ip->b->dim);
  71. v = v_resize(v,ip->b->dim);
  72. MEM_STAT_REG(p,TYPE_VEC);
  73. MEM_STAT_REG(q,TYPE_VEC);
  74. MEM_STAT_REG(r,TYPE_VEC);
  75. MEM_STAT_REG(u,TYPE_VEC);
  76. MEM_STAT_REG(v,TYPE_VEC);
  77. if (ip->Bx) {
  78. z = v_resize(z,ip->b->dim);
  79. MEM_STAT_REG(z,TYPE_VEC);
  80. }
  81. if (ip->x != VNULL) {
  82. if (ip->x->dim != ip->b->dim)
  83. error(E_SIZES,"iter_cgs");
  84. ip->Ax(ip->A_par,ip->x,v); /* v = A*x */
  85. if (ip->Bx) {
  86. v_sub(ip->b,v,v); /* v = b - A*x */
  87. (ip->Bx)(ip->B_par,v,r); /* r = B*(b-A*x) */
  88. }
  89. else v_sub(ip->b,v,r); /* r = b-A*x */
  90. }
  91. else { /* ip->x == 0 */
  92. ip->x = v_get(ip->b->dim); /* x == 0 */
  93. ip->shared_x = FALSE;
  94. if (ip->Bx) (ip->Bx)(ip->B_par,ip->b,r); /* r = B*b */
  95. else v_copy(ip->b,r); /* r = b */
  96. }
  97. v_zero(p);
  98. v_zero(q);
  99. old_rho = 1.0;
  100. for (ip->steps = 0; ip->steps <= ip->limit; ip->steps++) {
  101. inner = in_prod(r,r);
  102. nres = sqrt(fabs(inner));
  103. if (ip->steps == 0) ip->init_res = nres;
  104. if (ip->info) ip->info(ip,nres,r,VNULL);
  105. if ( ip->stop_crit(ip,nres,r,VNULL) ) break;
  106. rho = in_prod(r0,r);
  107. if ( old_rho == 0.0 )
  108. error(E_BREAKDOWN,"iter_cgs");
  109. beta = rho/old_rho;
  110. v_mltadd(r,q,beta,u);
  111. v_mltadd(q,p,beta,v);
  112. v_mltadd(u,v,beta,p);
  113. (ip->Ax)(ip->A_par,p,q);
  114. if (ip->Bx) {
  115. (ip->Bx)(ip->B_par,q,z);
  116. tmp = z;
  117. }
  118. else tmp = q;
  119. sigma = in_prod(r0,tmp);
  120. if ( sigma == 0.0 )
  121. error(E_BREAKDOWN,"iter_cgs");
  122. alpha = rho/sigma;
  123. v_mltadd(u,tmp,-alpha,q);
  124. v_add(u,q,v);
  125. (ip->Ax)(ip->A_par,v,u);
  126. if (ip->Bx) {
  127. (ip->Bx)(ip->B_par,u,z);
  128. tmp = z;
  129. }
  130. else tmp = u;
  131. v_mltadd(r,tmp,-alpha,r);
  132. v_mltadd(ip->x,v,alpha,ip->x);
  133. old_rho = rho;
  134. }
  135. #ifdef THREADSAFE
  136. V_FREE(p); V_FREE(q); V_FREE(r); V_FREE(u);
  137. V_FREE(v); V_FREE(z);
  138. #endif
  139. return ip->x;
  140. }
  141. /* iter_spcgs -- simple interface for SPMAT data structures
  142. use always as follows:
  143. x = iter_spcgs(A,B,b,r0,tol,x,limit,steps);
  144. or
  145. x = iter_spcgs(A,B,b,r0,tol,VNULL,limit,steps);
  146. In the second case the solution vector is created.
  147. If B is not NULL then it is a preconditioner.
  148. */
  149. #ifndef ANSI_C
  150. VEC *iter_spcgs(A,B,b,r0,tol,x,limit,steps)
  151. SPMAT *A, *B;
  152. VEC *b, *r0, *x;
  153. double tol;
  154. int *steps,limit;
  155. #else
  156. VEC *iter_spcgs(SPMAT *A, SPMAT *B, VEC *b, VEC *r0, double tol,
  157. VEC *x, int limit, int *steps)
  158. #endif
  159. {
  160. ITER *ip;
  161. ip = iter_get(0,0);
  162. ip->Ax = (Fun_Ax) sp_mv_mlt;
  163. ip->A_par = (void *) A;
  164. if (B) {
  165. ip->Bx = (Fun_Ax) sp_mv_mlt;
  166. ip->B_par = (void *) B;
  167. }
  168. else {
  169. ip->Bx = (Fun_Ax) NULL;
  170. ip->B_par = NULL;
  171. }
  172. ip->info = (Fun_info) NULL;
  173. ip->limit = limit;
  174. ip->b = b;
  175. ip->eps = tol;
  176. ip->x = x;
  177. iter_cgs(ip,r0);
  178. x = ip->x;
  179. if (steps) *steps = ip->steps;
  180. ip->shared_x = ip->shared_b = TRUE;
  181. iter_free(ip); /* release only ITER structure */
  182. return x;
  183. }
  184. /*
  185. Routine for performing LSQR -- the least squares QR algorithm
  186. of Paige and Saunders:
  187. "LSQR: an algorithm for sparse linear equations and
  188. sparse least squares", ACM Trans. Math. Soft., v. 8
  189. pp. 43--71 (1982)
  190. */
  191. /* iter_lsqr -- sparse CG-like least squares routine:
  192. -- finds min_x ||A.x-b||_2 using A defined through A & AT
  193. -- returns x (if x != NULL) */
  194. #ifndef ANSI_C
  195. VEC *iter_lsqr(ip)
  196. ITER *ip;
  197. #else
  198. VEC *iter_lsqr(ITER *ip)
  199. #endif
  200. {
  201. STATIC VEC *u = VNULL, *v = VNULL, *w = VNULL, *tmp = VNULL;
  202. Real alpha, beta, phi, phi_bar;
  203. Real rho, rho_bar, rho_max, theta, nres;
  204. Real s, c; /* for Givens' rotations */
  205. int m, n;
  206. if ( ! ip || ! ip->b || !ip->Ax || !ip->ATx )
  207. error(E_NULL,"iter_lsqr");
  208. if ( ip->x == ip->b )
  209. error(E_INSITU,"iter_lsqr");
  210. if (!ip->stop_crit || !ip->x)
  211. error(E_NULL,"iter_lsqr");
  212. if ( ip->eps <= 0.0 ) ip->eps = MACHEPS;
  213. m = ip->b->dim;
  214. n = ip->x->dim;
  215. u = v_resize(u,(unsigned int)m);
  216. v = v_resize(v,(unsigned int)n);
  217. w = v_resize(w,(unsigned int)n);
  218. tmp = v_resize(tmp,(unsigned int)n);
  219. MEM_STAT_REG(u,TYPE_VEC);
  220. MEM_STAT_REG(v,TYPE_VEC);
  221. MEM_STAT_REG(w,TYPE_VEC);
  222. MEM_STAT_REG(tmp,TYPE_VEC);
  223. if (ip->x != VNULL) {
  224. ip->Ax(ip->A_par,ip->x,u); /* u = A*x */
  225. v_sub(ip->b,u,u); /* u = b-A*x */
  226. }
  227. else { /* ip->x == 0 */
  228. ip->x = v_get(ip->b->dim);
  229. ip->shared_x = FALSE;
  230. v_copy(ip->b,u); /* u = b */
  231. }
  232. beta = v_norm2(u);
  233. if ( beta == 0.0 ) return ip->x;
  234. sv_mlt(1.0/beta,u,u);
  235. (ip->ATx)(ip->AT_par,u,v);
  236. alpha = v_norm2(v);
  237. if ( alpha == 0.0 ) return ip->x;
  238. sv_mlt(1.0/alpha,v,v);
  239. v_copy(v,w);
  240. phi_bar = beta;
  241. rho_bar = alpha;
  242. rho_max = 1.0;
  243. for (ip->steps = 0; ip->steps <= ip->limit; ip->steps++) {
  244. tmp = v_resize(tmp,m);
  245. (ip->Ax)(ip->A_par,v,tmp);
  246. v_mltadd(tmp,u,-alpha,u);
  247. beta = v_norm2(u);
  248. sv_mlt(1.0/beta,u,u);
  249. tmp = v_resize(tmp,n);
  250. (ip->ATx)(ip->AT_par,u,tmp);
  251. v_mltadd(tmp,v,-beta,v);
  252. alpha = v_norm2(v);
  253. sv_mlt(1.0/alpha,v,v);
  254. rho = sqrt(rho_bar*rho_bar+beta*beta);
  255. if ( rho > rho_max )
  256. rho_max = rho;
  257. c = rho_bar/rho;
  258. s = beta/rho;
  259. theta = s*alpha;
  260. rho_bar = -c*alpha;
  261. phi = c*phi_bar;
  262. phi_bar = s*phi_bar;
  263. /* update ip->x & w */
  264. if ( rho == 0.0 )
  265. error(E_BREAKDOWN,"iter_lsqr");
  266. v_mltadd(ip->x,w,phi/rho,ip->x);
  267. v_mltadd(v,w,-theta/rho,w);
  268. nres = fabs(phi_bar*alpha*c)*rho_max;
  269. if (ip->info) ip->info(ip,nres,w,VNULL);
  270. if (ip->steps == 0) ip->init_res = nres;
  271. if ( ip->stop_crit(ip,nres,w,VNULL) ) break;
  272. }
  273. #ifdef THREADSAFE
  274. V_FREE(u); V_FREE(v); V_FREE(w); V_FREE(tmp);
  275. #endif
  276. return ip->x;
  277. }
  278. /* iter_splsqr -- simple interface for SPMAT data structures */
  279. #ifndef ANSI_C
  280. VEC *iter_splsqr(A,b,tol,x,limit,steps)
  281. SPMAT *A;
  282. VEC *b, *x;
  283. double tol;
  284. int *steps,limit;
  285. #else
  286. VEC *iter_splsqr(SPMAT *A, VEC *b, double tol,
  287. VEC *x, int limit, int *steps)
  288. #endif
  289. {
  290. ITER *ip;
  291. ip = iter_get(0,0);
  292. ip->Ax = (Fun_Ax) sp_mv_mlt;
  293. ip->A_par = (void *) A;
  294. ip->ATx = (Fun_Ax) sp_vm_mlt;
  295. ip->AT_par = (void *) A;
  296. ip->Bx = (Fun_Ax) NULL;
  297. ip->B_par = NULL;
  298. ip->info = (Fun_info) NULL;
  299. ip->limit = limit;
  300. ip->b = b;
  301. ip->eps = tol;
  302. ip->x = x;
  303. iter_lsqr(ip);
  304. x = ip->x;
  305. if (steps) *steps = ip->steps;
  306. ip->shared_x = ip->shared_b = TRUE;
  307. iter_free(ip); /* release only ITER structure */
  308. return x;
  309. }
  310. /* iter_arnoldi -- an implementation of the Arnoldi method;
  311. iterative refinement is applied.
  312. */
  313. #ifndef ANSI_C
  314. MAT *iter_arnoldi_iref(ip,h_rem,Q,H)
  315. ITER *ip;
  316. Real *h_rem;
  317. MAT *Q, *H;
  318. #else
  319. MAT *iter_arnoldi_iref(ITER *ip, Real *h_rem, MAT *Q, MAT *H)
  320. #endif
  321. {
  322. STATIC VEC *u=VNULL, *r=VNULL, *s=VNULL, *tmp=VNULL;
  323. VEC v; /* auxiliary vector */
  324. int i,j;
  325. Real h_val, c;
  326. if (ip == INULL)
  327. error(E_NULL,"iter_arnoldi_iref");
  328. if ( ! ip->Ax || ! Q || ! ip->x )
  329. error(E_NULL,"iter_arnoldi_iref");
  330. if ( ip->k <= 0 )
  331. error(E_BOUNDS,"iter_arnoldi_iref");
  332. if ( Q->n != ip->x->dim || Q->m != ip->k )
  333. error(E_SIZES,"iter_arnoldi_iref");
  334. m_zero(Q);
  335. H = m_resize(H,ip->k,ip->k);
  336. m_zero(H);
  337. u = v_resize(u,ip->x->dim);
  338. r = v_resize(r,ip->k);
  339. s = v_resize(s,ip->k);
  340. tmp = v_resize(tmp,ip->x->dim);
  341. MEM_STAT_REG(u,TYPE_VEC);
  342. MEM_STAT_REG(r,TYPE_VEC);
  343. MEM_STAT_REG(s,TYPE_VEC);
  344. MEM_STAT_REG(tmp,TYPE_VEC);
  345. v.dim = v.max_dim = ip->x->dim;
  346. c = v_norm2(ip->x);
  347. if ( c <= 0.0)
  348. return H;
  349. else {
  350. v.ve = Q->me[0];
  351. sv_mlt(1.0/c,ip->x,&v);
  352. }
  353. v_zero(r);
  354. v_zero(s);
  355. for ( i = 0; i < ip->k; i++ )
  356. {
  357. v.ve = Q->me[i];
  358. u = (ip->Ax)(ip->A_par,&v,u);
  359. for (j = 0; j <= i; j++) {
  360. v.ve = Q->me[j];
  361. /* modified Gram-Schmidt */
  362. r->ve[j] = in_prod(&v,u);
  363. v_mltadd(u,&v,-r->ve[j],u);
  364. }
  365. h_val = v_norm2(u);
  366. /* if u == 0 then we have an exact subspace */
  367. if ( h_val <= 0.0 )
  368. {
  369. *h_rem = h_val;
  370. return H;
  371. }
  372. /* iterative refinement -- ensures near orthogonality */
  373. do {
  374. v_zero(tmp);
  375. for (j = 0; j <= i; j++) {
  376. v.ve = Q->me[j];
  377. s->ve[j] = in_prod(&v,u);
  378. v_mltadd(tmp,&v,s->ve[j],tmp);
  379. }
  380. v_sub(u,tmp,u);
  381. v_add(r,s,r);
  382. } while ( v_norm2(s) > 0.1*(h_val = v_norm2(u)) );
  383. /* now that u is nearly orthogonal to Q, update H */
  384. set_col(H,i,r);
  385. /* check once again if h_val is zero */
  386. if ( h_val <= 0.0 )
  387. {
  388. *h_rem = h_val;
  389. return H;
  390. }
  391. if ( i == ip->k-1 )
  392. {
  393. *h_rem = h_val;
  394. continue;
  395. }
  396. /* H->me[i+1][i] = h_val; */
  397. m_set_val(H,i+1,i,h_val);
  398. v.ve = Q->me[i+1];
  399. sv_mlt(1.0/h_val,u,&v);
  400. }
  401. #ifdef THREADSAFE
  402. V_FREE(u); V_FREE(r); V_FREE(s); V_FREE(tmp);
  403. #endif
  404. return H;
  405. }
  406. /* iter_arnoldi -- an implementation of the Arnoldi method;
  407. modified Gram-Schmidt algorithm
  408. */
  409. #ifndef ANSI_C
  410. MAT *iter_arnoldi(ip,h_rem,Q,H)
  411. ITER *ip;
  412. Real *h_rem;
  413. MAT *Q, *H;
  414. #else
  415. MAT *iter_arnoldi(ITER *ip, Real *h_rem, MAT *Q, MAT *H)
  416. #endif
  417. {
  418. STATIC VEC *u=VNULL, *r=VNULL;
  419. VEC v; /* auxiliary vector */
  420. int i,j;
  421. Real h_val, c;
  422. if (ip == INULL)
  423. error(E_NULL,"iter_arnoldi");
  424. if ( ! ip->Ax || ! Q || ! ip->x )
  425. error(E_NULL,"iter_arnoldi");
  426. if ( ip->k <= 0 )
  427. error(E_BOUNDS,"iter_arnoldi");
  428. if ( Q->n != ip->x->dim || Q->m != ip->k )
  429. error(E_SIZES,"iter_arnoldi");
  430. m_zero(Q);
  431. H = m_resize(H,ip->k,ip->k);
  432. m_zero(H);
  433. u = v_resize(u,ip->x->dim);
  434. r = v_resize(r,ip->k);
  435. MEM_STAT_REG(u,TYPE_VEC);
  436. MEM_STAT_REG(r,TYPE_VEC);
  437. v.dim = v.max_dim = ip->x->dim;
  438. c = v_norm2(ip->x);
  439. if ( c <= 0.0)
  440. return H;
  441. else {
  442. v.ve = Q->me[0];
  443. sv_mlt(1.0/c,ip->x,&v);
  444. }
  445. v_zero(r);
  446. for ( i = 0; i < ip->k; i++ )
  447. {
  448. v.ve = Q->me[i];
  449. u = (ip->Ax)(ip->A_par,&v,u);
  450. for (j = 0; j <= i; j++) {
  451. v.ve = Q->me[j];
  452. /* modified Gram-Schmidt */
  453. r->ve[j] = in_prod(&v,u);
  454. v_mltadd(u,&v,-r->ve[j],u);
  455. }
  456. h_val = v_norm2(u);
  457. /* if u == 0 then we have an exact subspace */
  458. if ( h_val <= 0.0 )
  459. {
  460. *h_rem = h_val;
  461. return H;
  462. }
  463. set_col(H,i,r);
  464. if ( i == ip->k-1 )
  465. {
  466. *h_rem = h_val;
  467. continue;
  468. }
  469. /* H->me[i+1][i] = h_val; */
  470. m_set_val(H,i+1,i,h_val);
  471. v.ve = Q->me[i+1];
  472. sv_mlt(1.0/h_val,u,&v);
  473. }
  474. #ifdef THREADSAFE
  475. V_FREE(u); V_FREE(r);
  476. #endif
  477. return H;
  478. }
  479. /* iter_sparnoldi -- uses arnoldi() with an explicit representation of A */
  480. #ifndef ANSI_C
  481. MAT *iter_sparnoldi(A,x0,m,h_rem,Q,H)
  482. SPMAT *A;
  483. VEC *x0;
  484. int m;
  485. Real *h_rem;
  486. MAT *Q, *H;
  487. #else
  488. MAT *iter_sparnoldi(SPMAT *A, VEC *x0, int m, Real *h_rem, MAT *Q, MAT *H)
  489. #endif
  490. {
  491. ITER *ip;
  492. ip = iter_get(0,0);
  493. ip->Ax = (Fun_Ax) sp_mv_mlt;
  494. ip->A_par = (void *) A;
  495. ip->x = x0;
  496. ip->k = m;
  497. iter_arnoldi_iref(ip,h_rem,Q,H);
  498. ip->shared_x = ip->shared_b = TRUE;
  499. iter_free(ip); /* release only ITER structure */
  500. return H;
  501. }
  502. /* for testing gmres */
  503. #ifndef ANSI_C
  504. static void test_gmres(ip,i,Q,R,givc,givs,h_val)
  505. ITER *ip;
  506. int i;
  507. MAT *Q, *R;
  508. VEC *givc, *givs;
  509. double h_val;
  510. #else
  511. static void test_gmres(ITER *ip, int i, MAT *Q, MAT *R,
  512. VEC *givc, VEC *givs, double h_val)
  513. #endif
  514. {
  515. VEC vt, vt1;
  516. STATIC MAT *Q1=MNULL, *R1=MNULL;
  517. int j;
  518. /* test Q*A*Q^T = R */
  519. Q = m_resize(Q,i+1,ip->b->dim);
  520. Q1 = m_resize(Q1,i+1,ip->b->dim);
  521. R1 = m_resize(R1,i+1,i+1);
  522. MEM_STAT_REG(Q1,TYPE_MAT);
  523. MEM_STAT_REG(R1,TYPE_MAT);
  524. vt.dim = vt.max_dim = ip->b->dim;
  525. vt1.dim = vt1.max_dim = ip->b->dim;
  526. for (j=0; j <= i; j++) {
  527. vt.ve = Q->me[j];
  528. vt1.ve = Q1->me[j];
  529. ip->Ax(ip->A_par,&vt,&vt1);
  530. }
  531. mmtr_mlt(Q,Q1,R1);
  532. R1 = m_resize(R1,i+2,i+1);
  533. for (j=0; j < i; j++)
  534. R1->me[i+1][j] = 0.0;
  535. R1->me[i+1][i] = h_val;
  536. for (j = 0; j <= i; j++) {
  537. rot_rows(R1,j,j+1,givc->ve[j],givs->ve[j],R1);
  538. }
  539. R1 = m_resize(R1,i+1,i+1);
  540. m_sub(R,R1,R1);
  541. /* if (m_norm_inf(R1) > MACHEPS*ip->b->dim) */
  542. #ifndef MEX
  543. printf(" %d. ||Q*A*Q^T - H|| = %g [cf. MACHEPS = %g]\n",
  544. ip->steps,m_norm_inf(R1),MACHEPS);
  545. #endif
  546. /* check Q*Q^T = I */
  547. Q = m_resize(Q,i+1,ip->b->dim);
  548. mmtr_mlt(Q,Q,R1);
  549. for (j=0; j <= i; j++)
  550. R1->me[j][j] -= 1.0;
  551. #ifndef MEX
  552. if (m_norm_inf(R1) > MACHEPS*ip->b->dim)
  553. printf(" ! m_norm_inf(Q*Q^T) = %g\n",m_norm_inf(R1));
  554. #endif
  555. #ifdef THREADSAFE
  556. M_FREE(Q1); M_FREE(R1);
  557. #endif
  558. }
  559. /* gmres -- generalised minimum residual algorithm of Saad & Schultz
  560. SIAM J. Sci. Stat. Comp. v.7, pp.856--869 (1986)
  561. */
  562. #ifndef ANSI_C
  563. VEC *iter_gmres(ip)
  564. ITER *ip;
  565. #else
  566. VEC *iter_gmres(ITER *ip)
  567. #endif
  568. {
  569. STATIC VEC *u=VNULL, *r=VNULL, *rhs = VNULL;
  570. STATIC VEC *givs=VNULL, *givc=VNULL, *z = VNULL;
  571. STATIC MAT *Q = MNULL, *R = MNULL;
  572. VEC *rr, v, v1; /* additional pointers (not real vectors) */
  573. int i,j, done;
  574. Real nres;
  575. /* Real last_h; */
  576. if (ip == INULL)
  577. error(E_NULL,"iter_gmres");
  578. if ( ! ip->Ax || ! ip->b )
  579. error(E_NULL,"iter_gmres");
  580. if ( ! ip->stop_crit )
  581. error(E_NULL,"iter_gmres");
  582. if ( ip->k <= 0 )
  583. error(E_BOUNDS,"iter_gmres");
  584. if (ip->x != VNULL && ip->x->dim != ip->b->dim)
  585. error(E_SIZES,"iter_gmres");
  586. if (ip->eps <= 0.0) ip->eps = MACHEPS;
  587. r = v_resize(r,ip->k+1);
  588. u = v_resize(u,ip->b->dim);
  589. rhs = v_resize(rhs,ip->k+1);
  590. givs = v_resize(givs,ip->k); /* Givens rotations */
  591. givc = v_resize(givc,ip->k);
  592. MEM_STAT_REG(r,TYPE_VEC);
  593. MEM_STAT_REG(u,TYPE_VEC);
  594. MEM_STAT_REG(rhs,TYPE_VEC);
  595. MEM_STAT_REG(givs,TYPE_VEC);
  596. MEM_STAT_REG(givc,TYPE_VEC);
  597. R = m_resize(R,ip->k+1,ip->k);
  598. Q = m_resize(Q,ip->k,ip->b->dim);
  599. MEM_STAT_REG(R,TYPE_MAT);
  600. MEM_STAT_REG(Q,TYPE_MAT);
  601. if (ip->x == VNULL) { /* ip->x == 0 */
  602. ip->x = v_get(ip->b->dim);
  603. ip->shared_x = FALSE;
  604. }
  605. v.dim = v.max_dim = ip->b->dim; /* v and v1 are pointers to rows */
  606. v1.dim = v1.max_dim = ip->b->dim; /* of matrix Q */
  607. if (ip->Bx != (Fun_Ax)NULL) { /* if precondition is defined */
  608. z = v_resize(z,ip->b->dim);
  609. MEM_STAT_REG(z,TYPE_VEC);
  610. }
  611. done = FALSE;
  612. for (ip->steps = 0; ip->steps < ip->limit; ) {
  613. /* restart */
  614. ip->Ax(ip->A_par,ip->x,u); /* u = A*x */
  615. v_sub(ip->b,u,u); /* u = b - A*x */
  616. rr = u; /* rr is a pointer only */
  617. if (ip->Bx) {
  618. (ip->Bx)(ip->B_par,u,z); /* tmp = B*(b-A*x) */
  619. rr = z;
  620. }
  621. nres = v_norm2(rr);
  622. if (ip->steps == 0) {
  623. if (ip->info) ip->info(ip,nres,VNULL,VNULL);
  624. ip->init_res = nres;
  625. }
  626. if ( nres == 0.0 ) {
  627. done = TRUE;
  628. break;
  629. }
  630. v.ve = Q->me[0];
  631. sv_mlt(1.0/nres,rr,&v);
  632. v_zero(r);
  633. v_zero(rhs);
  634. rhs->ve[0] = nres;
  635. for ( i = 0; i < ip->k && ip->steps < ip->limit; i++ ) {
  636. ip->steps++;
  637. v.ve = Q->me[i];
  638. (ip->Ax)(ip->A_par,&v,u);
  639. rr = u;
  640. if (ip->Bx) {
  641. (ip->Bx)(ip->B_par,u,z);
  642. rr = z;
  643. }
  644. if (i < ip->k - 1) {
  645. v1.ve = Q->me[i+1];
  646. v_copy(rr,&v1);
  647. for (j = 0; j <= i; j++) {
  648. v.ve = Q->me[j];
  649. /* r->ve[j] = in_prod(&v,rr); */
  650. /* modified Gram-Schmidt algorithm */
  651. r->ve[j] = in_prod(&v,&v1);
  652. v_mltadd(&v1,&v,-r->ve[j],&v1);
  653. }
  654. r->ve[i+1] = nres = v_norm2(&v1);
  655. if (nres <= MACHEPS*ip->init_res) {
  656. for (j = 0; j < i; j++)
  657. rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r);
  658. set_col(R,i,r);
  659. done = TRUE;
  660. break;
  661. }
  662. sv_mlt(1.0/nres,&v1,&v1);
  663. }
  664. else { /* i == ip->k - 1 */
  665. /* Q->me[ip->k] need not be computed */
  666. for (j = 0; j <= i; j++) {
  667. v.ve = Q->me[j];
  668. r->ve[j] = in_prod(&v,rr);
  669. }
  670. nres = in_prod(rr,rr) - in_prod(r,r);
  671. if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) {
  672. for (j = 0; j < i; j++)
  673. rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r);
  674. set_col(R,i,r);
  675. done = TRUE;
  676. break;
  677. }
  678. if (nres < 0.0) { /* do restart */
  679. i--;
  680. ip->steps--;
  681. break;
  682. }
  683. r->ve[i+1] = sqrt(nres);
  684. }
  685. /* QR update */
  686. /* last_h = r->ve[i+1]; */ /* for test only */
  687. for (j = 0; j < i; j++)
  688. rot_vec(r,j,j+1,givc->ve[j],givs->ve[j],r);
  689. givens(r->ve[i],r->ve[i+1],&givc->ve[i],&givs->ve[i]);
  690. rot_vec(r,i,i+1,givc->ve[i],givs->ve[i],r);
  691. rot_vec(rhs,i,i+1,givc->ve[i],givs->ve[i],rhs);
  692. set_col(R,i,r);
  693. nres = fabs((double) rhs->ve[i+1]);
  694. if (ip->info) ip->info(ip,nres,VNULL,VNULL);
  695. if ( ip->stop_crit(ip,nres,VNULL,VNULL) ) {
  696. done = TRUE;
  697. break;
  698. }
  699. }
  700. /* use ixi submatrix of R */
  701. if (i >= ip->k) i = ip->k - 1;
  702. R = m_resize(R,i+1,i+1);
  703. rhs = v_resize(rhs,i+1);
  704. /* test only */
  705. /* test_gmres(ip,i,Q,R,givc,givs,last_h); */
  706. Usolve(R,rhs,rhs,0.0); /* solve a system: R*x = rhs */
  707. /* new approximation */
  708. for (j = 0; j <= i; j++) {
  709. v.ve = Q->me[j];
  710. v_mltadd(ip->x,&v,rhs->ve[j],ip->x);
  711. }
  712. if (done) break;
  713. /* back to old dimensions */
  714. rhs = v_resize(rhs,ip->k+1);
  715. R = m_resize(R,ip->k+1,ip->k);
  716. }
  717. #ifdef THREADSAFE
  718. V_FREE(u); V_FREE(r); V_FREE(rhs);
  719. V_FREE(givs); V_FREE(givc); V_FREE(z);
  720. M_FREE(Q); M_FREE(R);
  721. #endif
  722. return ip->x;
  723. }
  724. /* iter_spgmres - a simple interface to iter_gmres */
  725. #ifndef ANSI_C
  726. VEC *iter_spgmres(A,B,b,tol,x,k,limit,steps)
  727. SPMAT *A, *B;
  728. VEC *b, *x;
  729. double tol;
  730. int *steps,k,limit;
  731. #else
  732. VEC *iter_spgmres(SPMAT *A, SPMAT *B, VEC *b, double tol,
  733. VEC *x, int k, int limit, int *steps)
  734. #endif
  735. {
  736. ITER *ip;
  737. ip = iter_get(0,0);
  738. ip->Ax = (Fun_Ax) sp_mv_mlt;
  739. ip->A_par = (void *) A;
  740. if (B) {
  741. ip->Bx = (Fun_Ax) sp_mv_mlt;
  742. ip->B_par = (void *) B;
  743. }
  744. else {
  745. ip->Bx = (Fun_Ax) NULL;
  746. ip->B_par = NULL;
  747. }
  748. ip->k = k;
  749. ip->limit = limit;
  750. ip->info = (Fun_info) NULL;
  751. ip->b = b;
  752. ip->eps = tol;
  753. ip->x = x;
  754. iter_gmres(ip);
  755. x = ip->x;
  756. if (steps) *steps = ip->steps;
  757. ip->shared_x = ip->shared_b = TRUE;
  758. iter_free(ip); /* release only ITER structure */
  759. return x;
  760. }
  761. /* for testing mgcr */
  762. #ifndef ANSI_C
  763. static void test_mgcr(ip,i,Q,R)
  764. ITER *ip;
  765. int i;
  766. MAT *Q, *R;
  767. #else
  768. static void test_mgcr(ITER *ip, int i, MAT *Q, MAT *R)
  769. #endif
  770. {
  771. VEC vt, vt1;
  772. static MAT *R1=MNULL;
  773. static VEC *r=VNULL, *r1=VNULL;
  774. VEC *rr;
  775. int k,j;
  776. Real sm;
  777. /* check Q*Q^T = I */
  778. vt.dim = vt.max_dim = ip->b->dim;
  779. vt1.dim = vt1.max_dim = ip->b->dim;
  780. Q = m_resize(Q,i+1,ip->b->dim);
  781. R1 = m_resize(R1,i+1,i+1);
  782. r = v_resize(r,ip->b->dim);
  783. r1 = v_resize(r1,ip->b->dim);
  784. MEM_STAT_REG(R1,TYPE_MAT);
  785. MEM_STAT_REG(r,TYPE_VEC);
  786. MEM_STAT_REG(r1,TYPE_VEC);
  787. m_zero(R1);
  788. for (k=1; k <= i; k++)
  789. for (j=1; j <= i; j++) {
  790. vt.ve = Q->me[k];
  791. vt1.ve = Q->me[j];
  792. R1->me[k][j] = in_prod(&vt,&vt1);
  793. }
  794. for (j=1; j <= i; j++)
  795. R1->me[j][j] -= 1.0;
  796. #ifndef MEX
  797. if (m_norm_inf(R1) > MACHEPS*ip->b->dim)
  798. printf(" ! (mgcr:) m_norm_inf(Q*Q^T) = %g\n",m_norm_inf(R1));
  799. #endif
  800. /* check (r_i,Ap_j) = 0 for j <= i */
  801. ip->Ax(ip->A_par,ip->x,r);
  802. v_sub(ip->b,r,r);
  803. rr = r;
  804. if (ip->Bx) {
  805. ip->Bx(ip->B_par,r,r1);
  806. rr = r1;
  807. }
  808. #ifndef MEX
  809. printf(" ||r|| = %g\n",v_norm2(rr));
  810. #endif
  811. sm = 0.0;
  812. for (j = 1; j <= i; j++) {
  813. vt.ve = Q->me[j];
  814. sm = max(sm,in_prod(&vt,rr));
  815. }
  816. #ifndef MEX
  817. if (sm >= MACHEPS*ip->b->dim)
  818. printf(" ! (mgcr:) max_j (r,Ap_j) = %g\n",sm);
  819. #endif
  820. }
  821. /*
  822. iter_mgcr -- modified generalized conjugate residual algorithm;
  823. fast version of GCR;
  824. */
  825. #ifndef ANSI_C
  826. VEC *iter_mgcr(ip)
  827. ITER *ip;
  828. #else
  829. VEC *iter_mgcr(ITER *ip)
  830. #endif
  831. {
  832. STATIC VEC *As=VNULL, *beta=VNULL, *alpha=VNULL, *z=VNULL;
  833. STATIC MAT *N=MNULL, *H=MNULL;
  834. VEC *rr, v, s; /* additional pointer and structures */
  835. Real nres; /* norm of a residual */
  836. Real dd; /* coefficient d_i */
  837. int i,j;
  838. int done; /* if TRUE then stop the iterative process */
  839. int dim; /* dimension of the problem */
  840. /* ip cannot be NULL */
  841. if (ip == INULL) error(E_NULL,"mgcr");
  842. /* Ax, b and stopping criterion must be given */
  843. if (! ip->Ax || ! ip->b || ! ip->stop_crit)
  844. error(E_NULL,"mgcr");
  845. /* at least one direction vector must exist */
  846. if ( ip->k <= 0) error(E_BOUNDS,"mgcr");
  847. /* if the vector x is given then b and x must have the same dimension */
  848. if ( ip->x && ip->x->dim != ip->b->dim)
  849. error(E_SIZES,"mgcr");
  850. if (ip->eps <= 0.0) ip->eps = MACHEPS;
  851. dim = ip->b->dim;
  852. As = v_resize(As,dim);
  853. alpha = v_resize(alpha,ip->k);
  854. beta = v_resize(beta,ip->k);
  855. MEM_STAT_REG(As,TYPE_VEC);
  856. MEM_STAT_REG(alpha,TYPE_VEC);
  857. MEM_STAT_REG(beta,TYPE_VEC);
  858. H = m_resize(H,ip->k,ip->k);
  859. N = m_resize(N,ip->k,dim);
  860. MEM_STAT_REG(H,TYPE_MAT);
  861. MEM_STAT_REG(N,TYPE_MAT);
  862. /* if a preconditioner is defined */
  863. if (ip->Bx) {
  864. z = v_resize(z,dim);
  865. MEM_STAT_REG(z,TYPE_VEC);
  866. }
  867. /* if x is NULL then it is assumed that x has
  868. entries with value zero */
  869. if ( ! ip->x ) {
  870. ip->x = v_get(ip->b->dim);
  871. ip->shared_x = FALSE;
  872. }
  873. /* v and s are additional pointers to rows of N */
  874. /* they must have the same dimension as rows of N */
  875. v.dim = v.max_dim = s.dim = s.max_dim = dim;
  876. done = FALSE;
  877. for (ip->steps = 0; ip->steps < ip->limit; ) {
  878. (*ip->Ax)(ip->A_par,ip->x,As); /* As = A*x */
  879. v_sub(ip->b,As,As); /* As = b - A*x */
  880. rr = As; /* rr is an additional pointer */
  881. /* if a preconditioner is defined */
  882. if (ip->Bx) {
  883. (*ip->Bx)(ip->B_par,As,z); /* z = B*(b-A*x) */
  884. rr = z;
  885. }
  886. /* norm of the residual */
  887. nres = v_norm2(rr);
  888. dd = nres; /* dd = ||r_i|| */
  889. /* check if the norm of the residual is zero */
  890. if (ip->steps == 0) {
  891. /* information for a user */
  892. if (ip->info) (*ip->info)(ip,nres,As,rr);
  893. ip->init_res = fabs(nres);
  894. }
  895. if (nres == 0.0) {
  896. /* iterative process is finished */
  897. done = TRUE;
  898. break;
  899. }
  900. /* save this residual in the first row of N */
  901. v.ve = N->me[0];
  902. v_copy(rr,&v);
  903. for (i = 0; i < ip->k && ip->steps < ip->limit; i++) {
  904. ip->steps++;
  905. v.ve = N->me[i]; /* pointer to a row of N (=s_i) */
  906. /* note that we must use here &v, not v */
  907. (*ip->Ax)(ip->A_par,&v,As);
  908. rr = As; /* As = A*s_i */
  909. if (ip->Bx) {
  910. (*ip->Bx)(ip->B_par,As,z); /* z = B*A*s_i */
  911. rr = z;
  912. }
  913. if (i < ip->k - 1) {
  914. s.ve = N->me[i+1]; /* pointer to a row of N (=s_{i+1}) */
  915. v_copy(rr,&s); /* s_{i+1} = B*A*s_i */
  916. for (j = 0; j <= i-1; j++) {
  917. v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */
  918. /* beta->ve[j] = in_prod(&v,rr); */ /* beta_{j,i} */
  919. /* modified Gram-Schmidt algorithm */
  920. beta->ve[j] = in_prod(&v,&s); /* beta_{j,i} */
  921. /* s_{i+1} -= beta_{j,i}*s_{j+1} */
  922. v_mltadd(&s,&v,- beta->ve[j],&s);
  923. }
  924. /* beta_{i,i} = ||s_{i+1}||_2 */
  925. beta->ve[i] = nres = v_norm2(&s);
  926. if ( nres <= MACHEPS*ip->init_res) {
  927. /* s_{i+1} == 0 */
  928. i--;
  929. done = TRUE;
  930. break;
  931. }
  932. sv_mlt(1.0/nres,&s,&s); /* normalize s_{i+1} */
  933. v.ve = N->me[0];
  934. alpha->ve[i] = in_prod(&v,&s); /* alpha_i = (s_0 , s_{i+1}) */
  935. }
  936. else {
  937. for (j = 0; j <= i-1; j++) {
  938. v.ve = N->me[j+1]; /* pointer to a row of N (=s_{j+1}) */
  939. beta->ve[j] = in_prod(&v,rr); /* beta_{j,i} */
  940. }
  941. nres = in_prod(rr,rr); /* rr = B*A*s_{k-1} */
  942. for (j = 0; j <= i-1; j++)
  943. nres -= beta->ve[j]*beta->ve[j];
  944. if (sqrt(fabs(nres)) <= MACHEPS*ip->init_res) {
  945. /* s_k is zero */
  946. i--;
  947. done = TRUE;
  948. break;
  949. }
  950. if (nres < 0.0) { /* do restart */
  951. i--;
  952. ip->steps--;
  953. break;
  954. }
  955. beta->ve[i] = sqrt(nres); /* beta_{k-1,k-1} */
  956. v.ve = N->me[0];
  957. alpha->ve[i] = in_prod(&v,rr);
  958. for (j = 0; j <= i-1; j++)
  959. alpha->ve[i] -= beta->ve[j]*alpha->ve[j];
  960. alpha->ve[i] /= beta->ve[i]; /* alpha_{k-1} */
  961. }
  962. set_col(H,i,beta);
  963. /* other method of computing dd */
  964. /* if (fabs((double)alpha->ve[i]) > dd) {
  965. nres = - dd*dd + alpha->ve[i]*alpha->ve[i];
  966. nres = sqrt((double) nres);
  967. if (ip->info) (*ip->info)(ip,-nres,VNULL,VNULL);
  968. break;
  969. } */
  970. /* to avoid overflow/underflow in computing dd */
  971. /* dd *= cos(asin((double)(alpha->ve[i]/dd))); */
  972. nres = alpha->ve[i]/dd;
  973. if (fabs(nres-1.0) <= MACHEPS*ip->init_res)
  974. dd = 0.0;
  975. else {
  976. nres = 1.0 - nres*nres;
  977. if (nres < 0.0) {
  978. nres = sqrt((double) -nres);
  979. if (ip->info) (*ip->info)(ip,-dd*nres,VNULL,VNULL);
  980. break;
  981. }
  982. dd *= sqrt((double) nres);
  983. }
  984. if (ip->info) (*ip->info)(ip,dd,VNULL,VNULL);
  985. if ( ip->stop_crit(ip,dd,VNULL,VNULL) ) {
  986. /* stopping criterion is satisfied */
  987. done = TRUE;
  988. break;
  989. }
  990. } /* end of for */
  991. if (i >= ip->k) i = ip->k - 1;
  992. /* use (i+1) by (i+1) submatrix of H */
  993. H = m_resize(H,i+1,i+1);
  994. alpha = v_resize(alpha,i+1);
  995. Usolve(H,alpha,alpha,0.0); /* c_i is saved in alpha */
  996. for (j = 0; j <= i; j++) {
  997. v.ve = N->me[j];
  998. v_mltadd(ip->x,&v,alpha->ve[j],ip->x);
  999. }
  1000. if (done) break; /* stop the iterative process */
  1001. alpha = v_resize(alpha,ip->k);
  1002. H = m_resize(H,ip->k,ip->k);
  1003. } /* end of while */
  1004. #ifdef THREADSAFE
  1005. V_FREE(As); V_FREE(beta); V_FREE(alpha); V_FREE(z);
  1006. M_FREE(N); M_FREE(H);
  1007. #endif
  1008. return ip->x; /* return the solution */
  1009. }
  1010. /* iter_spmgcr - a simple interface to iter_mgcr */
  1011. /* no preconditioner */
  1012. #ifndef ANSI_C
  1013. VEC *iter_spmgcr(A,B,b,tol,x,k,limit,steps)
  1014. SPMAT *A, *B;
  1015. VEC *b, *x;
  1016. double tol;
  1017. int *steps,k,limit;
  1018. #else
  1019. VEC *iter_spmgcr(SPMAT *A, SPMAT *B, VEC *b, double tol,
  1020. VEC *x, int k, int limit, int *steps)
  1021. #endif
  1022. {
  1023. ITER *ip;
  1024. ip = iter_get(0,0);
  1025. ip->Ax = (Fun_Ax) sp_mv_mlt;
  1026. ip->A_par = (void *) A;
  1027. if (B) {
  1028. ip->Bx = (Fun_Ax) sp_mv_mlt;
  1029. ip->B_par = (void *) B;
  1030. }
  1031. else {
  1032. ip->Bx = (Fun_Ax) NULL;
  1033. ip->B_par = NULL;
  1034. }
  1035. ip->k = k;
  1036. ip->limit = limit;
  1037. ip->info = (Fun_info) NULL;
  1038. ip->b = b;
  1039. ip->eps = tol;
  1040. ip->x = x;
  1041. iter_mgcr(ip);
  1042. x = ip->x;
  1043. if (steps) *steps = ip->steps;
  1044. ip->shared_x = ip->shared_b = TRUE;
  1045. iter_free(ip); /* release only ITER structure */
  1046. return x;
  1047. }
  1048. /*
  1049. Conjugate gradients method for a normal equation
  1050. a preconditioner B must be symmetric !!
  1051. */
  1052. #ifndef ANSI_C
  1053. VEC *iter_cgne(ip)
  1054. ITER *ip;
  1055. #else
  1056. VEC *iter_cgne(ITER *ip)
  1057. #endif
  1058. {
  1059. STATIC VEC *r = VNULL, *p = VNULL, *q = VNULL, *z = VNULL;
  1060. Real alpha, beta, inner, old_inner, nres;
  1061. VEC *rr1; /* pointer only */
  1062. if (ip == INULL)
  1063. error(E_NULL,"iter_cgne");
  1064. if (!ip->Ax || ! ip->ATx || !ip->b)
  1065. error(E_NULL,"iter_cgne");
  1066. if ( ip->x == ip->b )
  1067. error(E_INSITU,"iter_cgne");
  1068. if (!ip->stop_crit)
  1069. error(E_NULL,"iter_cgne");
  1070. if ( ip->eps <= 0.0 ) ip->eps = MACHEPS;
  1071. r = v_resize(r,ip->b->dim);
  1072. p = v_resize(p,ip->b->dim);
  1073. q = v_resize(q,ip->b->dim);
  1074. MEM_STAT_REG(r,TYPE_VEC);
  1075. MEM_STAT_REG(p,TYPE_VEC);
  1076. MEM_STAT_REG(q,TYPE_VEC);
  1077. z = v_resize(z,ip->b->dim);
  1078. MEM_STAT_REG(z,TYPE_VEC);
  1079. if (ip->x) {
  1080. if (ip->x->dim != ip->b->dim)
  1081. error(E_SIZES,"iter_cgne");
  1082. ip->Ax(ip->A_par,ip->x,p); /* p = A*x */
  1083. v_sub(ip->b,p,z); /* z = b - A*x */
  1084. }
  1085. else { /* ip->x == 0 */
  1086. ip->x = v_get(ip->b->dim);
  1087. ip->shared_x = FALSE;
  1088. v_copy(ip->b,z);
  1089. }
  1090. rr1 = z;
  1091. if (ip->Bx) {
  1092. (ip->Bx)(ip->B_par,rr1,p);
  1093. rr1 = p;
  1094. }
  1095. (ip->ATx)(ip->AT_par,rr1,r); /* r = A^T*B*(b-A*x) */
  1096. old_inner = 0.0;
  1097. for ( ip->steps = 0; ip->steps <= ip->limit; ip->steps++ )
  1098. {
  1099. rr1 = r;
  1100. if ( ip->Bx ) {
  1101. (ip->Bx)(ip->B_par,r,z); /* rr = B*r */
  1102. rr1 = z;
  1103. }
  1104. inner = in_prod(r,rr1);
  1105. nres = sqrt(fabs(inner));
  1106. if (ip->info) ip->info(ip,nres,r,rr1);
  1107. if (ip->steps == 0) ip->init_res = nres;
  1108. if ( ip->stop_crit(ip,nres,r,rr1) ) break;
  1109. if ( ip->steps ) /* if ( ip->steps > 0 ) ... */
  1110. {
  1111. beta = inner/old_inner;
  1112. p = v_mltadd(rr1,p,beta,p);
  1113. }
  1114. else /* if ( ip->steps == 0 ) ... */
  1115. {
  1116. beta = 0.0;
  1117. p = v_copy(rr1,p);
  1118. old_inner = 0.0;
  1119. }
  1120. (ip->Ax)(ip->A_par,p,q); /* q = A*p */
  1121. if (ip->Bx) {
  1122. (ip->Bx)(ip->B_par,q,z);
  1123. (ip->ATx)(ip->AT_par,z,q);
  1124. rr1 = q; /* q = A^T*B*A*p */
  1125. }
  1126. else {
  1127. (ip->ATx)(ip->AT_par,q,z); /* z = A^T*A*p */
  1128. rr1 = z;
  1129. }
  1130. alpha = inner/in_prod(rr1,p);
  1131. v_mltadd(ip->x,p,alpha,ip->x);
  1132. v_mltadd(r,rr1,-alpha,r);
  1133. old_inner = inner;
  1134. }
  1135. #ifdef THREADSAFE
  1136. V_FREE(r); V_FREE(p); V_FREE(q); V_FREE(z);
  1137. #endif
  1138. return ip->x;
  1139. }
  1140. /* iter_spcgne -- a simple interface to iter_cgne() which
  1141. uses sparse matrix data structures
  1142. -- assumes that B contains an actual preconditioner (or NULL)
  1143. use always as follows:
  1144. x = iter_spcgne(A,B,b,eps,x,limit,steps);
  1145. or
  1146. x = iter_spcgne(A,B,b,eps,VNULL,limit,steps);
  1147. In the second case the solution vector is created.
  1148. */
  1149. #ifndef ANSI_C
  1150. VEC *iter_spcgne(A,B,b,eps,x,limit,steps)
  1151. SPMAT *A, *B;
  1152. VEC *b, *x;
  1153. double eps;
  1154. int *steps, limit;
  1155. #else
  1156. VEC *iter_spcgne(SPMAT *A,SPMAT *B, VEC *b, double eps,
  1157. VEC *x, int limit, int *steps)
  1158. #endif
  1159. {
  1160. ITER *ip;
  1161. ip = iter_get(0,0);
  1162. ip->Ax = (Fun_Ax) sp_mv_mlt;
  1163. ip->A_par = (void *)A;
  1164. ip->ATx = (Fun_Ax) sp_vm_mlt;
  1165. ip->AT_par = (void *)A;
  1166. if (B) {
  1167. ip->Bx = (Fun_Ax) sp_mv_mlt;
  1168. ip->B_par = (void *)B;
  1169. }
  1170. else {
  1171. ip->Bx = (Fun_Ax) NULL;
  1172. ip->B_par = NULL;
  1173. }
  1174. ip->info = (Fun_info) NULL;
  1175. ip->b = b;
  1176. ip->eps = eps;
  1177. ip->limit = limit;
  1178. ip->x = x;
  1179. iter_cgne(ip);
  1180. x = ip->x;
  1181. if (steps) *steps = ip->steps;
  1182. ip->shared_x = ip->shared_b = TRUE;
  1183. iter_free(ip); /* release only ITER structure */
  1184. return x;
  1185. }