spbkp.c 36 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445
  1. /**************************************************************************
  2. **
  3. ** Copyright (C) 1993 David E. Steward & Zbigniew Leyk, all rights reserved.
  4. **
  5. ** Meschach Library
  6. **
  7. ** This Meschach Library is provided "as is" without any express
  8. ** or implied warranty of any kind with respect to this software.
  9. ** In particular the authors shall not be liable for any direct,
  10. ** indirect, special, incidental or consequential damages arising
  11. ** in any way from use of the software.
  12. **
  13. ** Everyone is granted permission to copy, modify and redistribute this
  14. ** Meschach Library, provided:
  15. ** 1. All copies contain this copyright notice.
  16. ** 2. All modified copies shall carry a notice stating who
  17. ** made the last modification and the date of such modification.
  18. ** 3. No charge is made for this software or works derived from it.
  19. ** This clause shall not be construed as constraining other software
  20. ** distributed on the same medium as this software, nor is a
  21. ** distribution fee considered a charge.
  22. **
  23. ***************************************************************************/
  24. /*
  25. Sparse matrix Bunch--Kaufman--Parlett factorisation and solve
  26. Radical revision started Thu 05th Nov 1992, 09:36:12 AM
  27. to use Karen George's suggestion of leaving the the row elements unordered
  28. Radical revision completed Mon 07th Dec 1992, 10:59:57 AM
  29. */
  30. static char rcsid[] = "$Id: spbkp.c,v 1.6 1996/08/20 19:53:10 stewart Exp $";
  31. #include <stdio.h>
  32. #include <math.h>
  33. #include "sparse2.h"
  34. #ifdef MALLOCDECL
  35. #include <malloc.h>
  36. #endif
  37. #define alpha 0.6403882032022076 /* = (1+sqrt(17))/8 */
  38. #define btos(x) ((x) ? "TRUE" : "FALSE")
  39. /* assume no use of sqr() uses side-effects */
  40. #define sqr(x) ((x)*(x))
  41. /* unord_get_idx -- returns index (encoded if entry not allocated)
  42. of the element of row r with column j
  43. -- uses linear search */
  44. #ifndef ANSI_C
  45. int unord_get_idx(r,j)
  46. SPROW *r;
  47. int j;
  48. #else
  49. int unord_get_idx(SPROW *r, int j)
  50. #endif
  51. {
  52. int idx;
  53. row_elt *e;
  54. if ( ! r || ! r->elt )
  55. error(E_NULL,"unord_get_idx");
  56. for ( idx = 0, e = r->elt; idx < r->len; idx++, e++ )
  57. if ( e->col == j )
  58. break;
  59. if ( idx >= r->len )
  60. return -(r->len+2);
  61. else
  62. return idx;
  63. }
  64. /* unord_get_val -- returns value of the (i,j) entry of A
  65. -- same assumptions as unord_get_idx() */
  66. #ifndef ANSI_C
  67. double unord_get_val(A,i,j)
  68. SPMAT *A;
  69. int i, j;
  70. #else
  71. double unord_get_val(SPMAT *A, int i, int j)
  72. #endif
  73. {
  74. SPROW *r;
  75. int idx;
  76. if ( ! A )
  77. error(E_NULL,"unord_get_val");
  78. if ( i < 0 || i >= A->m || j < 0 || j >= A->n )
  79. error(E_BOUNDS,"unord_get_val");
  80. r = &(A->row[i]);
  81. idx = unord_get_idx(r,j);
  82. if ( idx < 0 )
  83. return 0.0;
  84. else
  85. return r->elt[idx].val;
  86. }
  87. /* bkp_swap_elt -- swaps the (i,j) with the (k,l) entry of sparse matrix
  88. -- either or both of the entries may be unallocated */
  89. #ifndef ANSI_C
  90. static SPMAT *bkp_swap_elt(A,i1,j1,idx1,i2,j2,idx2)
  91. SPMAT *A;
  92. int i1, j1, idx1, i2, j2, idx2;
  93. #else
  94. static SPMAT *bkp_swap_elt(SPMAT *A, int i1, int j1,
  95. int idx1, int i2, int j2, int idx2)
  96. #endif
  97. {
  98. int tmp_row, tmp_idx;
  99. SPROW *r1, *r2;
  100. row_elt *e1, *e2;
  101. Real tmp;
  102. if ( ! A )
  103. error(E_NULL,"bkp_swap_elt");
  104. if ( i1 < 0 || j1 < 0 || i2 < 0 || j2 < 0 ||
  105. i1 >= A->m || j1 >= A->n || i2 >= A->m || j2 >= A->n )
  106. {
  107. error(E_BOUNDS,"bkp_swap_elt");
  108. }
  109. if ( i1 == i2 && j1 == j2 )
  110. return A;
  111. if ( idx1 < 0 && idx2 < 0 ) /* neither allocated */
  112. return A;
  113. r1 = &(A->row[i1]); r2 = &(A->row[i2]);
  114. /* if ( idx1 >= r1->len || idx2 >= r2->len )
  115. error(E_BOUNDS,"bkp_swap_elt"); */
  116. if ( idx1 < 0 ) /* assume not allocated */
  117. {
  118. idx1 = r1->len;
  119. if ( idx1 >= r1->maxlen )
  120. { tracecatch(sprow_xpd(r1,2*r1->maxlen+1,TYPE_SPMAT),
  121. "bkp_swap_elt"); }
  122. r1->len = idx1+1;
  123. r1->elt[idx1].col = j1;
  124. r1->elt[idx1].val = 0.0;
  125. /* now patch up column access path */
  126. tmp_row = -1; tmp_idx = j1;
  127. chase_col(A,j1,&tmp_row,&tmp_idx,i1-1);
  128. if ( tmp_row < 0 )
  129. {
  130. r1->elt[idx1].nxt_row = A->start_row[j1];
  131. r1->elt[idx1].nxt_idx = A->start_idx[j1];
  132. A->start_row[j1] = i1;
  133. A->start_idx[j1] = idx1;
  134. }
  135. else
  136. {
  137. row_elt *tmp_e;
  138. tmp_e = &(A->row[tmp_row].elt[tmp_idx]);
  139. r1->elt[idx1].nxt_row = tmp_e->nxt_row;
  140. r1->elt[idx1].nxt_idx = tmp_e->nxt_idx;
  141. tmp_e->nxt_row = i1;
  142. tmp_e->nxt_idx = idx1;
  143. }
  144. }
  145. else if ( r1->elt[idx1].col != j1 )
  146. error(E_INTERN,"bkp_swap_elt");
  147. if ( idx2 < 0 )
  148. {
  149. idx2 = r2->len;
  150. if ( idx2 >= r2->maxlen )
  151. { tracecatch(sprow_xpd(r2,2*r2->maxlen+1,TYPE_SPMAT),
  152. "bkp_swap_elt"); }
  153. r2->len = idx2+1;
  154. r2->elt[idx2].col = j2;
  155. r2->elt[idx2].val = 0.0;
  156. /* now patch up column access path */
  157. tmp_row = -1; tmp_idx = j2;
  158. chase_col(A,j2,&tmp_row,&tmp_idx,i2-1);
  159. if ( tmp_row < 0 )
  160. {
  161. r2->elt[idx2].nxt_row = A->start_row[j2];
  162. r2->elt[idx2].nxt_idx = A->start_idx[j2];
  163. A->start_row[j2] = i2;
  164. A->start_idx[j2] = idx2;
  165. }
  166. else
  167. {
  168. row_elt *tmp_e;
  169. tmp_e = &(A->row[tmp_row].elt[tmp_idx]);
  170. r2->elt[idx2].nxt_row = tmp_e->nxt_row;
  171. r2->elt[idx2].nxt_idx = tmp_e->nxt_idx;
  172. tmp_e->nxt_row = i2;
  173. tmp_e->nxt_idx = idx2;
  174. }
  175. }
  176. else if ( r2->elt[idx2].col != j2 )
  177. error(E_INTERN,"bkp_swap_elt");
  178. e1 = &(r1->elt[idx1]); e2 = &(r2->elt[idx2]);
  179. tmp = e1->val;
  180. e1->val = e2->val;
  181. e2->val = tmp;
  182. return A;
  183. }
  184. /* bkp_bump_col -- bumps row and idx to next entry in column j */
  185. #ifndef ANSI_C
  186. row_elt *bkp_bump_col(A, j, row, idx)
  187. SPMAT *A;
  188. int j, *row, *idx;
  189. #else
  190. row_elt *bkp_bump_col(SPMAT *A, int j, int *row, int *idx)
  191. #endif
  192. {
  193. SPROW *r;
  194. row_elt *e;
  195. if ( *row < 0 )
  196. {
  197. *row = A->start_row[j];
  198. *idx = A->start_idx[j];
  199. }
  200. else
  201. {
  202. r = &(A->row[*row]);
  203. e = &(r->elt[*idx]);
  204. if ( e->col != j )
  205. error(E_INTERN,"bkp_bump_col");
  206. *row = e->nxt_row;
  207. *idx = e->nxt_idx;
  208. }
  209. if ( *row < 0 )
  210. return (row_elt *)NULL;
  211. else
  212. return &(A->row[*row].elt[*idx]);
  213. }
  214. /* bkp_interchange -- swap rows/cols i and j (symmetric pivot)
  215. -- uses just the upper triangular part */
  216. #ifndef ANSI_C
  217. SPMAT *bkp_interchange(A, i1, i2)
  218. SPMAT *A;
  219. int i1, i2;
  220. #else
  221. SPMAT *bkp_interchange(SPMAT *A, int i1, int i2)
  222. #endif
  223. {
  224. int tmp_row, tmp_idx;
  225. int row1, row2, idx1, idx2, tmp_row1, tmp_idx1, tmp_row2, tmp_idx2;
  226. SPROW *r1, *r2;
  227. row_elt *e1, *e2;
  228. IVEC *done_list = IVNULL;
  229. if ( ! A )
  230. error(E_NULL,"bkp_interchange");
  231. if ( i1 < 0 || i1 >= A->n || i2 < 0 || i2 >= A->n )
  232. error(E_BOUNDS,"bkp_interchange");
  233. if ( A->m != A->n )
  234. error(E_SQUARE,"bkp_interchange");
  235. if ( i1 == i2 )
  236. return A;
  237. if ( i1 > i2 )
  238. { tmp_idx = i1; i1 = i2; i2 = tmp_idx; }
  239. done_list = iv_resize(done_list,A->n);
  240. for ( tmp_idx = 0; tmp_idx < A->n; tmp_idx++ )
  241. done_list->ive[tmp_idx] = FALSE;
  242. row1 = -1; idx1 = i1;
  243. row2 = -1; idx2 = i2;
  244. e1 = bkp_bump_col(A,i1,&row1,&idx1);
  245. e2 = bkp_bump_col(A,i2,&row2,&idx2);
  246. while ( (row1 >= 0 && row1 < i1) || (row2 >= 0 && row2 < i1) )
  247. /* Note: "row2 < i1" not "row2 < i2" as we must stop before the
  248. "knee bend" */
  249. {
  250. if ( row1 >= 0 && row1 < i1 && ( row1 < row2 || row2 < 0 ) )
  251. {
  252. tmp_row1 = row1; tmp_idx1 = idx1;
  253. e1 = bkp_bump_col(A,i1,&tmp_row1,&tmp_idx1);
  254. if ( ! done_list->ive[row1] )
  255. {
  256. if ( row1 == row2 )
  257. bkp_swap_elt(A,row1,i1,idx1,row1,i2,idx2);
  258. else
  259. bkp_swap_elt(A,row1,i1,idx1,row1,i2,-1);
  260. done_list->ive[row1] = TRUE;
  261. }
  262. row1 = tmp_row1; idx1 = tmp_idx1;
  263. }
  264. else if ( row2 >= 0 && row2 < i1 && ( row2 < row1 || row1 < 0 ) )
  265. {
  266. tmp_row2 = row2; tmp_idx2 = idx2;
  267. e2 = bkp_bump_col(A,i2,&tmp_row2,&tmp_idx2);
  268. if ( ! done_list->ive[row2] )
  269. {
  270. if ( row1 == row2 )
  271. bkp_swap_elt(A,row2,i1,idx1,row2,i2,idx2);
  272. else
  273. bkp_swap_elt(A,row2,i1,-1,row2,i2,idx2);
  274. done_list->ive[row2] = TRUE;
  275. }
  276. row2 = tmp_row2; idx2 = tmp_idx2;
  277. }
  278. else if ( row1 == row2 )
  279. {
  280. tmp_row1 = row1; tmp_idx1 = idx1;
  281. e1 = bkp_bump_col(A,i1,&tmp_row1,&tmp_idx1);
  282. tmp_row2 = row2; tmp_idx2 = idx2;
  283. e2 = bkp_bump_col(A,i2,&tmp_row2,&tmp_idx2);
  284. if ( ! done_list->ive[row1] )
  285. {
  286. bkp_swap_elt(A,row1,i1,idx1,row2,i2,idx2);
  287. done_list->ive[row1] = TRUE;
  288. }
  289. row1 = tmp_row1; idx1 = tmp_idx1;
  290. row2 = tmp_row2; idx2 = tmp_idx2;
  291. }
  292. }
  293. /* ensure we are **past** the first knee */
  294. while ( row2 >= 0 && row2 <= i1 )
  295. e2 = bkp_bump_col(A,i2,&row2,&idx2);
  296. /* at/after 1st "knee bend" */
  297. r1 = &(A->row[i1]);
  298. idx1 = 0;
  299. e1 = &(r1->elt[idx1]);
  300. while ( row2 >= 0 && row2 < i2 )
  301. {
  302. /* used for update of e2 at end of loop */
  303. tmp_row = row2; tmp_idx = idx2;
  304. if ( ! done_list->ive[row2] )
  305. {
  306. r2 = &(A->row[row2]);
  307. bkp_bump_col(A,i2,&tmp_row,&tmp_idx);
  308. done_list->ive[row2] = TRUE;
  309. tmp_idx1 = unord_get_idx(r1,row2);
  310. tracecatch(bkp_swap_elt(A,row2,i2,idx2,i1,row2,tmp_idx1),
  311. "bkp_interchange");
  312. }
  313. /* update e1 and e2 */
  314. row2 = tmp_row; idx2 = tmp_idx;
  315. e2 = ( row2 >= 0 ) ? &(A->row[row2].elt[idx2]) : (row_elt *)NULL;
  316. }
  317. idx1 = 0;
  318. e1 = r1->elt;
  319. while ( idx1 < r1->len )
  320. {
  321. if ( e1->col >= i2 || e1->col <= i1 )
  322. {
  323. idx1++;
  324. e1++;
  325. continue;
  326. }
  327. if ( ! done_list->ive[e1->col] )
  328. {
  329. tmp_idx2 = unord_get_idx(&(A->row[e1->col]),i2);
  330. tracecatch(bkp_swap_elt(A,i1,e1->col,idx1,e1->col,i2,tmp_idx2),
  331. "bkp_interchange");
  332. done_list->ive[e1->col] = TRUE;
  333. }
  334. idx1++;
  335. e1++;
  336. }
  337. /* at/after 2nd "knee bend" */
  338. idx1 = 0;
  339. e1 = &(r1->elt[idx1]);
  340. r2 = &(A->row[i2]);
  341. idx2 = 0;
  342. e2 = &(r2->elt[idx2]);
  343. while ( idx1 < r1->len )
  344. {
  345. if ( e1->col <= i2 )
  346. {
  347. idx1++; e1++;
  348. continue;
  349. }
  350. if ( ! done_list->ive[e1->col] )
  351. {
  352. tmp_idx2 = unord_get_idx(r2,e1->col);
  353. tracecatch(bkp_swap_elt(A,i1,e1->col,idx1,i2,e1->col,tmp_idx2),
  354. "bkp_interchange");
  355. done_list->ive[e1->col] = TRUE;
  356. }
  357. idx1++; e1++;
  358. }
  359. idx2 = 0; e2 = r2->elt;
  360. while ( idx2 < r2->len )
  361. {
  362. if ( e2->col <= i2 )
  363. {
  364. idx2++; e2++;
  365. continue;
  366. }
  367. if ( ! done_list->ive[e2->col] )
  368. {
  369. tmp_idx1 = unord_get_idx(r1,e2->col);
  370. tracecatch(bkp_swap_elt(A,i2,e2->col,idx2,i1,e2->col,tmp_idx1),
  371. "bkp_interchange");
  372. done_list->ive[e2->col] = TRUE;
  373. }
  374. idx2++; e2++;
  375. }
  376. /* now interchange the digonal entries! */
  377. idx1 = unord_get_idx(&(A->row[i1]),i1);
  378. idx2 = unord_get_idx(&(A->row[i2]),i2);
  379. if ( idx1 >= 0 || idx2 >= 0 )
  380. {
  381. tracecatch(bkp_swap_elt(A,i1,i1,idx1,i2,i2,idx2),
  382. "bkp_interchange");
  383. }
  384. return A;
  385. }
  386. /* iv_min -- returns minimum of an integer vector
  387. -- sets index to the position in iv if index != NULL */
  388. #ifndef ANSI_C
  389. int iv_min(iv,index)
  390. IVEC *iv;
  391. int *index;
  392. #else
  393. int iv_min(IVEC *iv, int *index)
  394. #endif
  395. {
  396. int i, i_min, min_val, tmp;
  397. if ( ! iv )
  398. error(E_NULL,"iv_min");
  399. if ( iv->dim <= 0 )
  400. error(E_SIZES,"iv_min");
  401. i_min = 0;
  402. min_val = iv->ive[0];
  403. for ( i = 1; i < iv->dim; i++ )
  404. {
  405. tmp = iv->ive[i];
  406. if ( tmp < min_val )
  407. {
  408. min_val = tmp;
  409. i_min = i;
  410. }
  411. }
  412. if ( index != (int *)NULL )
  413. *index = i_min;
  414. return min_val;
  415. }
  416. /* max_row_col -- returns max { |A[j][k]| : k >= i, k != j, k != l } given j
  417. using symmetry and only the upper triangular part of A */
  418. #ifndef ANSI_C
  419. static double max_row_col(A,i,j,l)
  420. SPMAT *A;
  421. int i, j, l;
  422. #else
  423. static double max_row_col(SPMAT *A, int i,int j, int l)
  424. #endif
  425. {
  426. int row_num, idx;
  427. SPROW *r;
  428. row_elt *e;
  429. Real max_val, tmp;
  430. if ( ! A )
  431. error(E_NULL,"max_row_col");
  432. if ( i < 0 || i > A->n || j < 0 || j >= A->n )
  433. error(E_BOUNDS,"max_row_col");
  434. max_val = 0.0;
  435. idx = unord_get_idx(&(A->row[i]),j);
  436. if ( idx < 0 )
  437. {
  438. row_num = -1; idx = j;
  439. e = chase_past(A,j,&row_num,&idx,i);
  440. }
  441. else
  442. {
  443. row_num = i;
  444. e = &(A->row[i].elt[idx]);
  445. }
  446. while ( row_num >= 0 && row_num < j )
  447. {
  448. if ( row_num != l )
  449. {
  450. tmp = fabs(e->val);
  451. if ( tmp > max_val )
  452. max_val = tmp;
  453. }
  454. e = bump_col(A,j,&row_num,&idx);
  455. }
  456. r = &(A->row[j]);
  457. for ( idx = 0, e = r->elt; idx < r->len; idx++, e++ )
  458. {
  459. if ( e->col > j && e->col != l )
  460. {
  461. tmp = fabs(e->val);
  462. if ( tmp > max_val )
  463. max_val = tmp;
  464. }
  465. }
  466. return max_val;
  467. }
  468. /* nonzeros -- counts non-zeros in A */
  469. #ifndef ANSI_C
  470. static int nonzeros(A)
  471. SPMAT *A;
  472. #else
  473. static int nonzeros(const SPMAT *A)
  474. #endif
  475. {
  476. int cnt, i;
  477. if ( ! A )
  478. return 0;
  479. cnt = 0;
  480. for ( i = 0; i < A->m; i++ )
  481. cnt += A->row[i].len;
  482. return cnt;
  483. }
  484. /* chk_col_access -- for spBKPfactor()
  485. -- checks that column access path is OK */
  486. #ifndef ANSI_C
  487. int chk_col_access(A)
  488. SPMAT *A;
  489. #else
  490. int chk_col_access(const SPMAT *A)
  491. #endif
  492. {
  493. int cnt_nz, j, row, idx;
  494. SPROW *r;
  495. row_elt *e;
  496. if ( ! A )
  497. error(E_NULL,"chk_col_access");
  498. /* count nonzeros as we go down columns */
  499. cnt_nz = 0;
  500. for ( j = 0; j < A->n; j++ )
  501. {
  502. row = A->start_row[j];
  503. idx = A->start_idx[j];
  504. while ( row >= 0 )
  505. {
  506. if ( row >= A->m || idx < 0 )
  507. return FALSE;
  508. r = &(A->row[row]);
  509. if ( idx >= r->len )
  510. return FALSE;
  511. e = &(r->elt[idx]);
  512. if ( e->nxt_row >= 0 && e->nxt_row <= row )
  513. return FALSE;
  514. row = e->nxt_row;
  515. idx = e->nxt_idx;
  516. cnt_nz++;
  517. }
  518. }
  519. if ( cnt_nz != nonzeros(A) )
  520. return FALSE;
  521. else
  522. return TRUE;
  523. }
  524. /* col_cmp -- compare two columns -- for sorting rows using qsort() */
  525. #ifndef ANSI_C
  526. static int col_cmp(e1,e2)
  527. row_elt *e1, *e2;
  528. #else
  529. static int col_cmp(const row_elt *e1, const row_elt *e2)
  530. #endif
  531. {
  532. return e1->col - e2->col;
  533. }
  534. /* spBKPfactor -- sparse Bunch-Kaufman-Parlett factorisation of A in-situ
  535. -- A is factored into the form P'AP = MDM' where
  536. P is a permutation matrix, M lower triangular and D is block
  537. diagonal with blocks of size 1 or 2
  538. -- P is stored in pivot; blocks[i]==i iff D[i][i] is a block */
  539. #ifndef ANSI_C
  540. SPMAT *spBKPfactor(A,pivot,blocks,tol)
  541. SPMAT *A;
  542. PERM *pivot, *blocks;
  543. double tol;
  544. #else
  545. SPMAT *spBKPfactor(SPMAT *A, PERM *pivot, PERM *blocks, double tol)
  546. #endif
  547. {
  548. int i, j, k, l, n, onebyone, r;
  549. int idx, idx1, idx_piv;
  550. int row_num;
  551. int best_deg, best_j, best_l, best_cost, mark_cost, deg, deg_j,
  552. deg_l, ignore_deg;
  553. int list_idx, list_idx2, old_list_idx;
  554. SPROW *row, *r_piv, *r1_piv;
  555. row_elt *e, *e1;
  556. Real aii, aip1, aip1i;
  557. Real det, max_j, max_l, s, t;
  558. STATIC IVEC *scan_row = IVNULL, *scan_idx = IVNULL, *col_list = IVNULL,
  559. *tmp_iv = IVNULL;
  560. STATIC IVEC *deg_list = IVNULL;
  561. STATIC IVEC *orig_idx = IVNULL, *orig1_idx = IVNULL;
  562. STATIC PERM *order = PNULL;
  563. if ( ! A || ! pivot || ! blocks )
  564. error(E_NULL,"spBKPfactor");
  565. if ( A->m != A->n )
  566. error(E_SQUARE,"spBKPfactor");
  567. if ( A->m != pivot->size || pivot->size != blocks->size )
  568. error(E_SIZES,"spBKPfactor");
  569. if ( tol <= 0.0 || tol > 1.0 )
  570. error(E_RANGE,"spBKPfactor");
  571. n = A->n;
  572. px_ident(pivot); px_ident(blocks);
  573. sp_col_access(A); sp_diag_access(A);
  574. ignore_deg = FALSE;
  575. deg_list = iv_resize(deg_list,n);
  576. if ( order != NULL )
  577. px_ident(order);
  578. order = px_resize(order,n);
  579. MEM_STAT_REG(deg_list,TYPE_IVEC);
  580. MEM_STAT_REG(order,TYPE_PERM);
  581. scan_row = iv_resize(scan_row,5);
  582. scan_idx = iv_resize(scan_idx,5);
  583. col_list = iv_resize(col_list,5);
  584. orig_idx = iv_resize(orig_idx,5);
  585. orig_idx = iv_resize(orig1_idx,5);
  586. orig_idx = iv_resize(tmp_iv,5);
  587. MEM_STAT_REG(scan_row,TYPE_IVEC);
  588. MEM_STAT_REG(scan_idx,TYPE_IVEC);
  589. MEM_STAT_REG(col_list,TYPE_IVEC);
  590. MEM_STAT_REG(orig_idx,TYPE_IVEC);
  591. MEM_STAT_REG(orig1_idx,TYPE_IVEC);
  592. MEM_STAT_REG(tmp_iv,TYPE_IVEC);
  593. for ( i = 0; i < n-1; i = onebyone ? i+1 : i+2 )
  594. {
  595. /* now we want to use a Markowitz-style selection rule for
  596. determining which rows to swap and whether to use
  597. 1x1 or 2x2 pivoting */
  598. /* get list of degrees of nodes */
  599. deg_list = iv_resize(deg_list,n-i);
  600. if ( ! ignore_deg )
  601. for ( j = i; j < n; j++ )
  602. deg_list->ive[j-i] = 0;
  603. else
  604. {
  605. for ( j = i; j < n; j++ )
  606. deg_list->ive[j-i] = 1;
  607. if ( i < n )
  608. deg_list->ive[0] = 0;
  609. }
  610. order = px_resize(order,n-i);
  611. px_ident(order);
  612. if ( ! ignore_deg )
  613. {
  614. for ( j = i; j < n; j++ )
  615. {
  616. /* idx = sprow_idx(&(A->row[j]),j+1); */
  617. /* idx = fixindex(idx); */
  618. idx = 0;
  619. row = &(A->row[j]);
  620. e = &(row->elt[idx]);
  621. /* deg_list->ive[j-i] += row->len - idx; */
  622. for ( ; idx < row->len; idx++, e++ )
  623. if ( e->col >= i )
  624. deg_list->ive[e->col - i]++;
  625. }
  626. /* now deg_list[k] == degree of node k+i */
  627. /* now sort them into increasing order */
  628. iv_sort(deg_list,order);
  629. /* now deg_list[idx] == degree of node i+order[idx] */
  630. }
  631. /* now we can chase through the nodes in order of increasing
  632. degree, picking out the ones that satisfy our stability
  633. criterion */
  634. list_idx = 0; r = -1;
  635. best_j = best_l = -1;
  636. for ( deg = 0; deg <= n; deg++ )
  637. {
  638. Real ajj, all, ajl;
  639. if ( list_idx >= deg_list->dim )
  640. break; /* That's all folks! */
  641. old_list_idx = list_idx;
  642. while ( list_idx < deg_list->dim &&
  643. deg_list->ive[list_idx] <= deg )
  644. {
  645. j = i+order->pe[list_idx];
  646. if ( j < i )
  647. continue;
  648. /* can we use row/col j for a 1 x 1 pivot? */
  649. /* find max_j = max_{k>=i} {|A[k][j]|,|A[j][k]|} */
  650. ajj = fabs(unord_get_val(A,j,j));
  651. if ( ajj == 0.0 )
  652. {
  653. list_idx++;
  654. continue; /* can't use this for 1 x 1 pivot */
  655. }
  656. max_j = max_row_col(A,i,j,-1);
  657. if ( ajj >= tol/* *alpha */ *max_j )
  658. {
  659. onebyone = TRUE;
  660. best_j = j;
  661. best_deg = deg_list->ive[list_idx];
  662. break;
  663. }
  664. list_idx++;
  665. }
  666. if ( best_j >= 0 )
  667. break;
  668. best_cost = 2*n; /* > any possible Markowitz cost (bound) */
  669. best_j = best_l = -1;
  670. list_idx = old_list_idx;
  671. while ( list_idx < deg_list->dim &&
  672. deg_list->ive[list_idx] <= deg )
  673. {
  674. j = i+order->pe[list_idx];
  675. ajj = fabs(unord_get_val(A,j,j));
  676. for ( list_idx2 = 0; list_idx2 < list_idx; list_idx2++ )
  677. {
  678. deg_j = deg;
  679. deg_l = deg_list->ive[list_idx2];
  680. l = i+order->pe[list_idx2];
  681. if ( l < i )
  682. continue;
  683. /* try using rows/cols (j,l) for a 2 x 2 pivot block */
  684. all = fabs(unord_get_val(A,l,l));
  685. ajl = ( j > l ) ? fabs(unord_get_val(A,l,j)) :
  686. fabs(unord_get_val(A,j,l));
  687. det = fabs(ajj*all - ajl*ajl);
  688. if ( det == 0.0 )
  689. continue;
  690. max_j = max_row_col(A,i,j,l);
  691. max_l = max_row_col(A,i,l,j);
  692. if ( tol*(all*max_j+ajl*max_l) < det &&
  693. tol*(ajl*max_j+ajj*max_l) < det )
  694. {
  695. /* acceptably stable 2 x 2 pivot */
  696. /* this is actually an overestimate of the
  697. Markowitz cost for choosing (j,l) */
  698. mark_cost = (ajj == 0.0) ?
  699. ((all == 0.0) ? deg_j+deg_l : deg_j+2*deg_l) :
  700. ((all == 0.0) ? 2*deg_j+deg_l :
  701. 2*(deg_j+deg_l));
  702. if ( mark_cost < best_cost )
  703. {
  704. onebyone = FALSE;
  705. best_cost = mark_cost;
  706. best_j = j;
  707. best_l = l;
  708. best_deg = deg_j;
  709. }
  710. }
  711. }
  712. list_idx++;
  713. }
  714. if ( best_j >= 0 )
  715. break;
  716. }
  717. if ( best_deg > (int)floor(0.8*(n-i)) )
  718. ignore_deg = TRUE;
  719. /* now do actual interchanges */
  720. if ( best_j >= 0 && onebyone )
  721. {
  722. bkp_interchange(A,i,best_j);
  723. px_transp(pivot,i,best_j);
  724. }
  725. else if ( best_j >= 0 && best_l >= 0 && ! onebyone )
  726. {
  727. if ( best_j == i || best_j == i+1 )
  728. {
  729. if ( best_l == i || best_l == i+1 )
  730. {
  731. /* no pivoting, but must update blocks permutation */
  732. px_transp(blocks,i,i+1);
  733. goto dopivot;
  734. }
  735. bkp_interchange(A,(best_j == i) ? i+1 : i,best_l);
  736. px_transp(pivot,(best_j == i) ? i+1 : i,best_l);
  737. }
  738. else if ( best_l == i || best_l == i+1 )
  739. {
  740. bkp_interchange(A,(best_l == i) ? i+1 : i,best_j);
  741. px_transp(pivot,(best_l == i) ? i+1 : i,best_j);
  742. }
  743. else /* best_j & best_l outside i, i+1 */
  744. {
  745. if ( i != best_j )
  746. {
  747. bkp_interchange(A,i,best_j);
  748. px_transp(pivot,i,best_j);
  749. }
  750. if ( i+1 != best_l )
  751. {
  752. bkp_interchange(A,i+1,best_l);
  753. px_transp(pivot,i+1,best_l);
  754. }
  755. }
  756. }
  757. else /* can't pivot &/or nothing to pivot */
  758. continue;
  759. /* update blocks permutation */
  760. if ( ! onebyone )
  761. px_transp(blocks,i,i+1);
  762. dopivot:
  763. if ( onebyone )
  764. {
  765. int idx_j, idx_k, s_idx, s_idx2;
  766. row_elt *e_ij, *e_ik;
  767. r_piv = &(A->row[i]);
  768. idx_piv = unord_get_idx(r_piv,i);
  769. /* if idx_piv < 0 then aii == 0 and no pivoting can be done;
  770. -- this means that we should continue to the next iteration */
  771. if ( idx_piv < 0 )
  772. continue;
  773. aii = r_piv->elt[idx_piv].val;
  774. if ( aii == 0.0 )
  775. continue;
  776. /* for ( j = i+1; j < n; j++ ) { ... pivot step ... } */
  777. /* initialise scan_... etc for the 1 x 1 pivot */
  778. scan_row = iv_resize(scan_row,r_piv->len);
  779. scan_idx = iv_resize(scan_idx,r_piv->len);
  780. col_list = iv_resize(col_list,r_piv->len);
  781. orig_idx = iv_resize(orig_idx,r_piv->len);
  782. row_num = i; s_idx = idx = 0;
  783. e = &(r_piv->elt[idx]);
  784. for ( idx = 0; idx < r_piv->len; idx++, e++ )
  785. {
  786. if ( e->col < i )
  787. continue;
  788. scan_row->ive[s_idx] = i;
  789. scan_idx->ive[s_idx] = idx;
  790. orig_idx->ive[s_idx] = idx;
  791. col_list->ive[s_idx] = e->col;
  792. s_idx++;
  793. }
  794. scan_row = iv_resize(scan_row,s_idx);
  795. scan_idx = iv_resize(scan_idx,s_idx);
  796. col_list = iv_resize(col_list,s_idx);
  797. orig_idx = iv_resize(orig_idx,s_idx);
  798. order = px_resize(order,scan_row->dim);
  799. px_ident(order);
  800. iv_sort(col_list,order);
  801. tmp_iv = iv_resize(tmp_iv,scan_row->dim);
  802. for ( idx = 0; idx < order->size; idx++ )
  803. tmp_iv->ive[idx] = scan_idx->ive[order->pe[idx]];
  804. iv_copy(tmp_iv,scan_idx);
  805. for ( idx = 0; idx < order->size; idx++ )
  806. tmp_iv->ive[idx] = scan_row->ive[order->pe[idx]];
  807. iv_copy(tmp_iv,scan_row);
  808. for ( idx = 0; idx < scan_row->dim; idx++ )
  809. tmp_iv->ive[idx] = orig_idx->ive[order->pe[idx]];
  810. iv_copy(tmp_iv,orig_idx);
  811. /* now do actual pivot */
  812. /* for ( j = i+1; j < n-1; j++ ) .... */
  813. for ( s_idx = 0; s_idx < scan_row->dim; s_idx++ )
  814. {
  815. idx_j = orig_idx->ive[s_idx];
  816. if ( idx_j < 0 )
  817. error(E_INTERN,"spBKPfactor");
  818. e_ij = &(r_piv->elt[idx_j]);
  819. j = e_ij->col;
  820. if ( j < i+1 )
  821. continue;
  822. scan_to(A,scan_row,scan_idx,col_list,j);
  823. /* compute multiplier */
  824. t = e_ij->val / aii;
  825. /* for ( k = j; k < n; k++ ) { .... update A[j][k] .... } */
  826. /* this is the row in which pivoting is done */
  827. row = &(A->row[j]);
  828. for ( s_idx2 = s_idx; s_idx2 < scan_row->dim; s_idx2++ )
  829. {
  830. idx_k = orig_idx->ive[s_idx2];
  831. e_ik = &(r_piv->elt[idx_k]);
  832. k = e_ik->col;
  833. /* k >= j since col_list has been sorted */
  834. if ( scan_row->ive[s_idx2] == j )
  835. { /* no fill-in -- can be done directly */
  836. idx = scan_idx->ive[s_idx2];
  837. /* idx = sprow_idx2(row,k,idx); */
  838. row->elt[idx].val -= t*e_ik->val;
  839. }
  840. else
  841. { /* fill-in -- insert entry & patch column */
  842. int old_row, old_idx;
  843. row_elt *old_e, *new_e;
  844. old_row = scan_row->ive[s_idx2];
  845. old_idx = scan_idx->ive[s_idx2];
  846. /* old_idx = sprow_idx2(&(A->row[old_row]),k,old_idx); */
  847. if ( old_idx < 0 )
  848. error(E_INTERN,"spBKPfactor");
  849. /* idx = sprow_idx(row,k); */
  850. /* idx = fixindex(idx); */
  851. idx = row->len;
  852. /* sprow_set_val(row,k,-t*e_ik->val); */
  853. if ( row->len >= row->maxlen )
  854. { tracecatch(sprow_xpd(row,2*row->maxlen+1,TYPE_SPMAT),
  855. "spBKPfactor"); }
  856. row->len = idx+1;
  857. new_e = &(row->elt[idx]);
  858. new_e->val = -t*e_ik->val;
  859. new_e->col = k;
  860. old_e = &(A->row[old_row].elt[old_idx]);
  861. new_e->nxt_row = old_e->nxt_row;
  862. new_e->nxt_idx = old_e->nxt_idx;
  863. old_e->nxt_row = j;
  864. old_e->nxt_idx = idx;
  865. }
  866. }
  867. e_ij->val = t;
  868. }
  869. }
  870. else /* onebyone == FALSE */
  871. { /* do 2 x 2 pivot */
  872. int idx_k, idx1_k, s_idx, s_idx2;
  873. int old_col;
  874. row_elt *e_tmp;
  875. r_piv = &(A->row[i]);
  876. idx_piv = unord_get_idx(r_piv,i);
  877. aii = aip1i = 0.0;
  878. e_tmp = r_piv->elt;
  879. for ( idx_piv = 0; idx_piv < r_piv->len; idx_piv++, e_tmp++ )
  880. if ( e_tmp->col == i )
  881. aii = e_tmp->val;
  882. else if ( e_tmp->col == i+1 )
  883. aip1i = e_tmp->val;
  884. r1_piv = &(A->row[i+1]);
  885. e_tmp = r1_piv->elt;
  886. aip1 = unord_get_val(A,i+1,i+1);
  887. det = aii*aip1 - aip1i*aip1i; /* Must have det < 0 */
  888. if ( aii == 0.0 && aip1i == 0.0 )
  889. {
  890. /* error(E_RANGE,"spBKPfactor"); */
  891. onebyone = TRUE;
  892. continue; /* cannot pivot */
  893. }
  894. if ( det == 0.0 )
  895. {
  896. if ( aii != 0.0 )
  897. error(E_RANGE,"spBKPfactor");
  898. onebyone = TRUE;
  899. continue; /* cannot pivot */
  900. }
  901. aip1i = aip1i/det;
  902. aii = aii/det;
  903. aip1 = aip1/det;
  904. /* initialise scan_... etc for the 2 x 2 pivot */
  905. s_idx = r_piv->len + r1_piv->len;
  906. scan_row = iv_resize(scan_row,s_idx);
  907. scan_idx = iv_resize(scan_idx,s_idx);
  908. col_list = iv_resize(col_list,s_idx);
  909. orig_idx = iv_resize(orig_idx,s_idx);
  910. orig1_idx = iv_resize(orig1_idx,s_idx);
  911. e = r_piv->elt;
  912. for ( idx = 0; idx < r_piv->len; idx++, e++ )
  913. {
  914. scan_row->ive[idx] = i;
  915. scan_idx->ive[idx] = idx;
  916. col_list->ive[idx] = e->col;
  917. orig_idx->ive[idx] = idx;
  918. orig1_idx->ive[idx] = -1;
  919. }
  920. e = r_piv->elt;
  921. e1 = r1_piv->elt;
  922. for ( idx = 0; idx < r1_piv->len; idx++, e1++ )
  923. {
  924. scan_row->ive[idx+r_piv->len] = i+1;
  925. scan_idx->ive[idx+r_piv->len] = idx;
  926. col_list->ive[idx+r_piv->len] = e1->col;
  927. orig_idx->ive[idx+r_piv->len] = -1;
  928. orig1_idx->ive[idx+r_piv->len] = idx;
  929. }
  930. e1 = r1_piv->elt;
  931. order = px_resize(order,scan_row->dim);
  932. px_ident(order);
  933. iv_sort(col_list,order);
  934. tmp_iv = iv_resize(tmp_iv,scan_row->dim);
  935. for ( idx = 0; idx < order->size; idx++ )
  936. tmp_iv->ive[idx] = scan_idx->ive[order->pe[idx]];
  937. iv_copy(tmp_iv,scan_idx);
  938. for ( idx = 0; idx < order->size; idx++ )
  939. tmp_iv->ive[idx] = scan_row->ive[order->pe[idx]];
  940. iv_copy(tmp_iv,scan_row);
  941. for ( idx = 0; idx < scan_row->dim; idx++ )
  942. tmp_iv->ive[idx] = orig_idx->ive[order->pe[idx]];
  943. iv_copy(tmp_iv,orig_idx);
  944. for ( idx = 0; idx < scan_row->dim; idx++ )
  945. tmp_iv->ive[idx] = orig1_idx->ive[order->pe[idx]];
  946. iv_copy(tmp_iv,orig1_idx);
  947. s_idx = 0;
  948. old_col = -1;
  949. for ( idx = 0; idx < scan_row->dim; idx++ )
  950. {
  951. if ( col_list->ive[idx] == old_col )
  952. {
  953. if ( scan_row->ive[idx] == i )
  954. {
  955. scan_row->ive[s_idx-1] = scan_row->ive[idx];
  956. scan_idx->ive[s_idx-1] = scan_idx->ive[idx];
  957. col_list->ive[s_idx-1] = col_list->ive[idx];
  958. orig_idx->ive[s_idx-1] = orig_idx->ive[idx];
  959. orig1_idx->ive[s_idx-1] = orig1_idx->ive[idx-1];
  960. }
  961. else if ( idx > 0 )
  962. {
  963. scan_row->ive[s_idx-1] = scan_row->ive[idx-1];
  964. scan_idx->ive[s_idx-1] = scan_idx->ive[idx-1];
  965. col_list->ive[s_idx-1] = col_list->ive[idx-1];
  966. orig_idx->ive[s_idx-1] = orig_idx->ive[idx-1];
  967. orig1_idx->ive[s_idx-1] = orig1_idx->ive[idx];
  968. }
  969. }
  970. else
  971. {
  972. scan_row->ive[s_idx] = scan_row->ive[idx];
  973. scan_idx->ive[s_idx] = scan_idx->ive[idx];
  974. col_list->ive[s_idx] = col_list->ive[idx];
  975. orig_idx->ive[s_idx] = orig_idx->ive[idx];
  976. orig1_idx->ive[s_idx] = orig1_idx->ive[idx];
  977. s_idx++;
  978. }
  979. old_col = col_list->ive[idx];
  980. }
  981. scan_row = iv_resize(scan_row,s_idx);
  982. scan_idx = iv_resize(scan_idx,s_idx);
  983. col_list = iv_resize(col_list,s_idx);
  984. orig_idx = iv_resize(orig_idx,s_idx);
  985. orig1_idx = iv_resize(orig1_idx,s_idx);
  986. /* for ( j = i+2; j < n; j++ ) { .... row operation .... } */
  987. for ( s_idx = 0; s_idx < scan_row->dim; s_idx++ )
  988. {
  989. int idx_piv, idx1_piv;
  990. Real aip1j, aij, aik, aip1k;
  991. row_elt *e_ik, *e_ip1k;
  992. j = col_list->ive[s_idx];
  993. if ( j < i+2 )
  994. continue;
  995. tracecatch(scan_to(A,scan_row,scan_idx,col_list,j),
  996. "spBKPfactor");
  997. idx_piv = orig_idx->ive[s_idx];
  998. aij = ( idx_piv < 0 ) ? 0.0 : r_piv->elt[idx_piv].val;
  999. /* aij = ( s_idx < r_piv->len ) ? r_piv->elt[s_idx].val :
  1000. 0.0; */
  1001. /* aij = sp_get_val(A,i,j); */
  1002. idx1_piv = orig1_idx->ive[s_idx];
  1003. aip1j = ( idx1_piv < 0 ) ? 0.0 : r1_piv->elt[idx1_piv].val;
  1004. /* aip1j = ( s_idx < r_piv->len ) ? 0.0 :
  1005. r1_piv->elt[s_idx-r_piv->len].val; */
  1006. /* aip1j = sp_get_val(A,i+1,j); */
  1007. s = - aip1i*aip1j + aip1*aij;
  1008. t = - aip1i*aij + aii*aip1j;
  1009. /* for ( k = j; k < n; k++ ) { .... update entry .... } */
  1010. row = &(A->row[j]);
  1011. /* set idx_k and idx1_k indices */
  1012. s_idx2 = s_idx;
  1013. k = col_list->ive[s_idx2];
  1014. idx_k = orig_idx->ive[s_idx2];
  1015. idx1_k = orig1_idx->ive[s_idx2];
  1016. while ( s_idx2 < scan_row->dim )
  1017. {
  1018. k = col_list->ive[s_idx2];
  1019. idx_k = orig_idx->ive[s_idx2];
  1020. idx1_k = orig1_idx->ive[s_idx2];
  1021. e_ik = ( idx_k < 0 ) ? (row_elt *)NULL :
  1022. &(r_piv->elt[idx_k]);
  1023. e_ip1k = ( idx1_k < 0 ) ? (row_elt *)NULL :
  1024. &(r1_piv->elt[idx1_k]);
  1025. aik = ( idx_k >= 0 ) ? e_ik->val : 0.0;
  1026. aip1k = ( idx1_k >= 0 ) ? e_ip1k->val : 0.0;
  1027. if ( scan_row->ive[s_idx2] == j )
  1028. { /* no fill-in */
  1029. row = &(A->row[j]);
  1030. /* idx = sprow_idx(row,k); */
  1031. idx = scan_idx->ive[s_idx2];
  1032. if ( idx < 0 )
  1033. error(E_INTERN,"spBKPfactor");
  1034. row->elt[idx].val -= s*aik + t*aip1k;
  1035. }
  1036. else
  1037. { /* fill-in -- insert entry & patch column */
  1038. Real tmp;
  1039. int old_row, old_idx;
  1040. row_elt *old_e, *new_e;
  1041. tmp = - s*aik - t*aip1k;
  1042. if ( tmp != 0.0 )
  1043. {
  1044. row = &(A->row[j]);
  1045. old_row = scan_row->ive[s_idx2];
  1046. old_idx = scan_idx->ive[s_idx2];
  1047. idx = row->len;
  1048. if ( row->len >= row->maxlen )
  1049. { tracecatch(sprow_xpd(row,2*row->maxlen+1,
  1050. TYPE_SPMAT),
  1051. "spBKPfactor"); }
  1052. row->len = idx + 1;
  1053. /* idx = sprow_idx(row,k); */
  1054. new_e = &(row->elt[idx]);
  1055. new_e->val = tmp;
  1056. new_e->col = k;
  1057. if ( old_row < 0 )
  1058. error(E_INTERN,"spBKPfactor");
  1059. /* old_idx = sprow_idx2(&(A->row[old_row]),
  1060. k,old_idx); */
  1061. old_e = &(A->row[old_row].elt[old_idx]);
  1062. new_e->nxt_row = old_e->nxt_row;
  1063. new_e->nxt_idx = old_e->nxt_idx;
  1064. old_e->nxt_row = j;
  1065. old_e->nxt_idx = idx;
  1066. }
  1067. }
  1068. /* update idx_k, idx1_k, s_idx2 etc */
  1069. s_idx2++;
  1070. }
  1071. /* store multipliers -- may involve fill-in (!) */
  1072. /* idx = sprow_idx(r_piv,j); */
  1073. idx = orig_idx->ive[s_idx];
  1074. if ( idx >= 0 )
  1075. {
  1076. r_piv->elt[idx].val = s;
  1077. }
  1078. else if ( s != 0.0 )
  1079. {
  1080. int old_row, old_idx;
  1081. row_elt *new_e, *old_e;
  1082. old_row = -1; old_idx = j;
  1083. if ( i > 0 )
  1084. {
  1085. tracecatch(chase_col(A,j,&old_row,&old_idx,i-1),
  1086. "spBKPfactor");
  1087. }
  1088. /* sprow_set_val(r_piv,j,s); */
  1089. idx = r_piv->len;
  1090. if ( r_piv->len >= r_piv->maxlen )
  1091. { tracecatch(sprow_xpd(r_piv,2*r_piv->maxlen+1,
  1092. TYPE_SPMAT),
  1093. "spBKPfactor"); }
  1094. r_piv->len = idx + 1;
  1095. /* idx = sprow_idx(r_piv,j); */
  1096. /* if ( idx < 0 )
  1097. error(E_INTERN,"spBKPfactor"); */
  1098. new_e = &(r_piv->elt[idx]);
  1099. new_e->val = s;
  1100. new_e->col = j;
  1101. if ( old_row < 0 )
  1102. {
  1103. new_e->nxt_row = A->start_row[j];
  1104. new_e->nxt_idx = A->start_idx[j];
  1105. A->start_row[j] = i;
  1106. A->start_idx[j] = idx;
  1107. }
  1108. else
  1109. {
  1110. /* old_idx = sprow_idx2(&(A->row[old_row]),j,old_idx);*/
  1111. if ( old_idx < 0 )
  1112. error(E_INTERN,"spBKPfactor");
  1113. old_e = &(A->row[old_row].elt[old_idx]);
  1114. new_e->nxt_row = old_e->nxt_row;
  1115. new_e->nxt_idx = old_e->nxt_idx;
  1116. old_e->nxt_row = i;
  1117. old_e->nxt_idx = idx;
  1118. }
  1119. }
  1120. /* idx1 = sprow_idx(r1_piv,j); */
  1121. idx1 = orig1_idx->ive[s_idx];
  1122. if ( idx1 >= 0 )
  1123. {
  1124. r1_piv->elt[idx1].val = t;
  1125. }
  1126. else if ( t != 0.0 )
  1127. {
  1128. int old_row, old_idx;
  1129. row_elt *new_e, *old_e;
  1130. old_row = -1; old_idx = j;
  1131. tracecatch(chase_col(A,j,&old_row,&old_idx,i),
  1132. "spBKPfactor");
  1133. /* sprow_set_val(r1_piv,j,t); */
  1134. idx1 = r1_piv->len;
  1135. if ( r1_piv->len >= r1_piv->maxlen )
  1136. { tracecatch(sprow_xpd(r1_piv,2*r1_piv->maxlen+1,
  1137. TYPE_SPMAT),
  1138. "spBKPfactor"); }
  1139. r1_piv->len = idx1 + 1;
  1140. /* idx1 = sprow_idx(r1_piv,j); */
  1141. /* if ( idx < 0 )
  1142. error(E_INTERN,"spBKPfactor"); */
  1143. new_e = &(r1_piv->elt[idx1]);
  1144. new_e->val = t;
  1145. new_e->col = j;
  1146. if ( idx1 < 0 )
  1147. error(E_INTERN,"spBKPfactor");
  1148. new_e = &(r1_piv->elt[idx1]);
  1149. if ( old_row < 0 )
  1150. {
  1151. new_e->nxt_row = A->start_row[j];
  1152. new_e->nxt_idx = A->start_idx[j];
  1153. A->start_row[j] = i+1;
  1154. A->start_idx[j] = idx1;
  1155. }
  1156. else
  1157. {
  1158. old_idx = sprow_idx2(&(A->row[old_row]),j,old_idx);
  1159. if ( old_idx < 0 )
  1160. error(E_INTERN,"spBKPfactor");
  1161. old_e = &(A->row[old_row].elt[old_idx]);
  1162. new_e->nxt_row = old_e->nxt_row;
  1163. new_e->nxt_idx = old_e->nxt_idx;
  1164. old_e->nxt_row = i+1;
  1165. old_e->nxt_idx = idx1;
  1166. }
  1167. }
  1168. }
  1169. }
  1170. }
  1171. /* now sort the rows arrays */
  1172. for ( i = 0; i < A->m; i++ )
  1173. qsort(A->row[i].elt,A->row[i].len,sizeof(row_elt),(int(*)())col_cmp);
  1174. A->flag_col = A->flag_diag = FALSE;
  1175. #ifdef THREADSAFE
  1176. IV_FREE(scan_row); IV_FREE(scan_idx); IV_FREE(col_list);
  1177. IV_FREE(tmp_iv); IV_FREE(deg_list); IV_FREE(orig_idx);
  1178. IV_FREE(orig1_idx); PX_FREE(order);
  1179. #endif
  1180. return A;
  1181. }
  1182. /* spBKPsolve -- solves A.x = b where A has been factored a la BKPfactor()
  1183. -- returns x, which is created if NULL */
  1184. #ifndef ANSI_C
  1185. VEC *spBKPsolve(A,pivot,block,b,x)
  1186. SPMAT *A;
  1187. PERM *pivot, *block;
  1188. VEC *b, *x;
  1189. #else
  1190. VEC *spBKPsolve(SPMAT *A, PERM *pivot, PERM *block,
  1191. const VEC *b, VEC *x)
  1192. #endif
  1193. {
  1194. STATIC VEC *tmp=VNULL; /* dummy storage needed */
  1195. int i /* , j */, n, onebyone;
  1196. int row_num, idx;
  1197. Real a11, a12, a22, b1, b2, det, sum, *tmp_ve, tmp_diag;
  1198. SPROW *r;
  1199. row_elt *e;
  1200. if ( ! A || ! pivot || ! block || ! b )
  1201. error(E_NULL,"spBKPsolve");
  1202. if ( A->m != A->n )
  1203. error(E_SQUARE,"spBKPsolve");
  1204. n = A->n;
  1205. if ( b->dim != n || pivot->size != n || block->size != n )
  1206. error(E_SIZES,"spBKPsolve");
  1207. x = v_resize(x,n);
  1208. tmp = v_resize(tmp,n);
  1209. MEM_STAT_REG(tmp,TYPE_VEC);
  1210. tmp_ve = tmp->ve;
  1211. if ( ! A->flag_col )
  1212. sp_col_access(A);
  1213. px_vec(pivot,b,tmp);
  1214. /* printf("# BKPsolve: effect of pivot: tmp =\n"); v_output(tmp); */
  1215. /* solve for lower triangular part */
  1216. for ( i = 0; i < n; i++ )
  1217. {
  1218. sum = tmp_ve[i];
  1219. if ( block->pe[i] < i )
  1220. {
  1221. /* for ( j = 0; j < i-1; j++ )
  1222. sum -= A_me[j][i]*tmp_ve[j]; */
  1223. row_num = -1; idx = i;
  1224. e = bump_col(A,i,&row_num,&idx);
  1225. while ( row_num >= 0 && row_num < i-1 )
  1226. {
  1227. sum -= e->val*tmp_ve[row_num];
  1228. e = bump_col(A,i,&row_num,&idx);
  1229. }
  1230. }
  1231. else
  1232. {
  1233. /* for ( j = 0; j < i; j++ )
  1234. sum -= A_me[j][i]*tmp_ve[j]; */
  1235. row_num = -1; idx = i;
  1236. e = bump_col(A,i,&row_num,&idx);
  1237. while ( row_num >= 0 && row_num < i )
  1238. {
  1239. sum -= e->val*tmp_ve[row_num];
  1240. e = bump_col(A,i,&row_num,&idx);
  1241. }
  1242. }
  1243. tmp_ve[i] = sum;
  1244. }
  1245. /* printf("# BKPsolve: solving L part: tmp =\n"); v_output(tmp); */
  1246. /* solve for diagonal part */
  1247. for ( i = 0; i < n; i = onebyone ? i+1 : i+2 )
  1248. {
  1249. onebyone = ( block->pe[i] == i );
  1250. if ( onebyone )
  1251. {
  1252. /* tmp_ve[i] /= A_me[i][i]; */
  1253. tmp_diag = sp_get_val(A,i,i);
  1254. if ( tmp_diag == 0.0 )
  1255. error(E_SING,"spBKPsolve");
  1256. tmp_ve[i] /= tmp_diag;
  1257. }
  1258. else
  1259. {
  1260. a11 = sp_get_val(A,i,i);
  1261. a22 = sp_get_val(A,i+1,i+1);
  1262. a12 = sp_get_val(A,i,i+1);
  1263. b1 = tmp_ve[i];
  1264. b2 = tmp_ve[i+1];
  1265. det = a11*a22-a12*a12; /* < 0 : see BKPfactor() */
  1266. if ( det == 0.0 )
  1267. error(E_SING,"BKPsolve");
  1268. det = 1/det;
  1269. tmp_ve[i] = det*(a22*b1-a12*b2);
  1270. tmp_ve[i+1] = det*(a11*b2-a12*b1);
  1271. }
  1272. }
  1273. /* printf("# BKPsolve: solving D part: tmp =\n"); v_output(tmp); */
  1274. /* solve for transpose of lower triangular part */
  1275. for ( i = n-2; i >= 0; i-- )
  1276. {
  1277. sum = tmp_ve[i];
  1278. if ( block->pe[i] > i )
  1279. {
  1280. /* onebyone is false */
  1281. /* for ( j = i+2; j < n; j++ )
  1282. sum -= A_me[i][j]*tmp_ve[j]; */
  1283. if ( i+2 >= n )
  1284. continue;
  1285. r = &(A->row[i]);
  1286. idx = sprow_idx(r,i+2);
  1287. idx = fixindex(idx);
  1288. e = &(r->elt[idx]);
  1289. for ( ; idx < r->len; idx++, e++ )
  1290. sum -= e->val*tmp_ve[e->col];
  1291. }
  1292. else /* onebyone */
  1293. {
  1294. /* for ( j = i+1; j < n; j++ )
  1295. sum -= A_me[i][j]*tmp_ve[j]; */
  1296. r = &(A->row[i]);
  1297. idx = sprow_idx(r,i+1);
  1298. idx = fixindex(idx);
  1299. e = &(r->elt[idx]);
  1300. for ( ; idx < r->len; idx++, e++ )
  1301. sum -= e->val*tmp_ve[e->col];
  1302. }
  1303. tmp_ve[i] = sum;
  1304. }
  1305. /* printf("# BKPsolve: solving L^T part: tmp =\n");v_output(tmp); */
  1306. /* and do final permutation */
  1307. x = pxinv_vec(pivot,tmp,x);
  1308. #ifdef THREADSAFE
  1309. V_FREE(tmp);
  1310. #endif
  1311. return x;
  1312. }