Actual source code: aijfact.c
1: #define PETSCMAT_DLL
3: #include src/mat/impls/aij/seq/aij.h
4: #include src/inline/dot.h
5: #include src/inline/spops.h
6: #include petscbt.h
7: #include src/mat/utils/freespace.h
11: PetscErrorCode MatOrdering_Flow_SeqAIJ(Mat mat,const MatOrderingType type,IS *irow,IS *icol)
12: {
15: SETERRQ(PETSC_ERR_SUP,"Code not written");
16: #if !defined(PETSC_USE_DEBUG)
17: return(0);
18: #endif
19: }
22: EXTERN PetscErrorCode MatMarkDiagonal_SeqAIJ(Mat);
24: #if !defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
25: EXTERN PetscErrorCode SPARSEKIT2dperm(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscInt*,PetscInt*);
26: EXTERN PetscErrorCode SPARSEKIT2ilutp(PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscReal,PetscReal*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscErrorCode*);
27: EXTERN PetscErrorCode SPARSEKIT2msrcsr(PetscInt*,PetscScalar*,PetscInt*,PetscScalar*,PetscInt*,PetscInt*,PetscScalar*,PetscInt*);
28: #endif
32: /* ------------------------------------------------------------
34: This interface was contribed by Tony Caola
36: This routine is an interface to the pivoting drop-tolerance
37: ILU routine written by Yousef Saad (saad@cs.umn.edu) as part of
38: SPARSEKIT2.
40: The SPARSEKIT2 routines used here are covered by the GNU
41: copyright; see the file gnu in this directory.
43: Thanks to Prof. Saad, Dr. Hysom, and Dr. Smith for their
44: help in getting this routine ironed out.
46: The major drawback to this routine is that if info->fill is
47: not large enough it fails rather than allocating more space;
48: this can be fixed by hacking/improving the f2c version of
49: Yousef Saad's code.
51: ------------------------------------------------------------
52: */
53: PetscErrorCode MatILUDTFactor_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
54: {
55: #if defined(PETSC_AVOID_GNUCOPYRIGHT_CODE)
57: SETERRQ(PETSC_ERR_SUP_SYS,"This distribution does not include GNU Copyright code\n\
58: You can obtain the drop tolerance routines by installing PETSc from\n\
59: www.mcs.anl.gov/petsc\n");
60: #else
61: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
62: IS iscolf,isicol,isirow;
63: PetscTruth reorder;
64: PetscErrorCode ierr,sierr;
65: PetscInt *c,*r,*ic,i,n = A->rmap.n;
66: PetscInt *old_i = a->i,*old_j = a->j,*new_i,*old_i2 = 0,*old_j2 = 0,*new_j;
67: PetscInt *ordcol,*iwk,*iperm,*jw;
68: PetscInt jmax,lfill,job,*o_i,*o_j;
69: PetscScalar *old_a = a->a,*w,*new_a,*old_a2 = 0,*wk,*o_a;
70: PetscReal af;
74: if (info->dt == PETSC_DEFAULT) info->dt = .005;
75: if (info->dtcount == PETSC_DEFAULT) info->dtcount = (PetscInt)(1.5*a->rmax);
76: if (info->dtcol == PETSC_DEFAULT) info->dtcol = .01;
77: if (info->fill == PETSC_DEFAULT) info->fill = ((double)(n*(info->dtcount+1)))/a->nz;
78: lfill = (PetscInt)(info->dtcount/2.0);
79: jmax = (PetscInt)(info->fill*a->nz);
82: /* ------------------------------------------------------------
83: If reorder=.TRUE., then the original matrix has to be
84: reordered to reflect the user selected ordering scheme, and
85: then de-reordered so it is in it's original format.
86: Because Saad's dperm() is NOT in place, we have to copy
87: the original matrix and allocate more storage. . .
88: ------------------------------------------------------------
89: */
91: /* set reorder to true if either isrow or iscol is not identity */
92: ISIdentity(isrow,&reorder);
93: if (reorder) {ISIdentity(iscol,&reorder);}
94: reorder = PetscNot(reorder);
96:
97: /* storage for ilu factor */
98: PetscMalloc((n+1)*sizeof(PetscInt),&new_i);
99: PetscMalloc(jmax*sizeof(PetscInt),&new_j);
100: PetscMalloc(jmax*sizeof(PetscScalar),&new_a);
101: PetscMalloc(n*sizeof(PetscInt),&ordcol);
103: /* ------------------------------------------------------------
104: Make sure that everything is Fortran formatted (1-Based)
105: ------------------------------------------------------------
106: */
107: for (i=old_i[0];i<old_i[n];i++) {
108: old_j[i]++;
109: }
110: for(i=0;i<n+1;i++) {
111: old_i[i]++;
112: };
113:
115: if (reorder) {
116: ISGetIndices(iscol,&c);
117: ISGetIndices(isrow,&r);
118: for(i=0;i<n;i++) {
119: r[i] = r[i]+1;
120: c[i] = c[i]+1;
121: }
122: PetscMalloc((n+1)*sizeof(PetscInt),&old_i2);
123: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscInt),&old_j2);
124: PetscMalloc((old_i[n]-old_i[0]+1)*sizeof(PetscScalar),&old_a2);
125: job = 3; SPARSEKIT2dperm(&n,old_a,old_j,old_i,old_a2,old_j2,old_i2,r,c,&job);
126: for (i=0;i<n;i++) {
127: r[i] = r[i]-1;
128: c[i] = c[i]-1;
129: }
130: ISRestoreIndices(iscol,&c);
131: ISRestoreIndices(isrow,&r);
132: o_a = old_a2;
133: o_j = old_j2;
134: o_i = old_i2;
135: } else {
136: o_a = old_a;
137: o_j = old_j;
138: o_i = old_i;
139: }
141: /* ------------------------------------------------------------
142: Call Saad's ilutp() routine to generate the factorization
143: ------------------------------------------------------------
144: */
146: PetscMalloc(2*n*sizeof(PetscInt),&iperm);
147: PetscMalloc(2*n*sizeof(PetscInt),&jw);
148: PetscMalloc(n*sizeof(PetscScalar),&w);
150: SPARSEKIT2ilutp(&n,o_a,o_j,o_i,&lfill,(PetscReal)info->dt,&info->dtcol,&n,new_a,new_j,new_i,&jmax,w,jw,iperm,&sierr);
151: if (sierr) {
152: switch (sierr) {
153: case -3: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix U overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
154: case -2: SETERRQ2(PETSC_ERR_LIB,"ilutp(), matrix L overflows, need larger info->fill current fill %G space allocated %D",info->fill,jmax);
155: case -5: SETERRQ(PETSC_ERR_LIB,"ilutp(), zero row encountered");
156: case -1: SETERRQ(PETSC_ERR_LIB,"ilutp(), input matrix may be wrong");
157: case -4: SETERRQ1(PETSC_ERR_LIB,"ilutp(), illegal info->fill value %D",jmax);
158: default: SETERRQ1(PETSC_ERR_LIB,"ilutp(), zero pivot detected on row %D",sierr);
159: }
160: }
162: PetscFree(w);
163: PetscFree(jw);
165: /* ------------------------------------------------------------
166: Saad's routine gives the result in Modified Sparse Row (msr)
167: Convert to Compressed Sparse Row format (csr)
168: ------------------------------------------------------------
169: */
171: PetscMalloc(n*sizeof(PetscScalar),&wk);
172: PetscMalloc((n+1)*sizeof(PetscInt),&iwk);
174: SPARSEKIT2msrcsr(&n,new_a,new_j,new_a,new_j,new_i,wk,iwk);
176: PetscFree(iwk);
177: PetscFree(wk);
179: if (reorder) {
180: PetscFree(old_a2);
181: PetscFree(old_j2);
182: PetscFree(old_i2);
183: } else {
184: /* fix permutation of old_j that the factorization introduced */
185: for (i=old_i[0]; i<old_i[n]; i++) {
186: old_j[i-1] = iperm[old_j[i-1]-1];
187: }
188: }
190: /* get rid of the shift to indices starting at 1 */
191: for (i=0; i<n+1; i++) {
192: old_i[i]--;
193: }
194: for (i=old_i[0];i<old_i[n];i++) {
195: old_j[i]--;
196: }
197:
198: /* Make the factored matrix 0-based */
199: for (i=0; i<n+1; i++) {
200: new_i[i]--;
201: }
202: for (i=new_i[0];i<new_i[n];i++) {
203: new_j[i]--;
204: }
206: /*-- due to the pivoting, we need to reorder iscol to correctly --*/
207: /*-- permute the right-hand-side and solution vectors --*/
208: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209: ISInvertPermutation(isrow,PETSC_DECIDE,&isirow);
210: ISGetIndices(isicol,&ic);
211: for(i=0; i<n; i++) {
212: ordcol[i] = ic[iperm[i]-1];
213: };
214: ISRestoreIndices(isicol,&ic);
215: ISDestroy(isicol);
217: PetscFree(iperm);
219: ISCreateGeneral(PETSC_COMM_SELF,n,ordcol,&iscolf);
220: PetscFree(ordcol);
222: /*----- put together the new matrix -----*/
224: MatCreate(A->comm,fact);
225: MatSetSizes(*fact,n,n,n,n);
226: MatSetType(*fact,A->type_name);
227: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
228: (*fact)->factor = FACTOR_LU;
229: (*fact)->assembled = PETSC_TRUE;
231: b = (Mat_SeqAIJ*)(*fact)->data;
232: b->freedata = PETSC_TRUE;
233: b->sorted = PETSC_FALSE;
234: b->singlemalloc = PETSC_FALSE;
235: b->a = new_a;
236: b->j = new_j;
237: b->i = new_i;
238: b->ilen = 0;
239: b->imax = 0;
240: /* I am not sure why these are the inverses of the row and column permutations; but the other way is NO GOOD */
241: b->row = isirow;
242: b->col = iscolf;
243: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
244: b->maxnz = b->nz = new_i[n];
245: MatMarkDiagonal_SeqAIJ(*fact);
246: (*fact)->info.factor_mallocs = 0;
248: MatMarkDiagonal_SeqAIJ(A);
250: af = ((double)b->nz)/((double)a->nz) + .001;
251: PetscInfo2(A,"Fill ratio:given %G needed %G\n",info->fill,af);
252: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
253: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
254: PetscInfo(A,"for best performance.\n");
256: MatILUDTFactor_Inode(A,isrow,iscol,info,fact);
258: return(0);
259: #endif
260: }
264: PetscErrorCode MatLUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *B)
265: {
266: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
267: IS isicol;
268: PetscErrorCode ierr;
269: PetscInt *r,*ic,i,n=A->rmap.n,*ai=a->i,*aj=a->j;
270: PetscInt *bi,*bj,*ajtmp;
271: PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
272: PetscReal f;
273: PetscInt nlnk,*lnk,k,**bi_ptr;
274: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
275: PetscBT lnkbt;
278: if (A->rmap.N != A->cmap.N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
279: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
280: ISGetIndices(isrow,&r);
281: ISGetIndices(isicol,&ic);
283: /* get new row pointers */
284: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
285: bi[0] = 0;
287: /* bdiag is location of diagonal in factor */
288: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
289: bdiag[0] = 0;
291: /* linked list for storing column indices of the active row */
292: nlnk = n + 1;
293: PetscLLCreate(n,n,nlnk,lnk,lnkbt);
295: PetscMalloc((n+1)*sizeof(PetscInt)+n*sizeof(PetscInt**),&im);
296: bi_ptr = (PetscInt**)(im + n);
298: /* initial FreeSpace size is f*(ai[n]+1) */
299: f = info->fill;
300: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
301: current_space = free_space;
303: for (i=0; i<n; i++) {
304: /* copy previous fill into linked list */
305: nzi = 0;
306: nnz = ai[r[i]+1] - ai[r[i]];
307: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
308: ajtmp = aj + ai[r[i]];
309: PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);
310: nzi += nlnk;
312: /* add pivot rows into linked list */
313: row = lnk[n];
314: while (row < i) {
315: nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
316: ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
317: PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);
318: nzi += nlnk;
319: row = lnk[row];
320: }
321: bi[i+1] = bi[i] + nzi;
322: im[i] = nzi;
324: /* mark bdiag */
325: nzbd = 0;
326: nnz = nzi;
327: k = lnk[n];
328: while (nnz-- && k < i){
329: nzbd++;
330: k = lnk[k];
331: }
332: bdiag[i] = bi[i] + nzbd;
334: /* if free space is not available, make more free space */
335: if (current_space->local_remaining<nzi) {
336: nnz = (n - i)*nzi; /* estimated and max additional space needed */
337: PetscFreeSpaceGet(nnz,¤t_space);
338: reallocs++;
339: }
341: /* copy data into free space, then initialize lnk */
342: PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);
343: bi_ptr[i] = current_space->array;
344: current_space->array += nzi;
345: current_space->local_used += nzi;
346: current_space->local_remaining -= nzi;
347: }
348: #if defined(PETSC_USE_INFO)
349: if (ai[n] != 0) {
350: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
351: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
352: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
353: PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);
354: PetscInfo(A,"for best performance.\n");
355: } else {
356: PetscInfo(A,"Empty matrix\n");
357: }
358: #endif
360: ISRestoreIndices(isrow,&r);
361: ISRestoreIndices(isicol,&ic);
363: /* destroy list of free space and other temporary array(s) */
364: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
365: PetscFreeSpaceContiguous(&free_space,bj);
366: PetscLLDestroy(lnk,lnkbt);
367: PetscFree(im);
369: /* put together the new matrix */
370: MatCreate(A->comm,B);
371: MatSetSizes(*B,n,n,n,n);
372: MatSetType(*B,A->type_name);
373: MatSeqAIJSetPreallocation_SeqAIJ(*B,MAT_SKIP_ALLOCATION,PETSC_NULL);
374: PetscLogObjectParent(*B,isicol);
375: b = (Mat_SeqAIJ*)(*B)->data;
376: b->freedata = PETSC_TRUE;
377: b->singlemalloc = PETSC_FALSE;
378: PetscMalloc((bi[n]+1)*sizeof(PetscScalar),&b->a);
379: b->j = bj;
380: b->i = bi;
381: b->diag = bdiag;
382: b->ilen = 0;
383: b->imax = 0;
384: b->row = isrow;
385: b->col = iscol;
386: PetscObjectReference((PetscObject)isrow);
387: PetscObjectReference((PetscObject)iscol);
388: b->icol = isicol;
389: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
391: /* In b structure: Free imax, ilen, old a, old j. Allocate solve_work, new a, new j */
392: PetscLogObjectMemory(*B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)));
393: b->maxnz = b->nz = bi[n] ;
395: (*B)->factor = FACTOR_LU;
396: (*B)->info.factor_mallocs = reallocs;
397: (*B)->info.fill_ratio_given = f;
399: if (ai[n] != 0) {
400: (*B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
401: } else {
402: (*B)->info.fill_ratio_needed = 0.0;
403: }
404: MatLUFactorSymbolic_Inode(A,isrow,iscol,info,B);
405: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
406: return(0);
407: }
409: /* ----------------------------------------------------------- */
412: PetscErrorCode MatLUFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
413: {
414: Mat C=*B;
415: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*b=(Mat_SeqAIJ *)C->data;
416: IS isrow = b->row,isicol = b->icol;
418: PetscInt *r,*ic,i,j,n=A->rmap.n,*bi=b->i,*bj=b->j;
419: PetscInt *ajtmp,*bjtmp,nz,row,*ics;
420: PetscInt *diag_offset = b->diag,diag,*pj;
421: PetscScalar *rtmp,*v,*pc,multiplier,*pv,*rtmps;
422: PetscScalar d;
423: PetscReal rs;
424: LUShift_Ctx sctx;
425: PetscInt newshift;
428: ISGetIndices(isrow,&r);
429: ISGetIndices(isicol,&ic);
430: PetscMalloc((n+1)*sizeof(PetscScalar),&rtmp);
431: PetscMemzero(rtmp,(n+1)*sizeof(PetscScalar));
432: rtmps = rtmp; ics = ic;
434: sctx.shift_top = 0;
435: sctx.nshift_max = 0;
436: sctx.shift_lo = 0;
437: sctx.shift_hi = 0;
439: if (!a->diag) {
440: MatMarkDiagonal_SeqAIJ(A);
441: }
442: /* if both shift schemes are chosen by user, only use info->shiftpd */
443: if (info->shiftpd && info->shiftnz) info->shiftnz = 0.0;
444: if (info->shiftpd) { /* set sctx.shift_top=max{rs} */
445: PetscInt *aai = a->i,*ddiag = a->diag;
446: sctx.shift_top = 0;
447: for (i=0; i<n; i++) {
448: /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
449: d = (a->a)[ddiag[i]];
450: rs = -PetscAbsScalar(d) - PetscRealPart(d);
451: v = a->a+aai[i];
452: nz = aai[i+1] - aai[i];
453: for (j=0; j<nz; j++)
454: rs += PetscAbsScalar(v[j]);
455: if (rs>sctx.shift_top) sctx.shift_top = rs;
456: }
457: if (sctx.shift_top < info->zeropivot) sctx.shift_top = info->zeropivot;
458: sctx.shift_top *= 1.1;
459: sctx.nshift_max = 5;
460: sctx.shift_lo = 0.;
461: sctx.shift_hi = 1.;
462: }
464: sctx.shift_amount = 0;
465: sctx.nshift = 0;
466: do {
467: sctx.lushift = PETSC_FALSE;
468: for (i=0; i<n; i++){
469: nz = bi[i+1] - bi[i];
470: bjtmp = bj + bi[i];
471: for (j=0; j<nz; j++) rtmps[bjtmp[j]] = 0.0;
473: /* load in initial (unfactored row) */
474: nz = a->i[r[i]+1] - a->i[r[i]];
475: ajtmp = a->j + a->i[r[i]];
476: v = a->a + a->i[r[i]];
477: for (j=0; j<nz; j++) {
478: rtmp[ics[ajtmp[j]]] = v[j];
479: }
480: rtmp[ics[r[i]]] += sctx.shift_amount; /* shift the diagonal of the matrix */
482: row = *bjtmp++;
483: while (row < i) {
484: pc = rtmp + row;
485: if (*pc != 0.0) {
486: pv = b->a + diag_offset[row];
487: pj = b->j + diag_offset[row] + 1;
488: multiplier = *pc / *pv++;
489: *pc = multiplier;
490: nz = bi[row+1] - diag_offset[row] - 1;
491: for (j=0; j<nz; j++) rtmps[pj[j]] -= multiplier * pv[j];
492: PetscLogFlops(2*nz);
493: }
494: row = *bjtmp++;
495: }
496: /* finished row so stick it into b->a */
497: pv = b->a + bi[i] ;
498: pj = b->j + bi[i] ;
499: nz = bi[i+1] - bi[i];
500: diag = diag_offset[i] - bi[i];
501: rs = 0.0;
502: for (j=0; j<nz; j++) {
503: pv[j] = rtmps[pj[j]];
504: if (j != diag) rs += PetscAbsScalar(pv[j]);
505: }
507: /* 9/13/02 Victor Eijkhout suggested scaling zeropivot by rs for matrices with funny scalings */
508: sctx.rs = rs;
509: sctx.pv = pv[diag];
510: MatLUCheckShift_inline(info,sctx,newshift);
511: if (newshift == 1){
512: break; /* sctx.shift_amount is updated */
513: } else if (newshift == -1){
514: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",i,PetscAbsScalar(sctx.pv),info->zeropivot,rs);
515: }
516: }
518: if (info->shiftpd && !sctx.lushift && info->shift_fraction>0 && sctx.nshift<sctx.nshift_max) {
519: /*
520: * if no shift in this attempt & shifting & started shifting & can refine,
521: * then try lower shift
522: */
523: sctx.shift_hi = info->shift_fraction;
524: info->shift_fraction = (sctx.shift_hi+sctx.shift_lo)/2.;
525: sctx.shift_amount = info->shift_fraction * sctx.shift_top;
526: sctx.lushift = PETSC_TRUE;
527: sctx.nshift++;
528: }
529: } while (sctx.lushift);
531: /* invert diagonal entries for simplier triangular solves */
532: for (i=0; i<n; i++) {
533: b->a[diag_offset[i]] = 1.0/b->a[diag_offset[i]];
534: }
536: PetscFree(rtmp);
537: ISRestoreIndices(isicol,&ic);
538: ISRestoreIndices(isrow,&r);
539: C->factor = FACTOR_LU;
540: (*B)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
541: C->assembled = PETSC_TRUE;
542: PetscLogFlops(C->cmap.n);
543: if (sctx.nshift){
544: if (info->shiftnz) {
545: PetscInfo2(0,"number of shift_nz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
546: } else if (info->shiftpd) {
547: PetscInfo4(0,"number of shift_pd tries %D, shift_amount %G, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,sctx.shift_amount,info->shift_fraction,sctx.shift_top);
548: }
549: }
550: return(0);
551: }
555: PetscErrorCode MatUsePETSc_SeqAIJ(Mat A)
556: {
558: A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ;
559: A->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ;
560: return(0);
561: }
564: /* ----------------------------------------------------------- */
567: PetscErrorCode MatLUFactor_SeqAIJ(Mat A,IS row,IS col,MatFactorInfo *info)
568: {
570: Mat C;
573: MatLUFactorSymbolic(A,row,col,info,&C);
574: MatLUFactorNumeric(A,info,&C);
575: MatHeaderCopy(A,C);
576: PetscLogObjectParent(A,((Mat_SeqAIJ*)(A->data))->icol);
577: return(0);
578: }
579: /* ----------------------------------------------------------- */
582: PetscErrorCode MatSolve_SeqAIJ(Mat A,Vec bb,Vec xx)
583: {
584: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
585: IS iscol = a->col,isrow = a->row;
587: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
588: PetscInt nz,*rout,*cout;
589: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
592: if (!n) return(0);
594: VecGetArray(bb,&b);
595: VecGetArray(xx,&x);
596: tmp = a->solve_work;
598: ISGetIndices(isrow,&rout); r = rout;
599: ISGetIndices(iscol,&cout); c = cout + (n-1);
601: /* forward solve the lower triangular */
602: tmp[0] = b[*r++];
603: tmps = tmp;
604: for (i=1; i<n; i++) {
605: v = aa + ai[i] ;
606: vi = aj + ai[i] ;
607: nz = a->diag[i] - ai[i];
608: sum = b[*r++];
609: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
610: tmp[i] = sum;
611: }
613: /* backward solve the upper triangular */
614: for (i=n-1; i>=0; i--){
615: v = aa + a->diag[i] + 1;
616: vi = aj + a->diag[i] + 1;
617: nz = ai[i+1] - a->diag[i] - 1;
618: sum = tmp[i];
619: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
620: x[*c--] = tmp[i] = sum*aa[a->diag[i]];
621: }
623: ISRestoreIndices(isrow,&rout);
624: ISRestoreIndices(iscol,&cout);
625: VecRestoreArray(bb,&b);
626: VecRestoreArray(xx,&x);
627: PetscLogFlops(2*a->nz - A->cmap.n);
628: return(0);
629: }
633: PetscErrorCode MatMatSolve_SeqAIJ(Mat A,Mat B,Mat X)
634: {
635: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
636: IS iscol = a->col,isrow = a->row;
638: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
639: PetscInt nz,*rout,*cout,neq;
640: PetscScalar *x,*b,*tmp,*tmps,*aa = a->a,sum,*v;
643: if (!n) return(0);
645: MatGetArray(B,&b);
646: MatGetArray(X,&x);
647:
648: tmp = a->solve_work;
649: ISGetIndices(isrow,&rout); r = rout;
650: ISGetIndices(iscol,&cout); c = cout;
652: for (neq=0; neq<n; neq++){
653: /* forward solve the lower triangular */
654: tmp[0] = b[r[0]];
655: tmps = tmp;
656: for (i=1; i<n; i++) {
657: v = aa + ai[i] ;
658: vi = aj + ai[i] ;
659: nz = a->diag[i] - ai[i];
660: sum = b[r[i]];
661: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
662: tmp[i] = sum;
663: }
664: /* backward solve the upper triangular */
665: for (i=n-1; i>=0; i--){
666: v = aa + a->diag[i] + 1;
667: vi = aj + a->diag[i] + 1;
668: nz = ai[i+1] - a->diag[i] - 1;
669: sum = tmp[i];
670: SPARSEDENSEMDOT(sum,tmps,v,vi,nz);
671: x[c[i]] = tmp[i] = sum*aa[a->diag[i]];
672: }
674: b += n;
675: x += n;
676: }
677: ISRestoreIndices(isrow,&rout);
678: ISRestoreIndices(iscol,&cout);
679: MatRestoreArray(B,&b);
680: MatRestoreArray(X,&x);
681: PetscLogFlops(n*(2*a->nz - n));
682: return(0);
683: }
685: /* ----------------------------------------------------------- */
688: PetscErrorCode MatSolve_SeqAIJ_NaturalOrdering(Mat A,Vec bb,Vec xx)
689: {
690: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
692: PetscInt n = A->rmap.n,*ai = a->i,*aj = a->j,*adiag = a->diag;
693: PetscScalar *x,*b,*aa = a->a;
694: #if !defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
695: PetscInt adiag_i,i,*vi,nz,ai_i;
696: PetscScalar *v,sum;
697: #endif
700: if (!n) return(0);
702: VecGetArray(bb,&b);
703: VecGetArray(xx,&x);
705: #if defined(PETSC_USE_FORTRAN_KERNEL_SOLVEAIJ)
706: fortransolveaij_(&n,x,ai,aj,adiag,aa,b);
707: #else
708: /* forward solve the lower triangular */
709: x[0] = b[0];
710: for (i=1; i<n; i++) {
711: ai_i = ai[i];
712: v = aa + ai_i;
713: vi = aj + ai_i;
714: nz = adiag[i] - ai_i;
715: sum = b[i];
716: while (nz--) sum -= *v++ * x[*vi++];
717: x[i] = sum;
718: }
720: /* backward solve the upper triangular */
721: for (i=n-1; i>=0; i--){
722: adiag_i = adiag[i];
723: v = aa + adiag_i + 1;
724: vi = aj + adiag_i + 1;
725: nz = ai[i+1] - adiag_i - 1;
726: sum = x[i];
727: while (nz--) sum -= *v++ * x[*vi++];
728: x[i] = sum*aa[adiag_i];
729: }
730: #endif
731: PetscLogFlops(2*a->nz - A->cmap.n);
732: VecRestoreArray(bb,&b);
733: VecRestoreArray(xx,&x);
734: return(0);
735: }
739: PetscErrorCode MatSolveAdd_SeqAIJ(Mat A,Vec bb,Vec yy,Vec xx)
740: {
741: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
742: IS iscol = a->col,isrow = a->row;
744: PetscInt *r,*c,i, n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
745: PetscInt nz,*rout,*cout;
746: PetscScalar *x,*b,*tmp,*aa = a->a,sum,*v;
749: if (yy != xx) {VecCopy(yy,xx);}
751: VecGetArray(bb,&b);
752: VecGetArray(xx,&x);
753: tmp = a->solve_work;
755: ISGetIndices(isrow,&rout); r = rout;
756: ISGetIndices(iscol,&cout); c = cout + (n-1);
758: /* forward solve the lower triangular */
759: tmp[0] = b[*r++];
760: for (i=1; i<n; i++) {
761: v = aa + ai[i] ;
762: vi = aj + ai[i] ;
763: nz = a->diag[i] - ai[i];
764: sum = b[*r++];
765: while (nz--) sum -= *v++ * tmp[*vi++ ];
766: tmp[i] = sum;
767: }
769: /* backward solve the upper triangular */
770: for (i=n-1; i>=0; i--){
771: v = aa + a->diag[i] + 1;
772: vi = aj + a->diag[i] + 1;
773: nz = ai[i+1] - a->diag[i] - 1;
774: sum = tmp[i];
775: while (nz--) sum -= *v++ * tmp[*vi++ ];
776: tmp[i] = sum*aa[a->diag[i]];
777: x[*c--] += tmp[i];
778: }
780: ISRestoreIndices(isrow,&rout);
781: ISRestoreIndices(iscol,&cout);
782: VecRestoreArray(bb,&b);
783: VecRestoreArray(xx,&x);
784: PetscLogFlops(2*a->nz);
786: return(0);
787: }
788: /* -------------------------------------------------------------------*/
791: PetscErrorCode MatSolveTranspose_SeqAIJ(Mat A,Vec bb,Vec xx)
792: {
793: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
794: IS iscol = a->col,isrow = a->row;
796: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
797: PetscInt nz,*rout,*cout,*diag = a->diag;
798: PetscScalar *x,*b,*tmp,*aa = a->a,*v,s1;
801: VecGetArray(bb,&b);
802: VecGetArray(xx,&x);
803: tmp = a->solve_work;
805: ISGetIndices(isrow,&rout); r = rout;
806: ISGetIndices(iscol,&cout); c = cout;
808: /* copy the b into temp work space according to permutation */
809: for (i=0; i<n; i++) tmp[i] = b[c[i]];
811: /* forward solve the U^T */
812: for (i=0; i<n; i++) {
813: v = aa + diag[i] ;
814: vi = aj + diag[i] + 1;
815: nz = ai[i+1] - diag[i] - 1;
816: s1 = tmp[i];
817: s1 *= (*v++); /* multiply by inverse of diagonal entry */
818: while (nz--) {
819: tmp[*vi++ ] -= (*v++)*s1;
820: }
821: tmp[i] = s1;
822: }
824: /* backward solve the L^T */
825: for (i=n-1; i>=0; i--){
826: v = aa + diag[i] - 1 ;
827: vi = aj + diag[i] - 1 ;
828: nz = diag[i] - ai[i];
829: s1 = tmp[i];
830: while (nz--) {
831: tmp[*vi-- ] -= (*v--)*s1;
832: }
833: }
835: /* copy tmp into x according to permutation */
836: for (i=0; i<n; i++) x[r[i]] = tmp[i];
838: ISRestoreIndices(isrow,&rout);
839: ISRestoreIndices(iscol,&cout);
840: VecRestoreArray(bb,&b);
841: VecRestoreArray(xx,&x);
843: PetscLogFlops(2*a->nz-A->cmap.n);
844: return(0);
845: }
849: PetscErrorCode MatSolveTransposeAdd_SeqAIJ(Mat A,Vec bb,Vec zz,Vec xx)
850: {
851: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
852: IS iscol = a->col,isrow = a->row;
854: PetscInt *r,*c,i,n = A->rmap.n,*vi,*ai = a->i,*aj = a->j;
855: PetscInt nz,*rout,*cout,*diag = a->diag;
856: PetscScalar *x,*b,*tmp,*aa = a->a,*v;
859: if (zz != xx) {VecCopy(zz,xx);}
861: VecGetArray(bb,&b);
862: VecGetArray(xx,&x);
863: tmp = a->solve_work;
865: ISGetIndices(isrow,&rout); r = rout;
866: ISGetIndices(iscol,&cout); c = cout;
868: /* copy the b into temp work space according to permutation */
869: for (i=0; i<n; i++) tmp[i] = b[c[i]];
871: /* forward solve the U^T */
872: for (i=0; i<n; i++) {
873: v = aa + diag[i] ;
874: vi = aj + diag[i] + 1;
875: nz = ai[i+1] - diag[i] - 1;
876: tmp[i] *= *v++;
877: while (nz--) {
878: tmp[*vi++ ] -= (*v++)*tmp[i];
879: }
880: }
882: /* backward solve the L^T */
883: for (i=n-1; i>=0; i--){
884: v = aa + diag[i] - 1 ;
885: vi = aj + diag[i] - 1 ;
886: nz = diag[i] - ai[i];
887: while (nz--) {
888: tmp[*vi-- ] -= (*v--)*tmp[i];
889: }
890: }
892: /* copy tmp into x according to permutation */
893: for (i=0; i<n; i++) x[r[i]] += tmp[i];
895: ISRestoreIndices(isrow,&rout);
896: ISRestoreIndices(iscol,&cout);
897: VecRestoreArray(bb,&b);
898: VecRestoreArray(xx,&x);
900: PetscLogFlops(2*a->nz);
901: return(0);
902: }
903: /* ----------------------------------------------------------------*/
904: EXTERN PetscErrorCode MatMissingDiagonal_SeqAIJ(Mat);
908: PetscErrorCode MatILUFactorSymbolic_SeqAIJ(Mat A,IS isrow,IS iscol,MatFactorInfo *info,Mat *fact)
909: {
910: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data,*b;
911: IS isicol;
912: PetscErrorCode ierr;
913: PetscInt *r,*ic,n=A->rmap.n,*ai=a->i,*aj=a->j;
914: PetscInt *bi,*cols,nnz,*cols_lvl;
915: PetscInt *bdiag,prow,fm,nzbd,len, reallocs=0,dcount=0;
916: PetscInt i,levels,diagonal_fill;
917: PetscTruth col_identity,row_identity;
918: PetscReal f;
919: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
920: PetscBT lnkbt;
921: PetscInt nzi,*bj,**bj_ptr,**bjlvl_ptr;
922: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
923: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
924:
926: f = info->fill;
927: levels = (PetscInt)info->levels;
928: diagonal_fill = (PetscInt)info->diagonal_fill;
929: ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
931: /* special case that simply copies fill pattern */
932: ISIdentity(isrow,&row_identity);
933: ISIdentity(iscol,&col_identity);
934: if (!levels && row_identity && col_identity) {
935: MatDuplicate_SeqAIJ(A,MAT_DO_NOT_COPY_VALUES,fact);
936: (*fact)->factor = FACTOR_LU;
937: b = (Mat_SeqAIJ*)(*fact)->data;
938: if (!b->diag) {
939: MatMarkDiagonal_SeqAIJ(*fact);
940: }
941: MatMissingDiagonal_SeqAIJ(*fact);
942: b->row = isrow;
943: b->col = iscol;
944: b->icol = isicol;
945: PetscMalloc(((*fact)->rmap.n+1)*sizeof(PetscScalar),&b->solve_work);
946: (*fact)->ops->solve = MatSolve_SeqAIJ_NaturalOrdering;
947: PetscObjectReference((PetscObject)isrow);
948: PetscObjectReference((PetscObject)iscol);
949: return(0);
950: }
952: ISGetIndices(isrow,&r);
953: ISGetIndices(isicol,&ic);
955: /* get new row pointers */
956: PetscMalloc((n+1)*sizeof(PetscInt),&bi);
957: bi[0] = 0;
958: /* bdiag is location of diagonal in factor */
959: PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);
960: bdiag[0] = 0;
962: PetscMalloc((2*n+1)*sizeof(PetscInt**),&bj_ptr);
963: bjlvl_ptr = (PetscInt**)(bj_ptr + n);
965: /* create a linked list for storing column indices of the active row */
966: nlnk = n + 1;
967: PetscIncompleteLLCreate(n,n,nlnk,lnk,lnk_lvl,lnkbt);
969: /* initial FreeSpace size is f*(ai[n]+1) */
970: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);
971: current_space = free_space;
972: PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space_lvl);
973: current_space_lvl = free_space_lvl;
974:
975: for (i=0; i<n; i++) {
976: nzi = 0;
977: /* copy current row into linked list */
978: nnz = ai[r[i]+1] - ai[r[i]];
979: if (!nnz) SETERRQ(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix");
980: cols = aj + ai[r[i]];
981: lnk[i] = -1; /* marker to indicate if diagonal exists */
982: PetscIncompleteLLInit(nnz,cols,n,ic,nlnk,lnk,lnk_lvl,lnkbt);
983: nzi += nlnk;
985: /* make sure diagonal entry is included */
986: if (diagonal_fill && lnk[i] == -1) {
987: fm = n;
988: while (lnk[fm] < i) fm = lnk[fm];
989: lnk[i] = lnk[fm]; /* insert diagonal into linked list */
990: lnk[fm] = i;
991: lnk_lvl[i] = 0;
992: nzi++; dcount++;
993: }
995: /* add pivot rows into the active row */
996: nzbd = 0;
997: prow = lnk[n];
998: while (prow < i) {
999: nnz = bdiag[prow];
1000: cols = bj_ptr[prow] + nnz + 1;
1001: cols_lvl = bjlvl_ptr[prow] + nnz + 1;
1002: nnz = bi[prow+1] - bi[prow] - nnz - 1;
1003: PetscILULLAddSorted(nnz,cols,levels,cols_lvl,prow,nlnk,lnk,lnk_lvl,lnkbt,prow);
1004: nzi += nlnk;
1005: prow = lnk[prow];
1006: nzbd++;
1007: }
1008: bdiag[i] = nzbd;
1009: bi[i+1] = bi[i] + nzi;
1011: /* if free space is not available, make more free space */
1012: if (current_space->local_remaining<nzi) {
1013: nnz = nzi*(n - i); /* estimated and max additional space needed */
1014: PetscFreeSpaceGet(nnz,¤t_space);
1015: PetscFreeSpaceGet(nnz,¤t_space_lvl);
1016: reallocs++;
1017: }
1019: /* copy data into free_space and free_space_lvl, then initialize lnk */
1020: PetscIncompleteLLClean(n,n,nzi,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1021: bj_ptr[i] = current_space->array;
1022: bjlvl_ptr[i] = current_space_lvl->array;
1024: /* make sure the active row i has diagonal entry */
1025: if (*(bj_ptr[i]+bdiag[i]) != i) {
1026: SETERRQ1(PETSC_ERR_MAT_LU_ZRPVT,"Row %D has missing diagonal in factored matrix\n\
1027: try running with -pc_ilu_nonzeros_along_diagonal or -pc_ilu_diagonal_fill",i);
1028: }
1030: current_space->array += nzi;
1031: current_space->local_used += nzi;
1032: current_space->local_remaining -= nzi;
1033: current_space_lvl->array += nzi;
1034: current_space_lvl->local_used += nzi;
1035: current_space_lvl->local_remaining -= nzi;
1036: }
1038: ISRestoreIndices(isrow,&r);
1039: ISRestoreIndices(isicol,&ic);
1041: /* destroy list of free space and other temporary arrays */
1042: PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);
1043: PetscFreeSpaceContiguous(&free_space,bj);
1044: PetscIncompleteLLDestroy(lnk,lnkbt);
1045: PetscFreeSpaceDestroy(free_space_lvl);
1046: PetscFree(bj_ptr);
1048: #if defined(PETSC_USE_INFO)
1049: {
1050: PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1051: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);
1052: PetscInfo1(A,"Run with -[sub_]pc_factor_fill %G or use \n",af);
1053: PetscInfo1(A,"PCFactorSetFill([sub]pc,%G);\n",af);
1054: PetscInfo(A,"for best performance.\n");
1055: if (diagonal_fill) {
1056: PetscInfo1(A,"Detected and replaced %D missing diagonals",dcount);
1057: }
1058: }
1059: #endif
1061: /* put together the new matrix */
1062: MatCreate(A->comm,fact);
1063: MatSetSizes(*fact,n,n,n,n);
1064: MatSetType(*fact,A->type_name);
1065: MatSeqAIJSetPreallocation_SeqAIJ(*fact,MAT_SKIP_ALLOCATION,PETSC_NULL);
1066: PetscLogObjectParent(*fact,isicol);
1067: b = (Mat_SeqAIJ*)(*fact)->data;
1068: b->freedata = PETSC_TRUE;
1069: b->singlemalloc = PETSC_FALSE;
1070: len = (bi[n] )*sizeof(PetscScalar);
1071: PetscMalloc(len+1,&b->a);
1072: b->j = bj;
1073: b->i = bi;
1074: for (i=0; i<n; i++) bdiag[i] += bi[i];
1075: b->diag = bdiag;
1076: b->ilen = 0;
1077: b->imax = 0;
1078: b->row = isrow;
1079: b->col = iscol;
1080: PetscObjectReference((PetscObject)isrow);
1081: PetscObjectReference((PetscObject)iscol);
1082: b->icol = isicol;
1083: PetscMalloc((n+1)*sizeof(PetscScalar),&b->solve_work);
1084: /* In b structure: Free imax, ilen, old a, old j.
1085: Allocate bdiag, solve_work, new a, new j */
1086: PetscLogObjectMemory(*fact,(bi[n]-n) * (sizeof(PetscInt)+sizeof(PetscScalar)));
1087: b->maxnz = b->nz = bi[n] ;
1088: (*fact)->factor = FACTOR_LU;
1089: (*fact)->info.factor_mallocs = reallocs;
1090: (*fact)->info.fill_ratio_given = f;
1091: (*fact)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
1093: MatILUFactorSymbolic_Inode(A,isrow,iscol,info,fact);
1094: (*fact)->ops->lufactornumeric = A->ops->lufactornumeric; /* Use Inode variant ONLY if A has inodes */
1096: return(0);
1097: }
1099: #include src/mat/impls/sbaij/seq/sbaij.h
1102: PetscErrorCode MatCholeskyFactorNumeric_SeqAIJ(Mat A,MatFactorInfo *info,Mat *B)
1103: {
1104: Mat C = *B;
1105: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data;
1106: Mat_SeqSBAIJ *b=(Mat_SeqSBAIJ*)C->data;
1107: IS ip=b->row;
1109: PetscInt *rip,i,j,mbs=A->rmap.n,*bi=b->i,*bj=b->j,*bcol;
1110: PetscInt *ai=a->i,*aj=a->j;
1111: PetscInt k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1112: MatScalar *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1113: PetscReal zeropivot,rs,shiftnz;
1114: PetscReal shiftpd;
1115: ChShift_Ctx sctx;
1116: PetscInt newshift;
1119: shiftnz = info->shiftnz;
1120: shiftpd = info->shiftpd;
1121: zeropivot = info->zeropivot;
1123: ISGetIndices(ip,&rip);
1124:
1125: /* initialization */
1126: nz = (2*mbs+1)*sizeof(PetscInt)+mbs*sizeof(MatScalar);
1127: PetscMalloc(nz,&il);
1128: jl = il + mbs;
1129: rtmp = (MatScalar*)(jl + mbs);
1131: sctx.shift_amount = 0;
1132: sctx.nshift = 0;
1133: do {
1134: sctx.chshift = PETSC_FALSE;
1135: for (i=0; i<mbs; i++) {
1136: rtmp[i] = 0.0; jl[i] = mbs; il[0] = 0;
1137: }
1138:
1139: for (k = 0; k<mbs; k++){
1140: bval = ba + bi[k];
1141: /* initialize k-th row by the perm[k]-th row of A */
1142: jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1143: for (j = jmin; j < jmax; j++){
1144: col = rip[aj[j]];
1145: if (col >= k){ /* only take upper triangular entry */
1146: rtmp[col] = aa[j];
1147: *bval++ = 0.0; /* for in-place factorization */
1148: }
1149: }
1150: /* shift the diagonal of the matrix */
1151: if (sctx.nshift) rtmp[k] += sctx.shift_amount;
1153: /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1154: dk = rtmp[k];
1155: i = jl[k]; /* first row to be added to k_th row */
1157: while (i < k){
1158: nexti = jl[i]; /* next row to be added to k_th row */
1160: /* compute multiplier, update diag(k) and U(i,k) */
1161: ili = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1162: uikdi = - ba[ili]*ba[bi[i]]; /* diagonal(k) */
1163: dk += uikdi*ba[ili];
1164: ba[ili] = uikdi; /* -U(i,k) */
1166: /* add multiple of row i to k-th row */
1167: jmin = ili + 1; jmax = bi[i+1];
1168: if (jmin < jmax){
1169: for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1170: /* update il and jl for row i */
1171: il[i] = jmin;
1172: j = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1173: }
1174: i = nexti;
1175: }
1177: /* shift the diagonals when zero pivot is detected */
1178: /* compute rs=sum of abs(off-diagonal) */
1179: rs = 0.0;
1180: jmin = bi[k]+1;
1181: nz = bi[k+1] - jmin;
1182: if (nz){
1183: bcol = bj + jmin;
1184: while (nz--){
1185: rs += PetscAbsScalar(rtmp[*bcol]);
1186: bcol++;
1187: }
1188: }
1190: sctx.rs = rs;
1191: sctx.pv = dk;
1192: MatCholeskyCheckShift_inline(info,sctx,newshift);
1193: if (newshift == 1){
1194: break; /* sctx.shift_amount is updated */
1195: } else if (newshift == -1){
1196: SETERRQ4(PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot row %D value %G tolerance %G * rs %G",k,PetscAbsScalar(dk),zeropivot,rs);
1197: }
1198:
1199: /* copy data into U(k,:) */
1200: ba[bi[k]] = 1.0/dk; /* U(k,k) */
1201: jmin = bi[k]+1; jmax = bi[k+1];
1202: if (jmin < jmax) {
1203: for (j=jmin; j<jmax; j++){
1204: col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1205: }
1206: /* add the k-th row into il and jl */
1207: il[k] = jmin;
1208: i = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1209: }
1210: }
1211: } while (sctx.chshift);
1212: PetscFree(il);
1214: ISRestoreIndices(ip,&rip);
1215: C->factor = FACTOR_CHOLESKY;
1216: C->assembled = PETSC_TRUE;
1217: C->preallocated = PETSC_TRUE;
1218: PetscLogFlops(C->rmap.n);
1219: if (sctx.nshift){
1220: if (shiftnz) {
1221: PetscInfo2(0,"number of shiftnz tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1222: } else if (shiftpd) {
1223: PetscInfo2(0,"number of shiftpd tries %D, shift_amount %G\n",sctx.nshift,sctx.shift_amount);
1224: }
1225: }
1226: return(0);
1227: }
1231: PetscErrorCode MatICCFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1232: {
1233: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1234: Mat_SeqSBAIJ *b;
1235: Mat B;
1236: PetscErrorCode ierr;
1237: PetscTruth perm_identity;
1238: PetscInt reallocs=0,*rip,i,*ai=a->i,*aj=a->j,am=A->rmap.n,*ui;
1239: PetscInt jmin,jmax,nzk,k,j,*jl,prow,*il,nextprow;
1240: PetscInt nlnk,*lnk,*lnk_lvl=PETSC_NULL;
1241: PetscInt ncols,ncols_upper,*cols,*ajtmp,*uj,**uj_ptr,**uj_lvl_ptr;
1242: PetscReal fill=info->fill,levels=info->levels;
1243: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1244: PetscFreeSpaceList free_space_lvl=PETSC_NULL,current_space_lvl=PETSC_NULL;
1245: PetscBT lnkbt;
1246:
1248: ISIdentity(perm,&perm_identity);
1249: ISGetIndices(perm,&rip);
1251: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1252: ui[0] = 0;
1254: /* special case that simply copies fill pattern */
1255: if (!levels && perm_identity) {
1256: MatMarkDiagonal_SeqAIJ(A);
1257: for (i=0; i<am; i++) {
1258: ui[i+1] = ui[i] + ai[i+1] - a->diag[i];
1259: }
1260: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1261: cols = uj;
1262: for (i=0; i<am; i++) {
1263: aj = a->j + a->diag[i];
1264: ncols = ui[i+1] - ui[i];
1265: for (j=0; j<ncols; j++) *cols++ = *aj++;
1266: }
1267: } else { /* case: levels>0 || (levels=0 && !perm_identity) */
1268: /* initialization */
1269: PetscMalloc((am+1)*sizeof(PetscInt),&ajtmp);
1271: /* jl: linked list for storing indices of the pivot rows
1272: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1273: PetscMalloc((2*am+1)*sizeof(PetscInt)+2*am*sizeof(PetscInt**),&jl);
1274: il = jl + am;
1275: uj_ptr = (PetscInt**)(il + am);
1276: uj_lvl_ptr = (PetscInt**)(uj_ptr + am);
1277: for (i=0; i<am; i++){
1278: jl[i] = am; il[i] = 0;
1279: }
1281: /* create and initialize a linked list for storing column indices of the active row k */
1282: nlnk = am + 1;
1283: PetscIncompleteLLCreate(am,am,nlnk,lnk,lnk_lvl,lnkbt);
1285: /* initial FreeSpace size is fill*(ai[am]+1) */
1286: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1287: current_space = free_space;
1288: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space_lvl);
1289: current_space_lvl = free_space_lvl;
1291: for (k=0; k<am; k++){ /* for each active row k */
1292: /* initialize lnk by the column indices of row rip[k] of A */
1293: nzk = 0;
1294: ncols = ai[rip[k]+1] - ai[rip[k]];
1295: ncols_upper = 0;
1296: for (j=0; j<ncols; j++){
1297: i = *(aj + ai[rip[k]] + j);
1298: if (rip[i] >= k){ /* only take upper triangular entry */
1299: ajtmp[ncols_upper] = i;
1300: ncols_upper++;
1301: }
1302: }
1303: PetscIncompleteLLInit(ncols_upper,ajtmp,am,rip,nlnk,lnk,lnk_lvl,lnkbt);
1304: nzk += nlnk;
1306: /* update lnk by computing fill-in for each pivot row to be merged in */
1307: prow = jl[k]; /* 1st pivot row */
1308:
1309: while (prow < k){
1310: nextprow = jl[prow];
1311:
1312: /* merge prow into k-th row */
1313: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1314: jmax = ui[prow+1];
1315: ncols = jmax-jmin;
1316: i = jmin - ui[prow];
1317: cols = uj_ptr[prow] + i; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1318: uj = uj_lvl_ptr[prow] + i; /* levels of cols */
1319: j = *(uj - 1);
1320: PetscICCLLAddSorted(ncols,cols,levels,uj,am,nlnk,lnk,lnk_lvl,lnkbt,j);
1321: nzk += nlnk;
1323: /* update il and jl for prow */
1324: if (jmin < jmax){
1325: il[prow] = jmin;
1326: j = *cols; jl[prow] = jl[j]; jl[j] = prow;
1327: }
1328: prow = nextprow;
1329: }
1331: /* if free space is not available, make more free space */
1332: if (current_space->local_remaining<nzk) {
1333: i = am - k + 1; /* num of unfactored rows */
1334: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1335: PetscFreeSpaceGet(i,¤t_space);
1336: PetscFreeSpaceGet(i,¤t_space_lvl);
1337: reallocs++;
1338: }
1340: /* copy data into free_space and free_space_lvl, then initialize lnk */
1341: PetscIncompleteLLClean(am,am,nzk,lnk,lnk_lvl,current_space->array,current_space_lvl->array,lnkbt);
1343: /* add the k-th row into il and jl */
1344: if (nzk > 1){
1345: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1346: jl[k] = jl[i]; jl[i] = k;
1347: il[k] = ui[k] + 1;
1348: }
1349: uj_ptr[k] = current_space->array;
1350: uj_lvl_ptr[k] = current_space_lvl->array;
1352: current_space->array += nzk;
1353: current_space->local_used += nzk;
1354: current_space->local_remaining -= nzk;
1356: current_space_lvl->array += nzk;
1357: current_space_lvl->local_used += nzk;
1358: current_space_lvl->local_remaining -= nzk;
1360: ui[k+1] = ui[k] + nzk;
1361: }
1363: #if defined(PETSC_USE_INFO)
1364: if (ai[am] != 0) {
1365: PetscReal af = (PetscReal)ui[am]/((PetscReal)ai[am]);
1366: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1367: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1368: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1369: } else {
1370: PetscInfo(A,"Empty matrix.\n");
1371: }
1372: #endif
1374: ISRestoreIndices(perm,&rip);
1375: PetscFree(jl);
1376: PetscFree(ajtmp);
1378: /* destroy list of free space and other temporary array(s) */
1379: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1380: PetscFreeSpaceContiguous(&free_space,uj);
1381: PetscIncompleteLLDestroy(lnk,lnkbt);
1382: PetscFreeSpaceDestroy(free_space_lvl);
1384: } /* end of case: levels>0 || (levels=0 && !perm_identity) */
1386: /* put together the new matrix in MATSEQSBAIJ format */
1387: MatCreate(PETSC_COMM_SELF,fact);
1388: MatSetSizes(*fact,am,am,am,am);
1389: B = *fact;
1390: MatSetType(B,MATSEQSBAIJ);
1391: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1393: b = (Mat_SeqSBAIJ*)B->data;
1394: b->singlemalloc = PETSC_FALSE;
1395: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1396: b->j = uj;
1397: b->i = ui;
1398: b->diag = 0;
1399: b->ilen = 0;
1400: b->imax = 0;
1401: b->row = perm;
1402: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1403: PetscObjectReference((PetscObject)perm);
1404: b->icol = perm;
1405: PetscObjectReference((PetscObject)perm);
1406: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1407: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1408: b->maxnz = b->nz = ui[am];
1409:
1410: B->factor = FACTOR_CHOLESKY;
1411: B->info.factor_mallocs = reallocs;
1412: B->info.fill_ratio_given = fill;
1413: if (ai[am] != 0) {
1414: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1415: } else {
1416: B->info.fill_ratio_needed = 0.0;
1417: }
1418: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1419: if (perm_identity){
1420: B->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1421: B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1422: }
1423: return(0);
1424: }
1428: PetscErrorCode MatCholeskyFactorSymbolic_SeqAIJ(Mat A,IS perm,MatFactorInfo *info,Mat *fact)
1429: {
1430: Mat_SeqAIJ *a = (Mat_SeqAIJ*)A->data;
1431: Mat_SeqSBAIJ *b;
1432: Mat B;
1433: PetscErrorCode ierr;
1434: PetscTruth perm_identity;
1435: PetscReal fill = info->fill;
1436: PetscInt *rip,*riip,i,am=A->rmap.n,*ai=a->i,*aj=a->j,reallocs=0,prow;
1437: PetscInt *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
1438: PetscInt nlnk,*lnk,ncols,ncols_upper,*cols,*uj,**ui_ptr,*uj_ptr;
1439: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
1440: PetscBT lnkbt;
1441: IS iperm;
1444: /* check whether perm is the identity mapping */
1445: ISIdentity(perm,&perm_identity);
1446: ISGetIndices(perm,&rip);
1448: if (!perm_identity){
1449: /* check if perm is symmetric! */
1450: ISInvertPermutation(perm,PETSC_DECIDE,&iperm);
1451: ISGetIndices(iperm,&riip);
1452: for (i=0; i<am; i++) {
1453: if (rip[i] != riip[i]) SETERRQ(PETSC_ERR_ARG_INCOMP,"Non-symmetric permutation, must use symmetric permutation");
1454: }
1455: ISRestoreIndices(iperm,&riip);
1456: ISDestroy(iperm);
1457: }
1459: /* initialization */
1460: PetscMalloc((am+1)*sizeof(PetscInt),&ui);
1461: ui[0] = 0;
1463: /* jl: linked list for storing indices of the pivot rows
1464: il: il[i] points to the 1st nonzero entry of U(i,k:am-1) */
1465: PetscMalloc((3*am+1)*sizeof(PetscInt)+am*sizeof(PetscInt**),&jl);
1466: il = jl + am;
1467: cols = il + am;
1468: ui_ptr = (PetscInt**)(cols + am);
1469: for (i=0; i<am; i++){
1470: jl[i] = am; il[i] = 0;
1471: }
1473: /* create and initialize a linked list for storing column indices of the active row k */
1474: nlnk = am + 1;
1475: PetscLLCreate(am,am,nlnk,lnk,lnkbt);
1477: /* initial FreeSpace size is fill*(ai[am]+1) */
1478: PetscFreeSpaceGet((PetscInt)(fill*(ai[am]+1)),&free_space);
1479: current_space = free_space;
1481: for (k=0; k<am; k++){ /* for each active row k */
1482: /* initialize lnk by the column indices of row rip[k] of A */
1483: nzk = 0;
1484: ncols = ai[rip[k]+1] - ai[rip[k]];
1485: ncols_upper = 0;
1486: for (j=0; j<ncols; j++){
1487: i = rip[*(aj + ai[rip[k]] + j)];
1488: if (i >= k){ /* only take upper triangular entry */
1489: cols[ncols_upper] = i;
1490: ncols_upper++;
1491: }
1492: }
1493: PetscLLAdd(ncols_upper,cols,am,nlnk,lnk,lnkbt);
1494: nzk += nlnk;
1496: /* update lnk by computing fill-in for each pivot row to be merged in */
1497: prow = jl[k]; /* 1st pivot row */
1498:
1499: while (prow < k){
1500: nextprow = jl[prow];
1501: /* merge prow into k-th row */
1502: jmin = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:am-1) */
1503: jmax = ui[prow+1];
1504: ncols = jmax-jmin;
1505: uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:am-1) */
1506: PetscLLAddSorted(ncols,uj_ptr,am,nlnk,lnk,lnkbt);
1507: nzk += nlnk;
1509: /* update il and jl for prow */
1510: if (jmin < jmax){
1511: il[prow] = jmin;
1512: j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
1513: }
1514: prow = nextprow;
1515: }
1517: /* if free space is not available, make more free space */
1518: if (current_space->local_remaining<nzk) {
1519: i = am - k + 1; /* num of unfactored rows */
1520: i = PetscMin(i*nzk, i*(i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
1521: PetscFreeSpaceGet(i,¤t_space);
1522: reallocs++;
1523: }
1525: /* copy data into free space, then initialize lnk */
1526: PetscLLClean(am,am,nzk,lnk,current_space->array,lnkbt);
1528: /* add the k-th row into il and jl */
1529: if (nzk-1 > 0){
1530: i = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:am-1) */
1531: jl[k] = jl[i]; jl[i] = k;
1532: il[k] = ui[k] + 1;
1533: }
1534: ui_ptr[k] = current_space->array;
1535: current_space->array += nzk;
1536: current_space->local_used += nzk;
1537: current_space->local_remaining -= nzk;
1539: ui[k+1] = ui[k] + nzk;
1540: }
1542: #if defined(PETSC_USE_INFO)
1543: if (ai[am] != 0) {
1544: PetscReal af = (PetscReal)(ui[am])/((PetscReal)ai[am]);
1545: PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,fill,af);
1546: PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);
1547: PetscInfo1(A,"PCFactorSetFill(pc,%G) for best performance.\n",af);
1548: } else {
1549: PetscInfo(A,"Empty matrix.\n");
1550: }
1551: #endif
1553: ISRestoreIndices(perm,&rip);
1554: PetscFree(jl);
1556: /* destroy list of free space and other temporary array(s) */
1557: PetscMalloc((ui[am]+1)*sizeof(PetscInt),&uj);
1558: PetscFreeSpaceContiguous(&free_space,uj);
1559: PetscLLDestroy(lnk,lnkbt);
1561: /* put together the new matrix in MATSEQSBAIJ format */
1562: MatCreate(PETSC_COMM_SELF,fact);
1563: MatSetSizes(*fact,am,am,am,am);
1564: B = *fact;
1565: MatSetType(B,MATSEQSBAIJ);
1566: MatSeqSBAIJSetPreallocation(B,1,MAT_SKIP_ALLOCATION,PETSC_NULL);
1568: b = (Mat_SeqSBAIJ*)B->data;
1569: b->singlemalloc = PETSC_FALSE;
1570: PetscMalloc((ui[am]+1)*sizeof(MatScalar),&b->a);
1571: b->j = uj;
1572: b->i = ui;
1573: b->diag = 0;
1574: b->ilen = 0;
1575: b->imax = 0;
1576: b->row = perm;
1577: b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */
1578: PetscObjectReference((PetscObject)perm);
1579: b->icol = perm;
1580: PetscObjectReference((PetscObject)perm);
1581: PetscMalloc((am+1)*sizeof(PetscScalar),&b->solve_work);
1582: PetscLogObjectMemory(B,(ui[am]-am)*(sizeof(PetscInt)+sizeof(MatScalar)));
1583: b->maxnz = b->nz = ui[am];
1584:
1585: B->factor = FACTOR_CHOLESKY;
1586: B->info.factor_mallocs = reallocs;
1587: B->info.fill_ratio_given = fill;
1588: if (ai[am] != 0) {
1589: B->info.fill_ratio_needed = ((PetscReal)ui[am])/((PetscReal)ai[am]);
1590: } else {
1591: B->info.fill_ratio_needed = 0.0;
1592: }
1593: (*fact)->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqAIJ;
1594: if (perm_identity){
1595: (*fact)->ops->solve = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1596: (*fact)->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1597: }
1598: return(0);
1599: }