Actual source code: maij.c
1: #define PETSCMAT_DLL
3: /*
4: Defines the basic matrix operations for the MAIJ matrix storage format.
5: This format is used for restriction and interpolation operations for
6: multicomponent problems. It interpolates each component the same way
7: independently.
9: We provide:
10: MatMult()
11: MatMultTranspose()
12: MatMultTransposeAdd()
13: MatMultAdd()
14: and
15: MatCreateMAIJ(Mat,dof,Mat*)
17: This single directory handles both the sequential and parallel codes
18: */
20: #include src/mat/impls/maij/maij.h
21: #include src/mat/utils/freespace.h
22: #include private/vecimpl.h
26: PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJGetAIJ(Mat A,Mat *B)
27: {
29: PetscTruth ismpimaij,isseqmaij;
32: PetscTypeCompare((PetscObject)A,MATMPIMAIJ,&ismpimaij);
33: PetscTypeCompare((PetscObject)A,MATSEQMAIJ,&isseqmaij);
34: if (ismpimaij) {
35: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
37: *B = b->A;
38: } else if (isseqmaij) {
39: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
41: *B = b->AIJ;
42: } else {
43: *B = A;
44: }
45: return(0);
46: }
50: PetscErrorCode PETSCMAT_DLLEXPORT MatMAIJRedimension(Mat A,PetscInt dof,Mat *B)
51: {
53: Mat Aij;
56: MatMAIJGetAIJ(A,&Aij);
57: MatCreateMAIJ(Aij,dof,B);
58: return(0);
59: }
63: PetscErrorCode MatDestroy_SeqMAIJ(Mat A)
64: {
66: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
69: if (b->AIJ) {
70: MatDestroy(b->AIJ);
71: }
72: PetscFree(b);
73: return(0);
74: }
78: PetscErrorCode MatView_SeqMAIJ(Mat A,PetscViewer viewer)
79: {
81: Mat B;
84: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
85: MatView(B,viewer);
86: MatDestroy(B);
87: return(0);
88: }
92: PetscErrorCode MatView_MPIMAIJ(Mat A,PetscViewer viewer)
93: {
95: Mat B;
98: MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);
99: MatView(B,viewer);
100: MatDestroy(B);
101: return(0);
102: }
106: PetscErrorCode MatDestroy_MPIMAIJ(Mat A)
107: {
109: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
112: if (b->AIJ) {
113: MatDestroy(b->AIJ);
114: }
115: if (b->OAIJ) {
116: MatDestroy(b->OAIJ);
117: }
118: if (b->A) {
119: MatDestroy(b->A);
120: }
121: if (b->ctx) {
122: VecScatterDestroy(b->ctx);
123: }
124: if (b->w) {
125: VecDestroy(b->w);
126: }
127: PetscFree(b);
128: return(0);
129: }
131: /*MC
132: MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for
133: multicomponent problems, interpolating or restricting each component the same way independently.
134: The matrix type is based on MATSEQAIJ for sequential matrices, and MATMPIAIJ for distributed matrices.
136: Operations provided:
137: . MatMult
138: . MatMultTranspose
139: . MatMultAdd
140: . MatMultTransposeAdd
142: Level: advanced
144: .seealso: MatCreateSeqDense
145: M*/
150: PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MAIJ(Mat A)
151: {
153: Mat_MPIMAIJ *b;
156: PetscNew(Mat_MPIMAIJ,&b);
157: A->data = (void*)b;
158: PetscMemzero(A->ops,sizeof(struct _MatOps));
159: A->factor = 0;
160: A->mapping = 0;
162: b->AIJ = 0;
163: b->dof = 0;
164: b->OAIJ = 0;
165: b->ctx = 0;
166: b->w = 0;
167: return(0);
168: }
171: /* --------------------------------------------------------------------------------------*/
174: PetscErrorCode MatMult_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
175: {
176: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
177: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
178: PetscScalar *x,*y,*v,sum1, sum2;
180: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
181: PetscInt n,i,jrow,j;
184: VecGetArray(xx,&x);
185: VecGetArray(yy,&y);
186: idx = a->j;
187: v = a->a;
188: ii = a->i;
190: for (i=0; i<m; i++) {
191: jrow = ii[i];
192: n = ii[i+1] - jrow;
193: sum1 = 0.0;
194: sum2 = 0.0;
195: for (j=0; j<n; j++) {
196: sum1 += v[jrow]*x[2*idx[jrow]];
197: sum2 += v[jrow]*x[2*idx[jrow]+1];
198: jrow++;
199: }
200: y[2*i] = sum1;
201: y[2*i+1] = sum2;
202: }
204: PetscLogFlops(4*a->nz - 2*m);
205: VecRestoreArray(xx,&x);
206: VecRestoreArray(yy,&y);
207: return(0);
208: }
212: PetscErrorCode MatMultTranspose_SeqMAIJ_2(Mat A,Vec xx,Vec yy)
213: {
214: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
215: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
216: PetscScalar *x,*y,*v,alpha1,alpha2,zero = 0.0;
218: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
221: VecSet(yy,zero);
222: VecGetArray(xx,&x);
223: VecGetArray(yy,&y);
224:
225: for (i=0; i<m; i++) {
226: idx = a->j + a->i[i] ;
227: v = a->a + a->i[i] ;
228: n = a->i[i+1] - a->i[i];
229: alpha1 = x[2*i];
230: alpha2 = x[2*i+1];
231: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
232: }
233: PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
234: VecRestoreArray(xx,&x);
235: VecRestoreArray(yy,&y);
236: return(0);
237: }
241: PetscErrorCode MatMultAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
242: {
243: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
244: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
245: PetscScalar *x,*y,*v,sum1, sum2;
247: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
248: PetscInt n,i,jrow,j;
251: if (yy != zz) {VecCopy(yy,zz);}
252: VecGetArray(xx,&x);
253: VecGetArray(zz,&y);
254: idx = a->j;
255: v = a->a;
256: ii = a->i;
258: for (i=0; i<m; i++) {
259: jrow = ii[i];
260: n = ii[i+1] - jrow;
261: sum1 = 0.0;
262: sum2 = 0.0;
263: for (j=0; j<n; j++) {
264: sum1 += v[jrow]*x[2*idx[jrow]];
265: sum2 += v[jrow]*x[2*idx[jrow]+1];
266: jrow++;
267: }
268: y[2*i] += sum1;
269: y[2*i+1] += sum2;
270: }
272: PetscLogFlops(4*a->nz - 2*m);
273: VecRestoreArray(xx,&x);
274: VecRestoreArray(zz,&y);
275: return(0);
276: }
279: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_2(Mat A,Vec xx,Vec yy,Vec zz)
280: {
281: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
282: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
283: PetscScalar *x,*y,*v,alpha1,alpha2;
285: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
288: if (yy != zz) {VecCopy(yy,zz);}
289: VecGetArray(xx,&x);
290: VecGetArray(zz,&y);
291:
292: for (i=0; i<m; i++) {
293: idx = a->j + a->i[i] ;
294: v = a->a + a->i[i] ;
295: n = a->i[i+1] - a->i[i];
296: alpha1 = x[2*i];
297: alpha2 = x[2*i+1];
298: while (n-->0) {y[2*(*idx)] += alpha1*(*v); y[2*(*idx)+1] += alpha2*(*v); idx++; v++;}
299: }
300: PetscLogFlops(4*a->nz - 2*b->AIJ->cmap.n);
301: VecRestoreArray(xx,&x);
302: VecRestoreArray(zz,&y);
303: return(0);
304: }
305: /* --------------------------------------------------------------------------------------*/
308: PetscErrorCode MatMult_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
309: {
310: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
311: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
312: PetscScalar *x,*y,*v,sum1, sum2, sum3;
314: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
315: PetscInt n,i,jrow,j;
318: VecGetArray(xx,&x);
319: VecGetArray(yy,&y);
320: idx = a->j;
321: v = a->a;
322: ii = a->i;
324: for (i=0; i<m; i++) {
325: jrow = ii[i];
326: n = ii[i+1] - jrow;
327: sum1 = 0.0;
328: sum2 = 0.0;
329: sum3 = 0.0;
330: for (j=0; j<n; j++) {
331: sum1 += v[jrow]*x[3*idx[jrow]];
332: sum2 += v[jrow]*x[3*idx[jrow]+1];
333: sum3 += v[jrow]*x[3*idx[jrow]+2];
334: jrow++;
335: }
336: y[3*i] = sum1;
337: y[3*i+1] = sum2;
338: y[3*i+2] = sum3;
339: }
341: PetscLogFlops(6*a->nz - 3*m);
342: VecRestoreArray(xx,&x);
343: VecRestoreArray(yy,&y);
344: return(0);
345: }
349: PetscErrorCode MatMultTranspose_SeqMAIJ_3(Mat A,Vec xx,Vec yy)
350: {
351: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
352: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
353: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,zero = 0.0;
355: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
358: VecSet(yy,zero);
359: VecGetArray(xx,&x);
360: VecGetArray(yy,&y);
361:
362: for (i=0; i<m; i++) {
363: idx = a->j + a->i[i];
364: v = a->a + a->i[i];
365: n = a->i[i+1] - a->i[i];
366: alpha1 = x[3*i];
367: alpha2 = x[3*i+1];
368: alpha3 = x[3*i+2];
369: while (n-->0) {
370: y[3*(*idx)] += alpha1*(*v);
371: y[3*(*idx)+1] += alpha2*(*v);
372: y[3*(*idx)+2] += alpha3*(*v);
373: idx++; v++;
374: }
375: }
376: PetscLogFlops(6*a->nz - 3*b->AIJ->cmap.n);
377: VecRestoreArray(xx,&x);
378: VecRestoreArray(yy,&y);
379: return(0);
380: }
384: PetscErrorCode MatMultAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
385: {
386: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
387: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
388: PetscScalar *x,*y,*v,sum1, sum2, sum3;
390: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
391: PetscInt n,i,jrow,j;
394: if (yy != zz) {VecCopy(yy,zz);}
395: VecGetArray(xx,&x);
396: VecGetArray(zz,&y);
397: idx = a->j;
398: v = a->a;
399: ii = a->i;
401: for (i=0; i<m; i++) {
402: jrow = ii[i];
403: n = ii[i+1] - jrow;
404: sum1 = 0.0;
405: sum2 = 0.0;
406: sum3 = 0.0;
407: for (j=0; j<n; j++) {
408: sum1 += v[jrow]*x[3*idx[jrow]];
409: sum2 += v[jrow]*x[3*idx[jrow]+1];
410: sum3 += v[jrow]*x[3*idx[jrow]+2];
411: jrow++;
412: }
413: y[3*i] += sum1;
414: y[3*i+1] += sum2;
415: y[3*i+2] += sum3;
416: }
418: PetscLogFlops(6*a->nz);
419: VecRestoreArray(xx,&x);
420: VecRestoreArray(zz,&y);
421: return(0);
422: }
425: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_3(Mat A,Vec xx,Vec yy,Vec zz)
426: {
427: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
428: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
429: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3;
431: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
434: if (yy != zz) {VecCopy(yy,zz);}
435: VecGetArray(xx,&x);
436: VecGetArray(zz,&y);
437: for (i=0; i<m; i++) {
438: idx = a->j + a->i[i] ;
439: v = a->a + a->i[i] ;
440: n = a->i[i+1] - a->i[i];
441: alpha1 = x[3*i];
442: alpha2 = x[3*i+1];
443: alpha3 = x[3*i+2];
444: while (n-->0) {
445: y[3*(*idx)] += alpha1*(*v);
446: y[3*(*idx)+1] += alpha2*(*v);
447: y[3*(*idx)+2] += alpha3*(*v);
448: idx++; v++;
449: }
450: }
451: PetscLogFlops(6*a->nz);
452: VecRestoreArray(xx,&x);
453: VecRestoreArray(zz,&y);
454: return(0);
455: }
457: /* ------------------------------------------------------------------------------*/
460: PetscErrorCode MatMult_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
461: {
462: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
463: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
464: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
466: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
467: PetscInt n,i,jrow,j;
470: VecGetArray(xx,&x);
471: VecGetArray(yy,&y);
472: idx = a->j;
473: v = a->a;
474: ii = a->i;
476: for (i=0; i<m; i++) {
477: jrow = ii[i];
478: n = ii[i+1] - jrow;
479: sum1 = 0.0;
480: sum2 = 0.0;
481: sum3 = 0.0;
482: sum4 = 0.0;
483: for (j=0; j<n; j++) {
484: sum1 += v[jrow]*x[4*idx[jrow]];
485: sum2 += v[jrow]*x[4*idx[jrow]+1];
486: sum3 += v[jrow]*x[4*idx[jrow]+2];
487: sum4 += v[jrow]*x[4*idx[jrow]+3];
488: jrow++;
489: }
490: y[4*i] = sum1;
491: y[4*i+1] = sum2;
492: y[4*i+2] = sum3;
493: y[4*i+3] = sum4;
494: }
496: PetscLogFlops(8*a->nz - 4*m);
497: VecRestoreArray(xx,&x);
498: VecRestoreArray(yy,&y);
499: return(0);
500: }
504: PetscErrorCode MatMultTranspose_SeqMAIJ_4(Mat A,Vec xx,Vec yy)
505: {
506: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
507: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
508: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,zero = 0.0;
510: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
513: VecSet(yy,zero);
514: VecGetArray(xx,&x);
515: VecGetArray(yy,&y);
516: for (i=0; i<m; i++) {
517: idx = a->j + a->i[i] ;
518: v = a->a + a->i[i] ;
519: n = a->i[i+1] - a->i[i];
520: alpha1 = x[4*i];
521: alpha2 = x[4*i+1];
522: alpha3 = x[4*i+2];
523: alpha4 = x[4*i+3];
524: while (n-->0) {
525: y[4*(*idx)] += alpha1*(*v);
526: y[4*(*idx)+1] += alpha2*(*v);
527: y[4*(*idx)+2] += alpha3*(*v);
528: y[4*(*idx)+3] += alpha4*(*v);
529: idx++; v++;
530: }
531: }
532: PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
533: VecRestoreArray(xx,&x);
534: VecRestoreArray(yy,&y);
535: return(0);
536: }
540: PetscErrorCode MatMultAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
541: {
542: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
543: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
544: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4;
546: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
547: PetscInt n,i,jrow,j;
550: if (yy != zz) {VecCopy(yy,zz);}
551: VecGetArray(xx,&x);
552: VecGetArray(zz,&y);
553: idx = a->j;
554: v = a->a;
555: ii = a->i;
557: for (i=0; i<m; i++) {
558: jrow = ii[i];
559: n = ii[i+1] - jrow;
560: sum1 = 0.0;
561: sum2 = 0.0;
562: sum3 = 0.0;
563: sum4 = 0.0;
564: for (j=0; j<n; j++) {
565: sum1 += v[jrow]*x[4*idx[jrow]];
566: sum2 += v[jrow]*x[4*idx[jrow]+1];
567: sum3 += v[jrow]*x[4*idx[jrow]+2];
568: sum4 += v[jrow]*x[4*idx[jrow]+3];
569: jrow++;
570: }
571: y[4*i] += sum1;
572: y[4*i+1] += sum2;
573: y[4*i+2] += sum3;
574: y[4*i+3] += sum4;
575: }
577: PetscLogFlops(8*a->nz - 4*m);
578: VecRestoreArray(xx,&x);
579: VecRestoreArray(zz,&y);
580: return(0);
581: }
584: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_4(Mat A,Vec xx,Vec yy,Vec zz)
585: {
586: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
587: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
588: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4;
590: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
593: if (yy != zz) {VecCopy(yy,zz);}
594: VecGetArray(xx,&x);
595: VecGetArray(zz,&y);
596:
597: for (i=0; i<m; i++) {
598: idx = a->j + a->i[i] ;
599: v = a->a + a->i[i] ;
600: n = a->i[i+1] - a->i[i];
601: alpha1 = x[4*i];
602: alpha2 = x[4*i+1];
603: alpha3 = x[4*i+2];
604: alpha4 = x[4*i+3];
605: while (n-->0) {
606: y[4*(*idx)] += alpha1*(*v);
607: y[4*(*idx)+1] += alpha2*(*v);
608: y[4*(*idx)+2] += alpha3*(*v);
609: y[4*(*idx)+3] += alpha4*(*v);
610: idx++; v++;
611: }
612: }
613: PetscLogFlops(8*a->nz - 4*b->AIJ->cmap.n);
614: VecRestoreArray(xx,&x);
615: VecRestoreArray(zz,&y);
616: return(0);
617: }
618: /* ------------------------------------------------------------------------------*/
622: PetscErrorCode MatMult_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
623: {
624: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
625: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
626: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
628: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
629: PetscInt n,i,jrow,j;
632: VecGetArray(xx,&x);
633: VecGetArray(yy,&y);
634: idx = a->j;
635: v = a->a;
636: ii = a->i;
638: for (i=0; i<m; i++) {
639: jrow = ii[i];
640: n = ii[i+1] - jrow;
641: sum1 = 0.0;
642: sum2 = 0.0;
643: sum3 = 0.0;
644: sum4 = 0.0;
645: sum5 = 0.0;
646: for (j=0; j<n; j++) {
647: sum1 += v[jrow]*x[5*idx[jrow]];
648: sum2 += v[jrow]*x[5*idx[jrow]+1];
649: sum3 += v[jrow]*x[5*idx[jrow]+2];
650: sum4 += v[jrow]*x[5*idx[jrow]+3];
651: sum5 += v[jrow]*x[5*idx[jrow]+4];
652: jrow++;
653: }
654: y[5*i] = sum1;
655: y[5*i+1] = sum2;
656: y[5*i+2] = sum3;
657: y[5*i+3] = sum4;
658: y[5*i+4] = sum5;
659: }
661: PetscLogFlops(10*a->nz - 5*m);
662: VecRestoreArray(xx,&x);
663: VecRestoreArray(yy,&y);
664: return(0);
665: }
669: PetscErrorCode MatMultTranspose_SeqMAIJ_5(Mat A,Vec xx,Vec yy)
670: {
671: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
672: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
673: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,zero = 0.0;
675: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
678: VecSet(yy,zero);
679: VecGetArray(xx,&x);
680: VecGetArray(yy,&y);
681:
682: for (i=0; i<m; i++) {
683: idx = a->j + a->i[i] ;
684: v = a->a + a->i[i] ;
685: n = a->i[i+1] - a->i[i];
686: alpha1 = x[5*i];
687: alpha2 = x[5*i+1];
688: alpha3 = x[5*i+2];
689: alpha4 = x[5*i+3];
690: alpha5 = x[5*i+4];
691: while (n-->0) {
692: y[5*(*idx)] += alpha1*(*v);
693: y[5*(*idx)+1] += alpha2*(*v);
694: y[5*(*idx)+2] += alpha3*(*v);
695: y[5*(*idx)+3] += alpha4*(*v);
696: y[5*(*idx)+4] += alpha5*(*v);
697: idx++; v++;
698: }
699: }
700: PetscLogFlops(10*a->nz - 5*b->AIJ->cmap.n);
701: VecRestoreArray(xx,&x);
702: VecRestoreArray(yy,&y);
703: return(0);
704: }
708: PetscErrorCode MatMultAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
709: {
710: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
711: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
712: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5;
714: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
715: PetscInt n,i,jrow,j;
718: if (yy != zz) {VecCopy(yy,zz);}
719: VecGetArray(xx,&x);
720: VecGetArray(zz,&y);
721: idx = a->j;
722: v = a->a;
723: ii = a->i;
725: for (i=0; i<m; i++) {
726: jrow = ii[i];
727: n = ii[i+1] - jrow;
728: sum1 = 0.0;
729: sum2 = 0.0;
730: sum3 = 0.0;
731: sum4 = 0.0;
732: sum5 = 0.0;
733: for (j=0; j<n; j++) {
734: sum1 += v[jrow]*x[5*idx[jrow]];
735: sum2 += v[jrow]*x[5*idx[jrow]+1];
736: sum3 += v[jrow]*x[5*idx[jrow]+2];
737: sum4 += v[jrow]*x[5*idx[jrow]+3];
738: sum5 += v[jrow]*x[5*idx[jrow]+4];
739: jrow++;
740: }
741: y[5*i] += sum1;
742: y[5*i+1] += sum2;
743: y[5*i+2] += sum3;
744: y[5*i+3] += sum4;
745: y[5*i+4] += sum5;
746: }
748: PetscLogFlops(10*a->nz);
749: VecRestoreArray(xx,&x);
750: VecRestoreArray(zz,&y);
751: return(0);
752: }
756: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_5(Mat A,Vec xx,Vec yy,Vec zz)
757: {
758: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
759: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
760: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5;
762: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
765: if (yy != zz) {VecCopy(yy,zz);}
766: VecGetArray(xx,&x);
767: VecGetArray(zz,&y);
768:
769: for (i=0; i<m; i++) {
770: idx = a->j + a->i[i] ;
771: v = a->a + a->i[i] ;
772: n = a->i[i+1] - a->i[i];
773: alpha1 = x[5*i];
774: alpha2 = x[5*i+1];
775: alpha3 = x[5*i+2];
776: alpha4 = x[5*i+3];
777: alpha5 = x[5*i+4];
778: while (n-->0) {
779: y[5*(*idx)] += alpha1*(*v);
780: y[5*(*idx)+1] += alpha2*(*v);
781: y[5*(*idx)+2] += alpha3*(*v);
782: y[5*(*idx)+3] += alpha4*(*v);
783: y[5*(*idx)+4] += alpha5*(*v);
784: idx++; v++;
785: }
786: }
787: PetscLogFlops(10*a->nz);
788: VecRestoreArray(xx,&x);
789: VecRestoreArray(zz,&y);
790: return(0);
791: }
793: /* ------------------------------------------------------------------------------*/
796: PetscErrorCode MatMult_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
797: {
798: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
799: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
800: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
802: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
803: PetscInt n,i,jrow,j;
806: VecGetArray(xx,&x);
807: VecGetArray(yy,&y);
808: idx = a->j;
809: v = a->a;
810: ii = a->i;
812: for (i=0; i<m; i++) {
813: jrow = ii[i];
814: n = ii[i+1] - jrow;
815: sum1 = 0.0;
816: sum2 = 0.0;
817: sum3 = 0.0;
818: sum4 = 0.0;
819: sum5 = 0.0;
820: sum6 = 0.0;
821: for (j=0; j<n; j++) {
822: sum1 += v[jrow]*x[6*idx[jrow]];
823: sum2 += v[jrow]*x[6*idx[jrow]+1];
824: sum3 += v[jrow]*x[6*idx[jrow]+2];
825: sum4 += v[jrow]*x[6*idx[jrow]+3];
826: sum5 += v[jrow]*x[6*idx[jrow]+4];
827: sum6 += v[jrow]*x[6*idx[jrow]+5];
828: jrow++;
829: }
830: y[6*i] = sum1;
831: y[6*i+1] = sum2;
832: y[6*i+2] = sum3;
833: y[6*i+3] = sum4;
834: y[6*i+4] = sum5;
835: y[6*i+5] = sum6;
836: }
838: PetscLogFlops(12*a->nz - 6*m);
839: VecRestoreArray(xx,&x);
840: VecRestoreArray(yy,&y);
841: return(0);
842: }
846: PetscErrorCode MatMultTranspose_SeqMAIJ_6(Mat A,Vec xx,Vec yy)
847: {
848: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
849: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
850: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,zero = 0.0;
852: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
855: VecSet(yy,zero);
856: VecGetArray(xx,&x);
857: VecGetArray(yy,&y);
859: for (i=0; i<m; i++) {
860: idx = a->j + a->i[i] ;
861: v = a->a + a->i[i] ;
862: n = a->i[i+1] - a->i[i];
863: alpha1 = x[6*i];
864: alpha2 = x[6*i+1];
865: alpha3 = x[6*i+2];
866: alpha4 = x[6*i+3];
867: alpha5 = x[6*i+4];
868: alpha6 = x[6*i+5];
869: while (n-->0) {
870: y[6*(*idx)] += alpha1*(*v);
871: y[6*(*idx)+1] += alpha2*(*v);
872: y[6*(*idx)+2] += alpha3*(*v);
873: y[6*(*idx)+3] += alpha4*(*v);
874: y[6*(*idx)+4] += alpha5*(*v);
875: y[6*(*idx)+5] += alpha6*(*v);
876: idx++; v++;
877: }
878: }
879: PetscLogFlops(12*a->nz - 6*b->AIJ->cmap.n);
880: VecRestoreArray(xx,&x);
881: VecRestoreArray(yy,&y);
882: return(0);
883: }
887: PetscErrorCode MatMultAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
888: {
889: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
890: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
891: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6;
893: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
894: PetscInt n,i,jrow,j;
897: if (yy != zz) {VecCopy(yy,zz);}
898: VecGetArray(xx,&x);
899: VecGetArray(zz,&y);
900: idx = a->j;
901: v = a->a;
902: ii = a->i;
904: for (i=0; i<m; i++) {
905: jrow = ii[i];
906: n = ii[i+1] - jrow;
907: sum1 = 0.0;
908: sum2 = 0.0;
909: sum3 = 0.0;
910: sum4 = 0.0;
911: sum5 = 0.0;
912: sum6 = 0.0;
913: for (j=0; j<n; j++) {
914: sum1 += v[jrow]*x[6*idx[jrow]];
915: sum2 += v[jrow]*x[6*idx[jrow]+1];
916: sum3 += v[jrow]*x[6*idx[jrow]+2];
917: sum4 += v[jrow]*x[6*idx[jrow]+3];
918: sum5 += v[jrow]*x[6*idx[jrow]+4];
919: sum6 += v[jrow]*x[6*idx[jrow]+5];
920: jrow++;
921: }
922: y[6*i] += sum1;
923: y[6*i+1] += sum2;
924: y[6*i+2] += sum3;
925: y[6*i+3] += sum4;
926: y[6*i+4] += sum5;
927: y[6*i+5] += sum6;
928: }
930: PetscLogFlops(12*a->nz);
931: VecRestoreArray(xx,&x);
932: VecRestoreArray(zz,&y);
933: return(0);
934: }
938: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_6(Mat A,Vec xx,Vec yy,Vec zz)
939: {
940: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
941: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
942: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6;
944: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
947: if (yy != zz) {VecCopy(yy,zz);}
948: VecGetArray(xx,&x);
949: VecGetArray(zz,&y);
950:
951: for (i=0; i<m; i++) {
952: idx = a->j + a->i[i] ;
953: v = a->a + a->i[i] ;
954: n = a->i[i+1] - a->i[i];
955: alpha1 = x[6*i];
956: alpha2 = x[6*i+1];
957: alpha3 = x[6*i+2];
958: alpha4 = x[6*i+3];
959: alpha5 = x[6*i+4];
960: alpha6 = x[6*i+5];
961: while (n-->0) {
962: y[6*(*idx)] += alpha1*(*v);
963: y[6*(*idx)+1] += alpha2*(*v);
964: y[6*(*idx)+2] += alpha3*(*v);
965: y[6*(*idx)+3] += alpha4*(*v);
966: y[6*(*idx)+4] += alpha5*(*v);
967: y[6*(*idx)+5] += alpha6*(*v);
968: idx++; v++;
969: }
970: }
971: PetscLogFlops(12*a->nz);
972: VecRestoreArray(xx,&x);
973: VecRestoreArray(zz,&y);
974: return(0);
975: }
977: /* ------------------------------------------------------------------------------*/
980: PetscErrorCode MatMult_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
981: {
982: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
983: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
984: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
986: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
987: PetscInt n,i,jrow,j;
990: VecGetArray(xx,&x);
991: VecGetArray(yy,&y);
992: idx = a->j;
993: v = a->a;
994: ii = a->i;
996: for (i=0; i<m; i++) {
997: jrow = ii[i];
998: n = ii[i+1] - jrow;
999: sum1 = 0.0;
1000: sum2 = 0.0;
1001: sum3 = 0.0;
1002: sum4 = 0.0;
1003: sum5 = 0.0;
1004: sum6 = 0.0;
1005: sum7 = 0.0;
1006: for (j=0; j<n; j++) {
1007: sum1 += v[jrow]*x[7*idx[jrow]];
1008: sum2 += v[jrow]*x[7*idx[jrow]+1];
1009: sum3 += v[jrow]*x[7*idx[jrow]+2];
1010: sum4 += v[jrow]*x[7*idx[jrow]+3];
1011: sum5 += v[jrow]*x[7*idx[jrow]+4];
1012: sum6 += v[jrow]*x[7*idx[jrow]+5];
1013: sum7 += v[jrow]*x[7*idx[jrow]+6];
1014: jrow++;
1015: }
1016: y[7*i] = sum1;
1017: y[7*i+1] = sum2;
1018: y[7*i+2] = sum3;
1019: y[7*i+3] = sum4;
1020: y[7*i+4] = sum5;
1021: y[7*i+5] = sum6;
1022: y[7*i+6] = sum7;
1023: }
1025: PetscLogFlops(14*a->nz - 7*m);
1026: VecRestoreArray(xx,&x);
1027: VecRestoreArray(yy,&y);
1028: return(0);
1029: }
1033: PetscErrorCode MatMultTranspose_SeqMAIJ_7(Mat A,Vec xx,Vec yy)
1034: {
1035: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1036: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1037: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,zero = 0.0;
1039: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1042: VecSet(yy,zero);
1043: VecGetArray(xx,&x);
1044: VecGetArray(yy,&y);
1046: for (i=0; i<m; i++) {
1047: idx = a->j + a->i[i] ;
1048: v = a->a + a->i[i] ;
1049: n = a->i[i+1] - a->i[i];
1050: alpha1 = x[7*i];
1051: alpha2 = x[7*i+1];
1052: alpha3 = x[7*i+2];
1053: alpha4 = x[7*i+3];
1054: alpha5 = x[7*i+4];
1055: alpha6 = x[7*i+5];
1056: alpha7 = x[7*i+6];
1057: while (n-->0) {
1058: y[7*(*idx)] += alpha1*(*v);
1059: y[7*(*idx)+1] += alpha2*(*v);
1060: y[7*(*idx)+2] += alpha3*(*v);
1061: y[7*(*idx)+3] += alpha4*(*v);
1062: y[7*(*idx)+4] += alpha5*(*v);
1063: y[7*(*idx)+5] += alpha6*(*v);
1064: y[7*(*idx)+6] += alpha7*(*v);
1065: idx++; v++;
1066: }
1067: }
1068: PetscLogFlops(14*a->nz - 7*b->AIJ->cmap.n);
1069: VecRestoreArray(xx,&x);
1070: VecRestoreArray(yy,&y);
1071: return(0);
1072: }
1076: PetscErrorCode MatMultAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1077: {
1078: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1079: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1080: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7;
1082: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1083: PetscInt n,i,jrow,j;
1086: if (yy != zz) {VecCopy(yy,zz);}
1087: VecGetArray(xx,&x);
1088: VecGetArray(zz,&y);
1089: idx = a->j;
1090: v = a->a;
1091: ii = a->i;
1093: for (i=0; i<m; i++) {
1094: jrow = ii[i];
1095: n = ii[i+1] - jrow;
1096: sum1 = 0.0;
1097: sum2 = 0.0;
1098: sum3 = 0.0;
1099: sum4 = 0.0;
1100: sum5 = 0.0;
1101: sum6 = 0.0;
1102: sum7 = 0.0;
1103: for (j=0; j<n; j++) {
1104: sum1 += v[jrow]*x[7*idx[jrow]];
1105: sum2 += v[jrow]*x[7*idx[jrow]+1];
1106: sum3 += v[jrow]*x[7*idx[jrow]+2];
1107: sum4 += v[jrow]*x[7*idx[jrow]+3];
1108: sum5 += v[jrow]*x[7*idx[jrow]+4];
1109: sum6 += v[jrow]*x[7*idx[jrow]+5];
1110: sum7 += v[jrow]*x[7*idx[jrow]+6];
1111: jrow++;
1112: }
1113: y[7*i] += sum1;
1114: y[7*i+1] += sum2;
1115: y[7*i+2] += sum3;
1116: y[7*i+3] += sum4;
1117: y[7*i+4] += sum5;
1118: y[7*i+5] += sum6;
1119: y[7*i+6] += sum7;
1120: }
1122: PetscLogFlops(14*a->nz);
1123: VecRestoreArray(xx,&x);
1124: VecRestoreArray(zz,&y);
1125: return(0);
1126: }
1130: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_7(Mat A,Vec xx,Vec yy,Vec zz)
1131: {
1132: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1133: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1134: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7;
1136: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1139: if (yy != zz) {VecCopy(yy,zz);}
1140: VecGetArray(xx,&x);
1141: VecGetArray(zz,&y);
1142: for (i=0; i<m; i++) {
1143: idx = a->j + a->i[i] ;
1144: v = a->a + a->i[i] ;
1145: n = a->i[i+1] - a->i[i];
1146: alpha1 = x[7*i];
1147: alpha2 = x[7*i+1];
1148: alpha3 = x[7*i+2];
1149: alpha4 = x[7*i+3];
1150: alpha5 = x[7*i+4];
1151: alpha6 = x[7*i+5];
1152: alpha7 = x[7*i+6];
1153: while (n-->0) {
1154: y[7*(*idx)] += alpha1*(*v);
1155: y[7*(*idx)+1] += alpha2*(*v);
1156: y[7*(*idx)+2] += alpha3*(*v);
1157: y[7*(*idx)+3] += alpha4*(*v);
1158: y[7*(*idx)+4] += alpha5*(*v);
1159: y[7*(*idx)+5] += alpha6*(*v);
1160: y[7*(*idx)+6] += alpha7*(*v);
1161: idx++; v++;
1162: }
1163: }
1164: PetscLogFlops(14*a->nz);
1165: VecRestoreArray(xx,&x);
1166: VecRestoreArray(zz,&y);
1167: return(0);
1168: }
1172: PetscErrorCode MatMult_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1173: {
1174: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1175: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1176: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1178: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1179: PetscInt n,i,jrow,j;
1182: VecGetArray(xx,&x);
1183: VecGetArray(yy,&y);
1184: idx = a->j;
1185: v = a->a;
1186: ii = a->i;
1188: for (i=0; i<m; i++) {
1189: jrow = ii[i];
1190: n = ii[i+1] - jrow;
1191: sum1 = 0.0;
1192: sum2 = 0.0;
1193: sum3 = 0.0;
1194: sum4 = 0.0;
1195: sum5 = 0.0;
1196: sum6 = 0.0;
1197: sum7 = 0.0;
1198: sum8 = 0.0;
1199: for (j=0; j<n; j++) {
1200: sum1 += v[jrow]*x[8*idx[jrow]];
1201: sum2 += v[jrow]*x[8*idx[jrow]+1];
1202: sum3 += v[jrow]*x[8*idx[jrow]+2];
1203: sum4 += v[jrow]*x[8*idx[jrow]+3];
1204: sum5 += v[jrow]*x[8*idx[jrow]+4];
1205: sum6 += v[jrow]*x[8*idx[jrow]+5];
1206: sum7 += v[jrow]*x[8*idx[jrow]+6];
1207: sum8 += v[jrow]*x[8*idx[jrow]+7];
1208: jrow++;
1209: }
1210: y[8*i] = sum1;
1211: y[8*i+1] = sum2;
1212: y[8*i+2] = sum3;
1213: y[8*i+3] = sum4;
1214: y[8*i+4] = sum5;
1215: y[8*i+5] = sum6;
1216: y[8*i+6] = sum7;
1217: y[8*i+7] = sum8;
1218: }
1220: PetscLogFlops(16*a->nz - 8*m);
1221: VecRestoreArray(xx,&x);
1222: VecRestoreArray(yy,&y);
1223: return(0);
1224: }
1228: PetscErrorCode MatMultTranspose_SeqMAIJ_8(Mat A,Vec xx,Vec yy)
1229: {
1230: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1231: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1232: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1234: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1237: VecSet(yy,zero);
1238: VecGetArray(xx,&x);
1239: VecGetArray(yy,&y);
1241: for (i=0; i<m; i++) {
1242: idx = a->j + a->i[i] ;
1243: v = a->a + a->i[i] ;
1244: n = a->i[i+1] - a->i[i];
1245: alpha1 = x[8*i];
1246: alpha2 = x[8*i+1];
1247: alpha3 = x[8*i+2];
1248: alpha4 = x[8*i+3];
1249: alpha5 = x[8*i+4];
1250: alpha6 = x[8*i+5];
1251: alpha7 = x[8*i+6];
1252: alpha8 = x[8*i+7];
1253: while (n-->0) {
1254: y[8*(*idx)] += alpha1*(*v);
1255: y[8*(*idx)+1] += alpha2*(*v);
1256: y[8*(*idx)+2] += alpha3*(*v);
1257: y[8*(*idx)+3] += alpha4*(*v);
1258: y[8*(*idx)+4] += alpha5*(*v);
1259: y[8*(*idx)+5] += alpha6*(*v);
1260: y[8*(*idx)+6] += alpha7*(*v);
1261: y[8*(*idx)+7] += alpha8*(*v);
1262: idx++; v++;
1263: }
1264: }
1265: PetscLogFlops(16*a->nz - 8*b->AIJ->cmap.n);
1266: VecRestoreArray(xx,&x);
1267: VecRestoreArray(yy,&y);
1268: return(0);
1269: }
1273: PetscErrorCode MatMultAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1274: {
1275: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1276: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1277: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1279: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1280: PetscInt n,i,jrow,j;
1283: if (yy != zz) {VecCopy(yy,zz);}
1284: VecGetArray(xx,&x);
1285: VecGetArray(zz,&y);
1286: idx = a->j;
1287: v = a->a;
1288: ii = a->i;
1290: for (i=0; i<m; i++) {
1291: jrow = ii[i];
1292: n = ii[i+1] - jrow;
1293: sum1 = 0.0;
1294: sum2 = 0.0;
1295: sum3 = 0.0;
1296: sum4 = 0.0;
1297: sum5 = 0.0;
1298: sum6 = 0.0;
1299: sum7 = 0.0;
1300: sum8 = 0.0;
1301: for (j=0; j<n; j++) {
1302: sum1 += v[jrow]*x[8*idx[jrow]];
1303: sum2 += v[jrow]*x[8*idx[jrow]+1];
1304: sum3 += v[jrow]*x[8*idx[jrow]+2];
1305: sum4 += v[jrow]*x[8*idx[jrow]+3];
1306: sum5 += v[jrow]*x[8*idx[jrow]+4];
1307: sum6 += v[jrow]*x[8*idx[jrow]+5];
1308: sum7 += v[jrow]*x[8*idx[jrow]+6];
1309: sum8 += v[jrow]*x[8*idx[jrow]+7];
1310: jrow++;
1311: }
1312: y[8*i] += sum1;
1313: y[8*i+1] += sum2;
1314: y[8*i+2] += sum3;
1315: y[8*i+3] += sum4;
1316: y[8*i+4] += sum5;
1317: y[8*i+5] += sum6;
1318: y[8*i+6] += sum7;
1319: y[8*i+7] += sum8;
1320: }
1322: PetscLogFlops(16*a->nz);
1323: VecRestoreArray(xx,&x);
1324: VecRestoreArray(zz,&y);
1325: return(0);
1326: }
1330: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_8(Mat A,Vec xx,Vec yy,Vec zz)
1331: {
1332: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1333: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1334: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
1336: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1339: if (yy != zz) {VecCopy(yy,zz);}
1340: VecGetArray(xx,&x);
1341: VecGetArray(zz,&y);
1342: for (i=0; i<m; i++) {
1343: idx = a->j + a->i[i] ;
1344: v = a->a + a->i[i] ;
1345: n = a->i[i+1] - a->i[i];
1346: alpha1 = x[8*i];
1347: alpha2 = x[8*i+1];
1348: alpha3 = x[8*i+2];
1349: alpha4 = x[8*i+3];
1350: alpha5 = x[8*i+4];
1351: alpha6 = x[8*i+5];
1352: alpha7 = x[8*i+6];
1353: alpha8 = x[8*i+7];
1354: while (n-->0) {
1355: y[8*(*idx)] += alpha1*(*v);
1356: y[8*(*idx)+1] += alpha2*(*v);
1357: y[8*(*idx)+2] += alpha3*(*v);
1358: y[8*(*idx)+3] += alpha4*(*v);
1359: y[8*(*idx)+4] += alpha5*(*v);
1360: y[8*(*idx)+5] += alpha6*(*v);
1361: y[8*(*idx)+6] += alpha7*(*v);
1362: y[8*(*idx)+7] += alpha8*(*v);
1363: idx++; v++;
1364: }
1365: }
1366: PetscLogFlops(16*a->nz);
1367: VecRestoreArray(xx,&x);
1368: VecRestoreArray(zz,&y);
1369: return(0);
1370: }
1372: /* ------------------------------------------------------------------------------*/
1375: PetscErrorCode MatMult_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1376: {
1377: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1378: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1379: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1381: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1382: PetscInt n,i,jrow,j;
1385: VecGetArray(xx,&x);
1386: VecGetArray(yy,&y);
1387: idx = a->j;
1388: v = a->a;
1389: ii = a->i;
1391: for (i=0; i<m; i++) {
1392: jrow = ii[i];
1393: n = ii[i+1] - jrow;
1394: sum1 = 0.0;
1395: sum2 = 0.0;
1396: sum3 = 0.0;
1397: sum4 = 0.0;
1398: sum5 = 0.0;
1399: sum6 = 0.0;
1400: sum7 = 0.0;
1401: sum8 = 0.0;
1402: sum9 = 0.0;
1403: for (j=0; j<n; j++) {
1404: sum1 += v[jrow]*x[9*idx[jrow]];
1405: sum2 += v[jrow]*x[9*idx[jrow]+1];
1406: sum3 += v[jrow]*x[9*idx[jrow]+2];
1407: sum4 += v[jrow]*x[9*idx[jrow]+3];
1408: sum5 += v[jrow]*x[9*idx[jrow]+4];
1409: sum6 += v[jrow]*x[9*idx[jrow]+5];
1410: sum7 += v[jrow]*x[9*idx[jrow]+6];
1411: sum8 += v[jrow]*x[9*idx[jrow]+7];
1412: sum9 += v[jrow]*x[9*idx[jrow]+8];
1413: jrow++;
1414: }
1415: y[9*i] = sum1;
1416: y[9*i+1] = sum2;
1417: y[9*i+2] = sum3;
1418: y[9*i+3] = sum4;
1419: y[9*i+4] = sum5;
1420: y[9*i+5] = sum6;
1421: y[9*i+6] = sum7;
1422: y[9*i+7] = sum8;
1423: y[9*i+8] = sum9;
1424: }
1426: PetscLogFlops(18*a->nz - 9*m);
1427: VecRestoreArray(xx,&x);
1428: VecRestoreArray(yy,&y);
1429: return(0);
1430: }
1432: /* ------------------------------------------------------------------------------*/
1436: PetscErrorCode MatMultTranspose_SeqMAIJ_9(Mat A,Vec xx,Vec yy)
1437: {
1438: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1439: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1440: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,zero = 0.0;
1442: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1445: VecSet(yy,zero);
1446: VecGetArray(xx,&x);
1447: VecGetArray(yy,&y);
1449: for (i=0; i<m; i++) {
1450: idx = a->j + a->i[i] ;
1451: v = a->a + a->i[i] ;
1452: n = a->i[i+1] - a->i[i];
1453: alpha1 = x[9*i];
1454: alpha2 = x[9*i+1];
1455: alpha3 = x[9*i+2];
1456: alpha4 = x[9*i+3];
1457: alpha5 = x[9*i+4];
1458: alpha6 = x[9*i+5];
1459: alpha7 = x[9*i+6];
1460: alpha8 = x[9*i+7];
1461: alpha9 = x[9*i+8];
1462: while (n-->0) {
1463: y[9*(*idx)] += alpha1*(*v);
1464: y[9*(*idx)+1] += alpha2*(*v);
1465: y[9*(*idx)+2] += alpha3*(*v);
1466: y[9*(*idx)+3] += alpha4*(*v);
1467: y[9*(*idx)+4] += alpha5*(*v);
1468: y[9*(*idx)+5] += alpha6*(*v);
1469: y[9*(*idx)+6] += alpha7*(*v);
1470: y[9*(*idx)+7] += alpha8*(*v);
1471: y[9*(*idx)+8] += alpha9*(*v);
1472: idx++; v++;
1473: }
1474: }
1475: PetscLogFlops(18*a->nz - 9*b->AIJ->cmap.n);
1476: VecRestoreArray(xx,&x);
1477: VecRestoreArray(yy,&y);
1478: return(0);
1479: }
1483: PetscErrorCode MatMultAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1484: {
1485: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1486: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1487: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9;
1489: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1490: PetscInt n,i,jrow,j;
1493: if (yy != zz) {VecCopy(yy,zz);}
1494: VecGetArray(xx,&x);
1495: VecGetArray(zz,&y);
1496: idx = a->j;
1497: v = a->a;
1498: ii = a->i;
1500: for (i=0; i<m; i++) {
1501: jrow = ii[i];
1502: n = ii[i+1] - jrow;
1503: sum1 = 0.0;
1504: sum2 = 0.0;
1505: sum3 = 0.0;
1506: sum4 = 0.0;
1507: sum5 = 0.0;
1508: sum6 = 0.0;
1509: sum7 = 0.0;
1510: sum8 = 0.0;
1511: sum9 = 0.0;
1512: for (j=0; j<n; j++) {
1513: sum1 += v[jrow]*x[9*idx[jrow]];
1514: sum2 += v[jrow]*x[9*idx[jrow]+1];
1515: sum3 += v[jrow]*x[9*idx[jrow]+2];
1516: sum4 += v[jrow]*x[9*idx[jrow]+3];
1517: sum5 += v[jrow]*x[9*idx[jrow]+4];
1518: sum6 += v[jrow]*x[9*idx[jrow]+5];
1519: sum7 += v[jrow]*x[9*idx[jrow]+6];
1520: sum8 += v[jrow]*x[9*idx[jrow]+7];
1521: sum9 += v[jrow]*x[9*idx[jrow]+8];
1522: jrow++;
1523: }
1524: y[9*i] += sum1;
1525: y[9*i+1] += sum2;
1526: y[9*i+2] += sum3;
1527: y[9*i+3] += sum4;
1528: y[9*i+4] += sum5;
1529: y[9*i+5] += sum6;
1530: y[9*i+6] += sum7;
1531: y[9*i+7] += sum8;
1532: y[9*i+8] += sum9;
1533: }
1535: PetscLogFlops(18*a->nz);
1536: VecRestoreArray(xx,&x);
1537: VecRestoreArray(zz,&y);
1538: return(0);
1539: }
1543: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_9(Mat A,Vec xx,Vec yy,Vec zz)
1544: {
1545: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1546: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1547: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9;
1549: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1552: if (yy != zz) {VecCopy(yy,zz);}
1553: VecGetArray(xx,&x);
1554: VecGetArray(zz,&y);
1555: for (i=0; i<m; i++) {
1556: idx = a->j + a->i[i] ;
1557: v = a->a + a->i[i] ;
1558: n = a->i[i+1] - a->i[i];
1559: alpha1 = x[9*i];
1560: alpha2 = x[9*i+1];
1561: alpha3 = x[9*i+2];
1562: alpha4 = x[9*i+3];
1563: alpha5 = x[9*i+4];
1564: alpha6 = x[9*i+5];
1565: alpha7 = x[9*i+6];
1566: alpha8 = x[9*i+7];
1567: alpha9 = x[9*i+8];
1568: while (n-->0) {
1569: y[9*(*idx)] += alpha1*(*v);
1570: y[9*(*idx)+1] += alpha2*(*v);
1571: y[9*(*idx)+2] += alpha3*(*v);
1572: y[9*(*idx)+3] += alpha4*(*v);
1573: y[9*(*idx)+4] += alpha5*(*v);
1574: y[9*(*idx)+5] += alpha6*(*v);
1575: y[9*(*idx)+6] += alpha7*(*v);
1576: y[9*(*idx)+7] += alpha8*(*v);
1577: y[9*(*idx)+8] += alpha9*(*v);
1578: idx++; v++;
1579: }
1580: }
1581: PetscLogFlops(18*a->nz);
1582: VecRestoreArray(xx,&x);
1583: VecRestoreArray(zz,&y);
1584: return(0);
1585: }
1586: /*--------------------------------------------------------------------------------------------*/
1589: PetscErrorCode MatMult_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1590: {
1591: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1592: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1593: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1595: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1596: PetscInt n,i,jrow,j;
1599: VecGetArray(xx,&x);
1600: VecGetArray(yy,&y);
1601: idx = a->j;
1602: v = a->a;
1603: ii = a->i;
1605: for (i=0; i<m; i++) {
1606: jrow = ii[i];
1607: n = ii[i+1] - jrow;
1608: sum1 = 0.0;
1609: sum2 = 0.0;
1610: sum3 = 0.0;
1611: sum4 = 0.0;
1612: sum5 = 0.0;
1613: sum6 = 0.0;
1614: sum7 = 0.0;
1615: sum8 = 0.0;
1616: sum9 = 0.0;
1617: sum10 = 0.0;
1618: for (j=0; j<n; j++) {
1619: sum1 += v[jrow]*x[10*idx[jrow]];
1620: sum2 += v[jrow]*x[10*idx[jrow]+1];
1621: sum3 += v[jrow]*x[10*idx[jrow]+2];
1622: sum4 += v[jrow]*x[10*idx[jrow]+3];
1623: sum5 += v[jrow]*x[10*idx[jrow]+4];
1624: sum6 += v[jrow]*x[10*idx[jrow]+5];
1625: sum7 += v[jrow]*x[10*idx[jrow]+6];
1626: sum8 += v[jrow]*x[10*idx[jrow]+7];
1627: sum9 += v[jrow]*x[10*idx[jrow]+8];
1628: sum10 += v[jrow]*x[10*idx[jrow]+9];
1629: jrow++;
1630: }
1631: y[10*i] = sum1;
1632: y[10*i+1] = sum2;
1633: y[10*i+2] = sum3;
1634: y[10*i+3] = sum4;
1635: y[10*i+4] = sum5;
1636: y[10*i+5] = sum6;
1637: y[10*i+6] = sum7;
1638: y[10*i+7] = sum8;
1639: y[10*i+8] = sum9;
1640: y[10*i+9] = sum10;
1641: }
1643: PetscLogFlops(20*a->nz - 10*m);
1644: VecRestoreArray(xx,&x);
1645: VecRestoreArray(yy,&y);
1646: return(0);
1647: }
1651: PetscErrorCode MatMultAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1652: {
1653: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1654: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1655: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8, sum9, sum10;
1657: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1658: PetscInt n,i,jrow,j;
1661: if (yy != zz) {VecCopy(yy,zz);}
1662: VecGetArray(xx,&x);
1663: VecGetArray(zz,&y);
1664: idx = a->j;
1665: v = a->a;
1666: ii = a->i;
1668: for (i=0; i<m; i++) {
1669: jrow = ii[i];
1670: n = ii[i+1] - jrow;
1671: sum1 = 0.0;
1672: sum2 = 0.0;
1673: sum3 = 0.0;
1674: sum4 = 0.0;
1675: sum5 = 0.0;
1676: sum6 = 0.0;
1677: sum7 = 0.0;
1678: sum8 = 0.0;
1679: sum9 = 0.0;
1680: sum10 = 0.0;
1681: for (j=0; j<n; j++) {
1682: sum1 += v[jrow]*x[10*idx[jrow]];
1683: sum2 += v[jrow]*x[10*idx[jrow]+1];
1684: sum3 += v[jrow]*x[10*idx[jrow]+2];
1685: sum4 += v[jrow]*x[10*idx[jrow]+3];
1686: sum5 += v[jrow]*x[10*idx[jrow]+4];
1687: sum6 += v[jrow]*x[10*idx[jrow]+5];
1688: sum7 += v[jrow]*x[10*idx[jrow]+6];
1689: sum8 += v[jrow]*x[10*idx[jrow]+7];
1690: sum9 += v[jrow]*x[10*idx[jrow]+8];
1691: sum10 += v[jrow]*x[10*idx[jrow]+9];
1692: jrow++;
1693: }
1694: y[10*i] += sum1;
1695: y[10*i+1] += sum2;
1696: y[10*i+2] += sum3;
1697: y[10*i+3] += sum4;
1698: y[10*i+4] += sum5;
1699: y[10*i+5] += sum6;
1700: y[10*i+6] += sum7;
1701: y[10*i+7] += sum8;
1702: y[10*i+8] += sum9;
1703: y[10*i+9] += sum10;
1704: }
1706: PetscLogFlops(20*a->nz - 10*m);
1707: VecRestoreArray(xx,&x);
1708: VecRestoreArray(yy,&y);
1709: return(0);
1710: }
1714: PetscErrorCode MatMultTranspose_SeqMAIJ_10(Mat A,Vec xx,Vec yy)
1715: {
1716: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1717: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1718: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10,zero = 0.0;
1720: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1723: VecSet(yy,zero);
1724: VecGetArray(xx,&x);
1725: VecGetArray(yy,&y);
1727: for (i=0; i<m; i++) {
1728: idx = a->j + a->i[i] ;
1729: v = a->a + a->i[i] ;
1730: n = a->i[i+1] - a->i[i];
1731: alpha1 = x[10*i];
1732: alpha2 = x[10*i+1];
1733: alpha3 = x[10*i+2];
1734: alpha4 = x[10*i+3];
1735: alpha5 = x[10*i+4];
1736: alpha6 = x[10*i+5];
1737: alpha7 = x[10*i+6];
1738: alpha8 = x[10*i+7];
1739: alpha9 = x[10*i+8];
1740: alpha10 = x[10*i+9];
1741: while (n-->0) {
1742: y[10*(*idx)] += alpha1*(*v);
1743: y[10*(*idx)+1] += alpha2*(*v);
1744: y[10*(*idx)+2] += alpha3*(*v);
1745: y[10*(*idx)+3] += alpha4*(*v);
1746: y[10*(*idx)+4] += alpha5*(*v);
1747: y[10*(*idx)+5] += alpha6*(*v);
1748: y[10*(*idx)+6] += alpha7*(*v);
1749: y[10*(*idx)+7] += alpha8*(*v);
1750: y[10*(*idx)+8] += alpha9*(*v);
1751: y[10*(*idx)+9] += alpha10*(*v);
1752: idx++; v++;
1753: }
1754: }
1755: PetscLogFlops(20*a->nz - 10*b->AIJ->cmap.n);
1756: VecRestoreArray(xx,&x);
1757: VecRestoreArray(yy,&y);
1758: return(0);
1759: }
1763: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_10(Mat A,Vec xx,Vec yy,Vec zz)
1764: {
1765: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1766: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1767: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,alpha9,alpha10;
1769: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1772: if (yy != zz) {VecCopy(yy,zz);}
1773: VecGetArray(xx,&x);
1774: VecGetArray(zz,&y);
1775: for (i=0; i<m; i++) {
1776: idx = a->j + a->i[i] ;
1777: v = a->a + a->i[i] ;
1778: n = a->i[i+1] - a->i[i];
1779: alpha1 = x[10*i];
1780: alpha2 = x[10*i+1];
1781: alpha3 = x[10*i+2];
1782: alpha4 = x[10*i+3];
1783: alpha5 = x[10*i+4];
1784: alpha6 = x[10*i+5];
1785: alpha7 = x[10*i+6];
1786: alpha8 = x[10*i+7];
1787: alpha9 = x[10*i+8];
1788: alpha10 = x[10*i+9];
1789: while (n-->0) {
1790: y[10*(*idx)] += alpha1*(*v);
1791: y[10*(*idx)+1] += alpha2*(*v);
1792: y[10*(*idx)+2] += alpha3*(*v);
1793: y[10*(*idx)+3] += alpha4*(*v);
1794: y[10*(*idx)+4] += alpha5*(*v);
1795: y[10*(*idx)+5] += alpha6*(*v);
1796: y[10*(*idx)+6] += alpha7*(*v);
1797: y[10*(*idx)+7] += alpha8*(*v);
1798: y[10*(*idx)+8] += alpha9*(*v);
1799: y[10*(*idx)+9] += alpha10*(*v);
1800: idx++; v++;
1801: }
1802: }
1803: PetscLogFlops(20*a->nz);
1804: VecRestoreArray(xx,&x);
1805: VecRestoreArray(zz,&y);
1806: return(0);
1807: }
1810: /*--------------------------------------------------------------------------------------------*/
1813: PetscErrorCode MatMult_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1814: {
1815: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1816: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1817: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1818: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1820: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1821: PetscInt n,i,jrow,j;
1824: VecGetArray(xx,&x);
1825: VecGetArray(yy,&y);
1826: idx = a->j;
1827: v = a->a;
1828: ii = a->i;
1830: for (i=0; i<m; i++) {
1831: jrow = ii[i];
1832: n = ii[i+1] - jrow;
1833: sum1 = 0.0;
1834: sum2 = 0.0;
1835: sum3 = 0.0;
1836: sum4 = 0.0;
1837: sum5 = 0.0;
1838: sum6 = 0.0;
1839: sum7 = 0.0;
1840: sum8 = 0.0;
1841: sum9 = 0.0;
1842: sum10 = 0.0;
1843: sum11 = 0.0;
1844: sum12 = 0.0;
1845: sum13 = 0.0;
1846: sum14 = 0.0;
1847: sum15 = 0.0;
1848: sum16 = 0.0;
1849: for (j=0; j<n; j++) {
1850: sum1 += v[jrow]*x[16*idx[jrow]];
1851: sum2 += v[jrow]*x[16*idx[jrow]+1];
1852: sum3 += v[jrow]*x[16*idx[jrow]+2];
1853: sum4 += v[jrow]*x[16*idx[jrow]+3];
1854: sum5 += v[jrow]*x[16*idx[jrow]+4];
1855: sum6 += v[jrow]*x[16*idx[jrow]+5];
1856: sum7 += v[jrow]*x[16*idx[jrow]+6];
1857: sum8 += v[jrow]*x[16*idx[jrow]+7];
1858: sum9 += v[jrow]*x[16*idx[jrow]+8];
1859: sum10 += v[jrow]*x[16*idx[jrow]+9];
1860: sum11 += v[jrow]*x[16*idx[jrow]+10];
1861: sum12 += v[jrow]*x[16*idx[jrow]+11];
1862: sum13 += v[jrow]*x[16*idx[jrow]+12];
1863: sum14 += v[jrow]*x[16*idx[jrow]+13];
1864: sum15 += v[jrow]*x[16*idx[jrow]+14];
1865: sum16 += v[jrow]*x[16*idx[jrow]+15];
1866: jrow++;
1867: }
1868: y[16*i] = sum1;
1869: y[16*i+1] = sum2;
1870: y[16*i+2] = sum3;
1871: y[16*i+3] = sum4;
1872: y[16*i+4] = sum5;
1873: y[16*i+5] = sum6;
1874: y[16*i+6] = sum7;
1875: y[16*i+7] = sum8;
1876: y[16*i+8] = sum9;
1877: y[16*i+9] = sum10;
1878: y[16*i+10] = sum11;
1879: y[16*i+11] = sum12;
1880: y[16*i+12] = sum13;
1881: y[16*i+13] = sum14;
1882: y[16*i+14] = sum15;
1883: y[16*i+15] = sum16;
1884: }
1886: PetscLogFlops(32*a->nz - 16*m);
1887: VecRestoreArray(xx,&x);
1888: VecRestoreArray(yy,&y);
1889: return(0);
1890: }
1894: PetscErrorCode MatMultTranspose_SeqMAIJ_16(Mat A,Vec xx,Vec yy)
1895: {
1896: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1897: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1898: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8,zero = 0.0;
1899: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
1901: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
1904: VecSet(yy,zero);
1905: VecGetArray(xx,&x);
1906: VecGetArray(yy,&y);
1908: for (i=0; i<m; i++) {
1909: idx = a->j + a->i[i] ;
1910: v = a->a + a->i[i] ;
1911: n = a->i[i+1] - a->i[i];
1912: alpha1 = x[16*i];
1913: alpha2 = x[16*i+1];
1914: alpha3 = x[16*i+2];
1915: alpha4 = x[16*i+3];
1916: alpha5 = x[16*i+4];
1917: alpha6 = x[16*i+5];
1918: alpha7 = x[16*i+6];
1919: alpha8 = x[16*i+7];
1920: alpha9 = x[16*i+8];
1921: alpha10 = x[16*i+9];
1922: alpha11 = x[16*i+10];
1923: alpha12 = x[16*i+11];
1924: alpha13 = x[16*i+12];
1925: alpha14 = x[16*i+13];
1926: alpha15 = x[16*i+14];
1927: alpha16 = x[16*i+15];
1928: while (n-->0) {
1929: y[16*(*idx)] += alpha1*(*v);
1930: y[16*(*idx)+1] += alpha2*(*v);
1931: y[16*(*idx)+2] += alpha3*(*v);
1932: y[16*(*idx)+3] += alpha4*(*v);
1933: y[16*(*idx)+4] += alpha5*(*v);
1934: y[16*(*idx)+5] += alpha6*(*v);
1935: y[16*(*idx)+6] += alpha7*(*v);
1936: y[16*(*idx)+7] += alpha8*(*v);
1937: y[16*(*idx)+8] += alpha9*(*v);
1938: y[16*(*idx)+9] += alpha10*(*v);
1939: y[16*(*idx)+10] += alpha11*(*v);
1940: y[16*(*idx)+11] += alpha12*(*v);
1941: y[16*(*idx)+12] += alpha13*(*v);
1942: y[16*(*idx)+13] += alpha14*(*v);
1943: y[16*(*idx)+14] += alpha15*(*v);
1944: y[16*(*idx)+15] += alpha16*(*v);
1945: idx++; v++;
1946: }
1947: }
1948: PetscLogFlops(32*a->nz - 16*b->AIJ->cmap.n);
1949: VecRestoreArray(xx,&x);
1950: VecRestoreArray(yy,&y);
1951: return(0);
1952: }
1956: PetscErrorCode MatMultAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
1957: {
1958: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
1959: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
1960: PetscScalar *x,*y,*v,sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8;
1961: PetscScalar sum9, sum10, sum11, sum12, sum13, sum14, sum15, sum16;
1963: PetscInt m = b->AIJ->rmap.n,*idx,*ii;
1964: PetscInt n,i,jrow,j;
1967: if (yy != zz) {VecCopy(yy,zz);}
1968: VecGetArray(xx,&x);
1969: VecGetArray(zz,&y);
1970: idx = a->j;
1971: v = a->a;
1972: ii = a->i;
1974: for (i=0; i<m; i++) {
1975: jrow = ii[i];
1976: n = ii[i+1] - jrow;
1977: sum1 = 0.0;
1978: sum2 = 0.0;
1979: sum3 = 0.0;
1980: sum4 = 0.0;
1981: sum5 = 0.0;
1982: sum6 = 0.0;
1983: sum7 = 0.0;
1984: sum8 = 0.0;
1985: sum9 = 0.0;
1986: sum10 = 0.0;
1987: sum11 = 0.0;
1988: sum12 = 0.0;
1989: sum13 = 0.0;
1990: sum14 = 0.0;
1991: sum15 = 0.0;
1992: sum16 = 0.0;
1993: for (j=0; j<n; j++) {
1994: sum1 += v[jrow]*x[16*idx[jrow]];
1995: sum2 += v[jrow]*x[16*idx[jrow]+1];
1996: sum3 += v[jrow]*x[16*idx[jrow]+2];
1997: sum4 += v[jrow]*x[16*idx[jrow]+3];
1998: sum5 += v[jrow]*x[16*idx[jrow]+4];
1999: sum6 += v[jrow]*x[16*idx[jrow]+5];
2000: sum7 += v[jrow]*x[16*idx[jrow]+6];
2001: sum8 += v[jrow]*x[16*idx[jrow]+7];
2002: sum9 += v[jrow]*x[16*idx[jrow]+8];
2003: sum10 += v[jrow]*x[16*idx[jrow]+9];
2004: sum11 += v[jrow]*x[16*idx[jrow]+10];
2005: sum12 += v[jrow]*x[16*idx[jrow]+11];
2006: sum13 += v[jrow]*x[16*idx[jrow]+12];
2007: sum14 += v[jrow]*x[16*idx[jrow]+13];
2008: sum15 += v[jrow]*x[16*idx[jrow]+14];
2009: sum16 += v[jrow]*x[16*idx[jrow]+15];
2010: jrow++;
2011: }
2012: y[16*i] += sum1;
2013: y[16*i+1] += sum2;
2014: y[16*i+2] += sum3;
2015: y[16*i+3] += sum4;
2016: y[16*i+4] += sum5;
2017: y[16*i+5] += sum6;
2018: y[16*i+6] += sum7;
2019: y[16*i+7] += sum8;
2020: y[16*i+8] += sum9;
2021: y[16*i+9] += sum10;
2022: y[16*i+10] += sum11;
2023: y[16*i+11] += sum12;
2024: y[16*i+12] += sum13;
2025: y[16*i+13] += sum14;
2026: y[16*i+14] += sum15;
2027: y[16*i+15] += sum16;
2028: }
2030: PetscLogFlops(32*a->nz);
2031: VecRestoreArray(xx,&x);
2032: VecRestoreArray(zz,&y);
2033: return(0);
2034: }
2038: PetscErrorCode MatMultTransposeAdd_SeqMAIJ_16(Mat A,Vec xx,Vec yy,Vec zz)
2039: {
2040: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2041: Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data;
2042: PetscScalar *x,*y,*v,alpha1,alpha2,alpha3,alpha4,alpha5,alpha6,alpha7,alpha8;
2043: PetscScalar alpha9,alpha10,alpha11,alpha12,alpha13,alpha14,alpha15,alpha16;
2045: PetscInt m = b->AIJ->rmap.n,n,i,*idx;
2048: if (yy != zz) {VecCopy(yy,zz);}
2049: VecGetArray(xx,&x);
2050: VecGetArray(zz,&y);
2051: for (i=0; i<m; i++) {
2052: idx = a->j + a->i[i] ;
2053: v = a->a + a->i[i] ;
2054: n = a->i[i+1] - a->i[i];
2055: alpha1 = x[16*i];
2056: alpha2 = x[16*i+1];
2057: alpha3 = x[16*i+2];
2058: alpha4 = x[16*i+3];
2059: alpha5 = x[16*i+4];
2060: alpha6 = x[16*i+5];
2061: alpha7 = x[16*i+6];
2062: alpha8 = x[16*i+7];
2063: alpha9 = x[16*i+8];
2064: alpha10 = x[16*i+9];
2065: alpha11 = x[16*i+10];
2066: alpha12 = x[16*i+11];
2067: alpha13 = x[16*i+12];
2068: alpha14 = x[16*i+13];
2069: alpha15 = x[16*i+14];
2070: alpha16 = x[16*i+15];
2071: while (n-->0) {
2072: y[16*(*idx)] += alpha1*(*v);
2073: y[16*(*idx)+1] += alpha2*(*v);
2074: y[16*(*idx)+2] += alpha3*(*v);
2075: y[16*(*idx)+3] += alpha4*(*v);
2076: y[16*(*idx)+4] += alpha5*(*v);
2077: y[16*(*idx)+5] += alpha6*(*v);
2078: y[16*(*idx)+6] += alpha7*(*v);
2079: y[16*(*idx)+7] += alpha8*(*v);
2080: y[16*(*idx)+8] += alpha9*(*v);
2081: y[16*(*idx)+9] += alpha10*(*v);
2082: y[16*(*idx)+10] += alpha11*(*v);
2083: y[16*(*idx)+11] += alpha12*(*v);
2084: y[16*(*idx)+12] += alpha13*(*v);
2085: y[16*(*idx)+13] += alpha14*(*v);
2086: y[16*(*idx)+14] += alpha15*(*v);
2087: y[16*(*idx)+15] += alpha16*(*v);
2088: idx++; v++;
2089: }
2090: }
2091: PetscLogFlops(32*a->nz);
2092: VecRestoreArray(xx,&x);
2093: VecRestoreArray(zz,&y);
2094: return(0);
2095: }
2097: /*===================================================================================*/
2100: PetscErrorCode MatMult_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2101: {
2102: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2106: /* start the scatter */
2107: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2108: (*b->AIJ->ops->mult)(b->AIJ,xx,yy);
2109: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2110: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,yy,yy);
2111: return(0);
2112: }
2116: PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A,Vec xx,Vec yy)
2117: {
2118: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2122: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2123: (*b->AIJ->ops->multtranspose)(b->AIJ,xx,yy);
2124: VecScatterBegin(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2125: VecScatterEnd(b->w,yy,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2126: return(0);
2127: }
2131: PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2132: {
2133: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2137: /* start the scatter */
2138: VecScatterBegin(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2139: (*b->AIJ->ops->multadd)(b->AIJ,xx,yy,zz);
2140: VecScatterEnd(xx,b->w,INSERT_VALUES,SCATTER_FORWARD,b->ctx);
2141: (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);
2142: return(0);
2143: }
2147: PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A,Vec xx,Vec yy,Vec zz)
2148: {
2149: Mat_MPIMAIJ *b = (Mat_MPIMAIJ*)A->data;
2153: (*b->OAIJ->ops->multtranspose)(b->OAIJ,xx,b->w);
2154: VecScatterBegin(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2155: (*b->AIJ->ops->multtransposeadd)(b->AIJ,xx,yy,zz);
2156: VecScatterEnd(b->w,zz,ADD_VALUES,SCATTER_REVERSE,b->ctx);
2157: return(0);
2158: }
2162: PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A,Mat PP,PetscReal fill,Mat *C)
2163: {
2164: /* This routine requires testing -- but it's getting better. */
2165: PetscErrorCode ierr;
2166: PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
2167: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2168: Mat P=pp->AIJ;
2169: Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data,*p=(Mat_SeqAIJ*)P->data,*c;
2170: PetscInt *pti,*ptj,*ptJ,*ai=a->i,*aj=a->j,*ajj,*pi=p->i,*pj=p->j,*pjj;
2171: PetscInt *ci,*cj,*ptadenserow,*ptasparserow,*denserow,*sparserow,*ptaj;
2172: PetscInt an=A->cmap.N,am=A->rmap.N,pn=P->cmap.N,pm=P->rmap.N,ppdof=pp->dof,cn;
2173: PetscInt i,j,k,dof,pshift,ptnzi,arow,anzj,ptanzi,prow,pnzj,cnzi;
2174: MatScalar *ca;
2177: /* Start timer */
2178: PetscLogEventBegin(MAT_PtAPSymbolic,A,PP,0,0);
2180: /* Get ij structure of P^T */
2181: MatGetSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2183: cn = pn*ppdof;
2184: /* Allocate ci array, arrays for fill computation and */
2185: /* free space for accumulating nonzero column info */
2186: PetscMalloc((cn+1)*sizeof(PetscInt),&ci);
2187: ci[0] = 0;
2189: /* Work arrays for rows of P^T*A */
2190: PetscMalloc((2*cn+2*an+1)*sizeof(PetscInt),&ptadenserow);
2191: PetscMemzero(ptadenserow,(2*cn+2*an+1)*sizeof(PetscInt));
2192: ptasparserow = ptadenserow + an;
2193: denserow = ptasparserow + an;
2194: sparserow = denserow + cn;
2196: /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */
2197: /* This should be reasonable if sparsity of PtAP is similar to that of A. */
2198: /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */
2199: PetscFreeSpaceGet((ai[am]/pm)*pn,&free_space);
2200: current_space = free_space;
2202: /* Determine symbolic info for each row of C: */
2203: for (i=0;i<pn;i++) {
2204: ptnzi = pti[i+1] - pti[i];
2205: ptJ = ptj + pti[i];
2206: for (dof=0;dof<ppdof;dof++) {
2207: ptanzi = 0;
2208: /* Determine symbolic row of PtA: */
2209: for (j=0;j<ptnzi;j++) {
2210: /* Expand ptJ[j] by block size and shift by dof to get the right row of A */
2211: arow = ptJ[j]*ppdof + dof;
2212: /* Nonzeros of P^T*A will be in same locations as any element of A in that row */
2213: anzj = ai[arow+1] - ai[arow];
2214: ajj = aj + ai[arow];
2215: for (k=0;k<anzj;k++) {
2216: if (!ptadenserow[ajj[k]]) {
2217: ptadenserow[ajj[k]] = -1;
2218: ptasparserow[ptanzi++] = ajj[k];
2219: }
2220: }
2221: }
2222: /* Using symbolic info for row of PtA, determine symbolic info for row of C: */
2223: ptaj = ptasparserow;
2224: cnzi = 0;
2225: for (j=0;j<ptanzi;j++) {
2226: /* Get offset within block of P */
2227: pshift = *ptaj%ppdof;
2228: /* Get block row of P */
2229: prow = (*ptaj++)/ppdof; /* integer division */
2230: /* P has same number of nonzeros per row as the compressed form */
2231: pnzj = pi[prow+1] - pi[prow];
2232: pjj = pj + pi[prow];
2233: for (k=0;k<pnzj;k++) {
2234: /* Locations in C are shifted by the offset within the block */
2235: /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */
2236: if (!denserow[pjj[k]*ppdof+pshift]) {
2237: denserow[pjj[k]*ppdof+pshift] = -1;
2238: sparserow[cnzi++] = pjj[k]*ppdof+pshift;
2239: }
2240: }
2241: }
2243: /* sort sparserow */
2244: PetscSortInt(cnzi,sparserow);
2245:
2246: /* If free space is not available, make more free space */
2247: /* Double the amount of total space in the list */
2248: if (current_space->local_remaining<cnzi) {
2249: PetscFreeSpaceGet(current_space->total_array_size,¤t_space);
2250: }
2252: /* Copy data into free space, and zero out denserows */
2253: PetscMemcpy(current_space->array,sparserow,cnzi*sizeof(PetscInt));
2254: current_space->array += cnzi;
2255: current_space->local_used += cnzi;
2256: current_space->local_remaining -= cnzi;
2258: for (j=0;j<ptanzi;j++) {
2259: ptadenserow[ptasparserow[j]] = 0;
2260: }
2261: for (j=0;j<cnzi;j++) {
2262: denserow[sparserow[j]] = 0;
2263: }
2264: /* Aside: Perhaps we should save the pta info for the numerical factorization. */
2265: /* For now, we will recompute what is needed. */
2266: ci[i*ppdof+1+dof] = ci[i*ppdof+dof] + cnzi;
2267: }
2268: }
2269: /* nnz is now stored in ci[ptm], column indices are in the list of free space */
2270: /* Allocate space for cj, initialize cj, and */
2271: /* destroy list of free space and other temporary array(s) */
2272: PetscMalloc((ci[cn]+1)*sizeof(PetscInt),&cj);
2273: PetscFreeSpaceContiguous(&free_space,cj);
2274: PetscFree(ptadenserow);
2275:
2276: /* Allocate space for ca */
2277: PetscMalloc((ci[cn]+1)*sizeof(MatScalar),&ca);
2278: PetscMemzero(ca,(ci[cn]+1)*sizeof(MatScalar));
2279:
2280: /* put together the new matrix */
2281: MatCreateSeqAIJWithArrays(A->comm,cn,cn,ci,cj,ca,C);
2283: /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
2284: /* Since these are PETSc arrays, change flags to free them as necessary. */
2285: c = (Mat_SeqAIJ *)((*C)->data);
2286: c->freedata = PETSC_TRUE;
2287: c->nonew = 0;
2289: /* Clean up. */
2290: MatRestoreSymbolicTranspose_SeqAIJ(P,&pti,&ptj);
2292: PetscLogEventEnd(MAT_PtAPSymbolic,A,PP,0,0);
2293: return(0);
2294: }
2298: PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A,Mat PP,Mat C)
2299: {
2300: /* This routine requires testing -- first draft only */
2302: PetscInt flops=0;
2303: Mat_SeqMAIJ *pp=(Mat_SeqMAIJ*)PP->data;
2304: Mat P=pp->AIJ;
2305: Mat_SeqAIJ *a = (Mat_SeqAIJ *) A->data;
2306: Mat_SeqAIJ *p = (Mat_SeqAIJ *) P->data;
2307: Mat_SeqAIJ *c = (Mat_SeqAIJ *) C->data;
2308: PetscInt *ai=a->i,*aj=a->j,*apj,*apjdense,*pi=p->i,*pj=p->j,*pJ=p->j,*pjj;
2309: PetscInt *ci=c->i,*cj=c->j,*cjj;
2310: PetscInt am=A->rmap.N,cn=C->cmap.N,cm=C->rmap.N,ppdof=pp->dof;
2311: PetscInt i,j,k,pshift,poffset,anzi,pnzi,apnzj,nextap,pnzj,prow,crow;
2312: MatScalar *aa=a->a,*apa,*pa=p->a,*pA=p->a,*paj,*ca=c->a,*caj;
2315: /* Allocate temporary array for storage of one row of A*P */
2316: PetscMalloc(cn*(sizeof(MatScalar)+2*sizeof(PetscInt)),&apa);
2317: PetscMemzero(apa,cn*(sizeof(MatScalar)+2*sizeof(PetscInt)));
2319: apj = (PetscInt *)(apa + cn);
2320: apjdense = apj + cn;
2322: /* Clear old values in C */
2323: PetscMemzero(ca,ci[cm]*sizeof(MatScalar));
2325: for (i=0;i<am;i++) {
2326: /* Form sparse row of A*P */
2327: anzi = ai[i+1] - ai[i];
2328: apnzj = 0;
2329: for (j=0;j<anzi;j++) {
2330: /* Get offset within block of P */
2331: pshift = *aj%ppdof;
2332: /* Get block row of P */
2333: prow = *aj++/ppdof; /* integer division */
2334: pnzj = pi[prow+1] - pi[prow];
2335: pjj = pj + pi[prow];
2336: paj = pa + pi[prow];
2337: for (k=0;k<pnzj;k++) {
2338: poffset = pjj[k]*ppdof+pshift;
2339: if (!apjdense[poffset]) {
2340: apjdense[poffset] = -1;
2341: apj[apnzj++] = poffset;
2342: }
2343: apa[poffset] += (*aa)*paj[k];
2344: }
2345: flops += 2*pnzj;
2346: aa++;
2347: }
2349: /* Sort the j index array for quick sparse axpy. */
2350: /* Note: a array does not need sorting as it is in dense storage locations. */
2351: PetscSortInt(apnzj,apj);
2353: /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */
2354: prow = i/ppdof; /* integer division */
2355: pshift = i%ppdof;
2356: poffset = pi[prow];
2357: pnzi = pi[prow+1] - poffset;
2358: /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */
2359: pJ = pj+poffset;
2360: pA = pa+poffset;
2361: for (j=0;j<pnzi;j++) {
2362: crow = (*pJ)*ppdof+pshift;
2363: cjj = cj + ci[crow];
2364: caj = ca + ci[crow];
2365: pJ++;
2366: /* Perform sparse axpy operation. Note cjj includes apj. */
2367: for (k=0,nextap=0;nextap<apnzj;k++) {
2368: if (cjj[k]==apj[nextap]) {
2369: caj[k] += (*pA)*apa[apj[nextap++]];
2370: }
2371: }
2372: flops += 2*apnzj;
2373: pA++;
2374: }
2376: /* Zero the current row info for A*P */
2377: for (j=0;j<apnzj;j++) {
2378: apa[apj[j]] = 0.;
2379: apjdense[apj[j]] = 0;
2380: }
2381: }
2383: /* Assemble the final matrix and clean up */
2384: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2385: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2386: PetscFree(apa);
2387: PetscLogFlops(flops);
2389: return(0);
2390: }
2395: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2396: {
2397: Mat_SeqMAIJ *b = (Mat_SeqMAIJ*)A->data;
2398: Mat a = b->AIJ,B;
2399: Mat_SeqAIJ *aij = (Mat_SeqAIJ*)a->data;
2400: PetscErrorCode ierr;
2401: PetscInt m,n,i,ncols,*ilen,nmax = 0,*icols,j,k,ii,dof = b->dof;
2402: PetscInt *cols;
2403: PetscScalar *vals;
2406: MatGetSize(a,&m,&n);
2407: PetscMalloc(dof*m*sizeof(PetscInt),&ilen);
2408: for (i=0; i<m; i++) {
2409: nmax = PetscMax(nmax,aij->ilen[i]);
2410: for (j=0; j<dof; j++) {
2411: ilen[dof*i+j] = aij->ilen[i];
2412: }
2413: }
2414: MatCreateSeqAIJ(PETSC_COMM_SELF,dof*m,dof*n,0,ilen,&B);
2415: MatSetOption(B,MAT_COLUMNS_SORTED);
2416: PetscFree(ilen);
2417: PetscMalloc(nmax*sizeof(PetscInt),&icols);
2418: ii = 0;
2419: for (i=0; i<m; i++) {
2420: MatGetRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2421: for (j=0; j<dof; j++) {
2422: for (k=0; k<ncols; k++) {
2423: icols[k] = dof*cols[k]+j;
2424: }
2425: MatSetValues_SeqAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2426: ii++;
2427: }
2428: MatRestoreRow_SeqAIJ(a,i,&ncols,&cols,&vals);
2429: }
2430: PetscFree(icols);
2431: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2432: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2434: if (reuse == MAT_REUSE_MATRIX) {
2435: MatHeaderReplace(A,B);
2436: } else {
2437: *newmat = B;
2438: }
2439: return(0);
2440: }
2443: #include src/mat/impls/aij/mpi/mpiaij.h
2448: PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
2449: {
2450: Mat_MPIMAIJ *maij = (Mat_MPIMAIJ*)A->data;
2451: Mat MatAIJ = ((Mat_SeqMAIJ*)maij->AIJ->data)->AIJ,B;
2452: Mat MatOAIJ = ((Mat_SeqMAIJ*)maij->OAIJ->data)->AIJ;
2453: Mat_SeqAIJ *AIJ = (Mat_SeqAIJ*) MatAIJ->data;
2454: Mat_SeqAIJ *OAIJ =(Mat_SeqAIJ*) MatOAIJ->data;
2455: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ*) maij->A->data;
2456: PetscInt dof = maij->dof,i,j,*dnz,*onz,nmax = 0,onmax = 0,*oicols,*icols,ncols,*cols,oncols,*ocols;
2457: PetscInt rstart,cstart,*garray,ii,k;
2458: PetscErrorCode ierr;
2459: PetscScalar *vals,*ovals;
2462: PetscMalloc2(A->rmap.n,PetscInt,&dnz,A->rmap.n,PetscInt,&onz);
2463: for (i=0; i<A->rmap.n/dof; i++) {
2464: nmax = PetscMax(nmax,AIJ->ilen[i]);
2465: onmax = PetscMax(onmax,OAIJ->ilen[i]);
2466: for (j=0; j<dof; j++) {
2467: dnz[dof*i+j] = AIJ->ilen[i];
2468: onz[dof*i+j] = OAIJ->ilen[i];
2469: }
2470: }
2471: MatCreateMPIAIJ(A->comm,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N,0,dnz,0,onz,&B);
2472: MatSetOption(B,MAT_COLUMNS_SORTED);
2473: PetscFree2(dnz,onz);
2475: PetscMalloc2(nmax,PetscInt,&icols,onmax,PetscInt,&oicols);
2476: rstart = dof*maij->A->rmap.rstart;
2477: cstart = dof*maij->A->cmap.rstart;
2478: garray = mpiaij->garray;
2480: ii = rstart;
2481: for (i=0; i<A->rmap.n/dof; i++) {
2482: MatGetRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2483: MatGetRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2484: for (j=0; j<dof; j++) {
2485: for (k=0; k<ncols; k++) {
2486: icols[k] = cstart + dof*cols[k]+j;
2487: }
2488: for (k=0; k<oncols; k++) {
2489: oicols[k] = dof*garray[ocols[k]]+j;
2490: }
2491: MatSetValues_MPIAIJ(B,1,&ii,ncols,icols,vals,INSERT_VALUES);
2492: MatSetValues_MPIAIJ(B,1,&ii,oncols,oicols,ovals,INSERT_VALUES);
2493: ii++;
2494: }
2495: MatRestoreRow_SeqAIJ(MatAIJ,i,&ncols,&cols,&vals);
2496: MatRestoreRow_SeqAIJ(MatOAIJ,i,&oncols,&ocols,&ovals);
2497: }
2498: PetscFree2(icols,oicols);
2500: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
2501: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
2503: if (reuse == MAT_REUSE_MATRIX) {
2504: MatHeaderReplace(A,B);
2505: } else {
2506: *newmat = B;
2507: }
2508: return(0);
2509: }
2513: /* ---------------------------------------------------------------------------------- */
2514: /*MC
2515: MatCreateMAIJ - Creates a matrix type providing restriction and interpolation
2516: operations for multicomponent problems. It interpolates each component the same
2517: way independently. The matrix type is based on MATSEQAIJ for sequential matrices,
2518: and MATMPIAIJ for distributed matrices.
2520: Operations provided:
2521: + MatMult
2522: . MatMultTranspose
2523: . MatMultAdd
2524: . MatMultTransposeAdd
2525: - MatView
2527: Level: advanced
2529: M*/
2532: PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMAIJ(Mat A,PetscInt dof,Mat *maij)
2533: {
2535: PetscMPIInt size;
2536: PetscInt n;
2537: Mat_MPIMAIJ *b;
2538: Mat B;
2541: PetscObjectReference((PetscObject)A);
2543: if (dof == 1) {
2544: *maij = A;
2545: } else {
2546: MatCreate(A->comm,&B);
2547: MatSetSizes(B,dof*A->rmap.n,dof*A->cmap.n,dof*A->rmap.N,dof*A->cmap.N);
2548: B->assembled = PETSC_TRUE;
2550: MPI_Comm_size(A->comm,&size);
2551: if (size == 1) {
2552: MatSetType(B,MATSEQMAIJ);
2553: B->ops->destroy = MatDestroy_SeqMAIJ;
2554: B->ops->view = MatView_SeqMAIJ;
2555: b = (Mat_MPIMAIJ*)B->data;
2556: b->dof = dof;
2557: b->AIJ = A;
2558: if (dof == 2) {
2559: B->ops->mult = MatMult_SeqMAIJ_2;
2560: B->ops->multadd = MatMultAdd_SeqMAIJ_2;
2561: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2;
2562: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2;
2563: } else if (dof == 3) {
2564: B->ops->mult = MatMult_SeqMAIJ_3;
2565: B->ops->multadd = MatMultAdd_SeqMAIJ_3;
2566: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3;
2567: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3;
2568: } else if (dof == 4) {
2569: B->ops->mult = MatMult_SeqMAIJ_4;
2570: B->ops->multadd = MatMultAdd_SeqMAIJ_4;
2571: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4;
2572: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4;
2573: } else if (dof == 5) {
2574: B->ops->mult = MatMult_SeqMAIJ_5;
2575: B->ops->multadd = MatMultAdd_SeqMAIJ_5;
2576: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5;
2577: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5;
2578: } else if (dof == 6) {
2579: B->ops->mult = MatMult_SeqMAIJ_6;
2580: B->ops->multadd = MatMultAdd_SeqMAIJ_6;
2581: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6;
2582: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6;
2583: } else if (dof == 7) {
2584: B->ops->mult = MatMult_SeqMAIJ_7;
2585: B->ops->multadd = MatMultAdd_SeqMAIJ_7;
2586: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7;
2587: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7;
2588: } else if (dof == 8) {
2589: B->ops->mult = MatMult_SeqMAIJ_8;
2590: B->ops->multadd = MatMultAdd_SeqMAIJ_8;
2591: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8;
2592: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8;
2593: } else if (dof == 9) {
2594: B->ops->mult = MatMult_SeqMAIJ_9;
2595: B->ops->multadd = MatMultAdd_SeqMAIJ_9;
2596: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9;
2597: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9;
2598: } else if (dof == 10) {
2599: B->ops->mult = MatMult_SeqMAIJ_10;
2600: B->ops->multadd = MatMultAdd_SeqMAIJ_10;
2601: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10;
2602: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10;
2603: } else if (dof == 16) {
2604: B->ops->mult = MatMult_SeqMAIJ_16;
2605: B->ops->multadd = MatMultAdd_SeqMAIJ_16;
2606: B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16;
2607: B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16;
2608: } else {
2609: SETERRQ1(PETSC_ERR_SUP,"Cannot handle a dof of %D. Send request for code to petsc-maint@mcs.anl.gov\n",dof);
2610: }
2611: B->ops->ptapsymbolic_seqaij = MatPtAPSymbolic_SeqAIJ_SeqMAIJ;
2612: B->ops->ptapnumeric_seqaij = MatPtAPNumeric_SeqAIJ_SeqMAIJ;
2613: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqmaij_seqaij_C","MatConvert_SeqMAIJ_SeqAIJ",MatConvert_SeqMAIJ_SeqAIJ);
2614: } else {
2615: Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data;
2616: IS from,to;
2617: Vec gvec;
2618: PetscInt *garray,i;
2620: MatSetType(B,MATMPIMAIJ);
2621: B->ops->destroy = MatDestroy_MPIMAIJ;
2622: B->ops->view = MatView_MPIMAIJ;
2623: b = (Mat_MPIMAIJ*)B->data;
2624: b->dof = dof;
2625: b->A = A;
2626: MatCreateMAIJ(mpiaij->A,dof,&b->AIJ);
2627: MatCreateMAIJ(mpiaij->B,dof,&b->OAIJ);
2629: VecGetSize(mpiaij->lvec,&n);
2630: VecCreateSeq(PETSC_COMM_SELF,n*dof,&b->w);
2631: VecSetBlockSize(b->w,dof);
2633: /* create two temporary Index sets for build scatter gather */
2634: PetscMalloc((n+1)*sizeof(PetscInt),&garray);
2635: for (i=0; i<n; i++) garray[i] = dof*mpiaij->garray[i];
2636: ISCreateBlock(A->comm,dof,n,garray,&from);
2637: PetscFree(garray);
2638: ISCreateStride(PETSC_COMM_SELF,n*dof,0,1,&to);
2640: /* create temporary global vector to generate scatter context */
2641: VecCreateMPI(A->comm,dof*A->cmap.n,dof*A->cmap.N,&gvec);
2642: VecSetBlockSize(gvec,dof);
2644: /* generate the scatter context */
2645: VecScatterCreate(gvec,from,b->w,to,&b->ctx);
2647: ISDestroy(from);
2648: ISDestroy(to);
2649: VecDestroy(gvec);
2651: B->ops->mult = MatMult_MPIMAIJ_dof;
2652: B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof;
2653: B->ops->multadd = MatMultAdd_MPIMAIJ_dof;
2654: B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof;
2655: PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpimaij_mpiaij_C","MatConvert_MPIMAIJ_MPIAIJ",MatConvert_MPIMAIJ_MPIAIJ);
2656: }
2657: *maij = B;
2658: MatView_Private(B);
2659: }
2660: return(0);
2661: }