Actual source code: ex1.c


  2: static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\
  3:                       For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK.       \n\
  4:                       For MATSEQDENSECUDA, it uses cusolverDn routines \n\n";

  6: #include <petscmat.h>

  8: static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b)
  9: {
 10:   PetscRandom    rand;
 11:   Mat            mat,RHS,SOLU;
 12:   PetscInt       rstart, rend;
 13:   PetscInt       cstart, cend;
 14:   PetscScalar    value = 1.0;
 15:   Vec            x, y, b;

 19:   /* create multiple vectors RHS and SOLU */
 20:   MatCreate(PETSC_COMM_WORLD,&RHS);
 21:   MatSetSizes(RHS,PETSC_DECIDE,PETSC_DECIDE,m,nrhs);
 22:   MatSetType(RHS,MATDENSE);
 23:   MatSetOptionsPrefix(RHS,"rhs_");
 24:   MatSetFromOptions(RHS);
 25:   MatSeqDenseSetPreallocation(RHS,NULL);

 27:   PetscRandomCreate(PETSC_COMM_WORLD,&rand);
 28:   PetscRandomSetFromOptions(rand);
 29:   MatSetRandom(RHS,rand);

 31:   if (m == n) {
 32:     MatDuplicate(RHS,MAT_DO_NOT_COPY_VALUES,&SOLU);
 33:   } else {
 34:     MatCreate(PETSC_COMM_WORLD,&SOLU);
 35:     MatSetSizes(SOLU,PETSC_DECIDE,PETSC_DECIDE,n,nrhs);
 36:     MatSetType(SOLU,MATDENSE);
 37:     MatSeqDenseSetPreallocation(SOLU,NULL);
 38:   }
 39:   MatSetRandom(SOLU,rand);

 41:   /* create matrix */
 42:   MatCreate(PETSC_COMM_WORLD,&mat);
 43:   MatSetSizes(mat,PETSC_DECIDE,PETSC_DECIDE,m,n);
 44:   MatSetType(mat,MATDENSE);
 45:   MatSetFromOptions(mat);
 46:   MatSetUp(mat);
 47:   MatGetOwnershipRange(mat,&rstart,&rend);
 48:   MatGetOwnershipRangeColumn(mat,&cstart,&cend);
 49:   if (!full) {
 50:     for (PetscInt i=rstart; i<rend; i++) {
 51:       if (m == n) {
 52:         value = (PetscReal)i+1;
 53:         MatSetValues(mat,1,&i,1,&i,&value,INSERT_VALUES);
 54:       } else {
 55:         for (PetscInt j = cstart; j < cend; j++) {
 56:           value = ((PetscScalar)i+1.)/(PetscSqr(i - j) + 1.);
 57:           MatSetValues(mat,1,&i,1,&j,&value,INSERT_VALUES);
 58:         }
 59:       }
 60:     }
 61:     MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
 62:     MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);
 63:   } else {
 64:     MatSetRandom(mat,rand);
 65:     if (m == n) {
 66:       Mat T;

 68:       MatMatTransposeMult(mat,mat,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&T);
 69:       MatDestroy(&mat);
 70:       mat  = T;
 71:     }
 72:   }

 74:   /* create single vectors */
 75:   MatCreateVecs(mat,&x,&b);
 76:   VecDuplicate(x,&y);
 77:   VecSet(x,value);
 78:   PetscRandomDestroy(&rand);
 79:   *_mat  = mat;
 80:   *_RHS  = RHS;
 81:   *_SOLU = SOLU;
 82:   *_x    = x;
 83:   *_y    = y;
 84:   *_b    = b;
 85:   return(0);
 86: }

 88: int main(int argc,char **argv)
 89: {
 90:   Mat            mat,F,RHS,SOLU;
 91:   MatInfo        info;
 93:   PetscInt       m = 15, n = 10,i,j,nrhs=2;
 94:   Vec            x,y,b,ytmp;
 95:   IS             perm;
 96:   PetscReal      norm,tol=PETSC_SMALL;
 97:   PetscMPIInt    size;
 98:   char           solver[64];
 99:   PetscBool      inplace,full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE;

101:   PetscInitialize(&argc,&argv,(char*) 0,help);if (ierr) return ierr;
102:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
103:   if (size != 1) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_SUP,"This is a uniprocessor example only!");
104:   PetscStrcpy(solver,"petsc");
105:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
106:   PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
107:   PetscOptionsGetInt(NULL,NULL,"-nrhs",&nrhs,NULL);
108:   PetscOptionsGetBool(NULL,NULL,"-ldl",&ldl,NULL);
109:   PetscOptionsGetBool(NULL,NULL,"-qr",&qr,NULL);
110:   PetscOptionsGetBool(NULL,NULL,"-full",&full,NULL);
111:   PetscOptionsGetString(NULL,NULL,"-solver_type",solver,sizeof(solver),NULL);

113:   createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b);
114:   VecDuplicate(y,&ytmp);

116:   /* Only SeqDense* support in-place factorizations and NULL permutations */
117:   PetscObjectBaseTypeCompare((PetscObject)mat,MATSEQDENSE,&inplace);
118:   MatGetLocalSize(mat,&i,NULL);
119:   MatGetOwnershipRange(mat,&j,NULL);
120:   ISCreateStride(PETSC_COMM_WORLD,i,j,1,&perm);

122:   MatGetInfo(mat,MAT_LOCAL,&info);
123:   PetscPrintf(PETSC_COMM_WORLD,"matrix nonzeros = %D, allocated nonzeros = %D\n",
124:                      (PetscInt)info.nz_used,(PetscInt)info.nz_allocated);
125:   MatMult(mat,x,b);

127:   /* Cholesky factorization - perm and factinfo are ignored by LAPACK */
128:   /* in-place Cholesky */
129:   if (inplace) {
130:     Mat RHS2;

132:     MatDuplicate(mat,MAT_COPY_VALUES,&F);
133:     if (!ldl) { MatSetOption(F,MAT_SPD,PETSC_TRUE); }
134:     MatCholeskyFactor(F,perm,0);
135:     MatSolve(F,b,y);
136:     VecAXPY(y,-1.0,x);
137:     VecNorm(y,NORM_2,&norm);
138:     if (norm > tol) {
139:       PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place Cholesky %g\n",(double)norm);
140:     }

142:     MatMatSolve(F,RHS,SOLU);
143:     MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2);
144:     MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN);
145:     MatNorm(RHS,NORM_FROBENIUS,&norm);
146:     if (norm > tol) {
147:       PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n",(double)norm);
148:     }
149:     MatDestroy(&F);
150:     MatDestroy(&RHS2);
151:   }

153:   /* out-of-place Cholesky */
154:   MatGetFactor(mat,solver,MAT_FACTOR_CHOLESKY,&F);
155:   if (!ldl) { MatSetOption(F,MAT_SPD,PETSC_TRUE); }
156:   MatCholeskyFactorSymbolic(F,mat,perm,0);
157:   MatCholeskyFactorNumeric(F,mat,0);
158:   MatSolve(F,b,y);
159:   VecAXPY(y,-1.0,x);
160:   VecNorm(y,NORM_2,&norm);
161:   if (norm > tol) {
162:     PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place Cholesky %g\n",(double)norm);
163:   }
164:   MatDestroy(&F);

166:   /* LU factorization - perms and factinfo are ignored by LAPACK */
167:   i    = n-1;
168:   MatZeroRows(mat,1,&i,-1.0,NULL,NULL);
169:   MatMult(mat,x,b);

171:   /* in-place LU */
172:   if (inplace) {
173:     Mat RHS2;

175:     MatDuplicate(mat,MAT_COPY_VALUES,&F);
176:     MatLUFactor(F,perm,perm,0);
177:     MatSolve(F,b,y);
178:     VecAXPY(y,-1.0,x);
179:     VecNorm(y,NORM_2,&norm);
180:     if (norm > tol) {
181:       PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place LU %g\n",(double)norm);
182:     }
183:     MatMatSolve(F,RHS,SOLU);
184:     MatMatMult(mat,SOLU,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&RHS2);
185:     MatAXPY(RHS,-1.0,RHS2,SAME_NONZERO_PATTERN);
186:     MatNorm(RHS,NORM_FROBENIUS,&norm);
187:     if (norm > tol) {
188:       PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of residual for in-place LU (MatMatSolve) %g\n",(double)norm);
189:     }
190:     MatDestroy(&F);
191:     MatDestroy(&RHS2);
192:   }

194:   /* out-of-place LU */
195:   MatGetFactor(mat,solver,MAT_FACTOR_LU,&F);
196:   MatLUFactorSymbolic(F,mat,perm,perm,0);
197:   MatLUFactorNumeric(F,mat,0);
198:   MatSolve(F,b,y);
199:   VecAXPY(y,-1.0,x);
200:   VecNorm(y,NORM_2,&norm);
201:   if (norm > tol) {
202:     PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place LU %g\n",(double)norm);
203:   }

205:   /* free space */
206:   ISDestroy(&perm);
207:   MatDestroy(&F);
208:   MatDestroy(&mat);
209:   MatDestroy(&RHS);
210:   MatDestroy(&SOLU);
211:   VecDestroy(&x);
212:   VecDestroy(&b);
213:   VecDestroy(&y);
214:   VecDestroy(&ytmp);

216:   if (qr) {
217:     /* setup rectanglar */
218:     createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b);
219:     VecDuplicate(y,&ytmp);

221:     /* QR factorization - perms and factinfo are ignored by LAPACK */
222:     MatMult(mat,x,b);

224:     /* in-place QR */
225:     if (inplace) {
226:       Mat SOLU2;

228:       MatDuplicate(mat,MAT_COPY_VALUES,&F);
229:       MatQRFactor(F,NULL,0);
230:       MatSolve(F,b,y);
231:       VecAXPY(y,-1.0,x);
232:       VecNorm(y,NORM_2,&norm);
233:       if (norm > tol) {
234:         PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for in-place QR %g\n",(double)norm);
235:       }
236:       MatMatMult(mat,SOLU,MAT_REUSE_MATRIX,PETSC_DEFAULT,&RHS);
237:       MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2);
238:       MatMatSolve(F,RHS,SOLU2);
239:       MatAXPY(SOLU2,-1.0,SOLU,SAME_NONZERO_PATTERN);
240:       MatNorm(SOLU2,NORM_FROBENIUS,&norm);
241:       if (norm > tol) {
242:         PetscPrintf(PETSC_COMM_WORLD,"Error: Norm of error for in-place QR (MatMatSolve) %g\n",(double)norm);
243:       }
244:       MatDestroy(&F);
245:       MatDestroy(&SOLU2);
246:     }

248:     /* out-of-place QR */
249:     MatGetFactor(mat,solver,MAT_FACTOR_QR,&F);
250:     MatQRFactorSymbolic(F,mat,NULL,NULL);
251:     MatQRFactorNumeric(F,mat,NULL);
252:     MatSolve(F,b,y);
253:     VecAXPY(y,-1.0,x);
254:     VecNorm(y,NORM_2,&norm);
255:     if (norm > tol) {
256:       PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm);
257:     }

259:     if (m == n) {
260:       /* out-of-place MatSolveTranspose */
261:       MatMultTranspose(mat,x,b);
262:       MatSolveTranspose(F,b,y);
263:       VecAXPY(y,-1.0,x);
264:       VecNorm(y,NORM_2,&norm);
265:       if (norm > tol) {
266:         PetscPrintf(PETSC_COMM_WORLD,"Warning: Norm of error for out-of-place QR %g\n",(double)norm);
267:       }
268:     }

270:     /* free space */
271:     MatDestroy(&F);
272:     MatDestroy(&mat);
273:     MatDestroy(&RHS);
274:     MatDestroy(&SOLU);
275:     VecDestroy(&x);
276:     VecDestroy(&b);
277:     VecDestroy(&y);
278:     VecDestroy(&ytmp);
279:   }
280:   PetscFinalize();
281:   return ierr;
282: }



286: /*TEST

288:    test:

290:    test:
291:      requires: cuda
292:      suffix: seqdensecuda
293:      args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}} -qr 0
294:      output_file: output/ex1_1.out

296:    test:
297:      requires: cuda
298:      suffix: seqdensecuda_2
299:      args: -ldl 0 -solver_type cuda -qr 0
300:      output_file: output/ex1_1.out

302:    test:
303:      requires: cuda
304:      suffix: seqdensecuda_seqaijcusparse
305:      args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0
306:      output_file: output/ex1_2.out

308:    test:
309:      requires: cuda viennacl
310:      suffix: seqdensecuda_seqaijviennacl
311:      args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0
312:      output_file: output/ex1_2.out

314:    test:
315:      suffix: 4
316:      args: -m 10 -n 10
317:      output_file: output/ex1_1.out

319: TEST*/