Actual source code: nladic.c

  1: #define PETSCMAT_DLL
  2: /*
  3:     ADIC based nonlinear operator object that can be used with FAS

  5:     This does not really belong in the matrix directories but since it 
  6:     was cloned off of Mat_DAAD I'm leaving it here until I have a better place

  8: */
 9:  #include petsc.h
 10:  #include petscda.h
 11:  #include petscsys.h

 14: #include "adic/ad_utils.h"

 17:  #include src/dm/da/daimpl.h
 18:  #include src/inline/ilu.h

 20: struct NLF_DAAD {
 21:   DA         da;
 22:   void       *ctx;
 23:   Vec        residual;
 24:   int        newton_its;
 25: };

 27: /*
 28:       Solves the one dimensional equation using Newton's method 
 29: */
 32: PetscErrorCode NLFNewton_DAAD(NLF A,DALocalInfo *info,MatStencil *stencil,void *ad_vu,PetscScalar *ad_vustart,int nI,int gI,PetscScalar residual)
 33: {
 35:   PetscInt       cnt = A->newton_its;
 36:   PetscScalar    ad_f[2],J,f;

 39:   ad_vustart[1+2*gI] = 1.0;
 40:   do {
 41:     /* compute the function and Jacobian */
 42:     (*A->da->adicmf_lfi)(info,stencil,ad_vu,ad_f,A->ctx);
 43:     J    = -ad_f[1];
 44:     f    = -ad_f[0] + residual;
 45:     if (f != f) SETERRQ(1,"nan");
 46:     ad_vustart[2*gI] =  ad_vustart[2*gI] - f/J;
 47:   } while (--cnt > 0 && PetscAbsScalar(f) > 1.e-14);

 49:   ad_vustart[1+2*gI] = 0.0;
 50:   return(0);
 51: }

 53: /*
 54:       Solves the four dimensional equation using Newton's method 
 55: */
 58: PetscErrorCode NLFNewton_DAAD4(NLF A,DALocalInfo *info,MatStencil *stencil,void *ad_vu,PetscScalar *ad_vustart,int nI,int gI,PetscScalar *residual)
 59: {
 61:   PetscInt       cnt = A->newton_its;
 62:   PetscScalar    ad_f[20], J[16],f[4], res, dd[5];


 66:   /* This sets the identity as the seed matrix for ADIC */
 67:   ad_vustart[1+5*gI   ] = 1.0;
 68:   ad_vustart[2+5*gI+5 ] = 1.0;
 69:   ad_vustart[3+5*gI+10] = 1.0;
 70:   ad_vustart[4+5*gI+15] = 1.0;

 72:   do {
 73:     /* compute the function and Jacobian */
 74:     (*A->da->adicmf_lfib)(info,stencil,ad_vu,ad_f,A->ctx);
 75: 
 76:     /* copy ADIC formated Jacobian into regular C array */
 77:     J[0] = ad_f[1] ; J[1] = ad_f[2] ; J[2] = ad_f[3] ; J[3] = ad_f[4] ;
 78:     J[4] = ad_f[6] ; J[5] = ad_f[7] ; J[6] = ad_f[8] ; J[7] = ad_f[9] ;
 79:     J[8] = ad_f[11]; J[9] = ad_f[12]; J[10]= ad_f[13]; J[11]= ad_f[14];
 80:     J[12]= ad_f[16]; J[13]= ad_f[17]; J[14]= ad_f[18]; J[15]= ad_f[19];

 82:     f[0]    = -ad_f[0]   + residual[0];
 83:     f[1]    = -ad_f[5]   + residual[1];
 84:     f[2]    = -ad_f[10]  + residual[2];
 85:     f[3]    = -ad_f[15]  + residual[3];

 87:     /* solve Jacobian * dd = ff */

 89:     /* could use PETSc kernel code to solve system with pivoting */

 91:     /* could put code in here to compute the solution directly using ADIC data structures instead of copying first */
 92:     dd[0]=J[0]*(J[5]*(J[10]*J[15]-J[11]*J[14])-J[6]*(J[9]*J[15]-J[11]*J[13])+J[7]*(J[9]*J[14]-J[10]*J[13]))-
 93:           J[1]*(J[4]*(J[10]*J[15]-J[11]*J[14])-J[6]*(J[8]*J[15]-J[11]*J[12])+J[7]*(J[8]*J[14]-J[10]*J[12]))+
 94:           J[2]*(J[4]*(J[ 9]*J[15]-J[11]*J[13])-J[5]*(J[8]*J[15]-J[11]*J[12])+J[7]*(J[8]*J[13]-J[ 9]*J[12]))-
 95:           J[3]*(J[4]*(J[ 9]*J[14]-J[10]*J[13])-J[5]*(J[8]*J[14]-J[10]*J[12])+J[6]*(J[8]*J[13]-J[ 9]*J[12]));

 97:     dd[1]=(f[0]*(J[5]*(J[10]*J[15]-J[11]*J[14])-J[6]*(J[9]*J[15]-J[11]*J[13])+J[7]*(J[9]*J[14]-J[10]*J[13]))-
 98:           J[1]*(f[1]*(J[10]*J[15]-J[11]*J[14])-J[6]*(f[2]*J[15]-J[11]*f[ 3])+J[7]*(f[2]*J[14]-J[10]*f[ 3]))+
 99:           J[2]*(f[1]*(J[ 9]*J[15]-J[11]*J[13])-J[5]*(f[2]*J[15]-J[11]*f[ 3])+J[7]*(f[2]*J[13]-J[ 9]*f[ 3]))-
100:            J[3]*(f[1]*(J[ 9]*J[14]-J[10]*J[13])-J[5]*(f[2]*J[14]-J[10]*f[ 3])+J[6]*(f[2]*J[13]-J[ 9]*f[ 3])))/dd[0];

102:     dd[2]=(J[0]*(f[1]*(J[10]*J[15]-J[11]*J[14])-J[6]*(f[2]*J[15]-J[11]*f[ 3])+J[7]*(f[2]*J[14]-J[10]*f[ 3]))-
103:           f[0]*(J[4]*(J[10]*J[15]-J[11]*J[14])-J[6]*(J[8]*J[15]-J[11]*J[12])+J[7]*(J[8]*J[14]-J[10]*J[12]))+
104:           J[2]*(J[4]*(f[ 2]*J[15]-J[11]*f[ 3])-f[1]*(J[8]*J[15]-J[11]*J[12])+J[7]*(J[8]*f[ 3]-f[ 2]*J[12]))-
105:           J[3]*(J[4]*(f[ 2]*J[14]-J[10]*f[ 3])-f[2]*(J[8]*J[14]-J[10]*J[12])+J[6]*(J[8]*f[ 3]-f[ 2]*J[12])))/dd[0];

107:     dd[3]=(J[0]*(J[5]*(f[ 2]*J[15]-J[11]*f[ 3])-f[1]*(J[9]*J[15]-J[11]*J[13])+J[7]*(J[9]*f[ 3]-f[ 2]*J[13]))-
108:           J[1]*(J[4]*(f[ 2]*J[15]-J[11]*f[ 3])-f[1]*(J[8]*J[15]-J[11]*J[12])+J[7]*(J[8]*f[ 3]-f[ 2]*J[12]))+
109:           f[0]*(J[4]*(J[ 9]*J[15]-J[11]*J[13])-J[5]*(J[8]*J[15]-J[11]*J[12])+J[7]*(J[8]*J[13]-J[ 9]*J[12]))-
110:            J[3]*(J[4]*(J[ 9]*f[ 3]-f[ 2]*J[13])-J[5]*(J[8]*f[ 3]-f[ 2]*J[12])+f[1]*(J[8]*J[13]-J[ 9]*J[12])))/dd[0];

112:     dd[4]=(J[0]*(J[5]*(J[10]*f[ 3]-f[ 2]*J[14])-J[6]*(J[9]*f[ 3]-f[ 2]*J[13])+f[1]*(J[9]*J[14]-J[10]*J[13]))-
113:           J[1]*(J[4]*(J[10]*f[ 3]-f[ 2]*J[14])-J[6]*(J[8]*f[ 3]-f[ 2]*J[12])+f[1]*(J[8]*J[14]-J[10]*J[12]))+
114:           J[2]*(J[4]*(J[ 9]*f[ 3]-f[ 2]*J[13])-J[5]*(J[8]*f[ 3]-f[ 2]*J[12])+f[1]*(J[8]*J[13]-J[ 9]*J[12]))-
115:           f[0]*(J[4]*(J[ 9]*J[14]-J[10]*J[13])-J[5]*(J[8]*J[14]-J[10]*J[12])+J[6]*(J[8]*J[13]-J[ 9]*J[12])))/dd[0];

117:     /* copy solution back into ADIC data structure */
118:     ad_vustart[5*(gI+0)] += dd[1];
119:     ad_vustart[5*(gI+1)] += dd[2];
120:     ad_vustart[5*(gI+2)] += dd[3];
121:     ad_vustart[5*(gI+3)] += dd[4];

123:     res =  f[0]*f[0];
124:     res += f[1]*f[1];
125:     res += f[2]*f[2];
126:     res += f[3]*f[3];
127:     res =  sqrt(res);
128: 
129:   } while (--cnt > 0 && res > 1.e-14);

131:   /* zero out this part of the seed matrix that was set initially */
132:   ad_vustart[1+5*gI   ] = 0.0;
133:   ad_vustart[2+5*gI+5 ] = 0.0;
134:   ad_vustart[3+5*gI+10] = 0.0;
135:   ad_vustart[4+5*gI+15] = 0.0;
136:   return(0);
137: }

141: PetscErrorCode NLFNewton_DAAD9(NLF A,DALocalInfo *info,MatStencil *stencil,void *ad_vu,PetscScalar *ad_vustart,int nI,int gI,PetscScalar *residual)
142: {
144:   PetscInt       cnt = A->newton_its;
145:   PetscScalar    ad_f[100], J[81],f[9], res;
146:   PetscInt       i,j,ngI[9];
148: 
149:   // the order of the nodes
150:    /*
151:          (6)      (7)         (8)
152:       i-1,j+1 --- i,j+1 --- i+1,j+1
153:         |         |           |
154:         |         |           |
155:       i-1,j   --- i,j  --- i+1,j
156:         |(3)      |(4)        |(5)
157:         |         |           |
158:       i-1,j-1 --- i,j-1--- i+1,j-1
159:        (0)       (1)         (2)
160:   */
161: 
162:   // the order of the derivative for the center nodes
163:    /*
164:          (7)      (8)         (9)
165:       i-1,j+1 --- i,j+1 --- i+1,j+1
166:         |         |           |
167:         |         |           |
168:       i-1,j   --- i,j  --- i+1,j
169:         |(4)      |(5)        |(6)
170:         |         |           |
171:       i-1,j-1 --- i,j-1--- i+1,j-1
172:        (1)       (2)         (3)
173:   */
174:   if( (*stencil).i==0 || (*stencil).i==1||(*stencil).i==(*info).gxs+(*info).gxm-1 || (*stencil).i==(*info).gxs+(*info).gxm-2  || (*stencil).j==0 ||  (*stencil).j==1 ||(*stencil).j==(*info).gys+(*info).gym-1 || (*stencil).j==(*info).gys+(*info).gym -2) {

176:   ad_vustart[1+10*gI] = 1.0;
177: 
178:   do {
179:     /* compute the function and Jacobian */
180:     (*A->da->adicmf_lfi)(info,stencil,ad_vu,ad_f,A->ctx);
181:     J[0]    = -ad_f[1];
182:     f[0]    = -ad_f[0] + residual[gI];
183:     ad_vustart[10*gI] =  ad_vustart[10*gI] - f[0]/J[0];
184:   } while (--cnt > 0 && PetscAbsScalar(f[0]) > 1.e-14);

186:   ad_vustart[1+10*gI] = 0.0;
187:   return(0);


190:   }
191: 
192:   ngI[0]  =  ((*stencil).i -1 - (*info).gxs)*(*info).dof + ((*stencil).j -1 - (*info).gys)*(*info).dof*(*info).gxm + ((*stencil).k - (*info).gzs)*(*info).dof*(*info).gxm*(*info).gym;
193:   ngI[1]  =  ngI[0] + 1;
194:   ngI[2]  =  ngI[1] + 1;
195:   ngI[3]  =  gI     - 1;
196:   ngI[4]  =  gI        ;
197:   ngI[5]  =  gI     + 1;
198:   ngI[6]  =  ((*stencil).i -1 - (*info).gxs)*(*info).dof + ((*stencil).j +1 - (*info).gys)*(*info).dof*(*info).gxm + ((*stencil).k - (*info).gzs)*(*info).dof*(*info).gxm*(*info).gym;
199:   ngI[7]  =  ngI[6] + 1;
200:   ngI[8]  =  ngI[7] + 1;
201: 

203:   for(j=0 ; j<9; j++){
204:     ad_vustart[ngI[j]*10+j+1] = 1.0;
205:   }
206: 
207:   do{
208:     /* compute the function and the Jacobian */
209: 
210:     (*A->da->adicmf_lfi)(info,stencil,ad_vu,ad_f,A->ctx);
211: 
212:     for(i=0; i<9; i++){
213:       for(j=0; j<9; j++){
214:         J[i*9+j] = -ad_f[i*10+j+1];
215:       }
216:       f[i]= -ad_f[i*10] + residual[ngI[i]];
217:     }

219:     Kernel_A_gets_inverse_A_9(J);
220: 
221:     res =0 ;
222:     for(i=0; i<9; i++){
223:       for(j=0;j<9;j++){
224:         ad_vustart[10*ngI[i]]= ad_vustart[10*ngI[i]] - J[i*9 +j]*f[j];
225:       }
226:       res = res + f[i]*f[i];
227:     }
228:     res = sqrt(res);
229:   } while(--cnt>0 && res>1.e-14);

231:   for(j=0; j<9; j++){
232:     ad_vustart[10*ngI[j]+j+1]=0.0;
233:   }

235:   return(0);
236: }

238: /*
239:         Nonlinear relax on all the equations with an initial guess in x
240: */
244: PetscErrorCode PETSCMAT_DLLEXPORT NLFRelax_DAAD(NLF A,MatSORType flag,int its,Vec xx)
245: {
247:   PetscInt       j,gtdof,nI,gI;
248:   PetscScalar    *avu,*av,*ad_vustart,*residual;
249:   Vec            localxx;
250:   DALocalInfo    info;
251:   MatStencil     stencil;
252:   void*          *ad_vu;

255:   if (its <= 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D positive",its);

257:   DAGetLocalVector(A->da,&localxx);
258:   /* get space for derivative object.  */
259:   DAGetAdicMFArray(A->da,PETSC_TRUE,(void **)&ad_vu,(void**)&ad_vustart,&gtdof);
260:   VecGetArray(A->residual,&residual);


263:   /* tell ADIC we will be computing one dimensional Jacobians */
264:   PetscADResetIndep();
265:   PetscADIncrementTotalGradSize(1);
266:   PetscADSetIndepDone();

268:   DAGetLocalInfo(A->da,&info);
269:   while (its--) {

271:     /* get initial solution properly ghosted */
272:     DAGlobalToLocalBegin(A->da,xx,INSERT_VALUES,localxx);
273:     DAGlobalToLocalEnd(A->da,xx,INSERT_VALUES,localxx);

275:     /* copy input vector into derivative object */
276:     VecGetArray(localxx,&avu);
277:     for (j=0; j<gtdof; j++) {
278:       ad_vustart[2*j]   = avu[j];
279:       ad_vustart[2*j+1] = 0.0;
280:     }
281: 
282:     VecRestoreArray(localxx,&avu);

284:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
285:       nI = 0;
286:       for (stencil.k = info.zs; stencil.k<info.zs+info.zm; stencil.k++) {
287:         for (stencil.j = info.ys; stencil.j<info.ys+info.ym; stencil.j++) {
288:           for (stencil.i = info.xs; stencil.i<info.xs+info.xm; stencil.i++) {
289:             for (stencil.c = 0; stencil.c<info.dof; stencil.c++) {
290:               gI   = stencil.c + (stencil.i - info.gxs)*info.dof + (stencil.j - info.gys)*info.dof*info.gxm + (stencil.k - info.gzs)*info.dof*info.gxm*info.gym;
291:               NLFNewton_DAAD(A,&info,&stencil,ad_vu,ad_vustart,nI,gI,residual[nI]);
292:               nI++;
293:             }
294:           }
295:         }
296:       }
297:     }
298:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
299:       nI = info.dof*info.xm*info.ym*info.zm - 1;
300:       for (stencil.k = info.zs+info.zm-1; stencil.k>=info.zs; stencil.k--) {
301:         for (stencil.j = info.ys+info.ym-1; stencil.j>=info.ys; stencil.j--) {
302:           for (stencil.i = info.xs+info.xm-1; stencil.i>=info.xs; stencil.i--) {
303:             for (stencil.c = info.dof-1; stencil.c>=0; stencil.c--) {
304:               gI   = stencil.c + (stencil.i - info.gxs)*info.dof + (stencil.j - info.gys)*info.dof*info.gxm + (stencil.k - info.gzs)*info.dof*info.gxm*info.gym;
305:               NLFNewton_DAAD(A,&info,&stencil,ad_vu,ad_vustart,nI,gI,residual[nI]);
306:               nI--;
307:             }
308:           }
309:         }
310:       }
311:     }

313:     /* copy solution back into ghosted vector from derivative object */
314:     VecGetArray(localxx,&av);
315:     for (j=0; j<gtdof; j++) {
316:       av[j] = ad_vustart[2*j];
317:     }
318:     VecRestoreArray(localxx,&av);
319:     /* stick relaxed solution back into global solution */
320:     DALocalToGlobal(A->da,localxx,INSERT_VALUES,xx);
321:   }


324:   VecRestoreArray(A->residual,&residual);
325:   DARestoreLocalVector(A->da,&localxx);
326:   DARestoreAdicMFArray(A->da,PETSC_TRUE,(void **)&ad_vu,(void**)&ad_vustart,&gtdof);
327:   return(0);
328: }

334: PetscErrorCode PETSCMAT_DLLEXPORT NLFRelax_DAAD4(NLF A,MatSORType flag,int its,Vec xx)
335: {
337:   PetscInt       j,gtdof,nI,gI;
338:   PetscScalar    *avu,*av,*ad_vustart,*residual;
339:   Vec            localxx;
340:   DALocalInfo    info;
341:   MatStencil     stencil;
342:   void*          *ad_vu;

345:   if (its <= 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D positive",its);
346: 
347:   DAGetLocalVector(A->da,&localxx);
348:   /* get space for derivative object.  */
349:   DAGetAdicMFArray4(A->da,PETSC_TRUE,(void **)&ad_vu,(void**)&ad_vustart,&gtdof);
350:   VecGetArray(A->residual,&residual);


353:   /* tell ADIC we will be computing four dimensional Jacobians */
354:   PetscADResetIndep();
355:   PetscADIncrementTotalGradSize(4);
356:   PetscADSetIndepDone();

358:   DAGetLocalInfo(A->da,&info);
359:   while (its--) {

361:     /* get initial solution properly ghosted */
362:     DAGlobalToLocalBegin(A->da,xx,INSERT_VALUES,localxx);
363:     DAGlobalToLocalEnd(A->da,xx,INSERT_VALUES,localxx);

365:     /* copy input vector into derivative object */
366:     VecGetArray(localxx,&avu);
367:     for (j=0; j<gtdof; j++) {
368:       ad_vustart[5*j  ]   = avu[j];
369:       ad_vustart[5*j+1]   = 0.0;
370:       ad_vustart[5*j+2]   = 0.0;
371:       ad_vustart[5*j+3]   = 0.0;
372:       ad_vustart[5*j+4]   = 0.0;
373:     }
374: 
375:     VecRestoreArray(localxx,&avu);

377:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
378:       nI = 0;
379:       for (stencil.k = info.zs; stencil.k<info.zs+info.zm; stencil.k++) {
380:         for (stencil.j = info.ys; stencil.j<info.ys+info.ym; stencil.j++) {
381:           for (stencil.i = info.xs; stencil.i<info.xs+info.xm; stencil.i++) {
382:             CHKMEMQ;
383:             gI   = (stencil.i - info.gxs)*info.dof + (stencil.j - info.gys)*info.dof*info.gxm + (stencil.k - info.gzs)*info.dof*info.gxm*info.gym;

385:             NLFNewton_DAAD4(A,&info,&stencil,ad_vu,ad_vustart,nI,gI,residual+nI);
386:             nI=nI+4;
387:             CHKMEMQ;
388:           }
389:         }
390:       }
391:     }
392:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
393:       nI = info.dof*info.xm*info.ym*info.zm - 4;
394: 
395:       for (stencil.k = info.zs+info.zm-1; stencil.k>=info.zs; stencil.k--) {
396:         for (stencil.j = info.ys+info.ym-1; stencil.j>=info.ys; stencil.j--) {
397:           for (stencil.i = info.xs+info.xm-1; stencil.i>=info.xs; stencil.i--) {
398:             gI   = (stencil.i - info.gxs)*info.dof + (stencil.j - info.gys)*info.dof*info.gxm + (stencil.k - info.gzs)*info.dof*info.gxm*info.gym;
399:             NLFNewton_DAAD4(A,&info,&stencil,ad_vu,ad_vustart,nI,gI,residual+nI);
400:             nI=nI-4;
401:           }
402:         }
403:       }
404:     }
405: 
406:     /* copy solution back into ghosted vector from derivative object */
407:     VecGetArray(localxx,&av);
408:     for (j=0; j<gtdof; j++) {
409:       av[j] = ad_vustart[5*j];
410:     }
411:     VecRestoreArray(localxx,&av);
412:     /* stick relaxed solution back into global solution */
413:     DALocalToGlobal(A->da,localxx,INSERT_VALUES,xx);
414:   }


417:   VecRestoreArray(A->residual,&residual);
418:   DARestoreLocalVector(A->da,&localxx);
419:   DARestoreAdicMFArray(A->da,PETSC_TRUE,(void **)&ad_vu,(void**)&ad_vustart,&gtdof);
420:   return(0);
421: }

426: PetscErrorCode PETSCMAT_DLLEXPORT NLFRelax_DAAD9(NLF A,MatSORType flag,int its,Vec xx)
427: {
429:   PetscInt       j,gtdof,nI,gI;
430:   PetscScalar    *avu,*av,*ad_vustart,*residual;
431:   Vec            localxx;
432:   DALocalInfo    info;
433:   MatStencil     stencil;
434:   void*          *ad_vu;

437:   if (its <= 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D positive",its);
438: 
439:   DAGetLocalVector(A->da,&localxx);
440:   /* get space for derivative object.  */
441:   DAGetAdicMFArray9(A->da,PETSC_TRUE,(void **)&ad_vu,(void**)&ad_vustart,&gtdof);
442:   VecGetArray(A->residual,&residual);


445:   /* tell ADIC we will be computing nine dimensional Jacobians */
446:   PetscADResetIndep();
447:   PetscADIncrementTotalGradSize(9);
448:   PetscADSetIndepDone();

450:   DAGetLocalInfo(A->da,&info);
451:   while (its--) {

453:     /* get initial solution properly ghosted */
454:     DAGlobalToLocalBegin(A->da,xx,INSERT_VALUES,localxx);
455:     DAGlobalToLocalEnd(A->da,xx,INSERT_VALUES,localxx);

457:     /* copy input vector into derivative object */
458:     VecGetArray(localxx,&avu);
459:      for (j=0; j<gtdof; j++) {
460:       ad_vustart[10*j  ]   = avu[j];
461:       ad_vustart[10*j+1]   = 0.0;
462:       ad_vustart[10*j+2]   = 0.0;
463:       ad_vustart[10*j+3]   = 0.0;
464:       ad_vustart[10*j+4]   = 0.0;
465:       ad_vustart[10*j+5]   = 0.0;
466:       ad_vustart[10*j+6]   = 0.0;
467:       ad_vustart[10*j+7]   = 0.0;
468:       ad_vustart[10*j+8]   = 0.0;
469:       ad_vustart[10*j+9]   = 0.0;
470:     }
471: 
472:     VecRestoreArray(localxx,&avu);

474:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
475:       nI = 0;
476:       for (stencil.k = info.zs; stencil.k<info.zs+info.zm; stencil.k++) {
477:         for (stencil.j = info.ys; stencil.j<info.ys+info.ym; stencil.j++) {
478:           for (stencil.i = info.xs; stencil.i<info.xs+info.xm; stencil.i++) {
479:             CHKMEMQ;
480:               gI   = (stencil.i - info.gxs)*info.dof + (stencil.j - info.gys)*info.dof*info.gxm + (stencil.k - info.gzs)*info.dof*info.gxm*info.gym;
481:  NLFNewton_DAAD9(A,&info,&stencil,ad_vu,ad_vustart,nI,gI,residual);
482:  nI=nI+1;
483:             CHKMEMQ;
484:           }
485:         }
486:       }
487:     }
488:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489:       nI = info.dof*info.xm*info.ym*info.zm - 4;
490: 
491:       for (stencil.k = info.zs+info.zm-1; stencil.k>=info.zs; stencil.k--) {
492:         for (stencil.j = info.ys+info.ym-1; stencil.j>=info.ys; stencil.j--) {
493:           for (stencil.i = info.xs+info.xm-1; stencil.i>=info.xs; stencil.i--) {
494:               gI   = (stencil.i - info.gxs)*info.dof + (stencil.j - info.gys)*info.dof*info.gxm + (stencil.k - info.gzs)*info.dof*info.gxm*info.gym;
495:               NLFNewton_DAAD9(A,&info,&stencil,ad_vu,ad_vustart,nI,gI,residual);
496:     nI=nI-1;
497:           }
498:         }
499:       }
500:     }
501: 
502:     /* copy solution back into ghosted vector from derivative object */
503:     VecGetArray(localxx,&av);
504:     for (j=0; j<gtdof; j++) {
505:       av[j] = ad_vustart[10*j];
506:     }
507:     VecRestoreArray(localxx,&av);
508:     /* stick relaxed solution back into global solution */
509:     DALocalToGlobal(A->da,localxx,INSERT_VALUES,xx);
510:   }


513:   VecRestoreArray(A->residual,&residual);
514:   DARestoreLocalVector(A->da,&localxx);
515:   DARestoreAdicMFArray(A->da,PETSC_TRUE,(void **)&ad_vu,(void**)&ad_vustart,&gtdof);
516:   return(0);
517: }

519: /*
520:         Point-block nonlinear relax on all the equations with an initial guess in xx using 
521: */
525: PetscErrorCode PETSCMAT_DLLEXPORT NLFRelax_DAADb(NLF A,MatSORType flag,int its,Vec xx)
526: {
528:   PetscInt       i,j,gtdof,nI,gI, bs = A->da->w, bs1 = bs + 1;
529:   PetscScalar    *avu,*av,*ad_vustart,*residual;
530:   Vec            localxx;
531:   DALocalInfo    info;
532:   MatStencil     stencil;
533:   void*          *ad_vu;
534:   PetscErrorCode (*NLFNewton_DAADb)(NLF,DALocalInfo*,MatStencil*,void*,PetscScalar*,int,int,PetscScalar*);

537:   if (its <= 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D positive",its);
538:   if (bs == 4) {
539:     NLFNewton_DAADb       = NLFNewton_DAAD4;
540:   } else {
541:     SETERRQ1(PETSC_ERR_SUP,"Point block nonlinear relaxation currently not for this block size",bs);
542:   }

544:   DAGetLocalVector(A->da,&localxx);
545:   /* get space for derivative object.  */
546:   DAGetAdicMFArrayb(A->da,PETSC_TRUE,(void **)&ad_vu,(void**)&ad_vustart,&gtdof);
547:   VecGetArray(A->residual,&residual);


550:   /* tell ADIC we will be computing bs dimensional Jacobians */
551:   PetscADResetIndep();
552:   PetscADIncrementTotalGradSize(bs);
553:   PetscADSetIndepDone();

555:   DAGetLocalInfo(A->da,&info);
556:   while (its--) {

558:     /* get initial solution properly ghosted */
559:     DAGlobalToLocalBegin(A->da,xx,INSERT_VALUES,localxx);
560:     DAGlobalToLocalEnd(A->da,xx,INSERT_VALUES,localxx);

562:     /* copy input vector into derivative object */
563:     VecGetArray(localxx,&avu);
564:     for (j=0; j<gtdof; j++) {
565:       ad_vustart[bs1*j]   = avu[j];
566:       for (i=0; i<bs; i++) {
567:          ad_vustart[bs1*j+1+i] = 0.0;
568:       }
569:     }
570:     VecRestoreArray(localxx,&avu);

572:     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
573:       nI = 0;
574:       for (stencil.k = info.zs; stencil.k<info.zs+info.zm; stencil.k++) {
575:         for (stencil.j = info.ys; stencil.j<info.ys+info.ym; stencil.j++) {
576:           for (stencil.i = info.xs; stencil.i<info.xs+info.xm; stencil.i++) {
577:             gI   = (stencil.i - info.gxs)*info.dof + (stencil.j - info.gys)*info.dof*info.gxm + (stencil.k - info.gzs)*info.dof*info.gxm*info.gym;
578:             (*NLFNewton_DAADb)(A,&info,&stencil,ad_vu,ad_vustart,nI,gI,residual+nI);
579:             nI += bs;
580:           }
581:         }
582:       }
583:     }
584:     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
585:       nI = info.dof*info.xm*info.ym*info.zm - bs;
586:       for (stencil.k = info.zs+info.zm-1; stencil.k>=info.zs; stencil.k--) {
587:         for (stencil.j = info.ys+info.ym-1; stencil.j>=info.ys; stencil.j--) {
588:           for (stencil.i = info.xs+info.xm-1; stencil.i>=info.xs; stencil.i--) {
589:             gI   = (stencil.i - info.gxs)*info.dof + (stencil.j - info.gys)*info.dof*info.gxm + (stencil.k - info.gzs)*info.dof*info.gxm*info.gym;
590:             (*NLFNewton_DAADb)(A,&info,&stencil,ad_vu,ad_vustart,nI,gI,residual+nI);
591:             nI -= bs;
592:           }
593:         }
594:       }
595:     }

597:     /* copy solution back into ghosted vector from derivative object */
598:     VecGetArray(localxx,&av);
599:     for (j=0; j<gtdof; j++) {
600:       av[j] = ad_vustart[bs1*j];
601:     }
602:     VecRestoreArray(localxx,&av);
603:     /* stick relaxed solution back into global solution */
604:     DALocalToGlobal(A->da,localxx,INSERT_VALUES,xx);
605:   }

607:   VecRestoreArray(A->residual,&residual);
608:   DARestoreLocalVector(A->da,&localxx);
609:   DARestoreAdicMFArray(A->da,PETSC_TRUE,(void **)&ad_vu,(void**)&ad_vustart,&gtdof);
610:   return(0);
611: }


617: PetscErrorCode NLFDestroy_DAAD(NLF A)
618: {

622:   DADestroy(A->da);
623:   PetscFree(A);
624:   return(0);
625: }

630: PetscErrorCode PETSCMAT_DLLEXPORT NLFDAADSetDA_DAAD(NLF A,DA da)
631: {

635:   A->da = da;
636:   PetscObjectReference((PetscObject)da);
637:   return(0);
638: }

644: PetscErrorCode PETSCMAT_DLLEXPORT NLFDAADSetNewtonIterations_DAAD(NLF A,int its)
645: {
647:   A->newton_its = its;
648:   return(0);
649: }

655: PetscErrorCode PETSCMAT_DLLEXPORT NLFDAADSetResidual_DAAD(NLF A,Vec residual)
656: {
658:   A->residual = residual;
659:   return(0);
660: }


667: PetscErrorCode PETSCMAT_DLLEXPORT NLFDAADSetCtx_DAAD(NLF A,void *ctx)
668: {
670:   A->ctx = ctx;
671:   return(0);
672: }

678: PetscErrorCode PETSCMAT_DLLEXPORT NLFCreate_DAAD(NLF *A)
679: {

683:   PetscNew(struct NLF_DAAD,A);
684:   (*A)->da         = 0;
685:   (*A)->ctx        = 0;
686:   (*A)->newton_its = 2;
687:   return(0);
688: }