Actual source code: flow.c


  2: static char help[] = "FUN3D - 3-D, Unstructured Incompressible Euler Solver.\n\
  3: originally written by W. K. Anderson of NASA Langley, \n\
  4: and ported into PETSc by D. K. Kaushik, ODU and ICASE.\n\n";

  6: #include <petscsnes.h>
  7: #include <petsctime.h>
  8: #include <petscao.h>
  9: #include "user.h"
 10: #if defined(_OPENMP)
 11: #include "omp.h"
 12: #if !defined(HAVE_REDUNDANT_WORK)
 13: #include "metis.h"
 14: #endif
 15: #endif

 17: #define ICALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
 18: #define FCALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);

 20: typedef struct {
 21:   Vec    qnew,qold,func;
 22:   double fnorm_ini,dt_ini,cfl_ini;
 23:   double ptime;
 24:   double cfl_max,max_time;
 25:   double fnorm,dt,cfl;
 26:   double fnorm_ratio;
 27:   int    ires,iramp,itstep;
 28:   int    max_steps,print_freq;
 29:   int    LocalTimeStepping;
 30: } TstepCtx;

 32: typedef struct {                               /*============================*/
 33:   GRID      *grid;                                 /* Pointer to Grid info       */
 34:   TstepCtx  *tsCtx;                                /* Pointer to Time Stepping Context */
 35:   PetscBool PreLoading;
 36: } AppCtx;                                      /*============================*/

 38: extern int  FormJacobian(SNES,Vec,Mat,Mat,void*),
 39:             FormFunction(SNES,Vec,Vec,void*),
 40:             FormInitialGuess(SNES,GRID*),
 41:             Update(SNES,void*),
 42:             ComputeTimeStep(SNES,int,void*),
 43:             GetLocalOrdering(GRID*),
 44:             SetPetscDS(GRID *,TstepCtx*);
 45: static PetscErrorCode WritePVTU(AppCtx*,const char*,PetscBool);
 46: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
 47: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncolor,int *ncount);
 48: #endif
 49: /* Global Variables */

 51:                                                /*============================*/
 52: CINFO  *c_info;                                /* Pointer to COMMON INFO     */
 53: CRUNGE *c_runge;                               /* Pointer to COMMON RUNGE    */
 54: CGMCOM *c_gmcom;                               /* Pointer to COMMON GMCOM    */
 55:                                                /*============================*/
 56: int  rank,size,rstart;
 57: REAL memSize = 0.0,grad_time = 0.0;
 58: #if defined(_OPENMP)
 59: int max_threads = 2,tot_threads,my_thread_id;
 60: #endif

 62: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
 63: int       event0,event1;
 64: Scalar    time_counters;
 65: long long counter0,counter1;
 66: #endif
 67: int  ntran[max_nbtran];        /* transition stuff put here to make global */
 68: REAL dxtran[max_nbtran];

 70: /* ======================== MAIN ROUTINE =================================== */
 71: /*                                                                           */
 72: /* Finite volume flux split solver for general polygons                      */
 73: /*                                                                           */
 74: /*===========================================================================*/

 76: int main(int argc,char **args)
 77: {
 78:   AppCtx      user;
 79:   GRID        f_pntr;
 80:   TstepCtx    tsCtx;
 81:   SNES        snes;                    /* nonlinear solver context */
 82:   Mat         Jpc;                     /* Jacobian and Preconditioner matrices */
 83:   PetscScalar *qnode;
 84:   int         ierr;
 85:   PetscBool   flg,write_pvtu,pvtu_base64;
 86:   MPI_Comm    comm;
 87:   PetscInt    maxfails                       = 10000;
 88:   char        pvtu_fname[PETSC_MAX_PATH_LEN] = "incomp";

 90:   PetscInitialize(&argc,&args,NULL,help);if (ierr) return ierr;
 91:   PetscInitializeFortran();
 92:   PetscOptionsInsertFile(PETSC_COMM_WORLD,"petsc.opt",PETSC_FALSE);

 94:   comm = PETSC_COMM_WORLD;
 95:   f77FORLINK();                               /* Link FORTRAN and C COMMONS */

 97:   MPI_Comm_rank(comm,&rank);
 98:   MPI_Comm_size(comm,&size);

100:   flg  = PETSC_FALSE;
101:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
102:   if (flg) {PetscMemorySetGetMaximumUsage();}

104:   /*======================================================================*/
105:   /* Initilize stuff related to time stepping */
106:   /*======================================================================*/
107:   tsCtx.fnorm_ini         = 0.0;  tsCtx.cfl_ini     = 50.0;    tsCtx.cfl_max = 1.0e+05;
108:   tsCtx.max_steps         = 50;   tsCtx.max_time    = 1.0e+12; tsCtx.iramp   = -50;
109:   tsCtx.dt                = -5.0; tsCtx.fnorm_ratio = 1.0e+10;
110:   tsCtx.LocalTimeStepping = 1;
111:   PetscOptionsGetInt(NULL,NULL,"-max_st",&tsCtx.max_steps,NULL);
112:   PetscOptionsGetReal(NULL,"-ts_rtol",&tsCtx.fnorm_ratio,NULL);
113:   PetscOptionsGetReal(NULL,"-cfl_ini",&tsCtx.cfl_ini,NULL);
114:   PetscOptionsGetReal(NULL,"-cfl_max",&tsCtx.cfl_max,NULL);
115:   tsCtx.print_freq        = tsCtx.max_steps;
116:   PetscOptionsGetInt(NULL,NULL,"-print_freq",&tsCtx.print_freq,&flg);
117:   PetscOptionsGetString(NULL,NULL,"-pvtu",pvtu_fname,sizeof(pvtu_fname),&write_pvtu);
118:   pvtu_base64             = PETSC_FALSE;
119:   PetscOptionsGetBool(NULL,NULL,"-pvtu_base64",&pvtu_base64,NULL);

121:   c_info->alpha = 3.0;
122:   c_info->beta  = 15.0;
123:   c_info->ivisc = 0;

125:   c_gmcom->ilu0  = 1;
126:   c_gmcom->nsrch = 10;

128:   c_runge->nitfo = 0;

130:   PetscMemzero(&f_pntr,sizeof(f_pntr));
131:   f_pntr.jvisc  = c_info->ivisc;
132:   f_pntr.ileast = 4;
133:   PetscOptionsGetReal(NULL,"-alpha",&c_info->alpha,NULL);
134:   PetscOptionsGetReal(NULL,"-beta",&c_info->beta,NULL);

136:   /*======================================================================*/

138:   /*Set the maximum number of threads for OpenMP */
139: #if defined(_OPENMP)
140:   PetscOptionsGetInt(NULL,NULL,"-max_threads",&max_threads,&flg);
141:   omp_set_num_threads(max_threads);
142:   PetscPrintf(comm,"Using %d threads for each MPI process\n",max_threads);
143: #endif

145:   /* Get the grid information into local ordering */
146:   GetLocalOrdering(&f_pntr);

148:   /* Allocate Memory for Some Other Grid Arrays */
149:   set_up_grid(&f_pntr);

151:   /* If using least squares for the gradients,calculate the r's */
152:   if (f_pntr.ileast == 4) f77SUMGS(&f_pntr.nnodesLoc,&f_pntr.nedgeLoc,f_pntr.eptr,f_pntr.xyz,f_pntr.rxy,&rank,&f_pntr.nvertices);

154:   user.grid  = &f_pntr;
155:   user.tsCtx = &tsCtx;

157:   /* SAWs Stuff */

159:   /*
160:     Preload the executable to get accurate timings. This runs the following chunk of
161:     code twice, first to get the executable pages into memory and the second time for
162:     accurate timings.
163:   */
164:   PetscPreLoadBegin(PETSC_TRUE,"Time integration");
165:   user.PreLoading = PetscPreLoading;

167:   /* Create nonlinear solver */
168:   SetPetscDS(&f_pntr,&tsCtx);
169:   SNESCreate(comm,&snes);
170:   SNESSetType(snes,"newtonls");


173:   /* Set various routines and options */
174:   SNESSetFunction(snes,user.grid->res,FormFunction,&user);
175:   flg  = PETSC_FALSE;
176:   PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
177:   if (flg) {
178:     /* Use matrix-free to define Newton system; use explicit (approx) Jacobian for preconditioner */
179:     MatCreateSNESMF(snes,&Jpc);
180:     SNESSetJacobian(snes,Jpc,user.grid->A,FormJacobian,&user);
181:   } else {
182:     /* Use explicit (approx) Jacobian to define Newton system and preconditioner */
183:     SNESSetJacobian(snes,user.grid->A,user.grid->A,FormJacobian,&user);
184:   }

186:   SNESSetMaxLinearSolveFailures(snes,maxfails);
187:   SNESSetFromOptions(snes);

189:   /* Initialize the flowfield */
190:   FormInitialGuess(snes,user.grid);

192:   /* Solve nonlinear system */
193:   Update(snes,&user);

195:   /* Write restart file */
196:   VecGetArray(user.grid->qnode,&qnode);
197:   /*f77WREST(&user.grid->nnodes,qnode,user.grid->turbre,user.grid->amut);*/

199:   /* Write Tecplot solution file */
200: #if 0
201:   if (!rank)
202:     f77TECFLO(&user.grid->nnodes,
203:               &user.grid->nnbound,&user.grid->nvbound,&user.grid->nfbound,
204:               &user.grid->nnfacet,&user.grid->nvfacet,&user.grid->nffacet,
205:               &user.grid->nsnode, &user.grid->nvnode, &user.grid->nfnode,
206:               c_info->title,
207:               user.grid->x,       user.grid->y,       user.grid->z,
208:               qnode,
209:               user.grid->nnpts,   user.grid->nntet,   user.grid->nvpts,
210:               user.grid->nvtet,   user.grid->nfpts,   user.grid->nftet,
211:               user.grid->f2ntn,   user.grid->f2ntv,   user.grid->f2ntf,
212:               user.grid->isnode,  user.grid->ivnode,  user.grid->ifnode,
213:               &rank);
214: #endif
215:   if (write_pvtu) {WritePVTU(&user,pvtu_fname,pvtu_base64);}

217:   /* Write residual,lift,drag,and moment history file */
218:   /*
219:     if (!rank) f77PLLAN(&user.grid->nnodes,&rank);
220:   */

222:   VecRestoreArray(user.grid->qnode,&qnode);
223:   flg  = PETSC_FALSE;
224:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
225:   if (flg) {
226:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage before destroying\n");
227:   }

229:   VecDestroy(&user.grid->qnode);
230:   VecDestroy(&user.grid->qnodeLoc);
231:   VecDestroy(&user.tsCtx->qold);
232:   VecDestroy(&user.tsCtx->func);
233:   VecDestroy(&user.grid->res);
234:   VecDestroy(&user.grid->grad);
235:   VecDestroy(&user.grid->gradLoc);
236:   MatDestroy(&user.grid->A);
237:   flg  = PETSC_FALSE;
238:   PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
239:   if (flg) { MatDestroy(&Jpc);}
240:   SNESDestroy(&snes);
241:   VecScatterDestroy(&user.grid->scatter);
242:   VecScatterDestroy(&user.grid->gradScatter);
243:   flg  = PETSC_FALSE;
244:   PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
245:   if (flg) {
246:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after destroying\n");
247:   }
248:   PetscPreLoadEnd();

250:   /* allocated in set_up_grid() */
251:   PetscFree(user.grid->isface);
252:   PetscFree(user.grid->ivface);
253:   PetscFree(user.grid->ifface);
254:   PetscFree(user.grid->us);
255:   PetscFree(user.grid->vs);
256:   PetscFree(user.grid->as);

258:   /* Allocated in GetLocalOrdering() */
259:   PetscFree(user.grid->eptr);
260:   PetscFree(user.grid->ia);
261:   PetscFree(user.grid->ja);
262:   PetscFree(user.grid->loc2glo);
263:   PetscFree(user.grid->loc2pet);
264:   PetscFree(user.grid->xyzn);
265: #if defined(_OPENMP)
266: #  if defined(HAVE_REDUNDANT_WORK)
267:   PetscFree(user.grid->resd);
268: #  else
269:   PetscFree(user.grid->part_thr);
270:   PetscFree(user.grid->nedge_thr);
271:   PetscFree(user.grid->edge_thr);
272:   PetscFree(user.grid->xyzn_thr);
273: #  endif
274: #endif
275:   PetscFree(user.grid->xyz);
276:   PetscFree(user.grid->area);

278:   PetscFree(user.grid->nntet);
279:   PetscFree(user.grid->nnpts);
280:   PetscFree(user.grid->f2ntn);
281:   PetscFree(user.grid->isnode);
282:   PetscFree(user.grid->sxn);
283:   PetscFree(user.grid->syn);
284:   PetscFree(user.grid->szn);
285:   PetscFree(user.grid->sa);
286:   PetscFree(user.grid->sface_bit);

288:   PetscFree(user.grid->nvtet);
289:   PetscFree(user.grid->nvpts);
290:   PetscFree(user.grid->f2ntv);
291:   PetscFree(user.grid->ivnode);
292:   PetscFree(user.grid->vxn);
293:   PetscFree(user.grid->vyn);
294:   PetscFree(user.grid->vzn);
295:   PetscFree(user.grid->va);
296:   PetscFree(user.grid->vface_bit);

298:   PetscFree(user.grid->nftet);
299:   PetscFree(user.grid->nfpts);
300:   PetscFree(user.grid->f2ntf);
301:   PetscFree(user.grid->ifnode);
302:   PetscFree(user.grid->fxn);
303:   PetscFree(user.grid->fyn);
304:   PetscFree(user.grid->fzn);
305:   PetscFree(user.grid->fa);
306:   PetscFree(user.grid->cdt);
307:   PetscFree(user.grid->phi);
308:   PetscFree(user.grid->rxy);

310:   PetscPrintf(comm,"Time taken in gradient calculation %g sec.\n",grad_time);

312:   PetscFinalize();
313:   return ierr;
314: }

316: /*---------------------------------------------------------------------*/
317: /* ---------------------  Form initial approximation ----------------- */
318: int FormInitialGuess(SNES snes,GRID *grid)
319: /*---------------------------------------------------------------------*/
320: {
321:   int         ierr;
322:   PetscScalar *qnode;

325:   VecGetArray(grid->qnode,&qnode);
326:   f77INIT(&grid->nnodesLoc,qnode,grid->turbre,grid->amut,&grid->nvnodeLoc,grid->ivnode,&rank);
327:   VecRestoreArray(grid->qnode,&qnode);
328:   return(0);
329: }

331: /*---------------------------------------------------------------------*/
332: /* ---------------------  Evaluate Function F(x) --------------------- */
333: int FormFunction(SNES snes,Vec x,Vec f,void *dummy)
334: /*---------------------------------------------------------------------*/
335: {
336:   AppCtx      *user  = (AppCtx*) dummy;
337:   GRID        *grid  = user->grid;
338:   TstepCtx    *tsCtx = user->tsCtx;
339:   PetscScalar *qnode,*res,*qold;
340:   PetscScalar *grad;
341:   PetscScalar temp;
342:   VecScatter  scatter     = grid->scatter;
343:   VecScatter  gradScatter = grid->gradScatter;
344:   Vec         localX      = grid->qnodeLoc;
345:   Vec         localGrad   = grid->gradLoc;
346:   int         i,j,in,ierr;
347:   int         nbface,ires;
348:   PetscScalar time_ini,time_fin;

351:   /* Get X into the local work vector */
352:   VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
353:   VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
354:   /* VecCopy(x,localX); */
355:   /* access the local work f,grad,and input */
356:   VecGetArray(f,&res);
357:   VecGetArray(grid->grad,&grad);
358:   VecGetArray(localX,&qnode);
359:   ires = tsCtx->ires;

361:   PetscTime(&time_ini);
362:   f77LSTGS(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,qnode,grad,grid->xyz,grid->rxy,
363:            &rank,&grid->nvertices);
364:   PetscTime(&time_fin);
365:   grad_time += time_fin - time_ini;
366:   VecRestoreArray(grid->grad,&grad);

368:   VecScatterBegin(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
369:   VecScatterEnd(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
370:   /*VecCopy(grid->grad,localGrad);*/

372:   VecGetArray(localGrad,&grad);
373:   nbface = grid->nsface + grid->nvface + grid->nfface;
374:   f77GETRES(&grid->nnodesLoc,&grid->ncell,  &grid->nedgeLoc,  &grid->nsface,
375:             &grid->nvface,&grid->nfface, &nbface,
376:             &grid->nsnodeLoc,&grid->nvnodeLoc, &grid->nfnodeLoc,
377:             grid->isface, grid->ivface,  grid->ifface, &grid->ileast,
378:             grid->isnode, grid->ivnode,  grid->ifnode,
379:             &grid->nnfacetLoc,grid->f2ntn,  &grid->nnbound,
380:             &grid->nvfacetLoc,grid->f2ntv,  &grid->nvbound,
381:             &grid->nffacetLoc,grid->f2ntf,  &grid->nfbound,
382:             grid->eptr,
383:             grid->sxn,    grid->syn,     grid->szn,
384:             grid->vxn,    grid->vyn,     grid->vzn,
385:             grid->fxn,    grid->fyn,     grid->fzn,
386:             grid->xyzn,
387:             qnode,        grid->cdt,
388:             grid->xyz,    grid->area,
389:             grad, res,
390:             grid->turbre,
391:             grid->slen,   grid->c2n,
392:             grid->c2e,
393:             grid->us,     grid->vs,      grid->as,
394:             grid->phi,
395:             grid->amut,   &ires,
396: #if defined(_OPENMP)
397:             &max_threads,
398: #if defined(HAVE_EDGE_COLORING)
399:             &grid->ncolor, grid->ncount,
400: #elif defined(HAVE_REDUNDANT_WORK)
401:             grid->resd,
402: #else
403:             &grid->nedgeAllThr,
404:             grid->part_thr,grid->nedge_thr,grid->edge_thr,grid->xyzn_thr,
405: #endif
406: #endif
407:             &tsCtx->LocalTimeStepping,&rank,&grid->nvertices);

409: /* Add the contribution due to time stepping */
410:   if (ires == 1) {
411:     VecGetArray(tsCtx->qold,&qold);
412: #if defined(INTERLACING)
413:     for (i = 0; i < grid->nnodesLoc; i++) {
414:       temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
415:       for (j = 0; j < 4; j++) {
416:         in       = 4*i + j;
417:         res[in] += temp*(qnode[in] - qold[in]);
418:       }
419:     }
420: #else
421:     for (j = 0; j < 4; j++) {
422:       for (i = 0; i < grid->nnodesLoc; i++) {
423:         temp     = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
424:         in       = grid->nnodesLoc*j + i;
425:         res[in] += temp*(qnode[in] - qold[in]);
426:       }
427:     }
428: #endif
429:     VecRestoreArray(tsCtx->qold,&qold);
430:   }
431:   VecRestoreArray(localX,&qnode);
432:   VecRestoreArray(f,&res);
433:   VecRestoreArray(localGrad,&grad);
434:   return(0);
435: }

437: /*---------------------------------------------------------------------*/
438: /* --------------------  Evaluate Jacobian F'(x) -------------------- */

440: int FormJacobian(SNES snes,Vec x,Mat Jac,Mat pc_mat,void *dummy)
441: /*---------------------------------------------------------------------*/
442: {
443:   AppCtx      *user  = (AppCtx*) dummy;
444:   GRID        *grid  = user->grid;
445:   TstepCtx    *tsCtx = user->tsCtx;
446:   Vec         localX = grid->qnodeLoc;
447:   PetscScalar *qnode;
448:   int         ierr;

451:   /*  VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
452:       VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD); */
453:   /* VecCopy(x,localX); */
454:   MatSetUnfactored(pc_mat);

456:   VecGetArray(localX,&qnode);
457:   f77FILLA(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,
458:            &grid->nsface,
459:             grid->isface,grid->fxn,grid->fyn,grid->fzn,
460:             grid->sxn,grid->syn,grid->szn,
461:            &grid->nsnodeLoc,&grid->nvnodeLoc,&grid->nfnodeLoc,grid->isnode,
462:             grid->ivnode,grid->ifnode,qnode,&pc_mat,grid->cdt,
463:             grid->area,grid->xyzn,&tsCtx->cfl,
464:            &rank,&grid->nvertices);
465:   VecRestoreArray(localX,&qnode);
466:   MatAssemblyBegin(Jac,MAT_FINAL_ASSEMBLY);
467:   MatAssemblyEnd(Jac,MAT_FINAL_ASSEMBLY);
468: #if defined(MATRIX_VIEW)
469:   if ((tsCtx->itstep != 0) &&(tsCtx->itstep % tsCtx->print_freq) == 0) {
470:     PetscViewer viewer;
471:     char mat_file[PETSC_MAX_PATH_LEN];
472:     sprintf(mat_file,"mat_bin.%d",tsCtx->itstep);
473:     PetscViewerBinaryOpen(MPI_COMM_WORLD,mat_file,FILE_MODE_WRITE,&viewer);
474:     MatView(pc_mat,viewer);
475:     PetscViewerDestroy(&viewer);
476:   }
477: #endif
478:   return(0);
479: }

481: /*---------------------------------------------------------------------*/
482: int Update(SNES snes,void *ctx)
483: /*---------------------------------------------------------------------*/
484: {
485:   AppCtx      *user   = (AppCtx*) ctx;
486:   GRID        *grid   = user->grid;
487:   TstepCtx    *tsCtx  = user->tsCtx;
488:   VecScatter  scatter = grid->scatter;
489:   Vec         localX  = grid->qnodeLoc;
490:   PetscScalar *qnode,*res;
491:   PetscScalar clift,cdrag,cmom;
492:   int         ierr,its;
493:   PetscScalar fratio;
494:   PetscScalar time1,time2,cpuloc,cpuglo;
495:   int         max_steps;
496:   PetscBool   print_flag = PETSC_FALSE;
497:   FILE        *fptr      = 0;
498:   int         nfailsCum  = 0,nfails = 0;
499:   /*Scalar         cpu_ini,cpu_fin,cpu_time;*/
500:   /*int            event0 = 14,event1 = 25,gen_start,gen_read;
501:   PetscScalar    time_start_counters,time_read_counters;
502:   long long      counter0,counter1;*/

505:   PetscOptionsGetBool(NULL,NULL,"-print",&print_flag,NULL);
506:   if (print_flag) {
507:     PetscFOpen(PETSC_COMM_WORLD,"history.out","w",&fptr);
508:     PetscFPrintf(PETSC_COMM_WORLD,fptr,"VARIABLES = iter,cfl,fnorm,clift,cdrag,cmom,cpu\n");
509:   }
510:   if (user->PreLoading) max_steps = 1;
511:   else max_steps = tsCtx->max_steps;
512:   fratio = 1.0;
513:   /*tsCtx->ptime = 0.0;*/
514:   VecCopy(grid->qnode,tsCtx->qold);
515:   PetscTime(&time1);
516: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
517:   /* if (!user->PreLoading) {
518:     PetscBool  flg = PETSC_FALSE;
519:     PetscOptionsGetInt(NULL,NULL,"-e0",&event0,&flg);
520:     PetscOptionsGetInt(NULL,NULL,"-e1",&event1,&flg);
521:     PetscTime(&time_start_counters);
522:     if ((gen_start = start_counters(event0,event1)) < 0)
523:     SETERRQ(PETSC_COMM_SELF,1,>"Error in start_counters\n");
524:   }*/
525: #endif
526:   /*cpu_ini = PetscGetCPUTime();*/
527:   for (tsCtx->itstep = 0; (tsCtx->itstep < max_steps) &&
528:         (fratio <= tsCtx->fnorm_ratio); tsCtx->itstep++) {
529:     ComputeTimeStep(snes,tsCtx->itstep,user);
530:     /*tsCtx->ptime +=  tsCtx->dt;*/

532:     SNESSolve(snes,NULL,grid->qnode);
533:     SNESGetIterationNumber(snes,&its);

535:     SNESGetNonlinearStepFailures(snes,&nfails);
536:     nfailsCum += nfails; nfails = 0;
537:     if (nfailsCum >= 2) SETERRQ(PETSC_COMM_SELF,1,"Unable to find a Newton Step");
538:     if (print_flag) {
539:       PetscPrintf(PETSC_COMM_WORLD,"At Time Step %d cfl = %g and fnorm = %g\n",
540:                          tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm);
541:     }
542:     VecCopy(grid->qnode,tsCtx->qold);

544:     c_info->ntt = tsCtx->itstep+1;
545:     PetscTime(&time2);
546:     cpuloc      = time2-time1;
547:     cpuglo      = 0.0;
548:     MPI_Allreduce(&cpuloc,&cpuglo,1,MPIU_REAL,MPIU_MAX,PETSC_COMM_WORLD);
549:     c_info->tot = cpuglo;    /* Total CPU time used upto this time step */

551:     VecScatterBegin(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
552:     VecScatterEnd(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
553:     /* VecCopy(grid->qnode,localX); */

555:     VecGetArray(grid->res,&res);
556:     VecGetArray(localX,&qnode);

558:     f77FORCE(&grid->nnodesLoc,&grid->nedgeLoc,
559:               grid->isnode, grid->ivnode,
560:              &grid->nnfacetLoc,grid->f2ntn,&grid->nnbound,
561:              &grid->nvfacetLoc,grid->f2ntv,&grid->nvbound,
562:               grid->eptr,   qnode,
563:               grid->xyz,
564:               grid->sface_bit,grid->vface_bit,
565:               &clift,&cdrag,&cmom,&rank,&grid->nvertices);
566:     if (print_flag) {
567:       PetscPrintf(PETSC_COMM_WORLD,"%d\t%g\t%g\t%g\t%g\t%g\n",tsCtx->itstep,
568:                         tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom);
569:       PetscPrintf(PETSC_COMM_WORLD,"Wall clock time needed %g seconds for %d time steps\n",
570:                         cpuglo,tsCtx->itstep);
571:       PetscFPrintf(PETSC_COMM_WORLD,fptr,"%d\t%g\t%g\t%g\t%g\t%g\t%g\n",
572:                           tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom,cpuglo);
573:     }
574:     VecRestoreArray(localX,&qnode);
575:     VecRestoreArray(grid->res,&res);
576:     fratio = tsCtx->fnorm_ini/tsCtx->fnorm;
577:     MPI_Barrier(PETSC_COMM_WORLD);

579:   } /* End of time step loop */

581: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
582:   if (!user->PreLoading) {
583:     int  eve0,eve1;
584:     FILE *cfp0,*cfp1;
585:     char str[256];
586:     /* if ((gen_read = read_counters(event0,&counter0,event1,&counter1)) < 0)
587:     SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter\n");
588:     PetscTime(&time_read_counters);
589:     if (gen_read != gen_start) {
590:     SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...\n");
591:     }*/
592:     /*sprintf(str,"counters%d_and_%d",event0,event1);
593:     cfp0 = fopen(str,"a");*/
594:     /*print_counters(event0,counter0,event1,counter1);*/
595:     /*fprintf(cfp0,"%lld %lld %g\n",counter0,counter1,
596:                   time_counters);
597:     fclose(cfp0);*/
598:   }
599: #endif
600:   PetscPrintf(PETSC_COMM_WORLD,"Total wall clock time needed %g seconds for %d time steps\n",
601:                      cpuglo,tsCtx->itstep);
602:   PetscPrintf(PETSC_COMM_WORLD,"cfl = %g fnorm = %g\n",tsCtx->cfl,tsCtx->fnorm);
603:   PetscPrintf(PETSC_COMM_WORLD,"clift = %g cdrag = %g cmom = %g\n",clift,cdrag,cmom);

605:   if (!rank && print_flag) fclose(fptr);
606:   if (user->PreLoading) {
607:     tsCtx->fnorm_ini = 0.0;
608:     PetscPrintf(PETSC_COMM_WORLD,"Preloading done ...\n");
609:   }
610:   return(0);
611: }

613: /*---------------------------------------------------------------------*/
614: int ComputeTimeStep(SNES snes,int iter,void *ctx)
615: /*---------------------------------------------------------------------*/
616: {
617:   AppCtx      *user  = (AppCtx*) ctx;
618:   TstepCtx    *tsCtx = user->tsCtx;
619:   Vec         func   = tsCtx->func;
620:   PetscScalar inc    = 1.1;
621:   PetscScalar newcfl;
622:   int         ierr;
623:   /*int       iramp = tsCtx->iramp;*/

626:   tsCtx->ires = 0;
627:   FormFunction(snes,tsCtx->qold,func,user);
628:   tsCtx->ires = 1;
629:   VecNorm(func,NORM_2,&tsCtx->fnorm);
630:   /* first time through so compute initial function norm */
631:   if (tsCtx->fnorm_ini == 0.0) {
632:     tsCtx->fnorm_ini = tsCtx->fnorm;
633:     tsCtx->cfl       = tsCtx->cfl_ini;
634:   } else {
635:     newcfl     = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
636:     tsCtx->cfl = PetscMin(newcfl,tsCtx->cfl_max);
637:   }

639:   /* if (iramp < 0) {
640:    newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
641:   } else {
642:    if (tsCtx->dt < 0 && iramp > 0)
643:     if (iter > iramp) newcfl = tsCtx->cfl_max;
644:     else newcfl = tsCtx->cfl_ini + (tsCtx->cfl_max - tsCtx->cfl_ini)*
645:                                 (double) iter/(double) iramp;
646:   }
647:   tsCtx->cfl = MIN(newcfl,tsCtx->cfl_max);*/
648:   /*printf("In ComputeTime Step - fnorm is %f\n",tsCtx->fnorm);*/
649:   /*VecDestroy(&func);*/
650:   return(0);
651: }

653: /*---------------------------------------------------------------------*/
654: int GetLocalOrdering(GRID *grid)
655: /*---------------------------------------------------------------------*/
656: {
657:   int         ierr,i,j,k,inode,isurf,nte,nb,node1,node2,node3;
658:   int         nnodes,nedge,nnz,jstart,jend;
659:   int         nnodesLoc,nvertices,nedgeLoc,nnodesLocEst;
660:   int         nedgeLocEst,remEdges,readEdges,remNodes,readNodes;
661:   int         nnfacet,nvfacet,nffacet;
662:   int         nnfacetLoc,nvfacetLoc,nffacetLoc;
663:   int         nsnode,nvnode,nfnode;
664:   int         nsnodeLoc,nvnodeLoc,nfnodeLoc;
665:   int         nnbound,nvbound,nfbound;
666:   int         bs = 4;
667:   int         fdes = 0;
668:   off_t       currentPos  = 0,newPos = 0;
669:   int         grid_param  = 13;
670:   int         cross_edges = 0;
671:   int         *edge_bit,*pordering;
672:   int         *l2p,*l2a,*p2l,*a2l,*v2p,*eperm;
673:   int         *tmp,*tmp1,*tmp2;
674:   PetscScalar time_ini,time_fin;
675:   PetscScalar *ftmp,*ftmp1;
676:   char        mesh_file[PETSC_MAX_PATH_LEN] = "";
677:   AO          ao;
678:   FILE        *fptr,*fptr1;
679:   PetscBool   flg;
680:   MPI_Comm    comm = PETSC_COMM_WORLD;

683:   /* Read the integer grid parameters */
684:   ICALLOC(grid_param,&tmp);
685:   if (!rank) {
686:     PetscBool exists;
687:     PetscOptionsGetString(NULL,NULL,"-mesh",mesh_file,sizeof(mesh_file),&flg);
688:     PetscTestFile(mesh_file,'r',&exists);
689:     if (!exists) { /* try uns3d.msh as the file name */
690:       PetscStrcpy(mesh_file,"uns3d.msh");
691:     }
692:     PetscBinaryOpen(mesh_file,FILE_MODE_READ,&fdes);
693:   }
694:   PetscBinarySynchronizedRead(comm,fdes,tmp,grid_param,PETSC_INT);
695:   grid->ncell   = tmp[0];
696:   grid->nnodes  = tmp[1];
697:   grid->nedge   = tmp[2];
698:   grid->nnbound = tmp[3];
699:   grid->nvbound = tmp[4];
700:   grid->nfbound = tmp[5];
701:   grid->nnfacet = tmp[6];
702:   grid->nvfacet = tmp[7];
703:   grid->nffacet = tmp[8];
704:   grid->nsnode  = tmp[9];
705:   grid->nvnode  = tmp[10];
706:   grid->nfnode  = tmp[11];
707:   grid->ntte    = tmp[12];
708:   grid->nsface  = 0;
709:   grid->nvface  = 0;
710:   grid->nfface  = 0;
711:   PetscFree(tmp);
712:   PetscPrintf(comm,"nnodes = %d,nedge = %d,nnfacet = %d,nsnode = %d,nfnode = %d\n",
713:                               grid->nnodes,grid->nedge,grid->nnfacet,grid->nsnode,grid->nfnode);

715:   nnodes  = grid->nnodes;
716:   nedge   = grid->nedge;
717:   nnfacet = grid->nnfacet;
718:   nvfacet = grid->nvfacet;
719:   nffacet = grid->nffacet;
720:   nnbound = grid->nnbound;
721:   nvbound = grid->nvbound;
722:   nfbound = grid->nfbound;
723:   nsnode  = grid->nsnode;
724:   nvnode  = grid->nvnode;
725:   nfnode  = grid->nfnode;

727:   /* Read the partitioning vector generated by MeTiS */
728:   ICALLOC(nnodes,&l2a);
729:   ICALLOC(nnodes,&v2p);
730:   ICALLOC(nnodes,&a2l);
731:   nnodesLoc = 0;

733:   for (i = 0; i < nnodes; i++) a2l[i] = -1;
734:   PetscTime(&time_ini);

736:   if (!rank) {
737:     if (size == 1) {
738:       PetscMemzero(v2p,nnodes*sizeof(int));
739:     } else {
740:       char      spart_file[PETSC_MAX_PATH_LEN],part_file[PETSC_MAX_PATH_LEN];
741:       PetscBool exists;

743:       PetscOptionsGetString(NULL,NULL,"-partition",spart_file,sizeof(spart_file),&flg);
744:       PetscTestFile(spart_file,'r',&exists);
745:       if (!exists) { /* try appending the number of processors */
746:         sprintf(part_file,"part_vec.part.%d",size);
747:         PetscStrcpy(spart_file,part_file);
748:       }
749:       fptr = fopen(spart_file,"r");
750:       if (!fptr) SETERRQ1(PETSC_COMM_SELF,1,"Cannot open file %s\n",part_file);
751:       for (inode = 0; inode < nnodes; inode++) {
752:         fscanf(fptr,"%d\n",&node1);
753:         v2p[inode] = node1;
754:       }
755:       fclose(fptr);
756:     }
757:   }
758:   MPI_Bcast(v2p,nnodes,MPI_INT,0,comm);
759:   for (inode = 0; inode < nnodes; inode++) {
760:     if (v2p[inode] == rank) {
761:       l2a[nnodesLoc] = inode;
762:       a2l[inode]     = nnodesLoc;
763:       nnodesLoc++;
764:     }
765:   }

767:   PetscTime(&time_fin);
768:   time_fin -= time_ini;
769:   PetscPrintf(comm,"Partition Vector read successfully\n");
770:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

772:   MPI_Scan(&nnodesLoc,&rstart,1,MPI_INT,MPI_SUM,comm);
773:   rstart -= nnodesLoc;
774:   ICALLOC(nnodesLoc,&pordering);
775:   for (i=0; i < nnodesLoc; i++) pordering[i] = rstart + i;
776:   AOCreateBasic(comm,nnodesLoc,l2a,pordering,&ao);
777:   PetscFree(pordering);

779:   /* Now count the local number of edges - including edges with
780:    ghost nodes but edges between ghost nodes are NOT counted */
781:   nedgeLoc  = 0;
782:   nvertices = nnodesLoc;
783:   /* Choose an estimated number of local edges. The choice
784:    nedgeLocEst = 1000000 looks reasonable as it will read
785:    the edge and edge normal arrays in 8 MB chunks */
786:   /*nedgeLocEst = nedge/size;*/
787:   nedgeLocEst = PetscMin(nedge,1000000);
788:   remEdges    = nedge;
789:   ICALLOC(2*nedgeLocEst,&tmp);
790:   PetscBinarySynchronizedSeek(comm,fdes,0,PETSC_BINARY_SEEK_CUR,&currentPos);
791:   PetscTime(&time_ini);
792:   while (remEdges > 0) {
793:     readEdges = PetscMin(remEdges,nedgeLocEst);
794:     /*time_ini = PetscTime();*/
795:     PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
796:     PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
797:     PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
798:     PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
799:     /*time_fin += PetscTime()-time_ini;*/
800:     for (j = 0; j < readEdges; j++) {
801:       node1 = tmp[j]-1;
802:       node2 = tmp[j+readEdges]-1;
803:       if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
804:         nedgeLoc++;
805:         if (a2l[node1] == -1) {
806:           l2a[nvertices] = node1;
807:           a2l[node1]     = nvertices;
808:           nvertices++;
809:         }
810:         if (a2l[node2] == -1) {
811:           l2a[nvertices] = node2;
812:           a2l[node2]     = nvertices;
813:           nvertices++;
814:         }
815:       }
816:     }
817:     remEdges = remEdges - readEdges;
818:     MPI_Barrier(comm);
819:   }
820:   PetscTime(&time_fin);
821:   time_fin -= time_ini;
822:   PetscPrintf(comm,"Local edges counted with MPI_Bcast %d\n",nedgeLoc);
823:   PetscPrintf(comm,"Local vertices counted %d\n",nvertices);
824:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

826:   /* Now store the local edges */
827:   ICALLOC(2*nedgeLoc,&grid->eptr);
828:   ICALLOC(nedgeLoc,&edge_bit);
829:   ICALLOC(nedgeLoc,&eperm);
830:   i = 0; j = 0; k = 0;
831:   remEdges   = nedge;
832:   PetscBinarySynchronizedSeek(comm,fdes,currentPos,PETSC_BINARY_SEEK_SET,&newPos);
833:   currentPos = newPos;

835:   PetscTime(&time_ini);
836:   while (remEdges > 0) {
837:     readEdges = PetscMin(remEdges,nedgeLocEst);
838:     PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
839:     PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
840:     PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
841:     PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
842:     for (j = 0; j < readEdges; j++) {
843:       node1 = tmp[j]-1;
844:       node2 = tmp[j+readEdges]-1;
845:       if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
846:         grid->eptr[k]          = a2l[node1];
847:         grid->eptr[k+nedgeLoc] = a2l[node2];
848:         edge_bit[k]            = i; /* Record global file index of the edge */
849:         eperm[k]               = k;
850:         k++;
851:       }
852:       i++;
853:     }
854:     remEdges = remEdges - readEdges;
855:     MPI_Barrier(comm);
856:   }
857:   PetscBinarySynchronizedSeek(comm,fdes,currentPos+2*nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_SET,&newPos);
858:   PetscTime(&time_fin);
859:   time_fin -= time_ini;
860:   PetscPrintf(comm,"Local edges stored\n");
861:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

863:   PetscFree(tmp);
864:   ICALLOC(2*nedgeLoc,&tmp);
865:   PetscMemcpy(tmp,grid->eptr,2*nedgeLoc*sizeof(int));
866: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
867:   EdgeColoring(nvertices,nedgeLoc,grid->eptr,eperm,&grid->ncolor,grid->ncount);
868: #else
869:   /* Now reorder the edges for better cache locality */
870:   /*
871:   tmp[0]=7;tmp[1]=6;tmp[2]=3;tmp[3]=9;tmp[4]=2;tmp[5]=0;
872:   PetscSortIntWithPermutation(6,tmp,eperm);
873:   for (i=0; i<6; i++)
874:    printf("%d %d %d\n",i,tmp[i],eperm[i]);
875:   */
876:   flg  = PETSC_FALSE;
877:   PetscOptionsGetBool(0,"-no_edge_reordering",&flg,NULL);
878:   if (!flg) {
879:     PetscSortIntWithPermutation(nedgeLoc,tmp,eperm);
880:   }
881: #endif
882:   PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
883:   k    = 0;
884:   for (i = 0; i < nedgeLoc; i++) {
885:     int cross_node=nnodesLoc/2;
886:     node1 = tmp[eperm[i]] + 1;
887:     node2 = tmp[nedgeLoc+eperm[i]] + 1;
888: #if defined(INTERLACING)
889:     grid->eptr[k++] = node1;
890:     grid->eptr[k++] = node2;
891: #else
892:     grid->eptr[i]          = node1;
893:     grid->eptr[nedgeLoc+i] = node2;
894: #endif
895:     /* if (node1 > node2)
896:      printf("On processor %d, for edge %d node1 = %d, node2 = %d\n",
897:             rank,i,node1,node2);*/
898:     if ((node1 <= cross_node) && (node2 > cross_node)) cross_edges++;
899:   }
900:   PetscPrintf(comm,"Number of cross edges %d\n", cross_edges);
901:   PetscFree(tmp);
902: #if defined(_OPENMP) && !defined(HAVE_REDUNDANT_WORK) && !defined(HAVE_EDGE_COLORING)
903:   /* Now make the local 'ia' and 'ja' arrays */
904:   ICALLOC(nvertices+1,&grid->ia);
905:   /* Use tmp for a work array */
906:   ICALLOC(nvertices,&tmp);
907:   f77GETIA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
908:   nnz = grid->ia[nvertices] - 1;
909:   ICALLOC(nnz,&grid->ja);
910:   f77GETJA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
911:   PetscFree(tmp);
912: #else
913:   /* Now make the local 'ia' and 'ja' arrays */
914:   ICALLOC(nnodesLoc+1,&grid->ia);
915:   /* Use tmp for a work array */
916:   ICALLOC(nnodesLoc,&tmp);
917:   f77GETIA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
918:   nnz = grid->ia[nnodesLoc] - 1;
919: #if defined(BLOCKING)
920:   PetscPrintf(comm,"The Jacobian has %d non-zero blocks with block size = %d\n",nnz,bs);
921: #else
922:   PetscPrintf(comm,"The Jacobian has %d non-zeros\n",nnz);
923: #endif
924:   ICALLOC(nnz,&grid->ja);
925:   f77GETJA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
926:   PetscFree(tmp);
927: #endif
928:   ICALLOC(nvertices,&grid->loc2glo);
929:   PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
930:   PetscFree(l2a);
931:   l2a  = grid->loc2glo;
932:   ICALLOC(nvertices,&grid->loc2pet);
933:   l2p  = grid->loc2pet;
934:   PetscMemcpy(l2p,l2a,nvertices*sizeof(int));
935:   AOApplicationToPetsc(ao,nvertices,l2p);


938:   /* Renumber unit normals of dual face (from node1 to node2)
939:       and the area of the dual mesh face */
940:   FCALLOC(nedgeLocEst,&ftmp);
941:   FCALLOC(nedgeLoc,&ftmp1);
942:   FCALLOC(4*nedgeLoc,&grid->xyzn);
943:   /* Do the x-component */
944:   i = 0; k = 0;
945:   remEdges = nedge;
946:   PetscTime(&time_ini);
947:   while (remEdges > 0) {
948:     readEdges = PetscMin(remEdges,nedgeLocEst);
949:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
950:     for (j = 0; j < readEdges; j++)
951:       if (edge_bit[k] == (i+j)) {
952:         ftmp1[k] = ftmp[j];
953:         k++;
954:       }
955:     i       += readEdges;
956:     remEdges = remEdges - readEdges;
957:     MPI_Barrier(comm);
958:   }
959:   for (i = 0; i < nedgeLoc; i++)
960: #if defined(INTERLACING)
961:     grid->xyzn[4*i] = ftmp1[eperm[i]];
962: #else
963:     grid->xyzn[i] = ftmp1[eperm[i]];
964: #endif
965:   /* Do the y-component */
966:   i = 0; k = 0;
967:   remEdges = nedge;
968:   while (remEdges > 0) {
969:     readEdges = PetscMin(remEdges,nedgeLocEst);
970:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
971:     for (j = 0; j < readEdges; j++)
972:       if (edge_bit[k] == (i+j)) {
973:         ftmp1[k] = ftmp[j];
974:         k++;
975:       }
976:     i       += readEdges;
977:     remEdges = remEdges - readEdges;
978:     MPI_Barrier(comm);
979:   }
980:   for (i = 0; i < nedgeLoc; i++)
981: #if defined(INTERLACING)
982:     grid->xyzn[4*i+1] = ftmp1[eperm[i]];
983: #else
984:     grid->xyzn[nedgeLoc+i] = ftmp1[eperm[i]];
985: #endif
986:   /* Do the z-component */
987:   i = 0; k = 0;
988:   remEdges = nedge;
989:   while (remEdges > 0) {
990:     readEdges = PetscMin(remEdges,nedgeLocEst);
991:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
992:     for (j = 0; j < readEdges; j++)
993:       if (edge_bit[k] == (i+j)) {
994:         ftmp1[k] = ftmp[j];
995:         k++;
996:       }
997:     i       += readEdges;
998:     remEdges = remEdges - readEdges;
999:     MPI_Barrier(comm);
1000:   }
1001:   for (i = 0; i < nedgeLoc; i++)
1002: #if defined(INTERLACING)
1003:     grid->xyzn[4*i+2] = ftmp1[eperm[i]];
1004: #else
1005:     grid->xyzn[2*nedgeLoc+i] = ftmp1[eperm[i]];
1006: #endif
1007:   /* Do the area */
1008:   i = 0; k = 0;
1009:   remEdges = nedge;
1010:   while (remEdges > 0) {
1011:     readEdges = PetscMin(remEdges,nedgeLocEst);
1012:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
1013:     for (j = 0; j < readEdges; j++)
1014:       if (edge_bit[k] == (i+j)) {
1015:         ftmp1[k] = ftmp[j];
1016:         k++;
1017:       }
1018:     i       += readEdges;
1019:     remEdges = remEdges - readEdges;
1020:     MPI_Barrier(comm);
1021:   }
1022:   for (i = 0; i < nedgeLoc; i++)
1023: #if defined(INTERLACING)
1024:     grid->xyzn[4*i+3] = ftmp1[eperm[i]];
1025: #else
1026:     grid->xyzn[3*nedgeLoc+i] = ftmp1[eperm[i]];
1027: #endif

1029:   PetscFree(edge_bit);
1030:   PetscFree(eperm);
1031:   PetscFree(ftmp);
1032:   PetscFree(ftmp1);
1033:   PetscTime(&time_fin);
1034:   time_fin -= time_ini;
1035:   PetscPrintf(comm,"Edge normals partitioned\n");
1036:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1037: #if defined(_OPENMP)
1038:   /*Arrange for the division of work among threads*/
1039: #if defined(HAVE_EDGE_COLORING)
1040: #elif defined(HAVE_REDUNDANT_WORK)
1041:   FCALLOC(4*nnodesLoc,   &grid->resd);
1042: #else
1043:   {
1044:     /* Get the local adjacency structure of the graph for partitioning the local
1045:       graph into max_threads pieces */
1046:     int *ia,*ja,*vwtg=0,*adjwgt=0,options[5];
1047:     int numflag = 0, wgtflag = 0, edgecut;
1048:     int thr1,thr2,nedgeAllThreads,ned1,ned2;
1049:     ICALLOC((nvertices+1),&ia);
1050:     ICALLOC((2*nedgeLoc),&ja);
1051:     ia[0] = 0;
1052:     for (i = 1; i <= nvertices; i++) ia[i] = grid->ia[i]-i-1;
1053:     for (i = 0; i < nvertices; i++) {
1054:       int jstart,jend;
1055:       jstart = grid->ia[i]-1;
1056:       jend   = grid->ia[i+1]-1;
1057:       k      = ia[i];
1058:       for (j=jstart; j < jend; j++) {
1059:         inode = grid->ja[j]-1;
1060:         if (inode != i) ja[k++] = inode;
1061:       }
1062:     }
1063:     ICALLOC(nvertices,&grid->part_thr);
1064:     PetscMemzero(grid->part_thr,nvertices*sizeof(int));
1065:     options[0] = 0;
1066:     /* Call the pmetis library routine */
1067:     if (max_threads > 1)
1068:       METIS_PartGraphRecursive(&nvertices,ia,ja,vwtg,adjwgt,
1069:                                &wgtflag,&numflag,&max_threads,options,&edgecut,grid->part_thr);
1070:     PetscPrintf(MPI_COMM_WORLD,"The number of cut edges is %d\n", edgecut);
1071:     /* Write the partition vector to disk */
1072:     flg  = PETSC_FALSE;
1073:     PetscOptionsGetBool(0,"-omp_partitioning",&flg,NULL);
1074:     if (flg) {
1075:       int  *partv_loc, *partv_glo;
1076:       int  *disp,*counts,*loc2glo_glo;
1077:       char part_file[PETSC_MAX_PATH_LEN];
1078:       FILE *fp;

1080:       ICALLOC(nnodes, &partv_glo);
1081:       ICALLOC(nnodesLoc, &partv_loc);
1082:       for (i = 0; i < nnodesLoc; i++)
1083:         /*partv_loc[i] = grid->part_thr[i]*size + rank;*/
1084:         partv_loc[i] = grid->part_thr[i] + max_threads*rank;
1085:       ICALLOC(size,&disp);
1086:       ICALLOC(size,&counts);
1087:       MPI_Allgather(&nnodesLoc,1,MPI_INT,counts,1,MPI_INT,MPI_COMM_WORLD);
1088:       disp[0] = 0;
1089:       for (i = 1; i < size; i++) disp[i] = counts[i-1] + disp[i-1];
1090:       ICALLOC(nnodes, &loc2glo_glo);
1091:       MPI_Gatherv(grid->loc2glo,nnodesLoc,MPI_INT,loc2glo_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1092:       MPI_Gatherv(partv_loc,nnodesLoc,MPI_INT,partv_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1093:       if (!rank) {
1094:         PetscSortIntWithArray(nnodes,loc2glo_glo,partv_glo);
1095:         sprintf(part_file,"hyb_part_vec.%d",2*size);
1096:         fp = fopen(part_file,"w");
1097:         for (i = 0; i < nnodes; i++) fprintf(fp,"%d\n",partv_glo[i]);
1098:         fclose(fp);
1099:       }
1100:       PetscFree(partv_loc);
1101:       PetscFree(partv_glo);
1102:       PetscFree(disp);
1103:       PetscFree(counts);
1104:       PetscFree(loc2glo_glo);
1105:     }

1107:     /* Divide the work among threads */
1108:     k = 0;
1109:     ICALLOC((max_threads+1),&grid->nedge_thr);
1110:     PetscMemzero(grid->nedge_thr,(max_threads+1)*sizeof(int));
1111:     cross_edges = 0;
1112:     for (i = 0; i < nedgeLoc; i++) {
1113:       node1 = grid->eptr[k++]-1;
1114:       node2 = grid->eptr[k++]-1;
1115:       thr1  = grid->part_thr[node1];
1116:       thr2  = grid->part_thr[node2];
1117:       grid->nedge_thr[thr1]+=1;
1118:       if (thr1 != thr2) {
1119:         grid->nedge_thr[thr2]+=1;
1120:         cross_edges++;
1121:       }
1122:     }
1123:     PetscPrintf(MPI_COMM_WORLD,"The number of cross edges after Metis partitioning is %d\n",cross_edges);
1124:     ned1 = grid->nedge_thr[0];
1125:     grid->nedge_thr[0] = 1;
1126:     for (i = 1; i <= max_threads; i++) {
1127:       ned2 = grid->nedge_thr[i];
1128:       grid->nedge_thr[i] = grid->nedge_thr[i-1]+ned1;
1129:       ned1 = ned2;
1130:     }
1131:     /* Allocate a shared edge array. Note that a cut edge is evaluated
1132:         by both the threads but updates are done only for the locally
1133:         owned node */
1134:     grid->nedgeAllThr = nedgeAllThreads = grid->nedge_thr[max_threads]-1;
1135:     ICALLOC(2*nedgeAllThreads, &grid->edge_thr);
1136:     ICALLOC(max_threads,&tmp);
1137:     FCALLOC(4*nedgeAllThreads,&grid->xyzn_thr);
1138:     for (i = 0; i < max_threads; i++) tmp[i] = grid->nedge_thr[i]-1;
1139:     k = 0;
1140:     for (i = 0; i < nedgeLoc; i++) {
1141:       int ie1,ie2,ie3;
1142:       node1 = grid->eptr[k++];
1143:       node2 = grid->eptr[k++];
1144:       thr1  = grid->part_thr[node1-1];
1145:       thr2  = grid->part_thr[node2-1];
1146:       ie1   = 2*tmp[thr1];
1147:       ie2   = 4*tmp[thr1];
1148:       ie3   = 4*i;

1150:       grid->edge_thr[ie1]   = node1;
1151:       grid->edge_thr[ie1+1] = node2;
1152:       grid->xyzn_thr[ie2]   = grid->xyzn[ie3];
1153:       grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1154:       grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1155:       grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];

1157:       tmp[thr1]+=1;
1158:       if (thr1 != thr2) {
1159:         ie1 = 2*tmp[thr2];
1160:         ie2 = 4*tmp[thr2];

1162:         grid->edge_thr[ie1]   = node1;
1163:         grid->edge_thr[ie1+1] = node2;
1164:         grid->xyzn_thr[ie2]   = grid->xyzn[ie3];
1165:         grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1166:         grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1167:         grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];

1169:         tmp[thr2]+=1;
1170:       }
1171:     }
1172:   }
1173: #endif
1174: #endif

1176:   /* Remap coordinates */
1177:   /*nnodesLocEst = nnodes/size;*/
1178:   nnodesLocEst = PetscMin(nnodes,500000);
1179:   FCALLOC(nnodesLocEst,&ftmp);
1180:   FCALLOC(3*nvertices,&grid->xyz);
1181:   remNodes = nnodes;
1182:   i        = 0;
1183:   PetscTime(&time_ini);
1184:   while (remNodes > 0) {
1185:     readNodes = PetscMin(remNodes,nnodesLocEst);
1186:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1187:     for (j = 0; j < readNodes; j++) {
1188:       if (a2l[i+j] >= 0) {
1189: #if defined(INTERLACING)
1190:         grid->xyz[3*a2l[i+j]] = ftmp[j];
1191: #else
1192:         grid->xyz[a2l[i+j]] = ftmp[j];
1193: #endif
1194:       }
1195:     }
1196:     i        += nnodesLocEst;
1197:     remNodes -= nnodesLocEst;
1198:     MPI_Barrier(comm);
1199:   }

1201:   remNodes = nnodes;
1202:   i = 0;
1203:   while (remNodes > 0) {
1204:     readNodes = PetscMin(remNodes,nnodesLocEst);
1205:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1206:     for (j = 0; j < readNodes; j++) {
1207:       if (a2l[i+j] >= 0) {
1208: #if defined(INTERLACING)
1209:         grid->xyz[3*a2l[i+j]+1] = ftmp[j];
1210: #else
1211:         grid->xyz[nnodesLoc+a2l[i+j]] = ftmp[j];
1212: #endif
1213:       }
1214:     }
1215:     i        += nnodesLocEst;
1216:     remNodes -= nnodesLocEst;
1217:     MPI_Barrier(comm);
1218:   }

1220:   remNodes = nnodes;
1221:   i        = 0;
1222:   while (remNodes > 0) {
1223:     readNodes = PetscMin(remNodes,nnodesLocEst);
1224:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1225:     for (j = 0; j < readNodes; j++) {
1226:       if (a2l[i+j] >= 0) {
1227: #if defined(INTERLACING)
1228:         grid->xyz[3*a2l[i+j]+2] = ftmp[j];
1229: #else
1230:         grid->xyz[2*nnodesLoc+a2l[i+j]] = ftmp[j];
1231: #endif
1232:       }
1233:     }
1234:     i        += nnodesLocEst;
1235:     remNodes -= nnodesLocEst;
1236:     MPI_Barrier(comm);
1237:   }


1240:   /* Renumber dual volume "area" */
1241:   FCALLOC(nvertices,&grid->area);
1242:   remNodes = nnodes;
1243:   i        = 0;
1244:   while (remNodes > 0) {
1245:     readNodes = PetscMin(remNodes,nnodesLocEst);
1246:     PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1247:     for (j = 0; j < readNodes; j++)
1248:       if (a2l[i+j] >= 0)
1249:         grid->area[a2l[i+j]] = ftmp[j];
1250:     i        += nnodesLocEst;
1251:     remNodes -= nnodesLocEst;
1252:     MPI_Barrier(comm);
1253:   }

1255:   PetscFree(ftmp);
1256:   PetscTime(&time_fin);
1257:   time_fin -= time_ini;
1258:   PetscPrintf(comm,"Coordinates remapped\n");
1259:   PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);

1261: /* Now,handle all the solid boundaries - things to be done :
1262:  * 1. Identify the nodes belonging to the solid
1263:  *    boundaries and count them.
1264:  * 2. Put proper indices into f2ntn array,after making it
1265:  *    of suitable size.
1266:  * 3. Remap the normals and areas of solid faces (sxn,syn,szn,
1267:  *    and sa arrays).
1268:  */
1269:   ICALLOC(nnbound,  &grid->nntet);
1270:   ICALLOC(nnbound,  &grid->nnpts);
1271:   ICALLOC(4*nnfacet,&grid->f2ntn);
1272:   ICALLOC(nsnode,&grid->isnode);
1273:   FCALLOC(nsnode,&grid->sxn);
1274:   FCALLOC(nsnode,&grid->syn);
1275:   FCALLOC(nsnode,&grid->szn);
1276:   FCALLOC(nsnode,&grid->sa);
1277:   PetscBinarySynchronizedRead(comm,fdes,grid->nntet,nnbound,PETSC_INT);
1278:   PetscBinarySynchronizedRead(comm,fdes,grid->nnpts,nnbound,PETSC_INT);
1279:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntn,4*nnfacet,PETSC_INT);
1280:   PetscBinarySynchronizedRead(comm,fdes,grid->isnode,nsnode,PETSC_INT);
1281:   PetscBinarySynchronizedRead(comm,fdes,grid->sxn,nsnode,PETSC_SCALAR);
1282:   PetscBinarySynchronizedRead(comm,fdes,grid->syn,nsnode,PETSC_SCALAR);
1283:   PetscBinarySynchronizedRead(comm,fdes,grid->szn,nsnode,PETSC_SCALAR);

1285:   isurf      = 0;
1286:   nsnodeLoc  = 0;
1287:   nnfacetLoc = 0;
1288:   nb         = 0;
1289:   nte        = 0;
1290:   ICALLOC(3*nnfacet,&tmp);
1291:   ICALLOC(nsnode,&tmp1);
1292:   ICALLOC(nnodes,&tmp2);
1293:   FCALLOC(4*nsnode,&ftmp);
1294:   PetscMemzero(tmp,3*nnfacet*sizeof(int));
1295:   PetscMemzero(tmp1,nsnode*sizeof(int));
1296:   PetscMemzero(tmp2,nnodes*sizeof(int));

1298:   j = 0;
1299:   for (i = 0; i < nsnode; i++) {
1300:     node1 = a2l[grid->isnode[i] - 1];
1301:     if (node1 >= 0) {
1302:       tmp1[nsnodeLoc] = node1;
1303:       tmp2[node1]     = nsnodeLoc;
1304:       ftmp[j++]       = grid->sxn[i];
1305:       ftmp[j++]       = grid->syn[i];
1306:       ftmp[j++]       = grid->szn[i];
1307:       ftmp[j++]       = grid->sa[i];
1308:       nsnodeLoc++;
1309:     }
1310:   }
1311:   for (i = 0; i < nnbound; i++) {
1312:     for (j = isurf; j < isurf + grid->nntet[i]; j++) {
1313:       node1 = a2l[grid->isnode[grid->f2ntn[j] - 1] - 1];
1314:       node2 = a2l[grid->isnode[grid->f2ntn[nnfacet + j] - 1] - 1];
1315:       node3 = a2l[grid->isnode[grid->f2ntn[2*nnfacet + j] - 1] - 1];

1317:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1318:         nnfacetLoc++;
1319:         nte++;
1320:         tmp[nb++] = tmp2[node1];
1321:         tmp[nb++] = tmp2[node2];
1322:         tmp[nb++] = tmp2[node3];
1323:       }
1324:     }
1325:     isurf += grid->nntet[i];
1326:     /*printf("grid->nntet[%d] before reordering is %d\n",i,grid->nntet[i]);*/
1327:     grid->nntet[i] = nte;
1328:     /*printf("grid->nntet[%d] after reordering is %d\n",i,grid->nntet[i]);*/
1329:     nte = 0;
1330:   }
1331:   PetscFree(grid->f2ntn);
1332:   PetscFree(grid->isnode);
1333:   PetscFree(grid->sxn);
1334:   PetscFree(grid->syn);
1335:   PetscFree(grid->szn);
1336:   PetscFree(grid->sa);
1337:   ICALLOC(4*nnfacetLoc,&grid->f2ntn);
1338:   ICALLOC(nsnodeLoc,&grid->isnode);
1339:   FCALLOC(nsnodeLoc,&grid->sxn);
1340:   FCALLOC(nsnodeLoc,&grid->syn);
1341:   FCALLOC(nsnodeLoc,&grid->szn);
1342:   FCALLOC(nsnodeLoc,&grid->sa);
1343:   j = 0;
1344:   for (i = 0; i < nsnodeLoc; i++) {
1345:     grid->isnode[i] = tmp1[i] + 1;
1346:     grid->sxn[i]    = ftmp[j++];
1347:     grid->syn[i]    = ftmp[j++];
1348:     grid->szn[i]    = ftmp[j++];
1349:     grid->sa[i]     = ftmp[j++];
1350:   }
1351:   j = 0;
1352:   for (i = 0; i < nnfacetLoc; i++) {
1353:     grid->f2ntn[i]              = tmp[j++] + 1;
1354:     grid->f2ntn[nnfacetLoc+i]   = tmp[j++] + 1;
1355:     grid->f2ntn[2*nnfacetLoc+i] = tmp[j++] + 1;
1356:   }
1357:   PetscFree(tmp);
1358:   PetscFree(tmp1);
1359:   PetscFree(tmp2);
1360:   PetscFree(ftmp);

1362: /* Now identify the triangles on which the current proceesor
1363:    would perform force calculation */
1364:   ICALLOC(nnfacetLoc,&grid->sface_bit);
1365:   PetscMemzero(grid->sface_bit,nnfacetLoc*sizeof(int));
1366:   for (i = 0; i < nnfacetLoc; i++) {
1367:     node1 = l2a[grid->isnode[grid->f2ntn[i] - 1] - 1];
1368:     node2 = l2a[grid->isnode[grid->f2ntn[nnfacetLoc + i] - 1] - 1];
1369:     node3 = l2a[grid->isnode[grid->f2ntn[2*nnfacetLoc + i] - 1] - 1];
1370:     if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1371:          && (v2p[node3] >= rank)) &&
1372:         ((v2p[node1] == rank) || (v2p[node2] == rank)
1373:          || (v2p[node3] == rank)))
1374:       grid->sface_bit[i] = 1;
1375:   }
1376:   /*printf("On processor %d total solid triangles = %d,locally owned = %d alpha = %d\n",rank,totTr,myTr,alpha);*/
1377:   PetscPrintf(comm,"Solid boundaries partitioned\n");

1379: /* Now,handle all the viscous boundaries - things to be done :
1380:  * 1. Identify the nodes belonging to the viscous
1381:  *    boundaries and count them.
1382:  * 2. Put proper indices into f2ntv array,after making it
1383:  *    of suitable size
1384:  * 3. Remap the normals and areas of viscous faces (vxn,vyn,vzn,
1385:  *    and va arrays).
1386:  */
1387:   ICALLOC(nvbound,  &grid->nvtet);
1388:   ICALLOC(nvbound,  &grid->nvpts);
1389:   ICALLOC(4*nvfacet,&grid->f2ntv);
1390:   ICALLOC(nvnode,&grid->ivnode);
1391:   FCALLOC(nvnode,&grid->vxn);
1392:   FCALLOC(nvnode,&grid->vyn);
1393:   FCALLOC(nvnode,&grid->vzn);
1394:   FCALLOC(nvnode,&grid->va);
1395:   PetscBinarySynchronizedRead(comm,fdes,grid->nvtet,nvbound,PETSC_INT);
1396:   PetscBinarySynchronizedRead(comm,fdes,grid->nvpts,nvbound,PETSC_INT);
1397:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntv,4*nvfacet,PETSC_INT);
1398:   PetscBinarySynchronizedRead(comm,fdes,grid->ivnode,nvnode,PETSC_INT);
1399:   PetscBinarySynchronizedRead(comm,fdes,grid->vxn,nvnode,PETSC_SCALAR);
1400:   PetscBinarySynchronizedRead(comm,fdes,grid->vyn,nvnode,PETSC_SCALAR);
1401:   PetscBinarySynchronizedRead(comm,fdes,grid->vzn,nvnode,PETSC_SCALAR);

1403:   isurf      = 0;
1404:   nvnodeLoc  = 0;
1405:   nvfacetLoc = 0;
1406:   nb         = 0;
1407:   nte        = 0;
1408:   ICALLOC(3*nvfacet,&tmp);
1409:   ICALLOC(nvnode,&tmp1);
1410:   ICALLOC(nnodes,&tmp2);
1411:   FCALLOC(4*nvnode,&ftmp);
1412:   PetscMemzero(tmp,3*nvfacet*sizeof(int));
1413:   PetscMemzero(tmp1,nvnode*sizeof(int));
1414:   PetscMemzero(tmp2,nnodes*sizeof(int));

1416:   j = 0;
1417:   for (i = 0; i < nvnode; i++) {
1418:     node1 = a2l[grid->ivnode[i] - 1];
1419:     if (node1 >= 0) {
1420:       tmp1[nvnodeLoc] = node1;
1421:       tmp2[node1]     = nvnodeLoc;
1422:       ftmp[j++]       = grid->vxn[i];
1423:       ftmp[j++]       = grid->vyn[i];
1424:       ftmp[j++]       = grid->vzn[i];
1425:       ftmp[j++]       = grid->va[i];
1426:       nvnodeLoc++;
1427:     }
1428:   }
1429:   for (i = 0; i < nvbound; i++) {
1430:     for (j = isurf; j < isurf + grid->nvtet[i]; j++) {
1431:       node1 = a2l[grid->ivnode[grid->f2ntv[j] - 1] - 1];
1432:       node2 = a2l[grid->ivnode[grid->f2ntv[nvfacet + j] - 1] - 1];
1433:       node3 = a2l[grid->ivnode[grid->f2ntv[2*nvfacet + j] - 1] - 1];
1434:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1435:         nvfacetLoc++;
1436:         nte++;
1437:         tmp[nb++] = tmp2[node1];
1438:         tmp[nb++] = tmp2[node2];
1439:         tmp[nb++] = tmp2[node3];
1440:       }
1441:     }
1442:     isurf         += grid->nvtet[i];
1443:     grid->nvtet[i] = nte;
1444:     nte            = 0;
1445:   }
1446:   PetscFree(grid->f2ntv);
1447:   PetscFree(grid->ivnode);
1448:   PetscFree(grid->vxn);
1449:   PetscFree(grid->vyn);
1450:   PetscFree(grid->vzn);
1451:   PetscFree(grid->va);
1452:   ICALLOC(4*nvfacetLoc,&grid->f2ntv);
1453:   ICALLOC(nvnodeLoc,&grid->ivnode);
1454:   FCALLOC(nvnodeLoc,&grid->vxn);
1455:   FCALLOC(nvnodeLoc,&grid->vyn);
1456:   FCALLOC(nvnodeLoc,&grid->vzn);
1457:   FCALLOC(nvnodeLoc,&grid->va);
1458:   j = 0;
1459:   for (i = 0; i < nvnodeLoc; i++) {
1460:     grid->ivnode[i] = tmp1[i] + 1;
1461:     grid->vxn[i]    = ftmp[j++];
1462:     grid->vyn[i]    = ftmp[j++];
1463:     grid->vzn[i]    = ftmp[j++];
1464:     grid->va[i]     = ftmp[j++];
1465:   }
1466:   j = 0;
1467:   for (i = 0; i < nvfacetLoc; i++) {
1468:     grid->f2ntv[i]              = tmp[j++] + 1;
1469:     grid->f2ntv[nvfacetLoc+i]   = tmp[j++] + 1;
1470:     grid->f2ntv[2*nvfacetLoc+i] = tmp[j++] + 1;
1471:   }
1472:   PetscFree(tmp);
1473:   PetscFree(tmp1);
1474:   PetscFree(tmp2);
1475:   PetscFree(ftmp);

1477: /* Now identify the triangles on which the current proceesor
1478:    would perform force calculation */
1479:   ICALLOC(nvfacetLoc,&grid->vface_bit);
1480:   PetscMemzero(grid->vface_bit,nvfacetLoc*sizeof(int));
1481:   for (i = 0; i < nvfacetLoc; i++) {
1482:     node1 = l2a[grid->ivnode[grid->f2ntv[i] - 1] - 1];
1483:     node2 = l2a[grid->ivnode[grid->f2ntv[nvfacetLoc + i] - 1] - 1];
1484:     node3 = l2a[grid->ivnode[grid->f2ntv[2*nvfacetLoc + i] - 1] - 1];
1485:     if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1486:          && (v2p[node3] >= rank)) &&
1487:         ((v2p[node1] == rank) || (v2p[node2] == rank)
1488:         || (v2p[node3] == rank))) {
1489:          grid->vface_bit[i] = 1;
1490:     }
1491:   }
1492:   PetscFree(v2p);
1493:   PetscPrintf(comm,"Viscous boundaries partitioned\n");

1495: /* Now,handle all the free boundaries - things to be done :
1496:  * 1. Identify the nodes belonging to the free
1497:  *    boundaries and count them.
1498:  * 2. Put proper indices into f2ntf array,after making it
1499:  *    of suitable size
1500:  * 3. Remap the normals and areas of free bound. faces (fxn,fyn,fzn,
1501:  *    and fa arrays).
1502:  */

1504:   ICALLOC(nfbound,  &grid->nftet);
1505:   ICALLOC(nfbound,  &grid->nfpts);
1506:   ICALLOC(4*nffacet,&grid->f2ntf);
1507:   ICALLOC(nfnode,&grid->ifnode);
1508:   FCALLOC(nfnode,&grid->fxn);
1509:   FCALLOC(nfnode,&grid->fyn);
1510:   FCALLOC(nfnode,&grid->fzn);
1511:   FCALLOC(nfnode,&grid->fa);
1512:   PetscBinarySynchronizedRead(comm,fdes,grid->nftet,nfbound,PETSC_INT);
1513:   PetscBinarySynchronizedRead(comm,fdes,grid->nfpts,nfbound,PETSC_INT);
1514:   PetscBinarySynchronizedRead(comm,fdes,grid->f2ntf,4*nffacet,PETSC_INT);
1515:   PetscBinarySynchronizedRead(comm,fdes,grid->ifnode,nfnode,PETSC_INT);
1516:   PetscBinarySynchronizedRead(comm,fdes,grid->fxn,nfnode,PETSC_SCALAR);
1517:   PetscBinarySynchronizedRead(comm,fdes,grid->fyn,nfnode,PETSC_SCALAR);
1518:   PetscBinarySynchronizedRead(comm,fdes,grid->fzn,nfnode,PETSC_SCALAR);

1520:   isurf      = 0;
1521:   nfnodeLoc  = 0;
1522:   nffacetLoc = 0;
1523:   nb         = 0;
1524:   nte        = 0;
1525:   ICALLOC(3*nffacet,&tmp);
1526:   ICALLOC(nfnode,&tmp1);
1527:   ICALLOC(nnodes,&tmp2);
1528:   FCALLOC(4*nfnode,&ftmp);
1529:   PetscMemzero(tmp,3*nffacet*sizeof(int));
1530:   PetscMemzero(tmp1,nfnode*sizeof(int));
1531:   PetscMemzero(tmp2,nnodes*sizeof(int));

1533:   j = 0;
1534:   for (i = 0; i < nfnode; i++) {
1535:     node1 = a2l[grid->ifnode[i] - 1];
1536:     if (node1 >= 0) {
1537:       tmp1[nfnodeLoc] = node1;
1538:       tmp2[node1]     = nfnodeLoc;
1539:       ftmp[j++]       = grid->fxn[i];
1540:       ftmp[j++]       = grid->fyn[i];
1541:       ftmp[j++]       = grid->fzn[i];
1542:       ftmp[j++]       = grid->fa[i];
1543:       nfnodeLoc++;
1544:     }
1545:   }
1546:   for (i = 0; i < nfbound; i++) {
1547:     for (j = isurf; j < isurf + grid->nftet[i]; j++) {
1548:       node1 = a2l[grid->ifnode[grid->f2ntf[j] - 1] - 1];
1549:       node2 = a2l[grid->ifnode[grid->f2ntf[nffacet + j] - 1] - 1];
1550:       node3 = a2l[grid->ifnode[grid->f2ntf[2*nffacet + j] - 1] - 1];
1551:       if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1552:         nffacetLoc++;
1553:         nte++;
1554:         tmp[nb++] = tmp2[node1];
1555:         tmp[nb++] = tmp2[node2];
1556:         tmp[nb++] = tmp2[node3];
1557:       }
1558:     }
1559:     isurf         += grid->nftet[i];
1560:     grid->nftet[i] = nte;
1561:     nte            = 0;
1562:   }
1563:   PetscFree(grid->f2ntf);
1564:   PetscFree(grid->ifnode);
1565:   PetscFree(grid->fxn);
1566:   PetscFree(grid->fyn);
1567:   PetscFree(grid->fzn);
1568:   PetscFree(grid->fa);
1569:   ICALLOC(4*nffacetLoc,&grid->f2ntf);
1570:   ICALLOC(nfnodeLoc,&grid->ifnode);
1571:   FCALLOC(nfnodeLoc,&grid->fxn);
1572:   FCALLOC(nfnodeLoc,&grid->fyn);
1573:   FCALLOC(nfnodeLoc,&grid->fzn);
1574:   FCALLOC(nfnodeLoc,&grid->fa);
1575:   j = 0;
1576:   for (i = 0; i < nfnodeLoc; i++) {
1577:     grid->ifnode[i] = tmp1[i] + 1;
1578:     grid->fxn[i]    = ftmp[j++];
1579:     grid->fyn[i]    = ftmp[j++];
1580:     grid->fzn[i]    = ftmp[j++];
1581:     grid->fa[i]     = ftmp[j++];
1582:   }
1583:   j = 0;
1584:   for (i = 0; i < nffacetLoc; i++) {
1585:     grid->f2ntf[i]              = tmp[j++] + 1;
1586:     grid->f2ntf[nffacetLoc+i]   = tmp[j++] + 1;
1587:     grid->f2ntf[2*nffacetLoc+i] = tmp[j++] + 1;
1588:   }


1591:   PetscFree(tmp);
1592:   PetscFree(tmp1);
1593:   PetscFree(tmp2);
1594:   PetscFree(ftmp);
1595:   PetscPrintf(comm,"Free boundaries partitioned\n");

1597:   flg  = PETSC_FALSE;
1598:   PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
1599:   if (flg) {
1600:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after partitioning\n");
1601:   }

1603:   /* Put different mappings and other info into grid */
1604:   /* ICALLOC(nvertices,&grid->loc2pet);
1605:    ICALLOC(nvertices,&grid->loc2glo);
1606:    PetscMemcpy(grid->loc2pet,l2p,nvertices*sizeof(int));
1607:    PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
1608:    PetscFree(l2a);
1609:    PetscFree(l2p);*/

1611:   grid->nnodesLoc  = nnodesLoc;
1612:   grid->nedgeLoc   = nedgeLoc;
1613:   grid->nvertices  = nvertices;
1614:   grid->nsnodeLoc  = nsnodeLoc;
1615:   grid->nvnodeLoc  = nvnodeLoc;
1616:   grid->nfnodeLoc  = nfnodeLoc;
1617:   grid->nnfacetLoc = nnfacetLoc;
1618:   grid->nvfacetLoc = nvfacetLoc;
1619:   grid->nffacetLoc = nffacetLoc;
1620: /*
1621:  * FCALLOC(nvertices*4, &grid->gradx);
1622:  * FCALLOC(nvertices*4, &grid->grady);
1623:  * FCALLOC(nvertices*4, &grid->gradz);
1624:  */
1625:   FCALLOC(nvertices,   &grid->cdt);
1626:   FCALLOC(nvertices*4, &grid->phi);
1627: /*
1628:    FCALLOC(nvertices,   &grid->r11);
1629:    FCALLOC(nvertices,   &grid->r12);
1630:    FCALLOC(nvertices,   &grid->r13);
1631:    FCALLOC(nvertices,   &grid->r22);
1632:    FCALLOC(nvertices,   &grid->r23);
1633:    FCALLOC(nvertices,   &grid->r33);
1634: */
1635:   FCALLOC(7*nnodesLoc,   &grid->rxy);

1637: /* Map the 'ja' array in petsc ordering */
1638:   for (i = 0; i < nnz; i++) grid->ja[i] = l2a[grid->ja[i] - 1];
1639:   AOApplicationToPetsc(ao,nnz,grid->ja);
1640:   AODestroy(&ao);

1642: /* Print the different mappings
1643:  *
1644:  */
1645:   {
1646:     int partLoc[7],partMax[7],partMin[7],partSum[7];
1647:     partLoc[0] = nnodesLoc;
1648:     partLoc[1] = nvertices;
1649:     partLoc[2] = nedgeLoc;
1650:     partLoc[3] = nnfacetLoc;
1651:     partLoc[4] = nffacetLoc;
1652:     partLoc[5] = nsnodeLoc;
1653:     partLoc[6] = nfnodeLoc;
1654:     for (i = 0; i < 7; i++) {
1655:       partMin[i] = 0;
1656:       partMax[i] = 0;
1657:       partSum[i] = 0;
1658:     }

1660:     MPI_Allreduce(partLoc,partMax,7,MPI_INT,MPI_MAX,comm);
1661:     MPI_Allreduce(partLoc,partMin,7,MPI_INT,MPI_MIN,comm);
1662:     MPI_Allreduce(partLoc,partSum,7,MPI_INT,MPI_SUM,comm);
1663:     PetscPrintf(comm,"==============================\n");
1664:     PetscPrintf(comm,"Partitioning quality info ....\n");
1665:     PetscPrintf(comm,"==============================\n");
1666:     PetscPrintf(comm,"------------------------------------------------------------\n");
1667:     PetscPrintf(comm,"Item                    Min        Max    Average      Total\n");
1668:     PetscPrintf(comm,"------------------------------------------------------------\n");
1669:     PetscPrintf(comm,"Local Nodes       %9d  %9d  %9d  %9d\n",
1670:                        partMin[0],partMax[0],partSum[0]/size,partSum[0]);
1671:     PetscPrintf(comm,"Local+Ghost Nodes %9d  %9d  %9d  %9d\n",
1672:                        partMin[1],partMax[1],partSum[1]/size,partSum[1]);
1673:     PetscPrintf(comm,"Local Edges       %9d  %9d  %9d  %9d\n",
1674:                        partMin[2],partMax[2],partSum[2]/size,partSum[2]);
1675:     PetscPrintf(comm,"Local solid faces %9d  %9d  %9d  %9d\n",
1676:                        partMin[3],partMax[3],partSum[3]/size,partSum[3]);
1677:     PetscPrintf(comm,"Local free faces  %9d  %9d  %9d  %9d\n",
1678:                        partMin[4],partMax[4],partSum[4]/size,partSum[4]);
1679:     PetscPrintf(comm,"Local solid nodes %9d  %9d  %9d  %9d\n",
1680:                        partMin[5],partMax[5],partSum[5]/size,partSum[5]);
1681:     PetscPrintf(comm,"Local free nodes  %9d  %9d  %9d  %9d\n",
1682:                        partMin[6],partMax[6],partSum[6]/size,partSum[6]);
1683:     PetscPrintf(comm,"------------------------------------------------------------\n");
1684:   }
1685:   flg  = PETSC_FALSE;
1686:   PetscOptionsGetBool(0,"-partition_info",&flg,NULL);
1687:   if (flg) {
1688:     char part_file[PETSC_MAX_PATH_LEN];
1689:     sprintf(part_file,"output.%d",rank);
1690:     fptr1 = fopen(part_file,"w");

1692:     fprintf(fptr1,"---------------------------------------------\n");
1693:     fprintf(fptr1,"Local and Global Grid Parameters are :\n");
1694:     fprintf(fptr1,"---------------------------------------------\n");
1695:     fprintf(fptr1,"Local\t\t\t\tGlobal\n");
1696:     fprintf(fptr1,"nnodesLoc = %d\t\tnnodes = %d\n",nnodesLoc,nnodes);
1697:     fprintf(fptr1,"nedgeLoc = %d\t\t\tnedge = %d\n",nedgeLoc,nedge);
1698:     fprintf(fptr1,"nnfacetLoc = %d\t\tnnfacet = %d\n",nnfacetLoc,nnfacet);
1699:     fprintf(fptr1,"nvfacetLoc = %d\t\t\tnvfacet = %d\n",nvfacetLoc,nvfacet);
1700:     fprintf(fptr1,"nffacetLoc = %d\t\t\tnffacet = %d\n",nffacetLoc,nffacet);
1701:     fprintf(fptr1,"nsnodeLoc = %d\t\t\tnsnode = %d\n",nsnodeLoc,nsnode);
1702:     fprintf(fptr1,"nvnodeLoc = %d\t\t\tnvnode = %d\n",nvnodeLoc,nvnode);
1703:     fprintf(fptr1,"nfnodeLoc = %d\t\t\tnfnode = %d\n",nfnodeLoc,nfnode);
1704:     fprintf(fptr1,"\n");
1705:     fprintf(fptr1,"nvertices = %d\n",nvertices);
1706:     fprintf(fptr1,"nnbound = %d\n",nnbound);
1707:     fprintf(fptr1,"nvbound = %d\n",nvbound);
1708:     fprintf(fptr1,"nfbound = %d\n",nfbound);
1709:     fprintf(fptr1,"\n");

1711:     fprintf(fptr1,"---------------------------------------------\n");
1712:     fprintf(fptr1,"Different Orderings\n");
1713:     fprintf(fptr1,"---------------------------------------------\n");
1714:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1715:     fprintf(fptr1,"---------------------------------------------\n");
1716:     for (i = 0; i < nvertices; i++) fprintf(fptr1,"%d\t\t%d\t\t%d\n",i,grid->loc2pet[i],grid->loc2glo[i]);
1717:     fprintf(fptr1,"\n");

1719:     fprintf(fptr1,"---------------------------------------------\n");
1720:     fprintf(fptr1,"Solid Boundary Nodes\n");
1721:     fprintf(fptr1,"---------------------------------------------\n");
1722:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1723:     fprintf(fptr1,"---------------------------------------------\n");
1724:     for (i = 0; i < nsnodeLoc; i++) {
1725:       j = grid->isnode[i]-1;
1726:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1727:     }
1728:     fprintf(fptr1,"\n");
1729:     fprintf(fptr1,"---------------------------------------------\n");
1730:     fprintf(fptr1,"f2ntn array\n");
1731:     fprintf(fptr1,"---------------------------------------------\n");
1732:     for (i = 0; i < nnfacetLoc; i++) {
1733:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntn[i],
1734:               grid->f2ntn[nnfacetLoc+i],grid->f2ntn[2*nnfacetLoc+i]);
1735:     }
1736:     fprintf(fptr1,"\n");


1739:     fprintf(fptr1,"---------------------------------------------\n");
1740:     fprintf(fptr1,"Viscous Boundary Nodes\n");
1741:     fprintf(fptr1,"---------------------------------------------\n");
1742:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1743:     fprintf(fptr1,"---------------------------------------------\n");
1744:     for (i = 0; i < nvnodeLoc; i++) {
1745:       j = grid->ivnode[i]-1;
1746:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1747:     }
1748:     fprintf(fptr1,"\n");
1749:     fprintf(fptr1,"---------------------------------------------\n");
1750:     fprintf(fptr1,"f2ntv array\n");
1751:     fprintf(fptr1,"---------------------------------------------\n");
1752:     for (i = 0; i < nvfacetLoc; i++) {
1753:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntv[i],
1754:               grid->f2ntv[nvfacetLoc+i],grid->f2ntv[2*nvfacetLoc+i]);
1755:     }
1756:     fprintf(fptr1,"\n");

1758:     fprintf(fptr1,"---------------------------------------------\n");
1759:     fprintf(fptr1,"Free Boundary Nodes\n");
1760:     fprintf(fptr1,"---------------------------------------------\n");
1761:     fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1762:     fprintf(fptr1,"---------------------------------------------\n");
1763:     for (i = 0; i < nfnodeLoc; i++) {
1764:       j = grid->ifnode[i]-1;
1765:       fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1766:     }
1767:     fprintf(fptr1,"\n");
1768:     fprintf(fptr1,"---------------------------------------------\n");
1769:     fprintf(fptr1,"f2ntf array\n");
1770:     fprintf(fptr1,"---------------------------------------------\n");
1771:     for (i = 0; i < nffacetLoc; i++) {
1772:       fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntf[i],
1773:               grid->f2ntf[nffacetLoc+i],grid->f2ntf[2*nffacetLoc+i]);
1774:     }
1775:     fprintf(fptr1,"\n");

1777:     fprintf(fptr1,"---------------------------------------------\n");
1778:     fprintf(fptr1,"Neighborhood Info In Various Ordering\n");
1779:     fprintf(fptr1,"---------------------------------------------\n");
1780:     ICALLOC(nnodes,&p2l);
1781:     for (i = 0; i < nvertices; i++) p2l[grid->loc2pet[i]] = i;
1782:     for (i = 0; i < nnodesLoc; i++) {
1783:       jstart = grid->ia[grid->loc2glo[i]] - 1;
1784:       jend   = grid->ia[grid->loc2glo[i]+1] - 1;
1785:       fprintf(fptr1,"Neighbors of Node %d in Local Ordering are :",i);
1786:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",p2l[grid->ja[j]]);
1787:       fprintf(fptr1,"\n");

1789:       fprintf(fptr1,"Neighbors of Node %d in PETSc ordering are :",grid->loc2pet[i]);
1790:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->ja[j]);
1791:       fprintf(fptr1,"\n");

1793:       fprintf(fptr1,"Neighbors of Node %d in Global Ordering are :",grid->loc2glo[i]);
1794:       for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->loc2glo[p2l[grid->ja[j]]]);
1795:       fprintf(fptr1,"\n");

1797:     }
1798:     fprintf(fptr1,"\n");
1799:     PetscFree(p2l);
1800:     fclose(fptr1);
1801:   }

1803: /* Free the temporary arrays */
1804:   PetscFree(a2l);
1805:   MPI_Barrier(comm);
1806:   return(0);
1807: }

1809: /*
1810:   encode 3 8-bit binary bytes as 4 '6-bit' characters, len is the number of bytes remaining, at most 3 are used
1811: */
1812: void *base64_encodeblock(void *vout,const void *vin,int len)
1813: {
1814:   unsigned char *out = (unsigned char*)vout,in[3] = {0,0,0};
1815:   /* Translation Table as described in RFC1113 */
1816:   static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1817:   memcpy(in,vin,PetscMin(len,3));
1818:   out[0] = cb64[in[0] >> 2];
1819:   out[1] = cb64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)];
1820:   out[2] = (unsigned char) (len > 1 ? cb64[((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)] : '=');
1821:   out[3] = (unsigned char) (len > 2 ? cb64[in[2] & 0x3f] : '=');
1822:   return (void*)(out+4);
1823: }

1825: /* Write binary data, does not do byte swapping. */
1826: static PetscErrorCode PetscFWrite_FUN3D(MPI_Comm comm,FILE *fp,void *data,PetscInt n,PetscDataType dtype,PetscBool base64)
1827: {
1829:   PetscMPIInt    rank;

1832:   if (n < 0) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"Trying to write a negative amount of data %D",n);
1833:   if (!n) return(0);
1834:   MPI_Comm_rank(comm,&rank);
1835:   if (!rank) {
1836:     size_t count;
1837:     int    bytes;
1838:     switch (dtype) {
1839:     case PETSC_DOUBLE:
1840:       size = sizeof(double);
1841:       break;
1842:     case PETSC_FLOAT:
1843:       size = sizeof(float);
1844:       break;
1845:     case PETSC_INT:
1846:       size = sizeof(PetscInt);
1847:       break;
1848:     case PETSC_CHAR:
1849:       size = sizeof(char);
1850:       break;
1851:     default: SETERRQ(comm,PETSC_ERR_SUP,"Data type not supported");
1852:     }
1853:     bytes = size*n;
1854:     if (base64) {
1855:       unsigned char *buf,*ptr;
1856:       int           i;
1857:       size_t        b64alloc = 9 + (n*size*4) / 3 + (n*size*4) % 3;
1858:       PetscMalloc(b64alloc,&buf);
1859:       ptr  = buf;
1860:       ptr  = (unsigned char*)base64_encodeblock(ptr,&bytes,3);
1861:       ptr  = (unsigned char*)base64_encodeblock(ptr,((char*)&bytes)+3,1);
1862:       for (i=0; i<bytes; i+=3) {
1863:         int left = bytes - i;
1864:         ptr = (unsigned char*)base64_encodeblock(ptr,((char*)data)+i,left);
1865:       }
1866:       *ptr++ = '\n';
1867:       /* printf("encoded 4+%d raw bytes in %zd base64 chars, allocated for %zd\n",bytes,ptr-buf,b64alloc); */
1868:       count = fwrite(buf,1,ptr-buf,fp);
1869:       if (count < (size_t)(ptr-buf)) {
1870:         perror("");
1871:         SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %D of %D bytes",(PetscInt)count,(PetscInt)(ptr-buf));
1872:       }
1873:       PetscFree(buf);
1874:     } else {
1875:       count = fwrite(&bytes,sizeof(int),1,fp);
1876:       if (count != 1) {
1877:         perror("");
1878:         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Error writing byte count");
1879:       }
1880:       count = fwrite(data,size,(size_t)n,fp);
1881:       if ((int)count != n) {
1882:         perror("");
1883:         SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %D/%D array members of size %D",(PetscInt)count,n,(PetscInt)size);
1884:       }
1885:     }
1886:   }
1887:   return(0);
1888: }

1890: static void SortInt2(PetscInt *a,PetscInt *b)
1891: {
1892:   if (*b < *a) {
1893:     PetscInt c = *b;
1894:     *b = *a;
1895:     *a = c;
1896:   }
1897: }

1899: /* b = intersection(a,b) */
1900: static PetscErrorCode IntersectInt(PetscInt na,const PetscInt *a,PetscInt *nb,PetscInt *b)
1901: {
1902:   PetscInt i,n,j;

1905:   j = 0;
1906:   n = 0;
1907:   for (i=0; i<*nb; i++) {
1908:     while (j<na && a[j]<b[i]) j++;
1909:     if (j<na && a[j]==b[i]) {
1910:       b[n++] = b[i];
1911:       j++;
1912:     }
1913:   }
1914:   *nb = n;
1915:   return(0);
1916: }

1918: /*
1919:   This function currently has a semantic bug: it only produces cells containing all local edges.  Since the local mesh
1920:   does not even store edges between unowned nodes, primal cells that are effectively shared between processes will not
1921:   be constructed. This causes visualization artifacts.

1923:   This issue could be resolved by either (a) storing more edges from the original mesh or (b) communicating an extra
1924:   layer of edges in this function.
1925: */
1926: static PetscErrorCode InferLocalCellConnectivity(PetscInt nnodes,PetscInt nedge,const PetscInt *eptr,PetscInt *incell,PetscInt **iconn)
1927: {
1929:   PetscInt       ncell,acell,(*conn)[4],node0,node1,node2,node3,i,j,k,l,rowmax;
1930:   PetscInt       *ui,*uj,*utmp,*tmp1,*tmp2,*tmp3,ntmp1,ntmp2,ntmp3;
1931: #if defined(INTERLACING)
1932: #  define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i*2+0]-1; n2 = eptr[i*2+1]-1; } while (0)
1933: #else
1934: #  define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i+0*nedge]-1; n2 = eptr[i+1*nedge]-1; } while (0)
1935: #endif

1938:   *incell = -1;
1939:   *iconn  = NULL;
1940:   acell   = 100000;              /* allocate for this many cells */
1941:   PetscMalloc1(acell,&conn);
1942:   PetscMalloc2(nnodes+1,&ui,nedge,&uj);
1943:   PetscCalloc1(nnodes,&utmp);
1944:   /* count the number of edges in the upper-triangular matrix u */
1945:   for (i=0; i<nedge; i++) {     /* count number of nonzeros in upper triangular matrix */
1946:     GetEdge(eptr,i,node0,node1);
1947:     utmp[PetscMin(node0,node1)]++;
1948:   }
1949:   rowmax = 0;
1950:   ui[0]  = 0;
1951:   for (i=0; i<nnodes; i++) {
1952:     rowmax  = PetscMax(rowmax,utmp[i]);
1953:     ui[i+1] = ui[i] + utmp[i]; /* convert from count to row offsets */
1954:     utmp[i] = 0;
1955:   }
1956:   for (i=0; i<nedge; i++) {     /* assemble upper triangular matrix U */
1957:     GetEdge(eptr,i,node0,node1);
1958:     SortInt2(&node0,&node1);
1959:     uj[ui[node0] + utmp[node0]++] = node1;
1960:   }
1961:   PetscFree(utmp);
1962:   for (i=0; i<nnodes; i++) {    /* sort every row */
1963:     PetscInt n = ui[i+1] - ui[i];
1964:     PetscSortInt(n,&uj[ui[i]]);
1965:   }

1967:   /* Infer cells */
1968:   ncell = 0;
1969:   PetscMalloc3(rowmax,&tmp1,rowmax,&tmp2,rowmax,&tmp3);
1970:   for (i=0; i<nnodes; i++) {
1971:     node0 = i;
1972:     ntmp1 = ui[node0+1] - ui[node0]; /* Number of candidates for node1 */
1973:     PetscMemcpy(tmp1,&uj[ui[node0]],ntmp1*sizeof(PetscInt));
1974:     for (j=0; j<ntmp1; j++) {
1975:       node1 = tmp1[j];
1976:       if (node1 < 0 || nnodes <= node1) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node1,nnodes);
1977:       if (node1 <= node0) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node0,node1);
1978:       ntmp2 = ui[node1+1] - ui[node1];
1979:       PetscMemcpy(tmp2,&uj[ui[node1]],ntmp2*sizeof(PetscInt));
1980:       IntersectInt(ntmp1,tmp1,&ntmp2,tmp2);
1981:       for (k=0; k<ntmp2; k++) {
1982:         node2 = tmp2[k];
1983:         if (node2 < 0 || nnodes <= node2) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node2,nnodes);
1984:         if (node2 <= node1) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node1,node2);
1985:         ntmp3 = ui[node2+1] - ui[node2];
1986:         PetscMemcpy(tmp3,&uj[ui[node2]],ntmp3*sizeof(PetscInt));
1987:         IntersectInt(ntmp2,tmp2,&ntmp3,tmp3);
1988:         for (l=0; l<ntmp3; l++) {
1989:           node3 = tmp3[l];
1990:           if (node3 < 0 || nnodes <= node3) SETERRQ2(PETSC_COMM_SELF,1,"node index %D out of range [0,%D)",node3,nnodes);
1991:           if (node3 <= node2) SETERRQ2(PETSC_COMM_SELF,1,"forward neighbor of %D is %D, should be larger",node2,node3);
1992:           if (ncell > acell) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer exceeded");
1993:           if (ntmp3 < 3) continue;
1994:           conn[ncell][0] = node0;
1995:           conn[ncell][1] = node1;
1996:           conn[ncell][2] = node2;
1997:           conn[ncell][3] = node3;
1998:           if (0) {
1999:             PetscViewer viewer = PETSC_VIEWER_STDOUT_WORLD;
2000:             PetscViewerASCIIPrintf(viewer,"created cell %d: %d %d %d %d\n",ncell,node0,node1,node2,node3);
2001:             PetscIntView(ntmp1,tmp1,viewer);
2002:             PetscIntView(ntmp2,tmp2,viewer);
2003:             PetscIntView(ntmp3,tmp3,viewer);
2004:             /* uns3d.msh has a handful of "tetrahedra" that overlap by violating the following condition. As far as I
2005:              * can tell, that means it is an invalid mesh. I don't know what the intent was. */
2006:             if (ntmp3 > 2) SETERRQ(PETSC_COMM_SELF,1,"More than two ways to complete a tetrahedron using a common triangle");
2007:           }
2008:           ncell++;
2009:         }
2010:       }
2011:     }
2012:   }
2013:   PetscFree3(tmp1,tmp2,tmp3);
2014:   PetscFree2(ui,uj);

2016:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"Inferred %D cells with nnodes=%D nedge=%D\n",ncell,nnodes,nedge);
2017:   PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2018:   *incell = ncell;
2019:   *iconn  = (PetscInt*)conn;
2020:   return(0);
2021: }

2023: static PetscErrorCode GridCompleteOverlap(GRID *grid,PetscInt *invertices,PetscInt *inedgeOv,PetscInt **ieptrOv)
2024: {
2026:   PetscInt       nedgeLoc,nedgeOv,i,j,cnt,node0,node1,node0p,node1p,nnodes,nnodesLoc,nvertices,rstart,nodeEdgeCountAll,nodeEdgeRstart;
2027:   PetscInt       *nodeEdgeCount,*nodeEdgeOffset,*eIdxOv,*p2l,*eptrOv;
2028:   Vec            VNodeEdge,VNodeEdgeInfo,VNodeEdgeInfoOv,VNodeEdgeOv;
2029:   PetscScalar    *vne,*vnei;
2030:   IS             isglobal,isedgeOv;
2031:   VecScatter     nescat,neiscat;
2032:   PetscBool      flg;

2035:   nnodes    = grid->nnodes;     /* Total number of global nodes */
2036:   nnodesLoc = grid->nnodesLoc;  /* Number of owned nodes */
2037:   nvertices = grid->nvertices;  /* Number of owned+ghosted nodes */
2038:   nedgeLoc  = grid->nedgeLoc;   /* Number of edges connected to owned nodes */

2040:   /* Count the number of neighbors of each owned node */
2041:   MPI_Scan(&nnodesLoc,&rstart,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);
2042:   rstart -= nnodesLoc;
2043:   PetscMalloc2(nnodesLoc,&nodeEdgeCount,nnodesLoc,&nodeEdgeOffset);
2044:   PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2045:   for (i=0; i<nedgeLoc; i++) {
2046:     GetEdge(grid->eptr,i,node0,node1);
2047:     node0p = grid->loc2pet[node0];
2048:     node1p = grid->loc2pet[node1];
2049:     if (rstart <= node0p && node0p < rstart+nnodesLoc) nodeEdgeCount[node0p-rstart]++;
2050:     if (rstart <= node1p && node1p < rstart+nnodesLoc) nodeEdgeCount[node1p-rstart]++;
2051:   }
2052:   /* Get the offset in the node-based edge array */
2053:   nodeEdgeOffset[0] = 0;
2054:   for (i=0; i<nnodesLoc-1; i++) nodeEdgeOffset[i+1] = nodeEdgeOffset[i] + nodeEdgeCount[i];
2055:   nodeEdgeCountAll = nodeEdgeCount[nnodesLoc-1] + nodeEdgeOffset[nnodesLoc-1];

2057:   /* Pack a Vec by node of all the edges for that node. The nodes are stored by global index */
2058:   VecCreateMPI(PETSC_COMM_WORLD,nodeEdgeCountAll,PETSC_DETERMINE,&VNodeEdge);
2059:   PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2060:   VecGetArray(VNodeEdge,&vne);
2061:   for (i=0; i<nedgeLoc; i++) {
2062:     GetEdge(grid->eptr,i,node0,node1);
2063:     node0p = grid->loc2pet[node0];
2064:     node1p = grid->loc2pet[node1];
2065:     if (rstart <= node0p && node0p < rstart+nnodesLoc) vne[nodeEdgeOffset[node0p-rstart] + nodeEdgeCount[node0p-rstart]++] = node1p;
2066:     if (rstart <= node1p && node1p < rstart+nnodesLoc) vne[nodeEdgeOffset[node1p-rstart] + nodeEdgeCount[node1p-rstart]++] = node0p;
2067:   }
2068:   VecRestoreArray(VNodeEdge,&vne);
2069:   VecGetOwnershipRange(VNodeEdge,&nodeEdgeRstart,NULL);

2071:   /* Move the count and offset into a Vec so that we can use VecScatter, translating offset from local to global */
2072:   VecCreate(PETSC_COMM_WORLD,&VNodeEdgeInfo);
2073:   VecSetSizes(VNodeEdgeInfo,2*nnodesLoc,2*nnodes);
2074:   VecSetBlockSize(VNodeEdgeInfo,2);
2075:   VecSetType(VNodeEdgeInfo,VECMPI);

2077:   VecGetArray(VNodeEdgeInfo,&vnei);
2078:   for (i=0; i<nnodesLoc; i++) {
2079:     vnei[i*2+0] = nodeEdgeCount[i];                   /* Total number of edges from this vertex */
2080:     vnei[i*2+1] = nodeEdgeOffset[i] + nodeEdgeRstart; /* Now the global index in the next comm round */
2081:   }
2082:   VecRestoreArray(VNodeEdgeInfo,&vnei);
2083:   PetscFree2(nodeEdgeCount,nodeEdgeOffset);

2085:   /* Create a Vec to receive the edge count and global offset for each node in owned+ghosted, get them, and clean up */
2086:   VecCreate(PETSC_COMM_SELF,&VNodeEdgeInfoOv);
2087:   VecSetSizes(VNodeEdgeInfoOv,2*nvertices,2*nvertices);
2088:   VecSetBlockSize(VNodeEdgeInfoOv,2);
2089:   VecSetType(VNodeEdgeInfoOv,VECSEQ);

2091:   ISCreateBlock(PETSC_COMM_WORLD,2,nvertices,grid->loc2pet,PETSC_COPY_VALUES,&isglobal); /* Address the nodes in overlap to get info from */
2092:   VecScatterCreate(VNodeEdgeInfo,isglobal,VNodeEdgeInfoOv,NULL,&neiscat);
2093:   VecScatterBegin(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2094:   VecScatterEnd(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2095:   VecScatterDestroy(&neiscat);
2096:   VecDestroy(&VNodeEdgeInfo);
2097:   ISDestroy(&isglobal);

2099:   /* Create a Vec to receive the actual edges for all nodes (owned and ghosted), execute the scatter */
2100:   nedgeOv = 0;                  /* First count the number of edges in the complete overlap */
2101:   VecGetArray(VNodeEdgeInfoOv,&vnei);
2102:   for (i=0; i<nvertices; i++) nedgeOv += (PetscInt)vnei[2*i+0];
2103:   /* Allocate for the global indices in VNodeEdge of the edges to receive */
2104:   PetscMalloc1(nedgeOv,&eIdxOv);
2105:   for (i=0,cnt=0; i<nvertices; i++) {
2106:     for (j=0; j<(PetscInt)vnei[2*i+0]; j++) eIdxOv[cnt++] = (PetscInt)vnei[2*i+1] + j;
2107:   }
2108:   VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2109:   ISCreateGeneral(PETSC_COMM_WORLD,nedgeOv,eIdxOv,PETSC_USE_POINTER,&isedgeOv);
2110:   VecCreateSeq(PETSC_COMM_SELF,nedgeOv,&VNodeEdgeOv);
2111:   VecScatterCreate(VNodeEdge,isedgeOv,VNodeEdgeOv,NULL,&nescat);
2112:   VecScatterBegin(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2113:   VecScatterEnd(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2114:   VecScatterDestroy(&nescat);
2115:   VecDestroy(&VNodeEdge);
2116:   ISDestroy(&isedgeOv);
2117:   PetscFree(eIdxOv);

2119:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of edges before pruning: %D, half=%D\n",rank,PETSC_FUNCTION_NAME,nedgeOv,nedgeOv/2);

2121:   /* Create the non-scalable global-to-local index map. Yuck, but it has already been used elsewhere. */
2122:   PetscMalloc1(nnodes,&p2l);
2123:   for (i=0; i<nnodes; i++) p2l[i] = -1;
2124:   for (i=0; i<nvertices; i++) p2l[grid->loc2pet[i]] = i;
2125:   if (1) {
2126:     PetscInt m = 0;
2127:     for (i=0; i<nnodes; i++) m += (p2l[i] >= 0);
2128:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of global indices that map to local indices: %D; nvertices=%D nnodesLoc=%D nnodes=%D\n",rank,PETSC_FUNCTION_NAME,m,nvertices,nnodesLoc,nnodes);
2129:   }

2131:   /* Log each edge connecting nodes in owned+ghosted exactly once */
2132:   VecGetArray(VNodeEdgeInfoOv,&vnei);
2133:   VecGetArray(VNodeEdgeOv,&vne);
2134:   /* First count the number of edges to keep */
2135:   nedgeOv = 0;
2136:   for (i=0,cnt=0; i<nvertices; i++) {
2137:     PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2138:     node0 = i;
2139:     for (j=0; j<n; j++) {
2140:       node1p = vne[cnt++];
2141:       node1  = p2l[node1p];
2142:       if (node0 < node1) nedgeOv++;
2143:     }
2144:   }
2145:   /* Array of edges to keep */
2146:   PetscMalloc1(2*nedgeOv,&eptrOv);
2147:   nedgeOv = 0;
2148:   for (i=0,cnt=0; i<nvertices; i++) {
2149:     PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2150:     node0 = i;
2151:     for (j=0; j<n; j++) {
2152:       node1p = vne[cnt++];
2153:       node1  = p2l[node1p];
2154:       if (node0 < node1) {
2155:         eptrOv[2*nedgeOv+0] = node0;
2156:         eptrOv[2*nedgeOv+1] = node1;
2157:         nedgeOv++;
2158:       }
2159:     }
2160:   }
2161:   VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2162:   VecRestoreArray(VNodeEdgeOv,&vne);
2163:   VecDestroy(&VNodeEdgeInfoOv);
2164:   VecDestroy(&VNodeEdgeOv);
2165:   PetscFree(p2l);

2167:   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: nedgeLoc=%D nedgeOv=%D\n",rank,PETSC_FUNCTION_NAME,nedgeLoc,nedgeOv);
2168:   PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

2170:   flg  = PETSC_TRUE;
2171:   PetscOptionsGetBool(NULL,NULL,"-complete_overlap",&flg,NULL);
2172:   if (flg) {
2173:     *invertices = grid->nvertices; /* We did not change the number of vertices */
2174:     *inedgeOv   = nedgeOv;
2175:     *ieptrOv    = eptrOv;
2176:   } else {
2177:     *invertices = grid->nvertices;
2178:     *inedgeOv   = nedgeLoc;
2179:     PetscFree(eptrOv);
2180:     PetscMalloc1(2*nedgeLoc,&eptrOv);
2181:     PetscMemcpy(eptrOv,grid->eptr,2*nedgeLoc*sizeof(PetscInt));
2182:     *ieptrOv    = eptrOv;
2183:   }
2184:   return(0);
2185: }

2187: static PetscErrorCode WritePVTU(AppCtx *user,const char *fname,PetscBool base64)
2188: {
2189:   GRID              *grid  = user->grid;
2190:   TstepCtx          *tsCtx = user->tsCtx;
2191:   FILE              *vtu,*pvtu;
2192:   char              pvtu_fname[PETSC_MAX_PATH_LEN],vtu_fname[PETSC_MAX_PATH_LEN];
2193:   MPI_Comm          comm;
2194:   PetscMPIInt       rank,size;
2195:   PetscInt          i,nvertices = 0,nedgeLoc = 0,ncells,bs,nloc,boffset = 0,*eptr = NULL;
2196:   PetscErrorCode    ierr;
2197:   Vec               Xloc,Xploc,Xuloc;
2198:   unsigned char     *celltype;
2199:   int               *celloffset,*conn,*cellrank;
2200:   const PetscScalar *x;
2201:   PetscScalar       *xu,*xp;
2202:   const char        *byte_order = PetscBinaryBigEndian() ? "BigEndian" : "LittleEndian";

2205:   GridCompleteOverlap(user->grid,&nvertices,&nedgeLoc,&eptr);
2206:   comm = PETSC_COMM_WORLD;
2207:   MPI_Comm_rank(comm,&rank);
2208:   MPI_Comm_size(comm,&size);
2209: #if defined(PETSC_USE_COMPLEX) || !defined(PETSC_USE_REAL_DOUBLE) || defined(PETSC_USE_64BIT_INDICES)
2210:   SETERRQ(comm,PETSC_ERR_SUP,"This function is only implemented for scalar-type=real precision=double, 32-bit indices");
2211: #endif
2212:   PetscSNPrintf(pvtu_fname,sizeof(pvtu_fname),"%s-%D.pvtu",fname,tsCtx->itstep);
2213:   PetscSNPrintf(vtu_fname,sizeof(vtu_fname),"%s-%D-%D.vtu",fname,tsCtx->itstep,rank);
2214:   PetscFOpen(comm,pvtu_fname,"w",&pvtu);
2215:   PetscFPrintf(comm,pvtu,"<?xml version=\"1.0\"?>\n");
2216:   PetscFPrintf(comm,pvtu,"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2217:   PetscFPrintf(comm,pvtu," <PUnstructuredGrid GhostLevel=\"0\">\n");
2218:   PetscFPrintf(comm,pvtu,"  <PPointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2219:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" />\n");
2220:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" />\n");
2221:   PetscFPrintf(comm,pvtu,"  </PPointData>\n");
2222:   PetscFPrintf(comm,pvtu,"  <PCellData>\n");
2223:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" />\n");
2224:   PetscFPrintf(comm,pvtu,"  </PCellData>\n");
2225:   PetscFPrintf(comm,pvtu,"  <PPoints>\n");
2226:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" />\n");
2227:   PetscFPrintf(comm,pvtu,"  </PPoints>\n");
2228:   PetscFPrintf(comm,pvtu,"  <PCells>\n");
2229:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" />\n");
2230:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"Int32\" Name=\"offsets\"      NumberOfComponents=\"1\" />\n");
2231:   PetscFPrintf(comm,pvtu,"   <PDataArray type=\"UInt8\" Name=\"types\"        NumberOfComponents=\"1\" />\n");
2232:   PetscFPrintf(comm,pvtu,"  </PCells>\n");
2233:   for (i=0; i<size; i++) {
2234:     PetscFPrintf(comm,pvtu,"  <Piece Source=\"%s-%D-%D.vtu\" />\n",fname,tsCtx->itstep,i);
2235:   }
2236:   PetscFPrintf(comm,pvtu," </PUnstructuredGrid>\n");
2237:   PetscFPrintf(comm,pvtu,"</VTKFile>\n");
2238:   PetscFClose(comm,pvtu);

2240:   Xloc = grid->qnodeLoc;
2241:   VecScatterBegin(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2242:   VecScatterEnd(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2243:   VecGetBlockSize(Xloc,&bs);
2244:   if (bs != 4) SETERRQ1(PETSC_COMM_WORLD,PETSC_ERR_ARG_INCOMP,"expected block size 4, got %D",bs);
2245:   VecGetSize(Xloc,&nloc);
2246:   if (nloc/bs != nvertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"expected nloc/bs=%D to match nvertices=%D",nloc/bs,nvertices);
2247:   if (nvertices != grid->nvertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"expected nvertices=%D to match grid->nvertices=%D",nvertices,grid->nvertices);
2248:   VecCreateSeq(PETSC_COMM_SELF,nvertices,&Xploc);

2250:   VecCreate(PETSC_COMM_SELF,&Xuloc);
2251:   VecSetSizes(Xuloc,3*nvertices,3*nvertices);
2252:   VecSetBlockSize(Xuloc,3);
2253:   VecSetType(Xuloc,VECSEQ);

2255:   VecGetArrayRead(Xloc,&x);
2256:   VecGetArray(Xploc,&xp);
2257:   VecGetArray(Xuloc,&xu);
2258:   for (i=0; i<nvertices; i++) {
2259:     xp[i]     = x[i*4+0];
2260:     xu[i*3+0] = x[i*4+1];
2261:     xu[i*3+1] = x[i*4+2];
2262:     xu[i*3+2] = x[i*4+3];
2263:   }
2264:   VecRestoreArrayRead(Xloc,&x);

2266:   InferLocalCellConnectivity(nvertices,nedgeLoc,eptr,&ncells,&conn);

2268:   PetscFOpen(PETSC_COMM_SELF,vtu_fname,"w",&vtu);
2269:   PetscFPrintf(PETSC_COMM_SELF,vtu,"<?xml version=\"1.0\"?>\n");
2270:   PetscFPrintf(PETSC_COMM_SELF,vtu,"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2271:   PetscFPrintf(PETSC_COMM_SELF,vtu," <UnstructuredGrid>\n");
2272:   PetscFPrintf(PETSC_COMM_SELF,vtu,"  <Piece NumberOfPoints=\"%D\" NumberOfCells=\"%D\">\n",nvertices,ncells);

2274:   /* Solution fields */
2275:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <PointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2276:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2277:   boffset += nvertices*sizeof(PetscScalar) + sizeof(int);
2278:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",boffset);
2279:   boffset += nvertices*3*sizeof(PetscScalar) + sizeof(int);
2280:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </PointData>\n");
2281:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <CellData>\n");
2282:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2283:   boffset += ncells*sizeof(int) + sizeof(int);
2284:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </CellData>\n");
2285:   /* Coordinate positions */
2286:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <Points>\n");
2287:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%D\" />\n",boffset);
2288:   boffset += nvertices*3*sizeof(double) + sizeof(int);
2289:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </Points>\n");
2290:   /* Cell connectivity */
2291:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   <Cells>\n");
2292:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2293:   boffset += ncells*4*sizeof(int) + sizeof(int);
2294:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"Int32\" Name=\"offsets\"      NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2295:   boffset += ncells*sizeof(int) + sizeof(int);
2296:   PetscFPrintf(PETSC_COMM_SELF,vtu,"    <DataArray type=\"UInt8\" Name=\"types\"        NumberOfComponents=\"1\" format=\"appended\" offset=\"%D\" />\n",boffset);
2297:   boffset += ncells*sizeof(unsigned char) + sizeof(int);
2298:   PetscFPrintf(PETSC_COMM_SELF,vtu,"   </Cells>\n");
2299:   PetscFPrintf(PETSC_COMM_SELF,vtu,"  </Piece>\n");
2300:   PetscFPrintf(PETSC_COMM_SELF,vtu," </UnstructuredGrid>\n");
2301:   PetscFPrintf(PETSC_COMM_SELF,vtu," <AppendedData encoding=\"%s\">\n",base64 ? "base64" : "raw");
2302:   PetscFPrintf(PETSC_COMM_SELF,vtu,"_");

2304:   /* Write pressure */
2305:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xp,nvertices,PETSC_SCALAR,base64);

2307:   /* Write velocity */
2308:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xu,nvertices*3,PETSC_SCALAR,base64);

2310:   /* Label cell rank, not a measure of computation because nothing is actually computed at cells.  This is written
2311:    * primarily to aid in debugging. The partition for computation should label vertices. */
2312:   PetscMalloc1(ncells,&cellrank);
2313:   for (i=0; i<ncells; i++) cellrank[i] = rank;
2314:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,cellrank,ncells,PETSC_INT,base64);
2315:   PetscFree(cellrank);

2317:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,grid->xyz,nvertices*3,PETSC_DOUBLE,base64);
2318:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,conn,ncells*4,PETSC_INT,base64);
2319:   PetscFree(conn);

2321:   PetscMalloc1(ncells,&celloffset);
2322:   for (i=0; i<ncells; i++) celloffset[i] = 4*(i+1);
2323:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celloffset,ncells,PETSC_INT,base64);
2324:   PetscFree(celloffset);

2326:   PetscMalloc1(ncells,&celltype);
2327:   for (i=0; i<ncells; i++) celltype[i] = 10; /* VTK_TETRA */
2328:   PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celltype,ncells,PETSC_CHAR,base64);
2329:   PetscFree(celltype);

2331:   PetscFPrintf(PETSC_COMM_SELF,vtu,"\n </AppendedData>\n");
2332:   PetscFPrintf(PETSC_COMM_SELF,vtu,"</VTKFile>\n");
2333:   PetscFClose(PETSC_COMM_SELF,vtu);

2335:   VecRestoreArray(Xploc,&xp);
2336:   VecRestoreArray(Xuloc,&xu);
2337:   VecDestroy(&Xploc);
2338:   VecDestroy(&Xuloc);
2339:   PetscFree(eptr);
2340:   return(0);
2341: }

2343: /*---------------------------------------------------------------------*/
2344: int SetPetscDS(GRID *grid,TstepCtx *tsCtx)
2345: /*---------------------------------------------------------------------*/
2346: {
2347:   int                    ierr,i,j,bs;
2348:   int                    nnodes,jstart,jend,nbrs_diag,nbrs_offd;
2349:   int                    nnodesLoc,nvertices;
2350:   int                    *val_diag,*val_offd,*svertices,*loc2pet;
2351:   IS                     isglobal,islocal;
2352:   ISLocalToGlobalMapping isl2g;
2353:   PetscBool              flg;
2354:   MPI_Comm               comm = PETSC_COMM_WORLD;

2357:   nnodes    = grid->nnodes;
2358:   nnodesLoc = grid->nnodesLoc;
2359:   nvertices = grid->nvertices;
2360:   loc2pet   = grid->loc2pet;
2361:   bs        = 4;

2363:   /* Set up the PETSc datastructures */

2365:   VecCreate(comm,&grid->qnode);
2366:   VecSetSizes(grid->qnode,bs*nnodesLoc,bs*nnodes);
2367:   VecSetBlockSize(grid->qnode,bs);
2368:   VecSetType(grid->qnode,VECMPI);

2370:   VecDuplicate(grid->qnode,&grid->res);
2371:   VecDuplicate(grid->qnode,&tsCtx->qold);
2372:   VecDuplicate(grid->qnode,&tsCtx->func);

2374:   VecCreate(MPI_COMM_SELF,&grid->qnodeLoc);
2375:   VecSetSizes(grid->qnodeLoc,bs*nvertices,bs*nvertices);
2376:   VecSetBlockSize(grid->qnodeLoc,bs);
2377:   VecSetType(grid->qnodeLoc,VECSEQ);

2379:   VecCreate(comm,&grid->grad);
2380:   VecSetSizes(grid->grad,3*bs*nnodesLoc,3*bs*nnodes);
2381:   VecSetBlockSize(grid->grad,3*bs);
2382:   VecSetType(grid->grad,VECMPI);

2384:   VecCreate(MPI_COMM_SELF,&grid->gradLoc);
2385:   VecSetSizes(grid->gradLoc,3*bs*nvertices,3*bs*nvertices);
2386:   VecSetBlockSize(grid->gradLoc,3*bs);
2387:   VecSetType(grid->gradLoc,VECSEQ);


2390: /* Create Scatter between the local and global vectors */
2391: /* First create scatter for qnode */
2392:   ISCreateStride(MPI_COMM_SELF,bs*nvertices,0,1,&islocal);
2393: #if defined(INTERLACING)
2394: #if defined(BLOCKING)
2395:   ICALLOC(nvertices,&svertices);
2396:   for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2397:   ISCreateBlock(MPI_COMM_SELF,bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2398: #else
2399:   ICALLOC(bs*nvertices,&svertices);
2400:   for (i = 0; i < nvertices; i++)
2401:     for (j = 0; j < bs; j++) svertices[j+bs*i] = j + bs*loc2pet[i];
2402:   ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2403: #endif
2404: #else
2405:   ICALLOC(bs*nvertices,&svertices);
2406:   for (j = 0; j < bs; j++)
2407:     for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2408:   ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2409: #endif
2410:   PetscFree(svertices);
2411:   VecScatterCreate(grid->qnode,isglobal,grid->qnodeLoc,islocal,&grid->scatter);
2412:   ISDestroy(&isglobal);
2413:   ISDestroy(&islocal);

2415: /* Now create scatter for gradient vector of qnode */
2416:   ISCreateStride(MPI_COMM_SELF,3*bs*nvertices,0,1,&islocal);
2417: #if defined(INTERLACING)
2418: #if defined(BLOCKING)
2419:   ICALLOC(nvertices,&svertices);
2420:   for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2421:   ISCreateBlock(MPI_COMM_SELF,3*bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2422: #else
2423:   ICALLOC(3*bs*nvertices,&svertices);
2424:   for (i = 0; i < nvertices; i++)
2425:     for (j = 0; j < 3*bs; j++) svertices[j+3*bs*i] = j + 3*bs*loc2pet[i];
2426:   ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2427: #endif
2428: #else
2429:   ICALLOC(3*bs*nvertices,&svertices);
2430:   for (j = 0; j < 3*bs; j++)
2431:     for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2432:   ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2433: #endif
2434:   PetscFree(svertices);
2435:   VecScatterCreate(grid->grad,isglobal,grid->gradLoc,islocal,&grid->gradScatter);
2436:   ISDestroy(&isglobal);
2437:   ISDestroy(&islocal);

2439: /* Store the number of non-zeroes per row */
2440: #if defined(INTERLACING)
2441: #if defined(BLOCKING)
2442:   ICALLOC(nnodesLoc,&val_diag);
2443:   ICALLOC(nnodesLoc,&val_offd);
2444:   for (i = 0; i < nnodesLoc; i++) {
2445:     jstart    = grid->ia[i] - 1;
2446:     jend      = grid->ia[i+1] - 1;
2447:     nbrs_diag = 0;
2448:     nbrs_offd = 0;
2449:     for (j = jstart; j < jend; j++) {
2450:       if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2451:       else nbrs_offd++;
2452:     }
2453:     val_diag[i] = nbrs_diag;
2454:     val_offd[i] = nbrs_offd;
2455:   }
2456:   MatCreateBAIJ(comm,bs,bs*nnodesLoc,bs*nnodesLoc,
2457:                        bs*nnodes,bs*nnodes,PETSC_DEFAULT,val_diag,
2458:                        PETSC_DEFAULT,val_offd,&grid->A);
2459: #else
2460:   ICALLOC(nnodesLoc*4,&val_diag);
2461:   ICALLOC(nnodesLoc*4,&val_offd);
2462:   for (i = 0; i < nnodesLoc; i++) {
2463:     jstart    = grid->ia[i] - 1;
2464:     jend      = grid->ia[i+1] - 1;
2465:     nbrs_diag = 0;
2466:     nbrs_offd = 0;
2467:     for (j = jstart; j < jend; j++) {
2468:       if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2469:       else nbrs_offd++;
2470:     }
2471:     for (j = 0; j < 4; j++) {
2472:       row           = 4*i + j;
2473:       val_diag[row] = nbrs_diag*4;
2474:       val_offd[row] = nbrs_offd*4;
2475:     }
2476:   }
2477:   MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2478:                       bs*nnodes,bs*nnodes,NULL,val_diag,
2479:                       NULL,val_offd,&grid->A);
2480: #endif
2481:   PetscFree(val_diag);
2482:   PetscFree(val_offd);

2484: #else
2485:   if (size > 1) SETERRQ(PETSC_COMM_SELF,1,"Parallel case not supported in non-interlaced case\n");
2486:   ICALLOC(nnodes*4,&val_diag);
2487:   ICALLOC(nnodes*4,&val_offd);
2488:   for (j = 0; j < 4; j++)
2489:     for (i = 0; i < nnodes; i++) {
2490:       int row;
2491:       row           = i + j*nnodes;
2492:       jstart        = grid->ia[i] - 1;
2493:       jend          = grid->ia[i+1] - 1;
2494:       nbrs_diag     = jend - jstart;
2495:       val_diag[row] = nbrs_diag*4;
2496:       val_offd[row] = 0;
2497:     }
2498:   /* MatCreateSeqAIJ(MPI_COMM_SELF,nnodes*4,nnodes*4,NULL,
2499:                         val,&grid->A);*/
2500:   MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2501:                       bs*nnodes,bs*nnodes,NULL,val_diag,
2502:                       NULL,val_offd,&grid->A);
2503:   MatSetBlockSize(grid->A,bs);
2504:   PetscFree(val_diag);
2505:   PetscFree(val_offd);
2506: #endif

2508:   flg  = PETSC_FALSE;
2509:   PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
2510:   if (flg) {
2511:     PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after allocating PETSc data structures\n");
2512:   }

2514: /* Set local to global mapping for setting the matrix elements in
2515: * local ordering : first set row by row mapping
2516: */
2517:   ISLocalToGlobalMappingCreate(MPI_COMM_SELF,bs,nvertices,loc2pet,PETSC_COPY_VALUES,&isl2g);
2518:   MatSetLocalToGlobalMapping(grid->A,isl2g,isl2g);
2519:   ISLocalToGlobalMappingDestroy(&isl2g);
2520:   return(0);
2521: }

2523: /*================================= CLINK ===================================*/
2524: /*                                                                           */
2525: /* Used in establishing the links between FORTRAN common blocks and C        */
2526: /*                                                                           */
2527: /*===========================================================================*/
2528: EXTERN_C_BEGIN
2529: void f77CLINK(CINFO *p1,CRUNGE *p2,CGMCOM *p3)
2530: {
2531:   c_info  = p1;
2532:   c_runge = p2;
2533:   c_gmcom = p3;
2534: }
2535: EXTERN_C_END

2537: /*========================== SET_UP_GRID====================================*/
2538: /*                                                                          */
2539: /* Allocates the memory for the fine grid                                   */
2540: /*                                                                          */
2541: /*==========================================================================*/
2542: int set_up_grid(GRID *grid)
2543: {
2544:   int nnodes,nedge;
2545:   int nsface,nvface,nfface,nbface;
2546:   int tnode,ierr;
2547:   /*int vface,lnodes,nnz,ncell,kvisc,ilu0,nsrch,ileast,ifcn,valloc;*/
2548:   /*int nsnode,nvnode,nfnode; */
2549:   /*int mgzero=0;*/ /* Variable so we dont allocate memory for multigrid */
2550:   /*int jalloc;*/  /* If jalloc=1 allocate space for dfp and dfm */
2551:   /*
2552:   * stuff to read in dave's grids
2553:   */
2554:   /*int nnbound,nvbound,nfbound,nnfacet,nvfacet,nffacet,ntte;*/
2555:   /* end of stuff */

2558:   nnodes = grid->nnodes;
2559:   tnode  = grid->nnodes;
2560:   nedge  = grid->nedge;
2561:   nsface = grid->nsface;
2562:   nvface = grid->nvface;
2563:   nfface = grid->nfface;
2564:   nbface = nsface + nvface + nfface;

2566:   /*ncell  = grid->ncell;
2567:   vface  = grid->nedge;
2568:   lnodes = grid->nnodes;
2569:   nsnode = grid->nsnode;
2570:   nvnode = grid->nvnode;
2571:   nfnode = grid->nfnode;
2572:   nsrch  = c_gmcom->nsrch;
2573:   ilu0   = c_gmcom->ilu0;
2574:   ileast = grid->ileast;
2575:   ifcn   = c_gmcom->ifcn;
2576:   jalloc = 0;
2577:   kvisc  = grid->jvisc;*/

2579:   /* if (ilu0 >=1 && ifcn == 1) jalloc=0;*/

2581:   /*
2582:   * stuff to read in dave's grids
2583:   */
2584:   /*nnbound = grid->nnbound;
2585:   nvbound = grid->nvbound;
2586:   nfbound = grid->nfbound;
2587:   nnfacet = grid->nnfacet;
2588:   nvfacet = grid->nvfacet;
2589:   nffacet = grid->nffacet;
2590:   ntte    = grid->ntte;*/
2591:   /* end of stuff */


2594:   /* if (!ileast) lnodes = 1;
2595:     printf("In set_up_grid->jvisc = %d\n",grid->jvisc);

2597:   if (grid->jvisc != 2 && grid->jvisc != 4 && grid->jvisc != 6)vface = 1;
2598:   printf(" vface = %d \n",vface);
2599:   if (grid->jvisc < 3) tnode = 1;
2600:   valloc = 1;
2601:   if (grid->jvisc ==  0)valloc = 0;*/

2603:   /*PetscPrintf(PETSC_COMM_WORLD," nsnode= %d nvnode= %d nfnode= %d\n",nsnode,nvnode,nfnode);*/
2604:   /*PetscPrintf(PETSC_COMM_WORLD," nsface= %d nvface= %d nfface= %d\n",nsface,nvface,nfface);
2605:   PetscPrintf(PETSC_COMM_WORLD," nbface= %d\n",nbface);*/
2606:   /* Now allocate memory for the other grid arrays */
2607:   /* ICALLOC(nedge*2,  &grid->eptr); */
2608:   ICALLOC(nsface,   &grid->isface);
2609:   ICALLOC(nvface,   &grid->ivface);
2610:   ICALLOC(nfface,   &grid->ifface);
2611:   /* ICALLOC(nsnode,   &grid->isnode);
2612:     ICALLOC(nvnode,   &grid->ivnode);
2613:     ICALLOC(nfnode,   &grid->ifnode);*/
2614:   /*ICALLOC(nnodes,   &grid->clist);
2615:   ICALLOC(nnodes,   &grid->iupdate);
2616:   ICALLOC(nsface*2, &grid->sface);
2617:   ICALLOC(nvface*2, &grid->vface);
2618:   ICALLOC(nfface*2, &grid->fface);
2619:   ICALLOC(lnodes,   &grid->icount);*/
FCALLOC(nnodes, &grid->y);
FCALLOC(nnodes, &grid->z);
FCALLOC(nnodes, &grid->area);*/
/*
* FCALLOC(nnodes*4, &grid->gradx);
* FCALLOC(nnodes*4, &grid->grady);
* FCALLOC(nnodes*4, &grid->gradz);
* FCALLOC(nnodes, &grid->cdt);
*/
/*
* FCALLOC(nnodes*4, &grid->qnode);
* FCALLOC(nnodes*4, &grid->dq);
* FCALLOC(nnodes*4, &grid->res);
* FCALLOC(jalloc*nnodes*4*4,&grid->A);
* FCALLOC(nnodes*4, &grid->B);
* FCALLOC(jalloc*nedge*4*4,&grid->dfp);
* FCALLOC(jalloc*nedge*4*4,&grid->dfm);
*/
FCALLOC(nsnode, &grid->syn);
FCALLOC(nsnode, &grid->szn);
FCALLOC(nsnode, &grid->sa);
FCALLOC(nvnode, &grid->vxn);
FCALLOC(nvnode, &grid->vyn);
FCALLOC(nvnode, &grid->vzn);
FCALLOC(nvnode, &grid->va);
FCALLOC(nfnode, &grid->fxn);
FCALLOC(nfnode, &grid->fyn);
FCALLOC(nfnode, &grid->fzn);
FCALLOC(nfnode, &grid->fa);
FCALLOC(nedge, &grid->xn);
FCALLOC(nedge, &grid->yn);
FCALLOC(nedge, &grid->zn);
FCALLOC(nedge, &grid->rl);*/

line2656">2656: FCALLOC(nbface*15,&grid-&
line2657">2657: FCALLOC(nbface*15,&grid-&
line2658">2658: FCALLOC(nbface*15,&grid-&
/*
* FCALLOC(nnodes*4, &grid->phi);
* FCALLOC(nnodes, &grid->r11);
* FCALLOC(nnodes, &grid->r12);
* FCALLOC(nnodes, &grid->r13);
* FCALLOC(nnodes, &grid->r22);
* FCALLOC(nnodes, &grid->r23);
* FCALLOC(nnodes, &grid->r33);
*/
/*
* Allocate memory for viscous length scale if turbulent
*/
line2671">2671: if (grid->jvisc >
line2672">2672: FCALLOC(tnode, &grid->
line2673">2673: FCALLOC(nnodes, &grid->t
line2674">2674: FCALLOC(nnodes, &grid->
line2675">2675: FCALLOC(tnode, &grid->tu
line2676">2676: FCALLOC(nedge, &grid->
line2677">2677: FCALLOC(nedge, &grid->
line2678">2678:
/*
* Allocate memory for MG transfer
*/
/*
ICALLOC(mgzero*nsface, &grid->isford);
ICALLOC(mgzero*nvface, &grid->ivford);
ICALLOC(mgzero*nfface, &grid->ifford);
ICALLOC(mgzero*nnodes, &grid->nflag);
ICALLOC(mgzero*nnodes, &grid->nnext);
ICALLOC(mgzero*nnodes, &grid->nneigh);
ICALLOC(mgzero*ncell, &grid->ctag);
ICALLOC(mgzero*ncell, &grid->csearch);
ICALLOC(valloc*ncell*4, &grid->c2n);
ICALLOC(valloc*ncell*6, &grid->c2e);
grid->c2c = (int*)grid->dfp;
ICALLOC(ncell*4, &grid->c2c);
ICALLOC(nnodes, &grid->cenc);
if (grid->iup == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefup);
FCALLOC(mgzero*nnodes*3, &grid->rcoefup);
}
if (grid->idown == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefdn);
FCALLOC(mgzero*nnodes*3, &grid->rcoefdn);
}
FCALLOC(nnodes*4, &grid->ff);
FCALLOC(tnode, &grid->turbff);
*/
/*
* If using GMRES (nsrch>0) allocate memory
*/
/* NoEq = 0;
if (nsrch > 0)NoEq = 4*nnodes;
if (nsrch < 0)NoEq = nnodes;
FCALLOC(NoEq, &grid->AP);
FCALLOC(NoEq, &grid->Xgm);
FCALLOC(NoEq, &grid->temr);
FCALLOC((abs(nsrch)+1)*NoEq,&grid->Fgm);
*/
/*
* stuff to read in dave's grids
*/
/*
ICALLOC(nnbound, &grid->ncolorn);
ICALLOC(nnbound*100,&grid->countn);
ICALLOC(nvbound, &grid->ncolorv);
ICALLOC(nvbound*100,&grid->countv);
ICALLOC(nfbound, &grid->ncolorf);
ICALLOC(nfbound*100,&grid->countf);
*/
/*ICALLOC(nnbound, &grid->nntet);
ICALLOC(nnbound, &grid->nnpts);
ICALLOC(nvbound, &grid->nvtet);
ICALLOC(nvbound, &grid->nvpts);
ICALLOC(nfbound, &grid->nftet);
ICALLOC(nfbound, &grid->nfpts);
ICALLOC(nnfacet*4,&grid->f2ntn);
ICALLOC(nvfacet*4,&grid->f2ntv);
ICALLOC(nffacet*4,&grid->f2ntf);*/
line2738">2738: return line2739">2739


/*========================== WRITE_FINE_GRID ================================*/
/* */
/* Write memory locations and other information for the fine grid */
/* */
/*===========================================================================*/
line2747">2747: int write_fine_grid(GRID *grid) line2748">2748
line2749">2749: FILE *

/* open file for output */
/* call the output frame.out */

line2755">2755: if (!(output = fopen("frame.out","a"))) SETERRQ(PETSC_COMM_SELF,1,"can't open frame.out" line2756">2756: fprintf(output,"information for fine grid\n" line2757">2757: fprintf(output,"\n" line2758">2758: fprintf(output," address of fine grid = %p\n",(void*

line2760">2760: fprintf(output,"grid.nnodes = %d\n",grid->n
line2761">2761: fprintf(output,"grid.ncell = %d\n",grid->
line2762">2762: fprintf(output,"grid.nedge = %d\n",grid->
line2763">2763: fprintf(output,"grid.nsface = %d\n",grid->n
line2764">2764: fprintf(output,"grid.nvface = %d\n",grid->n
line2765">2765: fprintf(output,"grid.nfface = %d\n",grid->n
line2766">2766: fprintf(output,"grid.nsnode = %d\n",grid->n
line2767">2767: fprintf(output,"grid.nvnode = %d\n",grid->n
line2768">2768: fprintf(output,"grid.nfnode = %d\n",grid->n
/*
fprintf(output,"grid.eptr = %p\n",grid->eptr);
fprintf(output,"grid.isface = %p\n",grid->isface);
fprintf(output,"grid.ivface = %p\n",grid->ivface);
fprintf(output,"grid.ifface = %p\n",grid->ifface);
fprintf(output,"grid.isnode = %p\n",grid->isnode);
fprintf(output,"grid.ivnode = %p\n",grid->ivnode);
fprintf(output,"grid.ifnode = %p\n",grid->ifnode);
fprintf(output,"grid.c2n = %p\n",grid->c2n);
fprintf(output,"grid.c2e = %p\n",grid->c2e);
fprintf(output,"grid.xyz = %p\n",grid->xyz);
*/
/*fprintf(output,"grid.y = %p\n",grid->xyz);
fprintf(output,"grid.z = %p\n",grid->z);*/
/*
fprintf(output,"grid.area = %p\n",grid->area);
fprintf(output,"grid.qnode = %p\n",grid->qnode);
*/
/*
fprintf(output,"grid.gradx = %p\n",grid->gradx);
fprintf(output,"grid.grady = %p\n",grid->grady);
fprintf(output,"grid.gradz = %p\n",grid->gradz);
*/
/*
fprintf(output,"grid.cdt = %p\n",grid->cdt);
fprintf(output,"grid.sxn = %p\n",grid->sxn);
fprintf(output,"grid.syn = %p\n",grid->syn);
fprintf(output,"grid.szn = %p\n",grid->szn);
fprintf(output,"grid.vxn = %p\n",grid->vxn);
fprintf(output,"grid.vyn = %p\n",grid->vyn);
fprintf(output,"grid.vzn = %p\n",grid->vzn);
fprintf(output,"grid.fxn = %p\n",grid->fxn);
fprintf(output,"grid.fyn = %p\n",grid->fyn);
fprintf(output,"grid.fzn = %p\n",grid->fzn);
fprintf(output,"grid.xyzn = %p\n",grid->xyzn);
*/
/*fprintf(output,"grid.yn = %p\n",grid->yn);
fprintf(output,"grid.zn = %p\n",grid->zn);
fprintf(output,"grid.rl = %p\n",grid->rl);*/
/*
* close output file
*/
line2811">2811: fclose(o
line2812">2812: return line2813">2813

line2815">2815: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
line2816">2816: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncle,int *counte) line2817">2817
line2818">2818: int ncolore = *nc
line2819">2819: int iedg = 0,ib = 0,ie = nedge,ta
line2820">2820: int i
line2821">2821: in
line2822">2822: ICALLOC(nnodes,&am
line2823">2823: while (ib <
line2824">2824: for (i = 0; i < nnodes; i++) tag[
line2825">2825: counte[ncolor
line2826">2826: for (i = ib; i < ie;
line2827">2827: n1 =
line2828">2828: n2 = e2n[i+
line2829">2829: tagcount = tag[n1]+t
/* If tagcount = 0 then this edge belongs in this color */
line2831">2831: if (!tagc
line2832">2832: tag[n1]
line2833">2833: tag[n2]
line2834">2834: e2n[i] = e2n
line2835">2835: e2n[i+nedge] = e2n[iedg+
line2836">2836: e2n[iedg]
line2837">2837: e2n[iedg+nedge
line2838">2838: n1 = ep
line2839">2839: eperm[i] = eperm
line2840">2840: eperm[iedg]
line2841">2841:
line2842">2842: counte[ncolor
line2843">2843:
line2844">2844: line2845">2845: ib
line2846">2846: nco
line2847">2847:
line2848">2848: *ncle = n
line2849">2849: return line2850">2850
line2851">2851: #endif
line2852">2852: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
line2853">2853: int EventCountersBegin(int *gen_start,PetscScalar *time_start_counters) line2854">2854
line2856">2856: if ((*gen_start = start_counters(event0,event1)) < 0) SETERRQ(PETSC_COMM_SELF,1,"Error in start_counters\n" line2857">2857: PetscTime(&time_start_cou
line2858">2858: return line2859">2859

line2861">2861: int EventCountersEnd(int gen_start,PetscScalar time_start_counters) line2862">2862
line2863">2863: int gen_rea
line2864">2864: PetscScalar time_read_co
line2865">2865: long long _counter0,_co

line2867">2867: if ((gen_read = read_counters(event0,&_counter0,event1,&_counter1)) < 0) SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter\n" line2868">2868: PetscTime(&&time_read_cou
line2869">2869: if (gen_read != gen_start) SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...\n" line2870">2870: counter0 += _co
line2871">2871: counter1 += _co
line2872">2872: time_counters += time_read_counters-time_start_co
line2873">2873: return line2874">2874
line2875">2875: #endif