Actual source code: fdda.c

petsc-3.14.0 2020-09-29
Report Typos and Errors

  2: #include <petsc/private/dmdaimpl.h>
  3: #include <petscmat.h>

  5: extern PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM,ISColoringType,ISColoring*);
  6: extern PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM,ISColoringType,ISColoring*);
  7: extern PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM,ISColoringType,ISColoring*);
  8: extern PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM,ISColoringType,ISColoring*);

 10: /*
 11:    For ghost i that may be negative or greater than the upper bound this
 12:   maps it into the 0:m-1 range using periodicity
 13: */
 14: #define SetInRange(i,m) ((i < 0) ? m+i : ((i >= m) ? i-m : i))

 16: static PetscErrorCode DMDASetBlockFills_Private(const PetscInt *dfill,PetscInt w,PetscInt **rfill)
 17: {
 19:   PetscInt       i,j,nz,*fill;

 22:   if (!dfill) return(0);

 24:   /* count number nonzeros */
 25:   nz = 0;
 26:   for (i=0; i<w; i++) {
 27:     for (j=0; j<w; j++) {
 28:       if (dfill[w*i+j]) nz++;
 29:     }
 30:   }
 31:   PetscMalloc1(nz + w + 1,&fill);
 32:   /* construct modified CSR storage of nonzero structure */
 33:   /*  fill[0 -- w] marks starts of each row of column indices (and end of last row)
 34:    so fill[1] - fill[0] gives number of nonzeros in first row etc */
 35:   nz = w + 1;
 36:   for (i=0; i<w; i++) {
 37:     fill[i] = nz;
 38:     for (j=0; j<w; j++) {
 39:       if (dfill[w*i+j]) {
 40:         fill[nz] = j;
 41:         nz++;
 42:       }
 43:     }
 44:   }
 45:   fill[w] = nz;

 47:   *rfill = fill;
 48:   return(0);
 49: }


 52: static PetscErrorCode DMDASetBlockFillsSparse_Private(const PetscInt *dfillsparse,PetscInt w,PetscInt **rfill)
 53: {
 55:   PetscInt       nz;

 58:   if (!dfillsparse) return(0);

 60:   /* Determine number of non-zeros */
 61:   nz = (dfillsparse[w] - w - 1);

 63:   /* Allocate space for our copy of the given sparse matrix representation. */
 64:   PetscMalloc1(nz + w + 1,rfill);
 65:   PetscArraycpy(*rfill,dfillsparse,nz+w+1);
 66:   return(0);
 67: }


 70: static PetscErrorCode DMDASetBlockFills_Private2(DM_DA *dd)
 71: {
 73:   PetscInt       i,k,cnt = 1;


 77:   /* ofillcount tracks the columns of ofill that have any nonzero in thems; the value in each location is the number of
 78:    columns to the left with any nonzeros in them plus 1 */
 79:   PetscCalloc1(dd->w,&dd->ofillcols);
 80:   for (i=0; i<dd->w; i++) {
 81:     for (k=dd->ofill[i]; k<dd->ofill[i+1]; k++) dd->ofillcols[dd->ofill[k]] = 1;
 82:   }
 83:   for (i=0; i<dd->w; i++) {
 84:     if (dd->ofillcols[i]) {
 85:       dd->ofillcols[i] = cnt++;
 86:     }
 87:   }
 88:   return(0);
 89: }



 93: /*@
 94:     DMDASetBlockFills - Sets the fill pattern in each block for a multi-component problem
 95:     of the matrix returned by DMCreateMatrix().

 97:     Logically Collective on da

 99:     Input Parameter:
100: +   da - the distributed array
101: .   dfill - the fill pattern in the diagonal block (may be NULL, means use dense block)
102: -   ofill - the fill pattern in the off-diagonal blocks


105:     Level: developer

107:     Notes:
108:     This only makes sense when you are doing multicomponent problems but using the
109:        MPIAIJ matrix format

111:            The format for dfill and ofill is a 2 dimensional dof by dof matrix with 1 entries
112:        representing coupling and 0 entries for missing coupling. For example
113: $             dfill[9] = {1, 0, 0,
114: $                         1, 1, 0,
115: $                         0, 1, 1}
116:        means that row 0 is coupled with only itself in the diagonal block, row 1 is coupled with
117:        itself and row 0 (in the diagonal block) and row 2 is coupled with itself and row 1 (in the
118:        diagonal block).

120:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
121:      can be represented in the dfill, ofill format

123:    Contributed by Glenn Hammond

125: .seealso DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()

127: @*/
128: PetscErrorCode  DMDASetBlockFills(DM da,const PetscInt *dfill,const PetscInt *ofill)
129: {
130:   DM_DA          *dd = (DM_DA*)da->data;

134:   /* save the given dfill and ofill information */
135:   DMDASetBlockFills_Private(dfill,dd->w,&dd->dfill);
136:   DMDASetBlockFills_Private(ofill,dd->w,&dd->ofill);

138:   /* count nonzeros in ofill columns */
139:   DMDASetBlockFills_Private2(dd);

141:   return(0);
142: }


145: /*@
146:     DMDASetBlockFillsSparse - Sets the fill pattern in each block for a multi-component problem
147:     of the matrix returned by DMCreateMatrix(), using sparse representations
148:     of fill patterns.

150:     Logically Collective on da

152:     Input Parameter:
153: +   da - the distributed array
154: .   dfill - the sparse fill pattern in the diagonal block (may be NULL, means use dense block)
155: -   ofill - the sparse fill pattern in the off-diagonal blocks


158:     Level: developer

160:     Notes: This only makes sense when you are doing multicomponent problems but using the
161:        MPIAIJ matrix format

163:            The format for dfill and ofill is a sparse representation of a
164:            dof-by-dof matrix with 1 entries representing coupling and 0 entries
165:            for missing coupling.  The sparse representation is a 1 dimensional
166:            array of length nz + dof + 1, where nz is the number of non-zeros in
167:            the matrix.  The first dof entries in the array give the
168:            starting array indices of each row's items in the rest of the array,
169:            the dof+1st item contains the value nz + dof + 1 (i.e. the entire length of the array)
170:            and the remaining nz items give the column indices of each of
171:            the 1s within the logical 2D matrix.  Each row's items within
172:            the array are the column indices of the 1s within that row
173:            of the 2D matrix.  PETSc developers may recognize that this is the
174:            same format as that computed by the DMDASetBlockFills_Private()
175:            function from a dense 2D matrix representation.

177:      DMDASetGetMatrix() allows you to provide general code for those more complicated nonzero patterns then
178:      can be represented in the dfill, ofill format

180:    Contributed by Philip C. Roth

182: .seealso DMDASetBlockFills(), DMCreateMatrix(), DMDASetGetMatrix(), DMSetMatrixPreallocateOnly()

184: @*/
185: PetscErrorCode  DMDASetBlockFillsSparse(DM da,const PetscInt *dfillsparse,const PetscInt *ofillsparse)
186: {
187:   DM_DA          *dd = (DM_DA*)da->data;

191:   /* save the given dfill and ofill information */
192:   DMDASetBlockFillsSparse_Private(dfillsparse,dd->w,&dd->dfill);
193:   DMDASetBlockFillsSparse_Private(ofillsparse,dd->w,&dd->ofill);

195:   /* count nonzeros in ofill columns */
196:   DMDASetBlockFills_Private2(dd);

198:   return(0);
199: }


202: PetscErrorCode  DMCreateColoring_DA(DM da,ISColoringType ctype,ISColoring *coloring)
203: {
204:   PetscErrorCode   ierr;
205:   PetscInt         dim,m,n,p,nc;
206:   DMBoundaryType   bx,by,bz;
207:   MPI_Comm         comm;
208:   PetscMPIInt      size;
209:   PetscBool        isBAIJ;
210:   DM_DA            *dd = (DM_DA*)da->data;

213:   /*
214:                                   m
215:           ------------------------------------------------------
216:          |                                                     |
217:          |                                                     |
218:          |               ----------------------                |
219:          |               |                    |                |
220:       n  |           yn  |                    |                |
221:          |               |                    |                |
222:          |               .---------------------                |
223:          |             (xs,ys)     xn                          |
224:          |            .                                        |
225:          |         (gxs,gys)                                   |
226:          |                                                     |
227:           -----------------------------------------------------
228:   */

230:   /*
231:          nc - number of components per grid point
232:          col - number of colors needed in one direction for single component problem

234:   */
235:   DMDAGetInfo(da,&dim,NULL,NULL,NULL,&m,&n,&p,&nc,NULL,&bx,&by,&bz,NULL);

237:   PetscObjectGetComm((PetscObject)da,&comm);
238:   MPI_Comm_size(comm,&size);
239:   if (ctype == IS_COLORING_LOCAL) {
240:     if (size == 1) {
241:       ctype = IS_COLORING_GLOBAL;
242:     } else if (dim > 1) {
243:       if ((m==1 && bx == DM_BOUNDARY_PERIODIC) || (n==1 && by == DM_BOUNDARY_PERIODIC) || (p==1 && bz == DM_BOUNDARY_PERIODIC)) {
244:         SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"IS_COLORING_LOCAL cannot be used for periodic boundary condition having both ends of the domain  on the same process");
245:       }
246:     }
247:   }

249:   /* Tell the DMDA it has 1 degree of freedom per grid point so that the coloring for BAIJ
250:      matrices is for the blocks, not the individual matrix elements  */
251:   PetscStrbeginswith(da->mattype,MATBAIJ,&isBAIJ);
252:   if (!isBAIJ) {PetscStrbeginswith(da->mattype,MATMPIBAIJ,&isBAIJ);}
253:   if (!isBAIJ) {PetscStrbeginswith(da->mattype,MATSEQBAIJ,&isBAIJ);}
254:   if (isBAIJ) {
255:     dd->w  = 1;
256:     dd->xs = dd->xs/nc;
257:     dd->xe = dd->xe/nc;
258:     dd->Xs = dd->Xs/nc;
259:     dd->Xe = dd->Xe/nc;
260:   }

262:   /*
263:      We do not provide a getcoloring function in the DMDA operations because
264:    the basic DMDA does not know about matrices. We think of DMDA as being
265:    more low-level then matrices.
266:   */
267:   if (dim == 1) {
268:     DMCreateColoring_DA_1d_MPIAIJ(da,ctype,coloring);
269:   } else if (dim == 2) {
270:     DMCreateColoring_DA_2d_MPIAIJ(da,ctype,coloring);
271:   } else if (dim == 3) {
272:     DMCreateColoring_DA_3d_MPIAIJ(da,ctype,coloring);
273:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not done for %D dimension, send us mail petsc-maint@mcs.anl.gov for code",dim);
274:   if (isBAIJ) {
275:     dd->w  = nc;
276:     dd->xs = dd->xs*nc;
277:     dd->xe = dd->xe*nc;
278:     dd->Xs = dd->Xs*nc;
279:     dd->Xe = dd->Xe*nc;
280:   }
281:   return(0);
282: }

284: /* ---------------------------------------------------------------------------------*/

286: PetscErrorCode DMCreateColoring_DA_2d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
287: {
288:   PetscErrorCode  ierr;
289:   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,M,N,dim,s,k,nc,col;
290:   PetscInt        ncolors;
291:   MPI_Comm        comm;
292:   DMBoundaryType  bx,by;
293:   DMDAStencilType st;
294:   ISColoringValue *colors;
295:   DM_DA           *dd = (DM_DA*)da->data;

298:   /*
299:          nc - number of components per grid point
300:          col - number of colors needed in one direction for single component problem

302:   */
303:   DMDAGetInfo(da,&dim,&m,&n,NULL,&M,&N,NULL,&nc,&s,&bx,&by,NULL,&st);
304:   col  = 2*s + 1;
305:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
306:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
307:   PetscObjectGetComm((PetscObject)da,&comm);

309:   /* special case as taught to us by Paul Hovland */
310:   if (st == DMDA_STENCIL_STAR && s == 1) {
311:     DMCreateColoring_DA_2d_5pt_MPIAIJ(da,ctype,coloring);
312:   } else {
313:     if (ctype == IS_COLORING_GLOBAL) {
314:       if (!dd->localcoloring) {
315:         PetscMalloc1(nc*nx*ny,&colors);
316:         ii   = 0;
317:         for (j=ys; j<ys+ny; j++) {
318:           for (i=xs; i<xs+nx; i++) {
319:             for (k=0; k<nc; k++) {
320:               colors[ii++] = k + nc*((i % col) + col*(j % col));
321:             }
322:           }
323:         }
324:         ncolors = nc + nc*(col-1 + col*(col-1));
325:         ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
326:       }
327:       *coloring = dd->localcoloring;
328:     } else if (ctype == IS_COLORING_LOCAL) {
329:       if (!dd->ghostedcoloring) {
330:         PetscMalloc1(nc*gnx*gny,&colors);
331:         ii   = 0;
332:         for (j=gys; j<gys+gny; j++) {
333:           for (i=gxs; i<gxs+gnx; i++) {
334:             for (k=0; k<nc; k++) {
335:               /* the complicated stuff is to handle periodic boundaries */
336:               colors[ii++] = k + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col));
337:             }
338:           }
339:         }
340:         ncolors = nc + nc*(col - 1 + col*(col-1));
341:         ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
342:         /* PetscIntView(ncolors,(PetscInt*)colors,0); */

344:         ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
345:       }
346:       *coloring = dd->ghostedcoloring;
347:     } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
348:   }
349:   ISColoringReference(*coloring);
350:   return(0);
351: }

353: /* ---------------------------------------------------------------------------------*/

355: PetscErrorCode DMCreateColoring_DA_3d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
356: {
357:   PetscErrorCode  ierr;
358:   PetscInt        xs,ys,nx,ny,i,j,gxs,gys,gnx,gny,m,n,p,dim,s,k,nc,col,zs,gzs,ii,l,nz,gnz,M,N,P;
359:   PetscInt        ncolors;
360:   MPI_Comm        comm;
361:   DMBoundaryType  bx,by,bz;
362:   DMDAStencilType st;
363:   ISColoringValue *colors;
364:   DM_DA           *dd = (DM_DA*)da->data;

367:   /*
368:          nc - number of components per grid point
369:          col - number of colors needed in one direction for single component problem

371:   */
372:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
373:   col  = 2*s + 1;
374:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
375:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
376:   PetscObjectGetComm((PetscObject)da,&comm);

378:   /* create the coloring */
379:   if (ctype == IS_COLORING_GLOBAL) {
380:     if (!dd->localcoloring) {
381:       PetscMalloc1(nc*nx*ny*nz,&colors);
382:       ii   = 0;
383:       for (k=zs; k<zs+nz; k++) {
384:         for (j=ys; j<ys+ny; j++) {
385:           for (i=xs; i<xs+nx; i++) {
386:             for (l=0; l<nc; l++) {
387:               colors[ii++] = l + nc*((i % col) + col*(j % col) + col*col*(k % col));
388:             }
389:           }
390:         }
391:       }
392:       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
393:       ISColoringCreate(comm,ncolors,nc*nx*ny*nz,colors,PETSC_OWN_POINTER,&dd->localcoloring);
394:     }
395:     *coloring = dd->localcoloring;
396:   } else if (ctype == IS_COLORING_LOCAL) {
397:     if (!dd->ghostedcoloring) {
398:       PetscMalloc1(nc*gnx*gny*gnz,&colors);
399:       ii   = 0;
400:       for (k=gzs; k<gzs+gnz; k++) {
401:         for (j=gys; j<gys+gny; j++) {
402:           for (i=gxs; i<gxs+gnx; i++) {
403:             for (l=0; l<nc; l++) {
404:               /* the complicated stuff is to handle periodic boundaries */
405:               colors[ii++] = l + nc*((SetInRange(i,m) % col) + col*(SetInRange(j,n) % col) + col*col*(SetInRange(k,p) % col));
406:             }
407:           }
408:         }
409:       }
410:       ncolors = nc + nc*(col-1 + col*(col-1)+ col*col*(col-1));
411:       ISColoringCreate(comm,ncolors,nc*gnx*gny*gnz,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
412:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
413:     }
414:     *coloring = dd->ghostedcoloring;
415:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
416:   ISColoringReference(*coloring);
417:   return(0);
418: }

420: /* ---------------------------------------------------------------------------------*/

422: PetscErrorCode DMCreateColoring_DA_1d_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
423: {
424:   PetscErrorCode  ierr;
425:   PetscInt        xs,nx,i,i1,gxs,gnx,l,m,M,dim,s,nc,col;
426:   PetscInt        ncolors;
427:   MPI_Comm        comm;
428:   DMBoundaryType  bx;
429:   ISColoringValue *colors;
430:   DM_DA           *dd = (DM_DA*)da->data;

433:   /*
434:          nc - number of components per grid point
435:          col - number of colors needed in one direction for single component problem

437:   */
438:   DMDAGetInfo(da,&dim,&m,NULL,NULL,&M,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
439:   col  = 2*s + 1;
440:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
441:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);
442:   PetscObjectGetComm((PetscObject)da,&comm);

444:   /* create the coloring */
445:   if (ctype == IS_COLORING_GLOBAL) {
446:     if (!dd->localcoloring) {
447:       PetscMalloc1(nc*nx,&colors);
448:       if (dd->ofillcols) {
449:         PetscInt tc = 0;
450:         for (i=0; i<nc; i++) tc += (PetscInt) (dd->ofillcols[i] > 0);
451:         i1 = 0;
452:         for (i=xs; i<xs+nx; i++) {
453:           for (l=0; l<nc; l++) {
454:             if (dd->ofillcols[l] && (i % col)) {
455:               colors[i1++] =  nc - 1 + tc*((i % col) - 1) + dd->ofillcols[l];
456:             } else {
457:               colors[i1++] = l;
458:             }
459:           }
460:         }
461:         ncolors = nc + 2*s*tc;
462:       } else {
463:         i1 = 0;
464:         for (i=xs; i<xs+nx; i++) {
465:           for (l=0; l<nc; l++) {
466:             colors[i1++] = l + nc*(i % col);
467:           }
468:         }
469:         ncolors = nc + nc*(col-1);
470:       }
471:       ISColoringCreate(comm,ncolors,nc*nx,colors,PETSC_OWN_POINTER,&dd->localcoloring);
472:     }
473:     *coloring = dd->localcoloring;
474:   } else if (ctype == IS_COLORING_LOCAL) {
475:     if (!dd->ghostedcoloring) {
476:       PetscMalloc1(nc*gnx,&colors);
477:       i1   = 0;
478:       for (i=gxs; i<gxs+gnx; i++) {
479:         for (l=0; l<nc; l++) {
480:           /* the complicated stuff is to handle periodic boundaries */
481:           colors[i1++] = l + nc*(SetInRange(i,m) % col);
482:         }
483:       }
484:       ncolors = nc + nc*(col-1);
485:       ISColoringCreate(comm,ncolors,nc*gnx,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
486:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
487:     }
488:     *coloring = dd->ghostedcoloring;
489:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
490:   ISColoringReference(*coloring);
491:   return(0);
492: }

494: PetscErrorCode DMCreateColoring_DA_2d_5pt_MPIAIJ(DM da,ISColoringType ctype,ISColoring *coloring)
495: {
496:   PetscErrorCode  ierr;
497:   PetscInt        xs,ys,nx,ny,i,j,ii,gxs,gys,gnx,gny,m,n,dim,s,k,nc;
498:   PetscInt        ncolors;
499:   MPI_Comm        comm;
500:   DMBoundaryType  bx,by;
501:   ISColoringValue *colors;
502:   DM_DA           *dd = (DM_DA*)da->data;

505:   /*
506:          nc - number of components per grid point
507:          col - number of colors needed in one direction for single component problem

509:   */
510:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,NULL);
511:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
512:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
513:   PetscObjectGetComm((PetscObject)da,&comm);
514:   /* create the coloring */
515:   if (ctype == IS_COLORING_GLOBAL) {
516:     if (!dd->localcoloring) {
517:       PetscMalloc1(nc*nx*ny,&colors);
518:       ii   = 0;
519:       for (j=ys; j<ys+ny; j++) {
520:         for (i=xs; i<xs+nx; i++) {
521:           for (k=0; k<nc; k++) {
522:             colors[ii++] = k + nc*((3*j+i) % 5);
523:           }
524:         }
525:       }
526:       ncolors = 5*nc;
527:       ISColoringCreate(comm,ncolors,nc*nx*ny,colors,PETSC_OWN_POINTER,&dd->localcoloring);
528:     }
529:     *coloring = dd->localcoloring;
530:   } else if (ctype == IS_COLORING_LOCAL) {
531:     if (!dd->ghostedcoloring) {
532:       PetscMalloc1(nc*gnx*gny,&colors);
533:       ii = 0;
534:       for (j=gys; j<gys+gny; j++) {
535:         for (i=gxs; i<gxs+gnx; i++) {
536:           for (k=0; k<nc; k++) {
537:             colors[ii++] = k + nc*((3*SetInRange(j,n) + SetInRange(i,m)) % 5);
538:           }
539:         }
540:       }
541:       ncolors = 5*nc;
542:       ISColoringCreate(comm,ncolors,nc*gnx*gny,colors,PETSC_OWN_POINTER,&dd->ghostedcoloring);
543:       ISColoringSetType(dd->ghostedcoloring,IS_COLORING_LOCAL);
544:     }
545:     *coloring = dd->ghostedcoloring;
546:   } else SETERRQ1(PetscObjectComm((PetscObject)da),PETSC_ERR_ARG_WRONG,"Unknown ISColoringType %d",(int)ctype);
547:   return(0);
548: }

550: /* =========================================================================== */
551: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM,Mat,PetscBool);
552: extern PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM,Mat);
553: extern PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM,Mat,PetscBool);
554: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM,Mat,PetscBool);
555: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM,Mat);
556: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM,Mat,PetscBool);
557: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM,Mat);
558: extern PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM,Mat);
559: extern PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM,Mat);
560: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM,Mat);
561: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM,Mat);
562: extern PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM,Mat);
563: extern PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM,Mat);
564: extern PetscErrorCode DMCreateMatrix_DA_IS(DM,Mat);

566: /*@C
567:    MatSetupDM - Sets the DMDA that is to be used by the HYPRE_StructMatrix PETSc matrix

569:    Logically Collective on mat

571:    Input Parameters:
572: +  mat - the matrix
573: -  da - the da

575:    Level: intermediate

577: @*/
578: PetscErrorCode MatSetupDM(Mat mat,DM da)
579: {

585:   PetscTryMethod(mat,"MatSetupDM_C",(Mat,DM),(mat,da));
586:   return(0);
587: }

589: PetscErrorCode  MatView_MPI_DA(Mat A,PetscViewer viewer)
590: {
591:   DM                da;
592:   PetscErrorCode    ierr;
593:   const char        *prefix;
594:   Mat               Anatural;
595:   AO                ao;
596:   PetscInt          rstart,rend,*petsc,i;
597:   IS                is;
598:   MPI_Comm          comm;
599:   PetscViewerFormat format;

602:   /* Check whether we are just printing info, in which case MatView() already viewed everything we wanted to view */
603:   PetscViewerGetFormat(viewer,&format);
604:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);

606:   PetscObjectGetComm((PetscObject)A,&comm);
607:   MatGetDM(A, &da);
608:   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");

610:   DMDAGetAO(da,&ao);
611:   MatGetOwnershipRange(A,&rstart,&rend);
612:   PetscMalloc1(rend-rstart,&petsc);
613:   for (i=rstart; i<rend; i++) petsc[i-rstart] = i;
614:   AOApplicationToPetsc(ao,rend-rstart,petsc);
615:   ISCreateGeneral(comm,rend-rstart,petsc,PETSC_OWN_POINTER,&is);

617:   /* call viewer on natural ordering */
618:   MatCreateSubMatrix(A,is,is,MAT_INITIAL_MATRIX,&Anatural);
619:   ISDestroy(&is);
620:   PetscObjectGetOptionsPrefix((PetscObject)A,&prefix);
621:   PetscObjectSetOptionsPrefix((PetscObject)Anatural,prefix);
622:   PetscObjectSetName((PetscObject)Anatural,((PetscObject)A)->name);
623:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_TRUE;
624:   MatView(Anatural,viewer);
625:   ((PetscObject)Anatural)->donotPetscObjectPrintClassNamePrefixType = PETSC_FALSE;
626:   MatDestroy(&Anatural);
627:   return(0);
628: }

630: PetscErrorCode  MatLoad_MPI_DA(Mat A,PetscViewer viewer)
631: {
632:   DM             da;
634:   Mat            Anatural,Aapp;
635:   AO             ao;
636:   PetscInt       rstart,rend,*app,i,m,n,M,N;
637:   IS             is;
638:   MPI_Comm       comm;

641:   PetscObjectGetComm((PetscObject)A,&comm);
642:   MatGetDM(A, &da);
643:   if (!da) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix not generated from a DMDA");

645:   /* Load the matrix in natural ordering */
646:   MatCreate(PetscObjectComm((PetscObject)A),&Anatural);
647:   MatSetType(Anatural,((PetscObject)A)->type_name);
648:   MatGetSize(A,&M,&N);
649:   MatGetLocalSize(A,&m,&n);
650:   MatSetSizes(Anatural,m,n,M,N);
651:   MatLoad(Anatural,viewer);

653:   /* Map natural ordering to application ordering and create IS */
654:   DMDAGetAO(da,&ao);
655:   MatGetOwnershipRange(Anatural,&rstart,&rend);
656:   PetscMalloc1(rend-rstart,&app);
657:   for (i=rstart; i<rend; i++) app[i-rstart] = i;
658:   AOPetscToApplication(ao,rend-rstart,app);
659:   ISCreateGeneral(comm,rend-rstart,app,PETSC_OWN_POINTER,&is);

661:   /* Do permutation and replace header */
662:   MatCreateSubMatrix(Anatural,is,is,MAT_INITIAL_MATRIX,&Aapp);
663:   MatHeaderReplace(A,&Aapp);
664:   ISDestroy(&is);
665:   MatDestroy(&Anatural);
666:   return(0);
667: }

669: PetscErrorCode DMCreateMatrix_DA(DM da, Mat *J)
670: {
672:   PetscInt       dim,dof,nx,ny,nz,dims[3],starts[3],M,N,P;
673:   Mat            A;
674:   MPI_Comm       comm;
675:   MatType        Atype;
676:   void           (*aij)(void)=NULL,(*baij)(void)=NULL,(*sbaij)(void)=NULL,(*sell)(void)=NULL,(*is)(void)=NULL;
677:   MatType        mtype;
678:   PetscMPIInt    size;
679:   DM_DA          *dd = (DM_DA*)da->data;

682:   MatInitializePackage();
683:   mtype = da->mattype;

685:   /*
686:                                   m
687:           ------------------------------------------------------
688:          |                                                     |
689:          |                                                     |
690:          |               ----------------------                |
691:          |               |                    |                |
692:       n  |           ny  |                    |                |
693:          |               |                    |                |
694:          |               .---------------------                |
695:          |             (xs,ys)     nx                          |
696:          |            .                                        |
697:          |         (gxs,gys)                                   |
698:          |                                                     |
699:           -----------------------------------------------------
700:   */

702:   /*
703:          nc - number of components per grid point
704:          col - number of colors needed in one direction for single component problem

706:   */
707:   M   = dd->M;
708:   N   = dd->N;
709:   P   = dd->P;
710:   dim = da->dim;
711:   dof = dd->w;
712:   /* DMDAGetInfo(da,&dim,&M,&N,&P,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL); */
713:   DMDAGetCorners(da,NULL,NULL,NULL,&nx,&ny,&nz);
714:   PetscObjectGetComm((PetscObject)da,&comm);
715:   MatCreate(comm,&A);
716:   MatSetSizes(A,dof*nx*ny*nz,dof*nx*ny*nz,dof*M*N*P,dof*M*N*P);
717:   MatSetType(A,mtype);
718:   MatSetFromOptions(A);
719:   MatSetDM(A,da);
720:   if (da->structure_only) {
721:     MatSetOption(A,MAT_STRUCTURE_ONLY,PETSC_TRUE);
722:   }
723:   MatGetType(A,&Atype);
724:   /*
725:      We do not provide a getmatrix function in the DMDA operations because
726:    the basic DMDA does not know about matrices. We think of DMDA as being more
727:    more low-level than matrices. This is kind of cheating but, cause sometimes
728:    we think of DMDA has higher level than matrices.

730:      We could switch based on Atype (or mtype), but we do not since the
731:    specialized setting routines depend only on the particular preallocation
732:    details of the matrix, not the type itself.
733:   */
734:   PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJSetPreallocation_C",&aij);
735:   if (!aij) {
736:     PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJSetPreallocation_C",&aij);
737:   }
738:   if (!aij) {
739:     PetscObjectQueryFunction((PetscObject)A,"MatMPIBAIJSetPreallocation_C",&baij);
740:     if (!baij) {
741:       PetscObjectQueryFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",&baij);
742:     }
743:     if (!baij) {
744:       PetscObjectQueryFunction((PetscObject)A,"MatMPISBAIJSetPreallocation_C",&sbaij);
745:       if (!sbaij) {
746:         PetscObjectQueryFunction((PetscObject)A,"MatSeqSBAIJSetPreallocation_C",&sbaij);
747:       }
748:       if (!sbaij) {
749:         PetscObjectQueryFunction((PetscObject)A,"MatMPISELLSetPreallocation_C",&sell);
750:         if (!sell) {
751:           PetscObjectQueryFunction((PetscObject)A,"MatSeqSELLSetPreallocation_C",&sell);
752:         }
753:       }
754:       if (!sell) {
755:         PetscObjectQueryFunction((PetscObject)A,"MatISSetPreallocation_C",&is);
756:       }
757:     }
758:   }
759:   if (aij) {
760:     if (dim == 1) {
761:       if (dd->ofill) {
762:         DMCreateMatrix_DA_1d_MPIAIJ_Fill(da,A);
763:       } else {
764:         DMBoundaryType bx;
765:         PetscMPIInt  size;
766:         DMDAGetInfo(da,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&bx,NULL,NULL,NULL);
767:         MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);
768:         if (size == 1 && bx == DM_BOUNDARY_NONE) {
769:           DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(da,A,PETSC_FALSE);
770:         } else {
771:           DMCreateMatrix_DA_1d_MPIAIJ(da,A,PETSC_FALSE);
772:         }
773:       }
774:     } else if (dim == 2) {
775:       if (dd->ofill) {
776:         DMCreateMatrix_DA_2d_MPIAIJ_Fill(da,A);
777:       } else {
778:         DMCreateMatrix_DA_2d_MPIAIJ(da,A,PETSC_FALSE);
779:       }
780:     } else if (dim == 3) {
781:       if (dd->ofill) {
782:         DMCreateMatrix_DA_3d_MPIAIJ_Fill(da,A);
783:       } else {
784:         DMCreateMatrix_DA_3d_MPIAIJ(da,A,PETSC_FALSE);
785:       }
786:     }
787:   } else if (baij) {
788:     if (dim == 2) {
789:       DMCreateMatrix_DA_2d_MPIBAIJ(da,A);
790:     } else if (dim == 3) {
791:       DMCreateMatrix_DA_3d_MPIBAIJ(da,A);
792:     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
793:   } else if (sbaij) {
794:     if (dim == 2) {
795:       DMCreateMatrix_DA_2d_MPISBAIJ(da,A);
796:     } else if (dim == 3) {
797:       DMCreateMatrix_DA_3d_MPISBAIJ(da,A);
798:     } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
799:   } else if (sell) {
800:      if (dim == 2) {
801:        DMCreateMatrix_DA_2d_MPISELL(da,A);
802:      } else if (dim == 3) {
803:        DMCreateMatrix_DA_3d_MPISELL(da,A);
804:      } else SETERRQ3(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Not implemented for %D dimension and Matrix Type: %s in %D dimension! Send mail to petsc-maint@mcs.anl.gov for code",dim,Atype,dim);
805:   } else if (is) {
806:     DMCreateMatrix_DA_IS(da,A);
807:   } else {
808:     ISLocalToGlobalMapping ltog;

810:     MatSetBlockSize(A,dof);
811:     MatSetUp(A);
812:     DMGetLocalToGlobalMapping(da,&ltog);
813:     MatSetLocalToGlobalMapping(A,ltog,ltog);
814:   }
815:   DMDAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
816:   MatSetStencil(A,dim,dims,starts,dof);
817:   MatSetDM(A,da);
818:   MPI_Comm_size(comm,&size);
819:   if (size > 1) {
820:     /* change viewer to display matrix in natural ordering */
821:     MatSetOperation(A, MATOP_VIEW, (void (*)(void))MatView_MPI_DA);
822:     MatSetOperation(A, MATOP_LOAD, (void (*)(void))MatLoad_MPI_DA);
823:   }
824:   *J = A;
825:   return(0);
826: }

828: /* ---------------------------------------------------------------------------------*/
829: PETSC_EXTERN PetscErrorCode MatISSetPreallocation_IS(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);

831: PetscErrorCode DMCreateMatrix_DA_IS(DM dm,Mat J)
832: {
833:   DM_DA                  *da = (DM_DA*)dm->data;
834:   Mat                    lJ;
835:   ISLocalToGlobalMapping ltog;
836:   IS                     is_loc_filt, is_glob;
837:   const PetscInt         *e_loc,*idx;
838:   PetscInt               nel,nen,nv,dof,dim,*gidx,nb;
839:   PetscBool              flg;
840:   PetscErrorCode         ierr;

842:   /* The l2g map of DMDA has all ghosted nodes, and e_loc is a subset of all the local nodes (including the ghosted)
843:      We need to filter the local indices that are represented through the DMDAGetElements decomposition
844:      This is because the size of the local matrices in MATIS is the local size of the l2g map */
846:   dof  = da->w;
847:   dim  = dm->dim;

849:   MatSetBlockSize(J,dof);

851:   /* get local elements indices in local DMDA numbering */
852:   DMDAGetElements(dm,&nel,&nen,&e_loc); /* this will throw an error if the stencil type is not DMDA_STENCIL_BOX */
853:   ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nel*nen,e_loc,PETSC_COPY_VALUES,&is_loc_filt);
854:   DMDARestoreElements(dm,&nel,&nen,&e_loc);

856:   /* obtain a consistent local ordering for MATIS */
857:   ISSortRemoveDups(is_loc_filt);
858:   ISBlockGetLocalSize(is_loc_filt,&nb);
859:   DMGetLocalToGlobalMapping(dm,&ltog);
860:   ISLocalToGlobalMappingGetSize(ltog,&nv);
861:   PetscMalloc1(PetscMax(nb,nv/dof),&gidx);
862:   ISBlockGetIndices(is_loc_filt,&idx);
863:   ISLocalToGlobalMappingApplyBlock(ltog,nb,idx,gidx);
864:   ISBlockRestoreIndices(is_loc_filt,&idx);
865:   ISCreateBlock(PetscObjectComm((PetscObject)dm),dof,nb,gidx,PETSC_USE_POINTER,&is_glob);
866:   ISLocalToGlobalMappingCreateIS(is_glob,&ltog);
867:   ISDestroy(&is_glob);
868:   MatSetLocalToGlobalMapping(J,ltog,ltog);
869:   ISLocalToGlobalMappingDestroy(&ltog);

871:   /* We also attach a l2g map to the local matrices to have MatSetValueLocal to work */
872:   MatISGetLocalMat(J,&lJ);
873:   ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);
874:   ISDestroy(&is_loc_filt);
875:   ISCreateStride(PetscObjectComm((PetscObject)lJ),nv/dof,0,1,&is_glob);
876:   ISGetIndices(is_glob,&idx);
877:   ISGlobalToLocalMappingApplyBlock(ltog,IS_GTOLM_MASK,nv/dof,idx,&nb,gidx);
878:   ISRestoreIndices(is_glob,&idx);
879:   ISDestroy(&is_glob);
880:   ISLocalToGlobalMappingDestroy(&ltog);
881:   ISCreateBlock(PETSC_COMM_SELF,dof,nb,gidx,PETSC_USE_POINTER,&is_loc_filt);
882:   ISLocalToGlobalMappingCreateIS(is_loc_filt,&ltog);
883:   ISDestroy(&is_loc_filt);
884:   MatSetLocalToGlobalMapping(lJ,ltog,ltog);
885:   ISLocalToGlobalMappingDestroy(&ltog);
886:   PetscFree(gidx);

888:   /* Preallocation (not exact): we reuse the preallocation routines of the assembled version  */
889:   flg = dm->prealloc_only;
890:   dm->prealloc_only = PETSC_TRUE;
891:   switch (dim) {
892:   case 1:
893:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
894:     DMCreateMatrix_DA_1d_MPIAIJ(dm,J,PETSC_TRUE);
895:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
896:     break;
897:   case 2:
898:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
899:     DMCreateMatrix_DA_2d_MPIAIJ(dm,J,PETSC_TRUE);
900:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
901:     break;
902:   case 3:
903:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",MatISSetPreallocation_IS);
904:     DMCreateMatrix_DA_3d_MPIAIJ(dm,J,PETSC_TRUE);
905:     PetscObjectComposeFunction((PetscObject)J,"MatMPIAIJSetPreallocation_C",NULL);
906:     break;
907:   default:
908:     SETERRQ1(PetscObjectComm((PetscObject)dm),PETSC_ERR_SUP,"Unhandled dimension %d",dim);
909:     break;
910:   }
911:   dm->prealloc_only = flg;
912:   return(0);
913: }

915: PetscErrorCode DMCreateMatrix_DA_2d_MPISELL(DM da,Mat J)
916: {
917:   PetscErrorCode         ierr;
918:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p;
919:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
920:   MPI_Comm               comm;
921:   PetscScalar            *values;
922:   DMBoundaryType         bx,by;
923:   ISLocalToGlobalMapping ltog;
924:   DMDAStencilType        st;

927:   /*
928:          nc - number of components per grid point
929:          col - number of colors needed in one direction for single component problem

931:   */
932:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
933:   col  = 2*s + 1;
934:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
935:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
936:   PetscObjectGetComm((PetscObject)da,&comm);

938:   PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
939:   DMGetLocalToGlobalMapping(da,&ltog);

941:   MatSetBlockSize(J,nc);
942:   /* determine the matrix preallocation information */
943:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
944:   for (i=xs; i<xs+nx; i++) {

946:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
947:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

949:     for (j=ys; j<ys+ny; j++) {
950:       slot = i - gxs + gnx*(j - gys);

952:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
953:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

955:       cnt = 0;
956:       for (k=0; k<nc; k++) {
957:         for (l=lstart; l<lend+1; l++) {
958:           for (p=pstart; p<pend+1; p++) {
959:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
960:               cols[cnt++] = k + nc*(slot + gnx*l + p);
961:             }
962:           }
963:         }
964:         rows[k] = k + nc*(slot);
965:       }
966:       MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
967:     }
968:   }
969:   MatSetBlockSize(J,nc);
970:   MatSeqSELLSetPreallocation(J,0,dnz);
971:   MatMPISELLSetPreallocation(J,0,dnz,0,onz);
972:   MatPreallocateFinalize(dnz,onz);

974:   MatSetLocalToGlobalMapping(J,ltog,ltog);

976:   /*
977:     For each node in the grid: we get the neighbors in the local (on processor ordering
978:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
979:     PETSc ordering.
980:   */
981:   if (!da->prealloc_only) {
982:     PetscCalloc1(col*col*nc*nc,&values);
983:     for (i=xs; i<xs+nx; i++) {

985:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
986:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

988:       for (j=ys; j<ys+ny; j++) {
989:         slot = i - gxs + gnx*(j - gys);

991:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
992:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

994:         cnt = 0;
995:         for (k=0; k<nc; k++) {
996:           for (l=lstart; l<lend+1; l++) {
997:             for (p=pstart; p<pend+1; p++) {
998:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
999:                 cols[cnt++] = k + nc*(slot + gnx*l + p);
1000:               }
1001:             }
1002:           }
1003:           rows[k] = k + nc*(slot);
1004:         }
1005:         MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1006:       }
1007:     }
1008:     PetscFree(values);
1009:     /* do not copy values to GPU since they are all zero and not yet needed there */
1010:     MatBindToCPU(J,PETSC_TRUE);
1011:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1012:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1013:     MatBindToCPU(J,PETSC_FALSE);
1014:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1015:   }
1016:   PetscFree2(rows,cols);
1017:   return(0);
1018: }

1020: PetscErrorCode DMCreateMatrix_DA_3d_MPISELL(DM da,Mat J)
1021: {
1022:   PetscErrorCode         ierr;
1023:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1024:   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1025:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1026:   MPI_Comm               comm;
1027:   PetscScalar            *values;
1028:   DMBoundaryType         bx,by,bz;
1029:   ISLocalToGlobalMapping ltog;
1030:   DMDAStencilType        st;

1033:   /*
1034:          nc - number of components per grid point
1035:          col - number of colors needed in one direction for single component problem

1037:   */
1038:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1039:   col  = 2*s + 1;
1040:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1041:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1042:   PetscObjectGetComm((PetscObject)da,&comm);

1044:   PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1045:   DMGetLocalToGlobalMapping(da,&ltog);

1047:   MatSetBlockSize(J,nc);
1048:   /* determine the matrix preallocation information */
1049:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1050:   for (i=xs; i<xs+nx; i++) {
1051:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1052:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1053:     for (j=ys; j<ys+ny; j++) {
1054:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1055:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1056:       for (k=zs; k<zs+nz; k++) {
1057:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1058:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1060:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1062:         cnt = 0;
1063:         for (l=0; l<nc; l++) {
1064:           for (ii=istart; ii<iend+1; ii++) {
1065:             for (jj=jstart; jj<jend+1; jj++) {
1066:               for (kk=kstart; kk<kend+1; kk++) {
1067:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1068:                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1069:                 }
1070:               }
1071:             }
1072:           }
1073:           rows[l] = l + nc*(slot);
1074:         }
1075:         MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1076:       }
1077:     }
1078:   }
1079:   MatSetBlockSize(J,nc);
1080:   MatSeqSELLSetPreallocation(J,0,dnz);
1081:   MatMPISELLSetPreallocation(J,0,dnz,0,onz);
1082:   MatPreallocateFinalize(dnz,onz);
1083:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1085:   /*
1086:     For each node in the grid: we get the neighbors in the local (on processor ordering
1087:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1088:     PETSc ordering.
1089:   */
1090:   if (!da->prealloc_only) {
1091:     PetscCalloc1(col*col*col*nc*nc*nc,&values);
1092:     for (i=xs; i<xs+nx; i++) {
1093:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1094:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1095:       for (j=ys; j<ys+ny; j++) {
1096:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1097:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1098:         for (k=zs; k<zs+nz; k++) {
1099:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1100:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1102:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1104:           cnt = 0;
1105:           for (l=0; l<nc; l++) {
1106:             for (ii=istart; ii<iend+1; ii++) {
1107:               for (jj=jstart; jj<jend+1; jj++) {
1108:                 for (kk=kstart; kk<kend+1; kk++) {
1109:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1110:                     cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1111:                   }
1112:                 }
1113:               }
1114:             }
1115:             rows[l] = l + nc*(slot);
1116:           }
1117:           MatSetValuesLocal(J,nc,rows,cnt,cols,values,INSERT_VALUES);
1118:         }
1119:       }
1120:     }
1121:     PetscFree(values);
1122:     /* do not copy values to GPU since they are all zero and not yet needed there */
1123:     MatBindToCPU(J,PETSC_TRUE);
1124:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1125:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1126:     MatBindToCPU(J,PETSC_FALSE);
1127:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1128:   }
1129:   PetscFree2(rows,cols);
1130:   return(0);
1131: }

1133: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1134: {
1135:   PetscErrorCode         ierr;
1136:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny,m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,M,N;
1137:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1138:   MPI_Comm               comm;
1139:   DMBoundaryType         bx,by;
1140:   ISLocalToGlobalMapping ltog,mltog;
1141:   DMDAStencilType        st;
1142:   PetscBool              removedups = PETSC_FALSE;

1145:   /*
1146:          nc - number of components per grid point
1147:          col - number of colors needed in one direction for single component problem

1149:   */
1150:   DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
1151:   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1152:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1153:   }
1154:   col  = 2*s + 1;
1155:   /*
1156:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1157:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1158:   */
1159:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1160:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1161:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1162:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1163:   PetscObjectGetComm((PetscObject)da,&comm);

1165:   PetscMalloc2(nc,&rows,col*col*nc*nc,&cols);
1166:   DMGetLocalToGlobalMapping(da,&ltog);

1168:   MatSetBlockSize(J,nc);
1169:   /* determine the matrix preallocation information */
1170:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1171:   for (i=xs; i<xs+nx; i++) {

1173:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1174:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1176:     for (j=ys; j<ys+ny; j++) {
1177:       slot = i - gxs + gnx*(j - gys);

1179:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1180:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1182:       cnt = 0;
1183:       for (k=0; k<nc; k++) {
1184:         for (l=lstart; l<lend+1; l++) {
1185:           for (p=pstart; p<pend+1; p++) {
1186:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1187:               cols[cnt++] = k + nc*(slot + gnx*l + p);
1188:             }
1189:           }
1190:         }
1191:         rows[k] = k + nc*(slot);
1192:       }
1193:       if (removedups) {
1194:         MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1195:       } else {
1196:         MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1197:       }
1198:     }
1199:   }
1200:   MatSetBlockSize(J,nc);
1201:   MatSeqAIJSetPreallocation(J,0,dnz);
1202:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1203:   MatPreallocateFinalize(dnz,onz);
1204:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1205:   if (!mltog) {
1206:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1207:   }

1209:   /*
1210:     For each node in the grid: we get the neighbors in the local (on processor ordering
1211:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1212:     PETSc ordering.
1213:   */
1214:   if (!da->prealloc_only) {
1215:     for (i=xs; i<xs+nx; i++) {

1217:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1218:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1220:       for (j=ys; j<ys+ny; j++) {
1221:         slot = i - gxs + gnx*(j - gys);

1223:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1224:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1226:         cnt = 0;
1227:         for (l=lstart; l<lend+1; l++) {
1228:           for (p=pstart; p<pend+1; p++) {
1229:             if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star have either l = 0 or p = 0 */
1230:               cols[cnt++] = nc*(slot + gnx*l + p);
1231:               for (k=1; k<nc; k++) {
1232:                 cols[cnt] = 1 + cols[cnt-1];cnt++;
1233:               }
1234:             }
1235:           }
1236:         }
1237:         for (k=0; k<nc; k++) rows[k] = k + nc*(slot);
1238:         MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1239:       }
1240:     }
1241:     /* do not copy values to GPU since they are all zero and not yet needed there */
1242:     MatBindToCPU(J,PETSC_TRUE);
1243:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1244:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1245:     MatBindToCPU(J,PETSC_FALSE);
1246:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1247:     if (bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE) {
1248:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1249:     }
1250:   }
1251:   PetscFree2(rows,cols);
1252:   return(0);
1253: }

1255: PetscErrorCode DMCreateMatrix_DA_2d_MPIAIJ_Fill(DM da,Mat J)
1256: {
1257:   PetscErrorCode         ierr;
1258:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1259:   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt,maxcnt = 0,l,p,M,N;
1260:   PetscInt               lstart,lend,pstart,pend,*dnz,*onz;
1261:   DM_DA                  *dd = (DM_DA*)da->data;
1262:   PetscInt               ifill_col,*ofill = dd->ofill, *dfill = dd->dfill;
1263:   MPI_Comm               comm;
1264:   DMBoundaryType         bx,by;
1265:   ISLocalToGlobalMapping ltog;
1266:   DMDAStencilType        st;
1267:   PetscBool              removedups = PETSC_FALSE;

1270:   /*
1271:          nc - number of components per grid point
1272:          col - number of colors needed in one direction for single component problem

1274:   */
1275:   DMDAGetInfo(da,&dim,&m,&n,&M,&N,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
1276:   col  = 2*s + 1;
1277:   /*
1278:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1279:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1280:   */
1281:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1282:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1283:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1284:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1285:   PetscObjectGetComm((PetscObject)da,&comm);

1287:   PetscMalloc1(col*col*nc,&cols);
1288:   DMGetLocalToGlobalMapping(da,&ltog);

1290:   MatSetBlockSize(J,nc);
1291:   /* determine the matrix preallocation information */
1292:   MatPreallocateInitialize(comm,nc*nx*ny,nc*nx*ny,dnz,onz);
1293:   for (i=xs; i<xs+nx; i++) {

1295:     pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1296:     pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1298:     for (j=ys; j<ys+ny; j++) {
1299:       slot = i - gxs + gnx*(j - gys);

1301:       lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1302:       lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1304:       for (k=0; k<nc; k++) {
1305:         cnt = 0;
1306:         for (l=lstart; l<lend+1; l++) {
1307:           for (p=pstart; p<pend+1; p++) {
1308:             if (l || p) {
1309:               if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1310:                 for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1311:               }
1312:             } else {
1313:               if (dfill) {
1314:                 for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1315:               } else {
1316:                 for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1317:               }
1318:             }
1319:           }
1320:         }
1321:         row    = k + nc*(slot);
1322:         maxcnt = PetscMax(maxcnt,cnt);
1323:         if (removedups) {
1324:           MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1325:         } else {
1326:           MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
1327:         }
1328:       }
1329:     }
1330:   }
1331:   MatSeqAIJSetPreallocation(J,0,dnz);
1332:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1333:   MatPreallocateFinalize(dnz,onz);
1334:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1336:   /*
1337:     For each node in the grid: we get the neighbors in the local (on processor ordering
1338:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1339:     PETSc ordering.
1340:   */
1341:   if (!da->prealloc_only) {
1342:     for (i=xs; i<xs+nx; i++) {

1344:       pstart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1345:       pend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));

1347:       for (j=ys; j<ys+ny; j++) {
1348:         slot = i - gxs + gnx*(j - gys);

1350:         lstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1351:         lend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));

1353:         for (k=0; k<nc; k++) {
1354:           cnt = 0;
1355:           for (l=lstart; l<lend+1; l++) {
1356:             for (p=pstart; p<pend+1; p++) {
1357:               if (l || p) {
1358:                 if ((st == DMDA_STENCIL_BOX) || (!l || !p)) {  /* entries on star */
1359:                   for (ifill_col=ofill[k]; ifill_col<ofill[k+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + gnx*l + p);
1360:                 }
1361:               } else {
1362:                 if (dfill) {
1363:                   for (ifill_col=dfill[k]; ifill_col<dfill[k+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + gnx*l + p);
1364:                 } else {
1365:                   for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + gnx*l + p);
1366:                 }
1367:               }
1368:             }
1369:           }
1370:           row  = k + nc*(slot);
1371:           MatSetValuesLocal(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1372:         }
1373:       }
1374:     }
1375:     /* do not copy values to GPU since they are all zero and not yet needed there */
1376:     MatBindToCPU(J,PETSC_TRUE);
1377:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1378:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1379:     MatBindToCPU(J,PETSC_FALSE);
1380:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1381:   }
1382:   PetscFree(cols);
1383:   return(0);
1384: }

1386: /* ---------------------------------------------------------------------------------*/

1388: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1389: {
1390:   PetscErrorCode         ierr;
1391:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1392:   PetscInt               m,n,dim,s,*cols = NULL,k,nc,*rows = NULL,col,cnt,l,p,*dnz = NULL,*onz = NULL;
1393:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
1394:   MPI_Comm               comm;
1395:   DMBoundaryType         bx,by,bz;
1396:   ISLocalToGlobalMapping ltog,mltog;
1397:   DMDAStencilType        st;
1398:   PetscBool              removedups = PETSC_FALSE;

1401:   /*
1402:          nc - number of components per grid point
1403:          col - number of colors needed in one direction for single component problem

1405:   */
1406:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
1407:   if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1408:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1409:   }
1410:   col  = 2*s + 1;

1412:   /*
1413:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
1414:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
1415:   */
1416:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
1417:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
1418:   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;

1420:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1421:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1422:   PetscObjectGetComm((PetscObject)da,&comm);

1424:   PetscMalloc2(nc,&rows,col*col*col*nc*nc,&cols);
1425:   DMGetLocalToGlobalMapping(da,&ltog);

1427:   MatSetBlockSize(J,nc);
1428:   /* determine the matrix preallocation information */
1429:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);
1430:   for (i=xs; i<xs+nx; i++) {
1431:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1432:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1433:     for (j=ys; j<ys+ny; j++) {
1434:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1435:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1436:       for (k=zs; k<zs+nz; k++) {
1437:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1438:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1440:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1442:         cnt = 0;
1443:         for (l=0; l<nc; l++) {
1444:           for (ii=istart; ii<iend+1; ii++) {
1445:             for (jj=jstart; jj<jend+1; jj++) {
1446:               for (kk=kstart; kk<kend+1; kk++) {
1447:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1448:                   cols[cnt++] = l + nc*(slot + ii + gnx*jj + gnx*gny*kk);
1449:                 }
1450:               }
1451:             }
1452:           }
1453:           rows[l] = l + nc*(slot);
1454:         }
1455:         if (removedups) {
1456:           MatPreallocateSetLocalRemoveDups(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1457:         } else {
1458:           MatPreallocateSetLocal(ltog,nc,rows,ltog,cnt,cols,dnz,onz);
1459:         }
1460:       }
1461:     }
1462:   }
1463:   MatSetBlockSize(J,nc);
1464:   MatSeqAIJSetPreallocation(J,0,dnz);
1465:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
1466:   MatPreallocateFinalize(dnz,onz);
1467:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1468:   if (!mltog) {
1469:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1470:   }

1472:   /*
1473:     For each node in the grid: we get the neighbors in the local (on processor ordering
1474:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1475:     PETSc ordering.
1476:   */
1477:   if (!da->prealloc_only) {
1478:     for (i=xs; i<xs+nx; i++) {
1479:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1480:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1481:       for (j=ys; j<ys+ny; j++) {
1482:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1483:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1484:         for (k=zs; k<zs+nz; k++) {
1485:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1486:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1488:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1490:           cnt = 0;
1491:           for (kk=kstart; kk<kend+1; kk++) {
1492:             for (jj=jstart; jj<jend+1; jj++) {
1493:               for (ii=istart; ii<iend+1; ii++) {
1494:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1495:                   cols[cnt++] = nc*(slot + ii + gnx*jj + gnx*gny*kk);
1496:                     for (l=1; l<nc; l++) {
1497:                       cols[cnt] = 1 + cols[cnt-1];cnt++;
1498:                   }
1499:                 }
1500:               }
1501:             }
1502:           }
1503:           rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1504:           MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1505:         }
1506:       }
1507:     }
1508:     /* do not copy values to GPU since they are all zero and not yet needed there */
1509:     MatBindToCPU(J,PETSC_TRUE);
1510:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1511:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1512:     if (!isIS && bx == DM_BOUNDARY_NONE && by == DM_BOUNDARY_NONE && bz == DM_BOUNDARY_NONE) {
1513:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1514:     }
1515:     MatBindToCPU(J,PETSC_FALSE);
1516:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1517:   }
1518:   PetscFree2(rows,cols);
1519:   return(0);
1520: }

1522: /* ---------------------------------------------------------------------------------*/

1524: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ_Fill(DM da,Mat J)
1525: {
1526:   PetscErrorCode         ierr;
1527:   DM_DA                  *dd = (DM_DA*)da->data;
1528:   PetscInt               xs,nx,i,j,gxs,gnx,row,k,l;
1529:   PetscInt               m,dim,s,*cols = NULL,nc,cnt,maxcnt = 0,*ocols;
1530:   PetscInt               *ofill = dd->ofill,*dfill = dd->dfill;
1531:   DMBoundaryType         bx;
1532:   ISLocalToGlobalMapping ltog;
1533:   PetscMPIInt            rank,size;

1536:   MPI_Comm_rank(PetscObjectComm((PetscObject)da),&rank);
1537:   MPI_Comm_size(PetscObjectComm((PetscObject)da),&size);

1539:   /*
1540:          nc - number of components per grid point

1542:   */
1543:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1544:   if (s > 1) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"Matrix creation for 1d not implemented correctly for stencil width larger than 1");
1545:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1546:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1548:   MatSetBlockSize(J,nc);
1549:   PetscCalloc2(nx*nc,&cols,nx*nc,&ocols);

1551:   /*
1552:         note should be smaller for first and last process with no periodic
1553:         does not handle dfill
1554:   */
1555:   cnt = 0;
1556:   /* coupling with process to the left */
1557:   for (i=0; i<s; i++) {
1558:     for (j=0; j<nc; j++) {
1559:       ocols[cnt] = ((!rank) ? 0 : (s - i)*(ofill[j+1] - ofill[j]));
1560:       cols[cnt]  = dfill[j+1] - dfill[j] + (s + i)*(ofill[j+1] - ofill[j]);
1561:       if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1562:         if (size > 1) ocols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1563:         else cols[cnt] += (s - i)*(ofill[j+1] - ofill[j]);
1564:       }
1565:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1566:       cnt++;
1567:     }
1568:   }
1569:   for (i=s; i<nx-s; i++) {
1570:     for (j=0; j<nc; j++) {
1571:       cols[cnt] = dfill[j+1] - dfill[j] + 2*s*(ofill[j+1] - ofill[j]);
1572:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1573:       cnt++;
1574:     }
1575:   }
1576:   /* coupling with process to the right */
1577:   for (i=nx-s; i<nx; i++) {
1578:     for (j=0; j<nc; j++) {
1579:       ocols[cnt] = ((rank == (size-1)) ? 0 : (i - nx + s + 1)*(ofill[j+1] - ofill[j]));
1580:       cols[cnt]  = dfill[j+1] - dfill[j] + (s + nx - i - 1)*(ofill[j+1] - ofill[j]);
1581:       if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1582:         if (size > 1) ocols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1583:         else cols[cnt] += (i - nx + s + 1)*(ofill[j+1] - ofill[j]);
1584:       }
1585:       maxcnt = PetscMax(maxcnt,ocols[cnt]+cols[cnt]);
1586:       cnt++;
1587:     }
1588:   }

1590:   MatSeqAIJSetPreallocation(J,0,cols);
1591:   MatMPIAIJSetPreallocation(J,0,cols,0,ocols);
1592:   PetscFree2(cols,ocols);

1594:   DMGetLocalToGlobalMapping(da,&ltog);
1595:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1597:   /*
1598:     For each node in the grid: we get the neighbors in the local (on processor ordering
1599:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1600:     PETSc ordering.
1601:   */
1602:   if (!da->prealloc_only) {
1603:     PetscMalloc1(maxcnt,&cols);
1604:     row = xs*nc;
1605:     /* coupling with process to the left */
1606:     for (i=xs; i<xs+s; i++) {
1607:       for (j=0; j<nc; j++) {
1608:         cnt = 0;
1609:         if (rank) {
1610:           for (l=0; l<s; l++) {
1611:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1612:           }
1613:         }
1614:         if (!rank && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1615:           for (l=0; l<s; l++) {
1616:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (m + i - s - l)*nc + ofill[k];
1617:           }
1618:         }
1619:         if (dfill) {
1620:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1621:             cols[cnt++] = i*nc + dfill[k];
1622:           }
1623:         } else {
1624:           for (k=0; k<nc; k++) {
1625:             cols[cnt++] = i*nc + k;
1626:           }
1627:         }
1628:         for (l=0; l<s; l++) {
1629:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1630:         }
1631:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1632:         row++;
1633:       }
1634:     }
1635:     for (i=xs+s; i<xs+nx-s; i++) {
1636:       for (j=0; j<nc; j++) {
1637:         cnt = 0;
1638:         for (l=0; l<s; l++) {
1639:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1640:         }
1641:         if (dfill) {
1642:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1643:             cols[cnt++] = i*nc + dfill[k];
1644:           }
1645:         } else {
1646:           for (k=0; k<nc; k++) {
1647:             cols[cnt++] = i*nc + k;
1648:           }
1649:         }
1650:         for (l=0; l<s; l++) {
1651:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1652:         }
1653:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1654:         row++;
1655:       }
1656:     }
1657:     /* coupling with process to the right */
1658:     for (i=xs+nx-s; i<xs+nx; i++) {
1659:       for (j=0; j<nc; j++) {
1660:         cnt = 0;
1661:         for (l=0; l<s; l++) {
1662:           for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s + l)*nc + ofill[k];
1663:         }
1664:         if (dfill) {
1665:           for (k=dfill[j]; k<dfill[j+1]; k++) {
1666:             cols[cnt++] = i*nc + dfill[k];
1667:           }
1668:         } else {
1669:           for (k=0; k<nc; k++) {
1670:             cols[cnt++] = i*nc + k;
1671:           }
1672:         }
1673:         if (rank < size-1) {
1674:           for (l=0; l<s; l++) {
1675:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i + s - l)*nc + ofill[k];
1676:           }
1677:         }
1678:         if ((rank == size-1) && (dd->bx == DM_BOUNDARY_PERIODIC)) {
1679:           for (l=0; l<s; l++) {
1680:             for (k=ofill[j]; k<ofill[j+1]; k++) cols[cnt++] = (i - s - l - m + 2)*nc + ofill[k];
1681:           }
1682:         }
1683:         MatSetValues(J,1,&row,cnt,cols,NULL,INSERT_VALUES);
1684:         row++;
1685:       }
1686:     }
1687:     PetscFree(cols);
1688:     /* do not copy values to GPU since they are all zero and not yet needed there */
1689:     MatBindToCPU(J,PETSC_TRUE);
1690:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1691:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1692:     MatBindToCPU(J,PETSC_FALSE);
1693:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1694:   }
1695:   return(0);
1696: }

1698: /* ---------------------------------------------------------------------------------*/

1700: PetscErrorCode DMCreateMatrix_DA_1d_MPIAIJ(DM da,Mat J,PetscBool isIS)
1701: {
1702:   PetscErrorCode         ierr;
1703:   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1704:   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1705:   PetscInt               istart,iend;
1706:   DMBoundaryType         bx;
1707:   ISLocalToGlobalMapping ltog,mltog;

1710:   /*
1711:          nc - number of components per grid point
1712:          col - number of colors needed in one direction for single component problem

1714:   */
1715:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1716:   if (!isIS && bx == DM_BOUNDARY_NONE) {
1717:     MatSetOption(J,MAT_SORTED_FULL,PETSC_TRUE);
1718:   }
1719:   col  = 2*s + 1;

1721:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1722:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1724:   MatSetBlockSize(J,nc);
1725:   MatSeqAIJSetPreallocation(J,col*nc,NULL);
1726:   MatMPIAIJSetPreallocation(J,col*nc,NULL,col*nc,NULL);

1728:   DMGetLocalToGlobalMapping(da,&ltog);
1729:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1730:   if (!mltog) {
1731:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1732:   }

1734:   /*
1735:     For each node in the grid: we get the neighbors in the local (on processor ordering
1736:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1737:     PETSc ordering.
1738:   */
1739:   if (!da->prealloc_only) {
1740:     PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1741:     for (i=xs; i<xs+nx; i++) {
1742:       istart = PetscMax(-s,gxs - i);
1743:       iend   = PetscMin(s,gxs + gnx - i - 1);
1744:       slot   = i - gxs;

1746:       cnt = 0;
1747:       for (i1=istart; i1<iend+1; i1++) {
1748:         cols[cnt++] = nc*(slot + i1);
1749:         for (l=1; l<nc; l++) {
1750:           cols[cnt] = 1 + cols[cnt-1];cnt++;
1751:         }
1752:       }
1753:       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1754:       MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1755:     }
1756:     /* do not copy values to GPU since they are all zero and not yet needed there */
1757:     MatBindToCPU(J,PETSC_TRUE);
1758:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1759:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1760:     if (!isIS && bx == DM_BOUNDARY_NONE) {
1761:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1762:     }
1763:     MatBindToCPU(J,PETSC_FALSE);
1764:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1765:     PetscFree2(rows,cols);
1766:   }
1767:   return(0);
1768: }

1770: /* ---------------------------------------------------------------------------------*/

1772: PetscErrorCode DMCreateMatrix_DA_1d_SeqAIJ_NoPreallocation(DM da,Mat J,PetscBool isIS)
1773: {
1774:   PetscErrorCode         ierr;
1775:   PetscInt               xs,nx,i,i1,slot,gxs,gnx;
1776:   PetscInt               m,dim,s,*cols = NULL,nc,*rows = NULL,col,cnt,l;
1777:   PetscInt               istart,iend;
1778:   DMBoundaryType         bx;
1779:   ISLocalToGlobalMapping ltog,mltog;

1782:   /*
1783:          nc - number of components per grid point
1784:          col - number of colors needed in one direction for single component problem
1785:   */
1786:   DMDAGetInfo(da,&dim,&m,NULL,NULL,NULL,NULL,NULL,&nc,&s,&bx,NULL,NULL,NULL);
1787:   col  = 2*s + 1;

1789:   DMDAGetCorners(da,&xs,NULL,NULL,&nx,NULL,NULL);
1790:   DMDAGetGhostCorners(da,&gxs,NULL,NULL,&gnx,NULL,NULL);

1792:   MatSetBlockSize(J,nc);
1793:   MatSeqAIJSetTotalPreallocation(J,nx*nc*col*nc);

1795:   DMGetLocalToGlobalMapping(da,&ltog);
1796:   MatGetLocalToGlobalMapping(J,&mltog,NULL);
1797:   if (!mltog) {
1798:     MatSetLocalToGlobalMapping(J,ltog,ltog);
1799:   }

1801:   /*
1802:     For each node in the grid: we get the neighbors in the local (on processor ordering
1803:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1804:     PETSc ordering.
1805:   */
1806:   if (!da->prealloc_only) {
1807:     PetscMalloc2(nc,&rows,col*nc*nc,&cols);
1808:     for (i=xs; i<xs+nx; i++) {
1809:       istart = PetscMax(-s,gxs - i);
1810:       iend   = PetscMin(s,gxs + gnx - i - 1);
1811:       slot   = i - gxs;

1813:       cnt = 0;
1814:       for (i1=istart; i1<iend+1; i1++) {
1815:         cols[cnt++] = nc*(slot + i1);
1816:         for (l=1; l<nc; l++) {
1817:           cols[cnt] = 1 + cols[cnt-1];cnt++;
1818:         }
1819:       }
1820:       rows[0] = nc*(slot); for (l=1; l<nc; l++) rows[l] = 1 + rows[l-1];
1821:       MatSetValuesLocal(J,nc,rows,cnt,cols,NULL,INSERT_VALUES);
1822:     }
1823:     /* do not copy values to GPU since they are all zero and not yet needed there */
1824:     MatBindToCPU(J,PETSC_TRUE);
1825:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1826:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1827:     if (!isIS && bx == DM_BOUNDARY_NONE) {
1828:       MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1829:     }
1830:     MatBindToCPU(J,PETSC_FALSE);
1831:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1832:     PetscFree2(rows,cols);
1833:   }
1834:   MatSetOption(J,MAT_SORTED_FULL,PETSC_FALSE);
1835:   return(0);
1836: }

1838: PetscErrorCode DMCreateMatrix_DA_2d_MPIBAIJ(DM da,Mat J)
1839: {
1840:   PetscErrorCode         ierr;
1841:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1842:   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
1843:   PetscInt               istart,iend,jstart,jend,ii,jj;
1844:   MPI_Comm               comm;
1845:   PetscScalar            *values;
1846:   DMBoundaryType         bx,by;
1847:   DMDAStencilType        st;
1848:   ISLocalToGlobalMapping ltog;

1851:   /*
1852:      nc - number of components per grid point
1853:      col - number of colors needed in one direction for single component problem
1854:   */
1855:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
1856:   col  = 2*s + 1;

1858:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
1859:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
1860:   PetscObjectGetComm((PetscObject)da,&comm);

1862:   PetscMalloc1(col*col*nc*nc,&cols);

1864:   DMGetLocalToGlobalMapping(da,&ltog);

1866:   /* determine the matrix preallocation information */
1867:   MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
1868:   for (i=xs; i<xs+nx; i++) {
1869:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1870:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1871:     for (j=ys; j<ys+ny; j++) {
1872:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1873:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1874:       slot   = i - gxs + gnx*(j - gys);

1876:       /* Find block columns in block row */
1877:       cnt = 0;
1878:       for (ii=istart; ii<iend+1; ii++) {
1879:         for (jj=jstart; jj<jend+1; jj++) {
1880:           if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1881:             cols[cnt++] = slot + ii + gnx*jj;
1882:           }
1883:         }
1884:       }
1885:       MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1886:     }
1887:   }
1888:   MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1889:   MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1890:   MatPreallocateFinalize(dnz,onz);

1892:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1894:   /*
1895:     For each node in the grid: we get the neighbors in the local (on processor ordering
1896:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1897:     PETSc ordering.
1898:   */
1899:   if (!da->prealloc_only) {
1900:     PetscCalloc1(col*col*nc*nc,&values);
1901:     for (i=xs; i<xs+nx; i++) {
1902:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1903:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1904:       for (j=ys; j<ys+ny; j++) {
1905:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1906:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1907:         slot = i - gxs + gnx*(j - gys);
1908:         cnt  = 0;
1909:         for (ii=istart; ii<iend+1; ii++) {
1910:           for (jj=jstart; jj<jend+1; jj++) {
1911:             if (st == DMDA_STENCIL_BOX || !ii || !jj) { /* BOX or on the STAR */
1912:               cols[cnt++] = slot + ii + gnx*jj;
1913:             }
1914:           }
1915:         }
1916:         MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
1917:       }
1918:     }
1919:     PetscFree(values);
1920:     /* do not copy values to GPU since they are all zero and not yet needed there */
1921:     MatBindToCPU(J,PETSC_TRUE);
1922:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
1923:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
1924:     MatBindToCPU(J,PETSC_FALSE);
1925:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
1926:   }
1927:   PetscFree(cols);
1928:   return(0);
1929: }

1931: PetscErrorCode DMCreateMatrix_DA_3d_MPIBAIJ(DM da,Mat J)
1932: {
1933:   PetscErrorCode         ierr;
1934:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
1935:   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
1936:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
1937:   MPI_Comm               comm;
1938:   PetscScalar            *values;
1939:   DMBoundaryType         bx,by,bz;
1940:   DMDAStencilType        st;
1941:   ISLocalToGlobalMapping ltog;

1944:   /*
1945:          nc - number of components per grid point
1946:          col - number of colors needed in one direction for single component problem

1948:   */
1949:   DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);
1950:   col  = 2*s + 1;

1952:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
1953:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
1954:   PetscObjectGetComm((PetscObject)da,&comm);

1956:   PetscMalloc1(col*col*col,&cols);

1958:   DMGetLocalToGlobalMapping(da,&ltog);

1960:   /* determine the matrix preallocation information */
1961:   MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
1962:   for (i=xs; i<xs+nx; i++) {
1963:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
1964:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
1965:     for (j=ys; j<ys+ny; j++) {
1966:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
1967:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
1968:       for (k=zs; k<zs+nz; k++) {
1969:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
1970:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

1972:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

1974:         /* Find block columns in block row */
1975:         cnt = 0;
1976:         for (ii=istart; ii<iend+1; ii++) {
1977:           for (jj=jstart; jj<jend+1; jj++) {
1978:             for (kk=kstart; kk<kend+1; kk++) {
1979:               if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
1980:                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
1981:               }
1982:             }
1983:           }
1984:         }
1985:         MatPreallocateSetLocalBlock(ltog,1,&slot,ltog,cnt,cols,dnz,onz);
1986:       }
1987:     }
1988:   }
1989:   MatSeqBAIJSetPreallocation(J,nc,0,dnz);
1990:   MatMPIBAIJSetPreallocation(J,nc,0,dnz,0,onz);
1991:   MatPreallocateFinalize(dnz,onz);

1993:   MatSetLocalToGlobalMapping(J,ltog,ltog);

1995:   /*
1996:     For each node in the grid: we get the neighbors in the local (on processor ordering
1997:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
1998:     PETSc ordering.
1999:   */
2000:   if (!da->prealloc_only) {
2001:     PetscCalloc1(col*col*col*nc*nc,&values);
2002:     for (i=xs; i<xs+nx; i++) {
2003:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2004:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2005:       for (j=ys; j<ys+ny; j++) {
2006:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2007:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2008:         for (k=zs; k<zs+nz; k++) {
2009:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2010:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2012:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2014:           cnt = 0;
2015:           for (ii=istart; ii<iend+1; ii++) {
2016:             for (jj=jstart; jj<jend+1; jj++) {
2017:               for (kk=kstart; kk<kend+1; kk++) {
2018:                 if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2019:                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2020:                 }
2021:               }
2022:             }
2023:           }
2024:           MatSetValuesBlockedLocal(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2025:         }
2026:       }
2027:     }
2028:     PetscFree(values);
2029:     /* do not copy values to GPU since they are all zero and not yet needed there */
2030:     MatBindToCPU(J,PETSC_TRUE);
2031:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2032:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2033:     MatBindToCPU(J,PETSC_FALSE);
2034:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2035:   }
2036:   PetscFree(cols);
2037:   return(0);
2038: }

2040: /*
2041:   This helper is for of SBAIJ preallocation, to discard the lower-triangular values which are difficult to
2042:   identify in the local ordering with periodic domain.
2043: */
2044: static PetscErrorCode L2GFilterUpperTriangular(ISLocalToGlobalMapping ltog,PetscInt *row,PetscInt *cnt,PetscInt col[])
2045: {
2047:   PetscInt       i,n;

2050:   ISLocalToGlobalMappingApplyBlock(ltog,1,row,row);
2051:   ISLocalToGlobalMappingApplyBlock(ltog,*cnt,col,col);
2052:   for (i=0,n=0; i<*cnt; i++) {
2053:     if (col[i] >= *row) col[n++] = col[i];
2054:   }
2055:   *cnt = n;
2056:   return(0);
2057: }

2059: PetscErrorCode DMCreateMatrix_DA_2d_MPISBAIJ(DM da,Mat J)
2060: {
2061:   PetscErrorCode         ierr;
2062:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2063:   PetscInt               m,n,dim,s,*cols,nc,col,cnt,*dnz,*onz;
2064:   PetscInt               istart,iend,jstart,jend,ii,jj;
2065:   MPI_Comm               comm;
2066:   PetscScalar            *values;
2067:   DMBoundaryType         bx,by;
2068:   DMDAStencilType        st;
2069:   ISLocalToGlobalMapping ltog;

2072:   /*
2073:      nc - number of components per grid point
2074:      col - number of colors needed in one direction for single component problem
2075:   */
2076:   DMDAGetInfo(da,&dim,&m,&n,NULL,NULL,NULL,NULL,&nc,&s,&bx,&by,NULL,&st);
2077:   col  = 2*s + 1;

2079:   DMDAGetCorners(da,&xs,&ys,NULL,&nx,&ny,NULL);
2080:   DMDAGetGhostCorners(da,&gxs,&gys,NULL,&gnx,&gny,NULL);
2081:   PetscObjectGetComm((PetscObject)da,&comm);

2083:   PetscMalloc1(col*col*nc*nc,&cols);

2085:   DMGetLocalToGlobalMapping(da,&ltog);

2087:   /* determine the matrix preallocation information */
2088:   MatPreallocateInitialize(comm,nx*ny,nx*ny,dnz,onz);
2089:   for (i=xs; i<xs+nx; i++) {
2090:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2091:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2092:     for (j=ys; j<ys+ny; j++) {
2093:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2094:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2095:       slot   = i - gxs + gnx*(j - gys);

2097:       /* Find block columns in block row */
2098:       cnt = 0;
2099:       for (ii=istart; ii<iend+1; ii++) {
2100:         for (jj=jstart; jj<jend+1; jj++) {
2101:           if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2102:             cols[cnt++] = slot + ii + gnx*jj;
2103:           }
2104:         }
2105:       }
2106:       L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2107:       MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2108:     }
2109:   }
2110:   MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2111:   MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2112:   MatPreallocateFinalize(dnz,onz);

2114:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2116:   /*
2117:     For each node in the grid: we get the neighbors in the local (on processor ordering
2118:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2119:     PETSc ordering.
2120:   */
2121:   if (!da->prealloc_only) {
2122:     PetscCalloc1(col*col*nc*nc,&values);
2123:     for (i=xs; i<xs+nx; i++) {
2124:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2125:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2126:       for (j=ys; j<ys+ny; j++) {
2127:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2128:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2129:         slot   = i - gxs + gnx*(j - gys);

2131:         /* Find block columns in block row */
2132:         cnt = 0;
2133:         for (ii=istart; ii<iend+1; ii++) {
2134:           for (jj=jstart; jj<jend+1; jj++) {
2135:             if (st == DMDA_STENCIL_BOX || !ii || !jj) {
2136:               cols[cnt++] = slot + ii + gnx*jj;
2137:             }
2138:           }
2139:         }
2140:         L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2141:         MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2142:       }
2143:     }
2144:     PetscFree(values);
2145:     /* do not copy values to GPU since they are all zero and not yet needed there */
2146:     MatBindToCPU(J,PETSC_TRUE);
2147:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2148:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2149:     MatBindToCPU(J,PETSC_FALSE);
2150:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2151:   }
2152:   PetscFree(cols);
2153:   return(0);
2154: }

2156: PetscErrorCode DMCreateMatrix_DA_3d_MPISBAIJ(DM da,Mat J)
2157: {
2158:   PetscErrorCode         ierr;
2159:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2160:   PetscInt               m,n,dim,s,*cols,k,nc,col,cnt,p,*dnz,*onz;
2161:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk;
2162:   MPI_Comm               comm;
2163:   PetscScalar            *values;
2164:   DMBoundaryType         bx,by,bz;
2165:   DMDAStencilType        st;
2166:   ISLocalToGlobalMapping ltog;

2169:   /*
2170:      nc - number of components per grid point
2171:      col - number of colors needed in one direction for single component problem
2172:   */
2173:   DMDAGetInfo(da,&dim,&m,&n,&p,NULL,NULL,NULL,&nc,&s,&bx,&by,&bz,&st);
2174:   col  = 2*s + 1;

2176:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2177:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2178:   PetscObjectGetComm((PetscObject)da,&comm);

2180:   /* create the matrix */
2181:   PetscMalloc1(col*col*col,&cols);

2183:   DMGetLocalToGlobalMapping(da,&ltog);

2185:   /* determine the matrix preallocation information */
2186:   MatPreallocateInitialize(comm,nx*ny*nz,nx*ny*nz,dnz,onz);
2187:   for (i=xs; i<xs+nx; i++) {
2188:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2189:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2190:     for (j=ys; j<ys+ny; j++) {
2191:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2192:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2193:       for (k=zs; k<zs+nz; k++) {
2194:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2195:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2197:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2199:         /* Find block columns in block row */
2200:         cnt = 0;
2201:         for (ii=istart; ii<iend+1; ii++) {
2202:           for (jj=jstart; jj<jend+1; jj++) {
2203:             for (kk=kstart; kk<kend+1; kk++) {
2204:               if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2205:                 cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2206:               }
2207:             }
2208:           }
2209:         }
2210:         L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2211:         MatPreallocateSymmetricSetBlock(slot,cnt,cols,dnz,onz);
2212:       }
2213:     }
2214:   }
2215:   MatSeqSBAIJSetPreallocation(J,nc,0,dnz);
2216:   MatMPISBAIJSetPreallocation(J,nc,0,dnz,0,onz);
2217:   MatPreallocateFinalize(dnz,onz);

2219:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2221:   /*
2222:     For each node in the grid: we get the neighbors in the local (on processor ordering
2223:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2224:     PETSc ordering.
2225:   */
2226:   if (!da->prealloc_only) {
2227:     PetscCalloc1(col*col*col*nc*nc,&values);
2228:     for (i=xs; i<xs+nx; i++) {
2229:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2230:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2231:       for (j=ys; j<ys+ny; j++) {
2232:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2233:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2234:         for (k=zs; k<zs+nz; k++) {
2235:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2236:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2238:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2240:           cnt = 0;
2241:           for (ii=istart; ii<iend+1; ii++) {
2242:             for (jj=jstart; jj<jend+1; jj++) {
2243:               for (kk=kstart; kk<kend+1; kk++) {
2244:                 if ((st == DMDA_STENCIL_BOX) || (!ii && !jj) || (!jj && !kk) || (!ii && !kk)) {
2245:                   cols[cnt++] = slot + ii + gnx*jj + gnx*gny*kk;
2246:                 }
2247:               }
2248:             }
2249:           }
2250:           L2GFilterUpperTriangular(ltog,&slot,&cnt,cols);
2251:           MatSetValuesBlocked(J,1,&slot,cnt,cols,values,INSERT_VALUES);
2252:         }
2253:       }
2254:     }
2255:     PetscFree(values);
2256:     /* do not copy values to GPU since they are all zero and not yet needed there */
2257:     MatBindToCPU(J,PETSC_TRUE);
2258:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2259:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2260:     MatBindToCPU(J,PETSC_FALSE);
2261:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2262:   }
2263:   PetscFree(cols);
2264:   return(0);
2265: }

2267: /* ---------------------------------------------------------------------------------*/

2269: PetscErrorCode DMCreateMatrix_DA_3d_MPIAIJ_Fill(DM da,Mat J)
2270: {
2271:   PetscErrorCode         ierr;
2272:   PetscInt               xs,ys,nx,ny,i,j,slot,gxs,gys,gnx,gny;
2273:   PetscInt               m,n,dim,s,*cols,k,nc,row,col,cnt, maxcnt = 0,l,p,*dnz,*onz;
2274:   PetscInt               istart,iend,jstart,jend,kstart,kend,zs,nz,gzs,gnz,ii,jj,kk,M,N,P;
2275:   DM_DA                  *dd = (DM_DA*)da->data;
2276:   PetscInt               ifill_col,*dfill = dd->dfill,*ofill = dd->ofill;
2277:   MPI_Comm               comm;
2278:   PetscScalar            *values;
2279:   DMBoundaryType         bx,by,bz;
2280:   ISLocalToGlobalMapping ltog;
2281:   DMDAStencilType        st;
2282:   PetscBool              removedups = PETSC_FALSE;

2285:   /*
2286:          nc - number of components per grid point
2287:          col - number of colors needed in one direction for single component problem

2289:   */
2290:   DMDAGetInfo(da,&dim,&m,&n,&p,&M,&N,&P,&nc,&s,&bx,&by,&bz,&st);
2291:   col  = 2*s + 1;
2292:   if (bx == DM_BOUNDARY_PERIODIC && (m % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in X is divisible\n\
2293:                  by 2*stencil_width + 1\n");
2294:   if (by == DM_BOUNDARY_PERIODIC && (n % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Y is divisible\n\
2295:                  by 2*stencil_width + 1\n");
2296:   if (bz == DM_BOUNDARY_PERIODIC && (p % col)) SETERRQ(PetscObjectComm((PetscObject)da),PETSC_ERR_SUP,"For coloring efficiency ensure number of grid points in Z is divisible\n\
2297:                  by 2*stencil_width + 1\n");

2299:   /*
2300:        With one processor in periodic domains in a skinny dimension the code will label nonzero columns multiple times
2301:        because of "wrapping" around the end of the domain hitting an entry already counted in the other direction.
2302:   */
2303:   if (M == 1 && 2*s >= m) removedups = PETSC_TRUE;
2304:   if (N == 1 && 2*s >= n) removedups = PETSC_TRUE;
2305:   if (P == 1 && 2*s >= p) removedups = PETSC_TRUE;

2307:   DMDAGetCorners(da,&xs,&ys,&zs,&nx,&ny,&nz);
2308:   DMDAGetGhostCorners(da,&gxs,&gys,&gzs,&gnx,&gny,&gnz);
2309:   PetscObjectGetComm((PetscObject)da,&comm);

2311:   PetscMalloc1(col*col*col*nc,&cols);
2312:   DMGetLocalToGlobalMapping(da,&ltog);

2314:   /* determine the matrix preallocation information */
2315:   MatPreallocateInitialize(comm,nc*nx*ny*nz,nc*nx*ny*nz,dnz,onz);

2317:   MatSetBlockSize(J,nc);
2318:   for (i=xs; i<xs+nx; i++) {
2319:     istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2320:     iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2321:     for (j=ys; j<ys+ny; j++) {
2322:       jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2323:       jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2324:       for (k=zs; k<zs+nz; k++) {
2325:         kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2326:         kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2328:         slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2330:         for (l=0; l<nc; l++) {
2331:           cnt = 0;
2332:           for (ii=istart; ii<iend+1; ii++) {
2333:             for (jj=jstart; jj<jend+1; jj++) {
2334:               for (kk=kstart; kk<kend+1; kk++) {
2335:                 if (ii || jj || kk) {
2336:                   if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2337:                     for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2338:                   }
2339:                 } else {
2340:                   if (dfill) {
2341:                     for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2342:                   } else {
2343:                     for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2344:                   }
2345:                 }
2346:               }
2347:             }
2348:           }
2349:           row  = l + nc*(slot);
2350:           maxcnt = PetscMax(maxcnt,cnt);
2351:           if (removedups) {
2352:             MatPreallocateSetLocalRemoveDups(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2353:           } else {
2354:             MatPreallocateSetLocal(ltog,1,&row,ltog,cnt,cols,dnz,onz);
2355:           }
2356:         }
2357:       }
2358:     }
2359:   }
2360:   MatSeqAIJSetPreallocation(J,0,dnz);
2361:   MatMPIAIJSetPreallocation(J,0,dnz,0,onz);
2362:   MatPreallocateFinalize(dnz,onz);
2363:   MatSetLocalToGlobalMapping(J,ltog,ltog);

2365:   /*
2366:     For each node in the grid: we get the neighbors in the local (on processor ordering
2367:     that includes the ghost points) then MatSetValuesLocal() maps those indices to the global
2368:     PETSc ordering.
2369:   */
2370:   if (!da->prealloc_only) {
2371:     PetscCalloc1(maxcnt,&values);
2372:     for (i=xs; i<xs+nx; i++) {
2373:       istart = (bx == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-i));
2374:       iend   = (bx == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,m-i-1));
2375:       for (j=ys; j<ys+ny; j++) {
2376:         jstart = (by == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-j));
2377:         jend   = (by == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,n-j-1));
2378:         for (k=zs; k<zs+nz; k++) {
2379:           kstart = (bz == DM_BOUNDARY_PERIODIC) ? -s : (PetscMax(-s,-k));
2380:           kend   = (bz == DM_BOUNDARY_PERIODIC) ?  s : (PetscMin(s,p-k-1));

2382:           slot = i - gxs + gnx*(j - gys) + gnx*gny*(k - gzs);

2384:           for (l=0; l<nc; l++) {
2385:             cnt = 0;
2386:             for (ii=istart; ii<iend+1; ii++) {
2387:               for (jj=jstart; jj<jend+1; jj++) {
2388:                 for (kk=kstart; kk<kend+1; kk++) {
2389:                   if (ii || jj || kk) {
2390:                     if ((st == DMDA_STENCIL_BOX) || ((!ii && !jj) || (!jj && !kk) || (!ii && !kk))) {/* entries on star*/
2391:                       for (ifill_col=ofill[l]; ifill_col<ofill[l+1]; ifill_col++) cols[cnt++] = ofill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2392:                     }
2393:                   } else {
2394:                     if (dfill) {
2395:                       for (ifill_col=dfill[l]; ifill_col<dfill[l+1]; ifill_col++) cols[cnt++] = dfill[ifill_col] + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2396:                     } else {
2397:                       for (ifill_col=0; ifill_col<nc; ifill_col++) cols[cnt++] = ifill_col + nc*(slot + ii + gnx*jj + gnx*gny*kk);
2398:                     }
2399:                   }
2400:                 }
2401:               }
2402:             }
2403:             row  = l + nc*(slot);
2404:             MatSetValuesLocal(J,1,&row,cnt,cols,values,INSERT_VALUES);
2405:           }
2406:         }
2407:       }
2408:     }
2409:     PetscFree(values);
2410:     /* do not copy values to GPU since they are all zero and not yet needed there */
2411:     MatBindToCPU(J,PETSC_TRUE);
2412:     MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);
2413:     MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);
2414:     MatBindToCPU(J,PETSC_FALSE);
2415:     MatSetOption(J,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);
2416:   }
2417:   PetscFree(cols);
2418:   return(0);
2419: }