Actual source code: bjacobi.c
petsc-3.14.0 2020-09-29
2: /*
3: Defines a block Jacobi preconditioner.
4: */
6: #include <../src/ksp/pc/impls/bjacobi/bjacobi.h>
8: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC,Mat,Mat);
9: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC,Mat,Mat);
10: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC);
12: static PetscErrorCode PCSetUp_BJacobi(PC pc)
13: {
14: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
15: Mat mat = pc->mat,pmat = pc->pmat;
17: PetscBool hasop;
18: PetscInt N,M,start,i,sum,end;
19: PetscInt bs,i_start=-1,i_end=-1;
20: PetscMPIInt rank,size;
21: const char *pprefix,*mprefix;
24: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
25: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
26: MatGetLocalSize(pc->pmat,&M,&N);
27: MatGetBlockSize(pc->pmat,&bs);
29: if (jac->n > 0 && jac->n < size) {
30: PCSetUp_BJacobi_Multiproc(pc);
31: return(0);
32: }
34: /* --------------------------------------------------------------------------
35: Determines the number of blocks assigned to each processor
36: -----------------------------------------------------------------------------*/
38: /* local block count given */
39: if (jac->n_local > 0 && jac->n < 0) {
40: MPIU_Allreduce(&jac->n_local,&jac->n,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
41: if (jac->l_lens) { /* check that user set these correctly */
42: sum = 0;
43: for (i=0; i<jac->n_local; i++) {
44: if (jac->l_lens[i]/bs*bs !=jac->l_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
45: sum += jac->l_lens[i];
46: }
47: if (sum != M) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local lens set incorrectly");
48: } else {
49: PetscMalloc1(jac->n_local,&jac->l_lens);
50: for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
51: }
52: } else if (jac->n > 0 && jac->n_local < 0) { /* global block count given */
53: /* global blocks given: determine which ones are local */
54: if (jac->g_lens) {
55: /* check if the g_lens is has valid entries */
56: for (i=0; i<jac->n; i++) {
57: if (!jac->g_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Zero block not allowed");
58: if (jac->g_lens[i]/bs*bs != jac->g_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Mat blocksize doesn't match block Jacobi layout");
59: }
60: if (size == 1) {
61: jac->n_local = jac->n;
62: PetscMalloc1(jac->n_local,&jac->l_lens);
63: PetscArraycpy(jac->l_lens,jac->g_lens,jac->n_local);
64: /* check that user set these correctly */
65: sum = 0;
66: for (i=0; i<jac->n_local; i++) sum += jac->l_lens[i];
67: if (sum != M) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Global lens set incorrectly");
68: } else {
69: MatGetOwnershipRange(pc->pmat,&start,&end);
70: /* loop over blocks determing first one owned by me */
71: sum = 0;
72: for (i=0; i<jac->n+1; i++) {
73: if (sum == start) { i_start = i; goto start_1;}
74: if (i < jac->n) sum += jac->g_lens[i];
75: }
76: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
77: start_1:
78: for (i=i_start; i<jac->n+1; i++) {
79: if (sum == end) { i_end = i; goto end_1; }
80: if (i < jac->n) sum += jac->g_lens[i];
81: }
82: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Block sizes used in PCBJacobiSetTotalBlocks()\nare not compatible with parallel matrix layout");
83: end_1:
84: jac->n_local = i_end - i_start;
85: PetscMalloc1(jac->n_local,&jac->l_lens);
86: PetscArraycpy(jac->l_lens,jac->g_lens+i_start,jac->n_local);
87: }
88: } else { /* no global blocks given, determine then using default layout */
89: jac->n_local = jac->n/size + ((jac->n % size) > rank);
90: PetscMalloc1(jac->n_local,&jac->l_lens);
91: for (i=0; i<jac->n_local; i++) {
92: jac->l_lens[i] = ((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i))*bs;
93: if (!jac->l_lens[i]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Too many blocks given");
94: }
95: }
96: } else if (jac->n < 0 && jac->n_local < 0) { /* no blocks given */
97: jac->n = size;
98: jac->n_local = 1;
99: PetscMalloc1(1,&jac->l_lens);
100: jac->l_lens[0] = M;
101: } else { /* jac->n > 0 && jac->n_local > 0 */
102: if (!jac->l_lens) {
103: PetscMalloc1(jac->n_local,&jac->l_lens);
104: for (i=0; i<jac->n_local; i++) jac->l_lens[i] = bs*((M/bs)/jac->n_local + (((M/bs) % jac->n_local) > i));
105: }
106: }
107: if (jac->n_local < 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of blocks is less than number of processors");
109: /* -------------------------
110: Determines mat and pmat
111: ---------------------------*/
112: MatHasOperation(pc->mat,MATOP_GET_DIAGONAL_BLOCK,&hasop);
113: if (!hasop && size == 1) {
114: mat = pc->mat;
115: pmat = pc->pmat;
116: } else {
117: if (pc->useAmat) {
118: /* use block from Amat matrix, not Pmat for local MatMult() */
119: MatGetDiagonalBlock(pc->mat,&mat);
120: /* make submatrix have same prefix as entire matrix */
121: PetscObjectGetOptionsPrefix((PetscObject)pc->mat,&mprefix);
122: PetscObjectSetOptionsPrefix((PetscObject)mat,mprefix);
123: }
124: if (pc->pmat != pc->mat || !pc->useAmat) {
125: MatGetDiagonalBlock(pc->pmat,&pmat);
126: /* make submatrix have same prefix as entire matrix */
127: PetscObjectGetOptionsPrefix((PetscObject)pc->pmat,&pprefix);
128: PetscObjectSetOptionsPrefix((PetscObject)pmat,pprefix);
129: } else pmat = mat;
130: }
132: /* ------
133: Setup code depends on the number of blocks
134: */
135: if (jac->n_local == 1) {
136: PCSetUp_BJacobi_Singleblock(pc,mat,pmat);
137: } else {
138: PCSetUp_BJacobi_Multiblock(pc,mat,pmat);
139: }
140: return(0);
141: }
143: /* Default destroy, if it has never been setup */
144: static PetscErrorCode PCDestroy_BJacobi(PC pc)
145: {
146: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
150: PetscFree(jac->g_lens);
151: PetscFree(jac->l_lens);
152: PetscFree(pc->data);
153: return(0);
154: }
157: static PetscErrorCode PCSetFromOptions_BJacobi(PetscOptionItems *PetscOptionsObject,PC pc)
158: {
159: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
161: PetscInt blocks,i;
162: PetscBool flg;
165: PetscOptionsHead(PetscOptionsObject,"Block Jacobi options");
166: PetscOptionsInt("-pc_bjacobi_blocks","Total number of blocks","PCBJacobiSetTotalBlocks",jac->n,&blocks,&flg);
167: if (flg) {PCBJacobiSetTotalBlocks(pc,blocks,NULL);}
168: PetscOptionsInt("-pc_bjacobi_local_blocks","Local number of blocks","PCBJacobiSetLocalBlocks",jac->n_local,&blocks,&flg);
169: if (flg) {PCBJacobiSetLocalBlocks(pc,blocks,NULL);}
170: if (jac->ksp) {
171: /* The sub-KSP has already been set up (e.g., PCSetUp_BJacobi_Singleblock), but KSPSetFromOptions was not called
172: * unless we had already been called. */
173: for (i=0; i<jac->n_local; i++) {
174: KSPSetFromOptions(jac->ksp[i]);
175: }
176: }
177: PetscOptionsTail();
178: return(0);
179: }
181: #include <petscdraw.h>
182: static PetscErrorCode PCView_BJacobi(PC pc,PetscViewer viewer)
183: {
184: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
185: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
186: PetscErrorCode ierr;
187: PetscMPIInt rank;
188: PetscInt i;
189: PetscBool iascii,isstring,isdraw;
190: PetscViewer sviewer;
193: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
194: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
195: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
196: if (iascii) {
197: if (pc->useAmat) {
198: PetscViewerASCIIPrintf(viewer," using Amat local matrix, number of blocks = %D\n",jac->n);
199: }
200: PetscViewerASCIIPrintf(viewer," number of blocks = %D\n",jac->n);
201: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
202: if (jac->same_local_solves) {
203: PetscViewerASCIIPrintf(viewer," Local solver is the same for all blocks, as in the following KSP and PC objects on rank 0:\n");
204: if (jac->ksp && !jac->psubcomm) {
205: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
206: if (!rank) {
207: PetscViewerASCIIPushTab(viewer);
208: KSPView(jac->ksp[0],sviewer);
209: PetscViewerASCIIPopTab(viewer);
210: }
211: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
212: } else if (mpjac && jac->ksp && mpjac->psubcomm) {
213: PetscViewerGetSubViewer(viewer,mpjac->psubcomm->child,&sviewer);
214: if (!mpjac->psubcomm->color) {
215: PetscViewerASCIIPushTab(viewer);
216: KSPView(*(jac->ksp),sviewer);
217: PetscViewerASCIIPopTab(viewer);
218: }
219: PetscViewerRestoreSubViewer(viewer,mpjac->psubcomm->child,&sviewer);
220: }
221: } else {
222: PetscInt n_global;
223: MPIU_Allreduce(&jac->n_local,&n_global,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
224: PetscViewerASCIIPushSynchronized(viewer);
225: PetscViewerASCIIPrintf(viewer," Local solve info for each block is in the following KSP and PC objects:\n");
226: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] number of local blocks = %D, first local block number = %D\n",
227: rank,jac->n_local,jac->first_local);
228: PetscViewerASCIIPushTab(viewer);
229: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
230: for (i=0; i<jac->n_local; i++) {
231: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] local block number %D\n",rank,i);
232: KSPView(jac->ksp[i],sviewer);
233: PetscViewerASCIISynchronizedPrintf(viewer,"- - - - - - - - - - - - - - - - - -\n");
234: }
235: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
236: PetscViewerASCIIPopTab(viewer);
237: PetscViewerFlush(viewer);
238: PetscViewerASCIIPopSynchronized(viewer);
239: }
240: } else if (isstring) {
241: PetscViewerStringSPrintf(viewer," blks=%D",jac->n);
242: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
243: if (jac->ksp) {KSPView(jac->ksp[0],sviewer);}
244: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
245: } else if (isdraw) {
246: PetscDraw draw;
247: char str[25];
248: PetscReal x,y,bottom,h;
250: PetscViewerDrawGetDraw(viewer,0,&draw);
251: PetscDrawGetCurrentPoint(draw,&x,&y);
252: PetscSNPrintf(str,25,"Number blocks %D",jac->n);
253: PetscDrawStringBoxed(draw,x,y,PETSC_DRAW_RED,PETSC_DRAW_BLACK,str,NULL,&h);
254: bottom = y - h;
255: PetscDrawPushCurrentPoint(draw,x,bottom);
256: /* warning the communicator on viewer is different then on ksp in parallel */
257: if (jac->ksp) {KSPView(jac->ksp[0],viewer);}
258: PetscDrawPopCurrentPoint(draw);
259: }
260: return(0);
261: }
263: /* -------------------------------------------------------------------------------------*/
265: static PetscErrorCode PCBJacobiGetSubKSP_BJacobi(PC pc,PetscInt *n_local,PetscInt *first_local,KSP **ksp)
266: {
267: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
270: if (!pc->setupcalled) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_WRONGSTATE,"Must call KSPSetUp() or PCSetUp() first");
272: if (n_local) *n_local = jac->n_local;
273: if (first_local) *first_local = jac->first_local;
274: *ksp = jac->ksp;
275: jac->same_local_solves = PETSC_FALSE; /* Assume that local solves are now different;
276: not necessarily true though! This flag is
277: used only for PCView_BJacobi() */
278: return(0);
279: }
281: static PetscErrorCode PCBJacobiSetTotalBlocks_BJacobi(PC pc,PetscInt blocks,PetscInt *lens)
282: {
283: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
287: if (pc->setupcalled > 0 && jac->n!=blocks) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ORDER,"Cannot alter number of blocks after PCSetUp()/KSPSetUp() has been called");
288: jac->n = blocks;
289: if (!lens) jac->g_lens = NULL;
290: else {
291: PetscMalloc1(blocks,&jac->g_lens);
292: PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt));
293: PetscArraycpy(jac->g_lens,lens,blocks);
294: }
295: return(0);
296: }
298: static PetscErrorCode PCBJacobiGetTotalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
299: {
300: PC_BJacobi *jac = (PC_BJacobi*) pc->data;
303: *blocks = jac->n;
304: if (lens) *lens = jac->g_lens;
305: return(0);
306: }
308: static PetscErrorCode PCBJacobiSetLocalBlocks_BJacobi(PC pc,PetscInt blocks,const PetscInt lens[])
309: {
310: PC_BJacobi *jac;
314: jac = (PC_BJacobi*)pc->data;
316: jac->n_local = blocks;
317: if (!lens) jac->l_lens = NULL;
318: else {
319: PetscMalloc1(blocks,&jac->l_lens);
320: PetscLogObjectMemory((PetscObject)pc,blocks*sizeof(PetscInt));
321: PetscArraycpy(jac->l_lens,lens,blocks);
322: }
323: return(0);
324: }
326: static PetscErrorCode PCBJacobiGetLocalBlocks_BJacobi(PC pc, PetscInt *blocks, const PetscInt *lens[])
327: {
328: PC_BJacobi *jac = (PC_BJacobi*) pc->data;
331: *blocks = jac->n_local;
332: if (lens) *lens = jac->l_lens;
333: return(0);
334: }
336: /* -------------------------------------------------------------------------------------*/
338: /*@C
339: PCBJacobiGetSubKSP - Gets the local KSP contexts for all blocks on
340: this processor.
342: Not Collective
344: Input Parameter:
345: . pc - the preconditioner context
347: Output Parameters:
348: + n_local - the number of blocks on this processor, or NULL
349: . first_local - the global number of the first block on this processor, or NULL
350: - ksp - the array of KSP contexts
352: Notes:
353: After PCBJacobiGetSubKSP() the array of KSP contexts is not to be freed.
355: Currently for some matrix implementations only 1 block per processor
356: is supported.
358: You must call KSPSetUp() or PCSetUp() before calling PCBJacobiGetSubKSP().
360: Fortran Usage: You must pass in a KSP array that is large enough to contain all the local KSPs.
361: You can call PCBJacobiGetSubKSP(pc,nlocal,firstlocal,PETSC_NULL_KSP,ierr) to determine how large the
362: KSP array must be.
364: Level: advanced
366: .seealso: PCBJacobiGetSubKSP()
367: @*/
368: PetscErrorCode PCBJacobiGetSubKSP(PC pc,PetscInt *n_local,PetscInt *first_local,KSP *ksp[])
369: {
374: PetscUseMethod(pc,"PCBJacobiGetSubKSP_C",(PC,PetscInt*,PetscInt*,KSP **),(pc,n_local,first_local,ksp));
375: return(0);
376: }
378: /*@
379: PCBJacobiSetTotalBlocks - Sets the global number of blocks for the block
380: Jacobi preconditioner.
382: Collective on PC
384: Input Parameters:
385: + pc - the preconditioner context
386: . blocks - the number of blocks
387: - lens - [optional] integer array containing the size of each block
389: Options Database Key:
390: . -pc_bjacobi_blocks <blocks> - Sets the number of global blocks
392: Notes:
393: Currently only a limited number of blocking configurations are supported.
394: All processors sharing the PC must call this routine with the same data.
396: Level: intermediate
398: .seealso: PCSetUseAmat(), PCBJacobiSetLocalBlocks()
399: @*/
400: PetscErrorCode PCBJacobiSetTotalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
401: {
406: if (blocks <= 0) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_OUTOFRANGE,"Must have positive blocks");
407: PetscTryMethod(pc,"PCBJacobiSetTotalBlocks_C",(PC,PetscInt,const PetscInt[]),(pc,blocks,lens));
408: return(0);
409: }
411: /*@C
412: PCBJacobiGetTotalBlocks - Gets the global number of blocks for the block
413: Jacobi preconditioner.
415: Not Collective
417: Input Parameter:
418: . pc - the preconditioner context
420: Output parameters:
421: + blocks - the number of blocks
422: - lens - integer array containing the size of each block
424: Level: intermediate
426: .seealso: PCSetUseAmat(), PCBJacobiGetLocalBlocks()
427: @*/
428: PetscErrorCode PCBJacobiGetTotalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
429: {
435: PetscUseMethod(pc,"PCBJacobiGetTotalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
436: return(0);
437: }
439: /*@
440: PCBJacobiSetLocalBlocks - Sets the local number of blocks for the block
441: Jacobi preconditioner.
443: Not Collective
445: Input Parameters:
446: + pc - the preconditioner context
447: . blocks - the number of blocks
448: - lens - [optional] integer array containing size of each block
450: Options Database Key:
451: . -pc_bjacobi_local_blocks <blocks> - Sets the number of local blocks
453: Note:
454: Currently only a limited number of blocking configurations are supported.
456: Level: intermediate
458: .seealso: PCSetUseAmat(), PCBJacobiSetTotalBlocks()
459: @*/
460: PetscErrorCode PCBJacobiSetLocalBlocks(PC pc,PetscInt blocks,const PetscInt lens[])
461: {
466: if (blocks < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must have nonegative blocks");
467: PetscTryMethod(pc,"PCBJacobiSetLocalBlocks_C",(PC,PetscInt,const PetscInt []),(pc,blocks,lens));
468: return(0);
469: }
471: /*@C
472: PCBJacobiGetLocalBlocks - Gets the local number of blocks for the block
473: Jacobi preconditioner.
475: Not Collective
477: Input Parameters:
478: + pc - the preconditioner context
479: . blocks - the number of blocks
480: - lens - [optional] integer array containing size of each block
482: Note:
483: Currently only a limited number of blocking configurations are supported.
485: Level: intermediate
487: .seealso: PCSetUseAmat(), PCBJacobiGetTotalBlocks()
488: @*/
489: PetscErrorCode PCBJacobiGetLocalBlocks(PC pc, PetscInt *blocks, const PetscInt *lens[])
490: {
496: PetscUseMethod(pc,"PCBJacobiGetLocalBlocks_C",(PC,PetscInt*, const PetscInt *[]),(pc,blocks,lens));
497: return(0);
498: }
500: /* -----------------------------------------------------------------------------------*/
502: /*MC
503: PCBJACOBI - Use block Jacobi preconditioning, each block is (approximately) solved with
504: its own KSP object.
506: Options Database Keys:
507: + -pc_use_amat - use Amat to apply block of operator in inner Krylov method
508: - -pc_bjacobi_blocks <n> - use n total blocks
510: Notes:
511: Each processor can have one or more blocks, or a single block can be shared by several processes. Defaults to one block per processor.
513: To set options on the solvers for each block append -sub_ to all the KSP, KSP, and PC
514: options database keys. For example, -sub_pc_type ilu -sub_pc_factor_levels 1 -sub_ksp_type preonly
516: To set the options on the solvers separate for each block call PCBJacobiGetSubKSP()
517: and set the options directly on the resulting KSP object (you can access its PC
518: KSPGetPC())
520: For GPU-based vectors (CUDA, ViennaCL) it is recommended to use exactly one block per MPI process for best
521: performance. Different block partitioning may lead to additional data transfers
522: between host and GPU that lead to degraded performance.
524: The options prefix for each block is sub_, for example -sub_pc_type lu.
526: When multiple processes share a single block, each block encompasses exactly all the unknowns owned its set of processes.
528: Level: beginner
530: .seealso: PCCreate(), PCSetType(), PCType (for list of available types), PC,
531: PCASM, PCSetUseAmat(), PCGetUseAmat(), PCBJacobiGetSubKSP(), PCBJacobiSetTotalBlocks(),
532: PCBJacobiSetLocalBlocks(), PCSetModifySubMatrices()
533: M*/
535: PETSC_EXTERN PetscErrorCode PCCreate_BJacobi(PC pc)
536: {
538: PetscMPIInt rank;
539: PC_BJacobi *jac;
542: PetscNewLog(pc,&jac);
543: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
545: pc->ops->apply = NULL;
546: pc->ops->matapply = NULL;
547: pc->ops->applytranspose = NULL;
548: pc->ops->setup = PCSetUp_BJacobi;
549: pc->ops->destroy = PCDestroy_BJacobi;
550: pc->ops->setfromoptions = PCSetFromOptions_BJacobi;
551: pc->ops->view = PCView_BJacobi;
552: pc->ops->applyrichardson = NULL;
554: pc->data = (void*)jac;
555: jac->n = -1;
556: jac->n_local = -1;
557: jac->first_local = rank;
558: jac->ksp = NULL;
559: jac->same_local_solves = PETSC_TRUE;
560: jac->g_lens = NULL;
561: jac->l_lens = NULL;
562: jac->psubcomm = NULL;
564: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetSubKSP_C",PCBJacobiGetSubKSP_BJacobi);
565: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetTotalBlocks_C",PCBJacobiSetTotalBlocks_BJacobi);
566: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetTotalBlocks_C",PCBJacobiGetTotalBlocks_BJacobi);
567: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiSetLocalBlocks_C",PCBJacobiSetLocalBlocks_BJacobi);
568: PetscObjectComposeFunction((PetscObject)pc,"PCBJacobiGetLocalBlocks_C",PCBJacobiGetLocalBlocks_BJacobi);
569: return(0);
570: }
572: /* --------------------------------------------------------------------------------------------*/
573: /*
574: These are for a single block per processor; works for AIJ, BAIJ; Seq and MPI
575: */
576: static PetscErrorCode PCReset_BJacobi_Singleblock(PC pc)
577: {
578: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
579: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
580: PetscErrorCode ierr;
583: KSPReset(jac->ksp[0]);
584: VecDestroy(&bjac->x);
585: VecDestroy(&bjac->y);
586: return(0);
587: }
589: static PetscErrorCode PCDestroy_BJacobi_Singleblock(PC pc)
590: {
591: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
592: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
593: PetscErrorCode ierr;
596: PCReset_BJacobi_Singleblock(pc);
597: KSPDestroy(&jac->ksp[0]);
598: PetscFree(jac->ksp);
599: PetscFree(jac->l_lens);
600: PetscFree(jac->g_lens);
601: PetscFree(bjac);
602: PetscFree(pc->data);
603: return(0);
604: }
606: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Singleblock(PC pc)
607: {
608: PetscErrorCode ierr;
609: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
610: KSP subksp = jac->ksp[0];
611: KSPConvergedReason reason;
614: KSPSetUp(subksp);
615: KSPGetConvergedReason(subksp,&reason);
616: if (reason == KSP_DIVERGED_PC_FAILED) {
617: pc->failedreason = PC_SUBPC_ERROR;
618: }
619: return(0);
620: }
622: static PetscErrorCode PCApply_BJacobi_Singleblock(PC pc,Vec x,Vec y)
623: {
624: PetscErrorCode ierr;
625: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
626: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
629: VecGetLocalVectorRead(x, bjac->x);
630: VecGetLocalVector(y, bjac->y);
631: /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
632: matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
633: of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
634: KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner);
635: KSPSolve(jac->ksp[0],bjac->x,bjac->y);
636: KSPCheckSolve(jac->ksp[0],pc,bjac->y);
637: VecRestoreLocalVectorRead(x, bjac->x);
638: VecRestoreLocalVector(y, bjac->y);
639: return(0);
640: }
642: static PetscErrorCode PCMatApply_BJacobi_Singleblock(PC pc,Mat X,Mat Y)
643: {
644: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
645: Mat sX,sY;
649: /* Since the inner KSP matrix may point directly to the diagonal block of an MPI matrix the inner
650: matrix may change even if the outer KSP/PC has not updated the preconditioner, this will trigger a rebuild
651: of the inner preconditioner automatically unless we pass down the outer preconditioners reuse flag.*/
652: KSPSetReusePreconditioner(jac->ksp[0],pc->reusepreconditioner);
653: MatDenseGetLocalMatrix(X,&sX);
654: MatDenseGetLocalMatrix(Y,&sY);
655: KSPMatSolve(jac->ksp[0],sX,sY);
656: return(0);
657: }
659: static PetscErrorCode PCApplySymmetricLeft_BJacobi_Singleblock(PC pc,Vec x,Vec y)
660: {
661: PetscErrorCode ierr;
662: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
663: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
664: PetscScalar *y_array;
665: const PetscScalar *x_array;
666: PC subpc;
669: /*
670: The VecPlaceArray() is to avoid having to copy the
671: y vector into the bjac->x vector. The reason for
672: the bjac->x vector is that we need a sequential vector
673: for the sequential solve.
674: */
675: VecGetArrayRead(x,&x_array);
676: VecGetArray(y,&y_array);
677: VecPlaceArray(bjac->x,x_array);
678: VecPlaceArray(bjac->y,y_array);
679: /* apply the symmetric left portion of the inner PC operator */
680: /* note this by-passes the inner KSP and its options completely */
681: KSPGetPC(jac->ksp[0],&subpc);
682: PCApplySymmetricLeft(subpc,bjac->x,bjac->y);
683: VecResetArray(bjac->x);
684: VecResetArray(bjac->y);
685: VecRestoreArrayRead(x,&x_array);
686: VecRestoreArray(y,&y_array);
687: return(0);
688: }
690: static PetscErrorCode PCApplySymmetricRight_BJacobi_Singleblock(PC pc,Vec x,Vec y)
691: {
692: PetscErrorCode ierr;
693: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
694: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
695: PetscScalar *y_array;
696: const PetscScalar *x_array;
697: PC subpc;
700: /*
701: The VecPlaceArray() is to avoid having to copy the
702: y vector into the bjac->x vector. The reason for
703: the bjac->x vector is that we need a sequential vector
704: for the sequential solve.
705: */
706: VecGetArrayRead(x,&x_array);
707: VecGetArray(y,&y_array);
708: VecPlaceArray(bjac->x,x_array);
709: VecPlaceArray(bjac->y,y_array);
711: /* apply the symmetric right portion of the inner PC operator */
712: /* note this by-passes the inner KSP and its options completely */
714: KSPGetPC(jac->ksp[0],&subpc);
715: PCApplySymmetricRight(subpc,bjac->x,bjac->y);
717: VecRestoreArrayRead(x,&x_array);
718: VecRestoreArray(y,&y_array);
719: return(0);
720: }
722: static PetscErrorCode PCApplyTranspose_BJacobi_Singleblock(PC pc,Vec x,Vec y)
723: {
724: PetscErrorCode ierr;
725: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
726: PC_BJacobi_Singleblock *bjac = (PC_BJacobi_Singleblock*)jac->data;
727: PetscScalar *y_array;
728: const PetscScalar *x_array;
731: /*
732: The VecPlaceArray() is to avoid having to copy the
733: y vector into the bjac->x vector. The reason for
734: the bjac->x vector is that we need a sequential vector
735: for the sequential solve.
736: */
737: VecGetArrayRead(x,&x_array);
738: VecGetArray(y,&y_array);
739: VecPlaceArray(bjac->x,x_array);
740: VecPlaceArray(bjac->y,y_array);
741: KSPSolveTranspose(jac->ksp[0],bjac->x,bjac->y);
742: KSPCheckSolve(jac->ksp[0],pc,bjac->y);
743: VecResetArray(bjac->x);
744: VecResetArray(bjac->y);
745: VecRestoreArrayRead(x,&x_array);
746: VecRestoreArray(y,&y_array);
747: return(0);
748: }
750: static PetscErrorCode PCSetUp_BJacobi_Singleblock(PC pc,Mat mat,Mat pmat)
751: {
752: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
753: PetscErrorCode ierr;
754: PetscInt m;
755: KSP ksp;
756: PC_BJacobi_Singleblock *bjac;
757: PetscBool wasSetup = PETSC_TRUE;
758: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
759: PetscBool is_gpumatrix = PETSC_FALSE;
760: #endif
763: if (!pc->setupcalled) {
764: const char *prefix;
766: if (!jac->ksp) {
767: wasSetup = PETSC_FALSE;
769: KSPCreate(PETSC_COMM_SELF,&ksp);
770: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
771: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
772: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
773: KSPSetType(ksp,KSPPREONLY);
774: PCGetOptionsPrefix(pc,&prefix);
775: KSPSetOptionsPrefix(ksp,prefix);
776: KSPAppendOptionsPrefix(ksp,"sub_");
778: pc->ops->reset = PCReset_BJacobi_Singleblock;
779: pc->ops->destroy = PCDestroy_BJacobi_Singleblock;
780: pc->ops->apply = PCApply_BJacobi_Singleblock;
781: pc->ops->matapply = PCMatApply_BJacobi_Singleblock;
782: pc->ops->applysymmetricleft = PCApplySymmetricLeft_BJacobi_Singleblock;
783: pc->ops->applysymmetricright = PCApplySymmetricRight_BJacobi_Singleblock;
784: pc->ops->applytranspose = PCApplyTranspose_BJacobi_Singleblock;
785: pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Singleblock;
787: PetscMalloc1(1,&jac->ksp);
788: jac->ksp[0] = ksp;
790: PetscNewLog(pc,&bjac);
791: jac->data = (void*)bjac;
792: } else {
793: ksp = jac->ksp[0];
794: bjac = (PC_BJacobi_Singleblock*)jac->data;
795: }
797: /*
798: The reason we need to generate these vectors is to serve
799: as the right-hand side and solution vector for the solve on the
800: block. We do not need to allocate space for the vectors since
801: that is provided via VecPlaceArray() just before the call to
802: KSPSolve() on the block.
803: */
804: MatGetSize(pmat,&m,&m);
805: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->x);
806: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&bjac->y);
807: #if defined(PETSC_HAVE_CUDA)
808: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSPARSE,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
809: if (is_gpumatrix) {
810: VecSetType(bjac->x,VECCUDA);
811: VecSetType(bjac->y,VECCUDA);
812: }
813: #endif
814: #if defined(PETSC_HAVE_VIENNACL)
815: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJVIENNACL,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");
816: if (is_gpumatrix) {
817: VecSetType(bjac->x,VECVIENNACL);
818: VecSetType(bjac->y,VECVIENNACL);
819: }
820: #endif
821: PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->x);
822: PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->y);
823: } else {
824: ksp = jac->ksp[0];
825: bjac = (PC_BJacobi_Singleblock*)jac->data;
826: }
827: if (pc->useAmat) {
828: KSPSetOperators(ksp,mat,pmat);
829: } else {
830: KSPSetOperators(ksp,pmat,pmat);
831: }
832: if (!wasSetup && pc->setfromoptionscalled) {
833: /* If PCSetFromOptions_BJacobi is called later, KSPSetFromOptions will be called at that time. */
834: KSPSetFromOptions(ksp);
835: }
836: return(0);
837: }
839: /* ---------------------------------------------------------------------------------------------*/
840: static PetscErrorCode PCReset_BJacobi_Multiblock(PC pc)
841: {
842: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
843: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
844: PetscErrorCode ierr;
845: PetscInt i;
848: if (bjac && bjac->pmat) {
849: MatDestroyMatrices(jac->n_local,&bjac->pmat);
850: if (pc->useAmat) {
851: MatDestroyMatrices(jac->n_local,&bjac->mat);
852: }
853: }
855: for (i=0; i<jac->n_local; i++) {
856: KSPReset(jac->ksp[i]);
857: if (bjac && bjac->x) {
858: VecDestroy(&bjac->x[i]);
859: VecDestroy(&bjac->y[i]);
860: ISDestroy(&bjac->is[i]);
861: }
862: }
863: PetscFree(jac->l_lens);
864: PetscFree(jac->g_lens);
865: return(0);
866: }
868: static PetscErrorCode PCDestroy_BJacobi_Multiblock(PC pc)
869: {
870: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
871: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
872: PetscErrorCode ierr;
873: PetscInt i;
876: PCReset_BJacobi_Multiblock(pc);
877: if (bjac) {
878: PetscFree2(bjac->x,bjac->y);
879: PetscFree(bjac->starts);
880: PetscFree(bjac->is);
881: }
882: PetscFree(jac->data);
883: for (i=0; i<jac->n_local; i++) {
884: KSPDestroy(&jac->ksp[i]);
885: }
886: PetscFree(jac->ksp);
887: PetscFree(pc->data);
888: return(0);
889: }
891: static PetscErrorCode PCSetUpOnBlocks_BJacobi_Multiblock(PC pc)
892: {
893: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
894: PetscErrorCode ierr;
895: PetscInt i,n_local = jac->n_local;
896: KSPConvergedReason reason;
899: for (i=0; i<n_local; i++) {
900: KSPSetUp(jac->ksp[i]);
901: KSPGetConvergedReason(jac->ksp[i],&reason);
902: if (reason == KSP_DIVERGED_PC_FAILED) {
903: pc->failedreason = PC_SUBPC_ERROR;
904: }
905: }
906: return(0);
907: }
909: /*
910: Preconditioner for block Jacobi
911: */
912: static PetscErrorCode PCApply_BJacobi_Multiblock(PC pc,Vec x,Vec y)
913: {
914: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
915: PetscErrorCode ierr;
916: PetscInt i,n_local = jac->n_local;
917: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
918: PetscScalar *yin;
919: const PetscScalar *xin;
922: VecGetArrayRead(x,&xin);
923: VecGetArray(y,&yin);
924: for (i=0; i<n_local; i++) {
925: /*
926: To avoid copying the subvector from x into a workspace we instead
927: make the workspace vector array point to the subpart of the array of
928: the global vector.
929: */
930: VecPlaceArray(bjac->x[i],xin+bjac->starts[i]);
931: VecPlaceArray(bjac->y[i],yin+bjac->starts[i]);
933: PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
934: KSPSolve(jac->ksp[i],bjac->x[i],bjac->y[i]);
935: KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]);
936: PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
938: VecResetArray(bjac->x[i]);
939: VecResetArray(bjac->y[i]);
940: }
941: VecRestoreArrayRead(x,&xin);
942: VecRestoreArray(y,&yin);
943: return(0);
944: }
946: /*
947: Preconditioner for block Jacobi
948: */
949: static PetscErrorCode PCApplyTranspose_BJacobi_Multiblock(PC pc,Vec x,Vec y)
950: {
951: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
952: PetscErrorCode ierr;
953: PetscInt i,n_local = jac->n_local;
954: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
955: PetscScalar *yin;
956: const PetscScalar *xin;
959: VecGetArrayRead(x,&xin);
960: VecGetArray(y,&yin);
961: for (i=0; i<n_local; i++) {
962: /*
963: To avoid copying the subvector from x into a workspace we instead
964: make the workspace vector array point to the subpart of the array of
965: the global vector.
966: */
967: VecPlaceArray(bjac->x[i],xin+bjac->starts[i]);
968: VecPlaceArray(bjac->y[i],yin+bjac->starts[i]);
970: PetscLogEventBegin(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
971: KSPSolveTranspose(jac->ksp[i],bjac->x[i],bjac->y[i]);
972: KSPCheckSolve(jac->ksp[i],pc,bjac->y[i]);
973: PetscLogEventEnd(PC_ApplyTransposeOnBlocks,jac->ksp[i],bjac->x[i],bjac->y[i],0);
975: VecResetArray(bjac->x[i]);
976: VecResetArray(bjac->y[i]);
977: }
978: VecRestoreArrayRead(x,&xin);
979: VecRestoreArray(y,&yin);
980: return(0);
981: }
983: static PetscErrorCode PCSetUp_BJacobi_Multiblock(PC pc,Mat mat,Mat pmat)
984: {
985: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
986: PetscErrorCode ierr;
987: PetscInt m,n_local,N,M,start,i;
988: const char *prefix,*pprefix,*mprefix;
989: KSP ksp;
990: Vec x,y;
991: PC_BJacobi_Multiblock *bjac = (PC_BJacobi_Multiblock*)jac->data;
992: PC subpc;
993: IS is;
994: MatReuse scall;
995: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
996: PetscBool is_gpumatrix = PETSC_FALSE;
997: #endif
1000: MatGetLocalSize(pc->pmat,&M,&N);
1002: n_local = jac->n_local;
1004: if (pc->useAmat) {
1005: PetscBool same;
1006: PetscObjectTypeCompare((PetscObject)mat,((PetscObject)pmat)->type_name,&same);
1007: if (!same) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_ARG_INCOMP,"Matrices not of same type");
1008: }
1010: if (!pc->setupcalled) {
1011: scall = MAT_INITIAL_MATRIX;
1013: if (!jac->ksp) {
1014: pc->ops->reset = PCReset_BJacobi_Multiblock;
1015: pc->ops->destroy = PCDestroy_BJacobi_Multiblock;
1016: pc->ops->apply = PCApply_BJacobi_Multiblock;
1017: pc->ops->matapply = NULL;
1018: pc->ops->applytranspose= PCApplyTranspose_BJacobi_Multiblock;
1019: pc->ops->setuponblocks = PCSetUpOnBlocks_BJacobi_Multiblock;
1021: PetscNewLog(pc,&bjac);
1022: PetscMalloc1(n_local,&jac->ksp);
1023: PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(KSP)));
1024: PetscMalloc2(n_local,&bjac->x,n_local,&bjac->y);
1025: PetscMalloc1(n_local,&bjac->starts);
1026: PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(PetscScalar)));
1028: jac->data = (void*)bjac;
1029: PetscMalloc1(n_local,&bjac->is);
1030: PetscLogObjectMemory((PetscObject)pc,sizeof(n_local*sizeof(IS)));
1032: for (i=0; i<n_local; i++) {
1033: KSPCreate(PETSC_COMM_SELF,&ksp);
1034: KSPSetErrorIfNotConverged(ksp,pc->erroriffailure);
1035: PetscObjectIncrementTabLevel((PetscObject)ksp,(PetscObject)pc,1);
1036: PetscLogObjectParent((PetscObject)pc,(PetscObject)ksp);
1037: KSPSetType(ksp,KSPPREONLY);
1038: KSPGetPC(ksp,&subpc);
1039: PCGetOptionsPrefix(pc,&prefix);
1040: KSPSetOptionsPrefix(ksp,prefix);
1041: KSPAppendOptionsPrefix(ksp,"sub_");
1043: jac->ksp[i] = ksp;
1044: }
1045: } else {
1046: bjac = (PC_BJacobi_Multiblock*)jac->data;
1047: }
1049: start = 0;
1050: for (i=0; i<n_local; i++) {
1051: m = jac->l_lens[i];
1052: /*
1053: The reason we need to generate these vectors is to serve
1054: as the right-hand side and solution vector for the solve on the
1055: block. We do not need to allocate space for the vectors since
1056: that is provided via VecPlaceArray() just before the call to
1057: KSPSolve() on the block.
1059: */
1060: VecCreateSeq(PETSC_COMM_SELF,m,&x);
1061: VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,NULL,&y);
1062: #if defined(PETSC_HAVE_CUDA)
1063: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJCUSPARSE,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
1064: if (is_gpumatrix) {
1065: VecSetType(x,VECCUDA);
1066: VecSetType(y,VECCUDA);
1067: }
1068: #endif
1069: #if defined(PETSC_HAVE_VIENNACL)
1070: PetscObjectTypeCompareAny((PetscObject)pmat,&is_gpumatrix,MATAIJVIENNACL,MATSEQAIJVIENNACL,MATMPIAIJVIENNACL,"");
1071: if (is_gpumatrix) {
1072: VecSetType(x,VECVIENNACL);
1073: VecSetType(y,VECVIENNACL);
1074: }
1075: #endif
1076: PetscLogObjectParent((PetscObject)pc,(PetscObject)x);
1077: PetscLogObjectParent((PetscObject)pc,(PetscObject)y);
1079: bjac->x[i] = x;
1080: bjac->y[i] = y;
1081: bjac->starts[i] = start;
1083: ISCreateStride(PETSC_COMM_SELF,m,start,1,&is);
1084: bjac->is[i] = is;
1085: PetscLogObjectParent((PetscObject)pc,(PetscObject)is);
1087: start += m;
1088: }
1089: } else {
1090: bjac = (PC_BJacobi_Multiblock*)jac->data;
1091: /*
1092: Destroy the blocks from the previous iteration
1093: */
1094: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1095: MatDestroyMatrices(n_local,&bjac->pmat);
1096: if (pc->useAmat) {
1097: MatDestroyMatrices(n_local,&bjac->mat);
1098: }
1099: scall = MAT_INITIAL_MATRIX;
1100: } else scall = MAT_REUSE_MATRIX;
1101: }
1103: MatCreateSubMatrices(pmat,n_local,bjac->is,bjac->is,scall,&bjac->pmat);
1104: if (pc->useAmat) {
1105: MatCreateSubMatrices(mat,n_local,bjac->is,bjac->is,scall,&bjac->mat);
1106: }
1107: /* Return control to the user so that the submatrices can be modified (e.g., to apply
1108: different boundary conditions for the submatrices than for the global problem) */
1109: PCModifySubMatrices(pc,n_local,bjac->is,bjac->is,bjac->pmat,pc->modifysubmatricesP);
1111: PetscObjectGetOptionsPrefix((PetscObject)pmat,&pprefix);
1112: for (i=0; i<n_local; i++) {
1113: PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->pmat[i]);
1114: PetscObjectSetOptionsPrefix((PetscObject)bjac->pmat[i],pprefix);
1115: if (pc->useAmat) {
1116: PetscLogObjectParent((PetscObject)pc,(PetscObject)bjac->mat[i]);
1117: PetscObjectGetOptionsPrefix((PetscObject)mat,&mprefix);
1118: PetscObjectSetOptionsPrefix((PetscObject)bjac->mat[i],mprefix);
1119: KSPSetOperators(jac->ksp[i],bjac->mat[i],bjac->pmat[i]);
1120: } else {
1121: KSPSetOperators(jac->ksp[i],bjac->pmat[i],bjac->pmat[i]);
1122: }
1123: if (pc->setfromoptionscalled) {
1124: KSPSetFromOptions(jac->ksp[i]);
1125: }
1126: }
1127: return(0);
1128: }
1130: /* ---------------------------------------------------------------------------------------------*/
1131: /*
1132: These are for a single block with multiple processes;
1133: */
1134: static PetscErrorCode PCReset_BJacobi_Multiproc(PC pc)
1135: {
1136: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1137: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1138: PetscErrorCode ierr;
1141: VecDestroy(&mpjac->ysub);
1142: VecDestroy(&mpjac->xsub);
1143: MatDestroy(&mpjac->submats);
1144: if (jac->ksp) {KSPReset(jac->ksp[0]);}
1145: return(0);
1146: }
1148: static PetscErrorCode PCDestroy_BJacobi_Multiproc(PC pc)
1149: {
1150: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1151: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1152: PetscErrorCode ierr;
1155: PCReset_BJacobi_Multiproc(pc);
1156: KSPDestroy(&jac->ksp[0]);
1157: PetscFree(jac->ksp);
1158: PetscSubcommDestroy(&mpjac->psubcomm);
1160: PetscFree(mpjac);
1161: PetscFree(pc->data);
1162: return(0);
1163: }
1165: static PetscErrorCode PCApply_BJacobi_Multiproc(PC pc,Vec x,Vec y)
1166: {
1167: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1168: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1169: PetscErrorCode ierr;
1170: PetscScalar *yarray;
1171: const PetscScalar *xarray;
1172: KSPConvergedReason reason;
1175: /* place x's and y's local arrays into xsub and ysub */
1176: VecGetArrayRead(x,&xarray);
1177: VecGetArray(y,&yarray);
1178: VecPlaceArray(mpjac->xsub,xarray);
1179: VecPlaceArray(mpjac->ysub,yarray);
1181: /* apply preconditioner on each matrix block */
1182: PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0);
1183: KSPSolve(jac->ksp[0],mpjac->xsub,mpjac->ysub);
1184: KSPCheckSolve(jac->ksp[0],pc,mpjac->ysub);
1185: PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],mpjac->xsub,mpjac->ysub,0);
1186: KSPGetConvergedReason(jac->ksp[0],&reason);
1187: if (reason == KSP_DIVERGED_PC_FAILED) {
1188: pc->failedreason = PC_SUBPC_ERROR;
1189: }
1191: VecResetArray(mpjac->xsub);
1192: VecResetArray(mpjac->ysub);
1193: VecRestoreArrayRead(x,&xarray);
1194: VecRestoreArray(y,&yarray);
1195: return(0);
1196: }
1198: static PetscErrorCode PCMatApply_BJacobi_Multiproc(PC pc,Mat X,Mat Y)
1199: {
1200: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1201: KSPConvergedReason reason;
1202: Mat sX,sY;
1203: const PetscScalar *x;
1204: PetscScalar *y;
1205: PetscInt m,N,lda,ldb;
1206: PetscErrorCode ierr;
1209: /* apply preconditioner on each matrix block */
1210: MatGetLocalSize(X,&m,NULL);
1211: MatGetSize(X,NULL,&N);
1212: MatDenseGetLDA(X,&lda);
1213: MatDenseGetLDA(Y,&ldb);
1214: MatDenseGetArrayRead(X,&x);
1215: MatDenseGetArrayWrite(Y,&y);
1216: MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,(PetscScalar*)x,&sX);
1217: MatCreateDense(PetscObjectComm((PetscObject)jac->ksp[0]),m,PETSC_DECIDE,PETSC_DECIDE,N,y,&sY);
1218: MatDenseSetLDA(sX,lda);
1219: MatDenseSetLDA(sY,ldb);
1220: PetscLogEventBegin(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0);
1221: KSPMatSolve(jac->ksp[0],sX,sY);
1222: KSPCheckSolve(jac->ksp[0],pc,NULL);
1223: PetscLogEventEnd(PC_ApplyOnBlocks,jac->ksp[0],X,Y,0);
1224: MatDestroy(&sY);
1225: MatDestroy(&sX);
1226: MatDenseRestoreArrayWrite(Y,&y);
1227: MatDenseRestoreArrayRead(X,&x);
1228: KSPGetConvergedReason(jac->ksp[0],&reason);
1229: if (reason == KSP_DIVERGED_PC_FAILED) {
1230: pc->failedreason = PC_SUBPC_ERROR;
1231: }
1232: return(0);
1233: }
1235: static PetscErrorCode PCSetUp_BJacobi_Multiproc(PC pc)
1236: {
1237: PC_BJacobi *jac = (PC_BJacobi*)pc->data;
1238: PC_BJacobi_Multiproc *mpjac = (PC_BJacobi_Multiproc*)jac->data;
1239: PetscErrorCode ierr;
1240: PetscInt m,n;
1241: MPI_Comm comm,subcomm=0;
1242: const char *prefix;
1243: PetscBool wasSetup = PETSC_TRUE;
1244: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
1245: PetscBool is_gpumatrix = PETSC_FALSE;
1246: #endif
1250: PetscObjectGetComm((PetscObject)pc,&comm);
1251: if (jac->n_local > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only a single block in a subcommunicator is supported");
1252: jac->n_local = 1; /* currently only a single block is supported for a subcommunicator */
1253: if (!pc->setupcalled) {
1254: wasSetup = PETSC_FALSE;
1255: PetscNewLog(pc,&mpjac);
1256: jac->data = (void*)mpjac;
1258: /* initialize datastructure mpjac */
1259: if (!jac->psubcomm) {
1260: /* Create default contiguous subcommunicatiors if user does not provide them */
1261: PetscSubcommCreate(comm,&jac->psubcomm);
1262: PetscSubcommSetNumber(jac->psubcomm,jac->n);
1263: PetscSubcommSetType(jac->psubcomm,PETSC_SUBCOMM_CONTIGUOUS);
1264: PetscLogObjectMemory((PetscObject)pc,sizeof(PetscSubcomm));
1265: }
1266: mpjac->psubcomm = jac->psubcomm;
1267: subcomm = PetscSubcommChild(mpjac->psubcomm);
1269: /* Get matrix blocks of pmat */
1270: MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats);
1272: /* create a new PC that processors in each subcomm have copy of */
1273: PetscMalloc1(1,&jac->ksp);
1274: KSPCreate(subcomm,&jac->ksp[0]);
1275: KSPSetErrorIfNotConverged(jac->ksp[0],pc->erroriffailure);
1276: PetscObjectIncrementTabLevel((PetscObject)jac->ksp[0],(PetscObject)pc,1);
1277: PetscLogObjectParent((PetscObject)pc,(PetscObject)jac->ksp[0]);
1278: KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats);
1279: KSPGetPC(jac->ksp[0],&mpjac->pc);
1281: PCGetOptionsPrefix(pc,&prefix);
1282: KSPSetOptionsPrefix(jac->ksp[0],prefix);
1283: KSPAppendOptionsPrefix(jac->ksp[0],"sub_");
1285: /* create dummy vectors xsub and ysub */
1286: MatGetLocalSize(mpjac->submats,&m,&n);
1287: VecCreateMPIWithArray(subcomm,1,n,PETSC_DECIDE,NULL,&mpjac->xsub);
1288: VecCreateMPIWithArray(subcomm,1,m,PETSC_DECIDE,NULL,&mpjac->ysub);
1289: #if defined(PETSC_HAVE_CUDA)
1290: PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
1291: if (is_gpumatrix) {
1292: VecSetType(mpjac->xsub,VECMPICUDA);
1293: VecSetType(mpjac->ysub,VECMPICUDA);
1294: }
1295: #endif
1296: #if defined(PETSC_HAVE_VIENNACL)
1297: PetscObjectTypeCompareAny((PetscObject)mpjac->submats,&is_gpumatrix,MATAIJVIENNACL,MATMPIAIJVIENNACL,"");
1298: if (is_gpumatrix) {
1299: VecSetType(mpjac->xsub,VECMPIVIENNACL);
1300: VecSetType(mpjac->ysub,VECMPIVIENNACL);
1301: }
1302: #endif
1303: PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->xsub);
1304: PetscLogObjectParent((PetscObject)pc,(PetscObject)mpjac->ysub);
1306: pc->ops->reset = PCReset_BJacobi_Multiproc;
1307: pc->ops->destroy = PCDestroy_BJacobi_Multiproc;
1308: pc->ops->apply = PCApply_BJacobi_Multiproc;
1309: pc->ops->matapply= PCMatApply_BJacobi_Multiproc;
1310: } else { /* pc->setupcalled */
1311: subcomm = PetscSubcommChild(mpjac->psubcomm);
1312: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
1313: /* destroy old matrix blocks, then get new matrix blocks */
1314: if (mpjac->submats) {MatDestroy(&mpjac->submats);}
1315: MatGetMultiProcBlock(pc->pmat,subcomm,MAT_INITIAL_MATRIX,&mpjac->submats);
1316: } else {
1317: MatGetMultiProcBlock(pc->pmat,subcomm,MAT_REUSE_MATRIX,&mpjac->submats);
1318: }
1319: KSPSetOperators(jac->ksp[0],mpjac->submats,mpjac->submats);
1320: }
1322: if (!wasSetup && pc->setfromoptionscalled) {
1323: KSPSetFromOptions(jac->ksp[0]);
1324: }
1325: return(0);
1326: }