Actual source code: bddcprivate.c
petsc-3.14.0 2020-09-29
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <../src/ksp/pc/impls/bddc/bddc.h>
3: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
4: #include <../src/mat/impls/dense/seq/dense.h>
5: #include <petscdmplex.h>
6: #include <petscblaslapack.h>
7: #include <petsc/private/sfimpl.h>
8: #include <petsc/private/dmpleximpl.h>
9: #include <petscdmda.h>
11: static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*);
13: /* if range is true, it returns B s.t. span{B} = range(A)
14: if range is false, it returns B s.t. range(B) _|_ range(A) */
15: PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
16: {
17: #if !defined(PETSC_USE_COMPLEX)
18: PetscScalar *uwork,*data,*U, ds = 0.;
19: PetscReal *sing;
20: PetscBLASInt bM,bN,lwork,lierr,di = 1;
21: PetscInt ulw,i,nr,nc,n;
25: MatGetSize(A,&nr,&nc);
26: if (!nr || !nc) return(0);
28: /* workspace */
29: if (!work) {
30: ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc));
31: PetscMalloc1(ulw,&uwork);
32: } else {
33: ulw = lw;
34: uwork = work;
35: }
36: n = PetscMin(nr,nc);
37: if (!rwork) {
38: PetscMalloc1(n,&sing);
39: } else {
40: sing = rwork;
41: }
43: /* SVD */
44: PetscMalloc1(nr*nr,&U);
45: PetscBLASIntCast(nr,&bM);
46: PetscBLASIntCast(nc,&bN);
47: PetscBLASIntCast(ulw,&lwork);
48: MatDenseGetArray(A,&data);
49: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
50: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr));
51: PetscFPTrapPop();
52: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
53: MatDenseRestoreArray(A,&data);
54: for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break;
55: if (!rwork) {
56: PetscFree(sing);
57: }
58: if (!work) {
59: PetscFree(uwork);
60: }
61: /* create B */
62: if (!range) {
63: MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);
64: MatDenseGetArray(*B,&data);
65: PetscArraycpy(data,U+nr*i,(nr-i)*nr);
66: } else {
67: MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);
68: MatDenseGetArray(*B,&data);
69: PetscArraycpy(data,U,i*nr);
70: }
71: MatDenseRestoreArray(*B,&data);
72: PetscFree(U);
73: #else /* PETSC_USE_COMPLEX */
75: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes");
76: #endif
77: return(0);
78: }
80: /* TODO REMOVE */
81: #if defined(PRINT_GDET)
82: static int inc = 0;
83: static int lev = 0;
84: #endif
86: PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
87: {
89: Mat GE,GEd;
90: PetscInt rsize,csize,esize;
91: PetscScalar *ptr;
94: ISGetSize(edge,&esize);
95: if (!esize) return(0);
96: ISGetSize(extrow,&rsize);
97: ISGetSize(extcol,&csize);
99: /* gradients */
100: ptr = work + 5*esize;
101: MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);
102: MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);
103: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);
104: MatDestroy(&GE);
106: /* constants */
107: ptr += rsize*csize;
108: MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);
109: MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);
110: MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);
111: MatDestroy(&GE);
112: MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);
113: MatDestroy(&GEd);
115: if (corners) {
116: Mat GEc;
117: const PetscScalar *vals;
118: PetscScalar v;
120: MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);
121: MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);
122: MatDenseGetArrayRead(GEd,&vals);
123: /* v = PetscAbsScalar(vals[0]) */;
124: v = 1.;
125: cvals[0] = vals[0]/v;
126: cvals[1] = vals[1]/v;
127: MatDenseRestoreArrayRead(GEd,&vals);
128: MatScale(*GKins,1./v);
129: #if defined(PRINT_GDET)
130: {
131: PetscViewer viewer;
132: char filename[256];
133: sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++);
134: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
135: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
136: PetscObjectSetName((PetscObject)GEc,"GEc");
137: MatView(GEc,viewer);
138: PetscObjectSetName((PetscObject)(*GKins),"GK");
139: MatView(*GKins,viewer);
140: PetscObjectSetName((PetscObject)GEd,"Gproj");
141: MatView(GEd,viewer);
142: PetscViewerDestroy(&viewer);
143: }
144: #endif
145: MatDestroy(&GEd);
146: MatDestroy(&GEc);
147: }
149: return(0);
150: }
152: PetscErrorCode PCBDDCNedelecSupport(PC pc)
153: {
154: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
155: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
156: Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit;
157: Vec tvec;
158: PetscSF sfv;
159: ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g;
160: MPI_Comm comm;
161: IS lned,primals,allprimals,nedfieldlocal;
162: IS *eedges,*extrows,*extcols,*alleedges;
163: PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter;
164: PetscScalar *vals,*work;
165: PetscReal *rwork;
166: const PetscInt *idxs,*ii,*jj,*iit,*jjt;
167: PetscInt ne,nv,Lv,order,n,field;
168: PetscInt n_neigh,*neigh,*n_shared,**shared;
169: PetscInt i,j,extmem,cum,maxsize,nee;
170: PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs;
171: PetscInt *sfvleaves,*sfvroots;
172: PetscInt *corners,*cedges;
173: PetscInt *ecount,**eneighs,*vcount,**vneighs;
174: PetscInt *emarks;
175: PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal;
176: PetscErrorCode ierr;
179: /* If the discrete gradient is defined for a subset of dofs and global is true,
180: it assumes G is given in global ordering for all the dofs.
181: Otherwise, the ordering is global for the Nedelec field */
182: order = pcbddc->nedorder;
183: conforming = pcbddc->conforming;
184: field = pcbddc->nedfield;
185: global = pcbddc->nedglobal;
186: setprimal = PETSC_FALSE;
187: print = PETSC_FALSE;
188: singular = PETSC_FALSE;
190: /* Command line customization */
191: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");
192: PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);
193: PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);
194: PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);
195: /* print debug info TODO: to be removed */
196: PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);
197: PetscOptionsEnd();
199: /* Return if there are no edges in the decomposition and the problem is not singular */
200: MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);
201: ISLocalToGlobalMappingGetSize(al2g,&n);
202: PetscObjectGetComm((PetscObject)pc,&comm);
203: if (!singular) {
204: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
205: lrc[0] = PETSC_FALSE;
206: for (i=0;i<n;i++) {
207: if (PetscRealPart(vals[i]) > 2.) {
208: lrc[0] = PETSC_TRUE;
209: break;
210: }
211: }
212: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
213: MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);
214: if (!lrc[1]) return(0);
215: }
217: /* Get Nedelec field */
218: if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal);
219: if (pcbddc->n_ISForDofsLocal && field >= 0) {
220: PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);
221: nedfieldlocal = pcbddc->ISForDofsLocal[field];
222: ISGetLocalSize(nedfieldlocal,&ne);
223: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
224: ne = n;
225: nedfieldlocal = NULL;
226: global = PETSC_TRUE;
227: } else if (field == PETSC_DECIDE) {
228: PetscInt rst,ren,*idx;
230: PetscArrayzero(matis->sf_leafdata,n);
231: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
232: MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);
233: for (i=rst;i<ren;i++) {
234: PetscInt nc;
236: MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
237: if (nc > 1) matis->sf_rootdata[i-rst] = 1;
238: MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);
239: }
240: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
241: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
242: PetscMalloc1(n,&idx);
243: for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i;
244: ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);
245: } else {
246: SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified");
247: }
249: /* Sanity checks */
250: if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time");
251: if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis");
252: if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order);
254: /* Just set primal dofs and return */
255: if (setprimal) {
256: IS enedfieldlocal;
257: PetscInt *eidxs;
259: PetscMalloc1(ne,&eidxs);
260: VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);
261: if (nedfieldlocal) {
262: ISGetIndices(nedfieldlocal,&idxs);
263: for (i=0,cum=0;i<ne;i++) {
264: if (PetscRealPart(vals[idxs[i]]) > 2.) {
265: eidxs[cum++] = idxs[i];
266: }
267: }
268: ISRestoreIndices(nedfieldlocal,&idxs);
269: } else {
270: for (i=0,cum=0;i<ne;i++) {
271: if (PetscRealPart(vals[i]) > 2.) {
272: eidxs[cum++] = i;
273: }
274: }
275: }
276: VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);
277: ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);
278: PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);
279: PetscFree(eidxs);
280: ISDestroy(&nedfieldlocal);
281: ISDestroy(&enedfieldlocal);
282: return(0);
283: }
285: /* Compute some l2g maps */
286: if (nedfieldlocal) {
287: IS is;
289: /* need to map from the local Nedelec field to local numbering */
290: ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);
291: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
292: ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);
293: ISLocalToGlobalMappingCreateIS(is,&al2g);
294: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
295: if (global) {
296: PetscObjectReference((PetscObject)al2g);
297: el2g = al2g;
298: } else {
299: IS gis;
301: ISRenumber(is,NULL,NULL,&gis);
302: ISLocalToGlobalMappingCreateIS(gis,&el2g);
303: ISDestroy(&gis);
304: }
305: ISDestroy(&is);
306: } else {
307: /* restore default */
308: pcbddc->nedfield = -1;
309: /* one ref for the destruction of al2g, one for el2g */
310: PetscObjectReference((PetscObject)al2g);
311: PetscObjectReference((PetscObject)al2g);
312: el2g = al2g;
313: fl2g = NULL;
314: }
316: /* Start communication to drop connections for interior edges (for cc analysis only) */
317: PetscArrayzero(matis->sf_leafdata,n);
318: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
319: if (nedfieldlocal) {
320: ISGetIndices(nedfieldlocal,&idxs);
321: for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1;
322: ISRestoreIndices(nedfieldlocal,&idxs);
323: } else {
324: for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1;
325: }
326: PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
327: PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);
329: if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */
330: MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);
331: MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
332: if (global) {
333: PetscInt rst;
335: MatGetOwnershipRange(G,&rst,NULL);
336: for (i=0,cum=0;i<pc->pmat->rmap->n;i++) {
337: if (matis->sf_rootdata[i] < 2) {
338: matis->sf_rootdata[cum++] = i + rst;
339: }
340: }
341: MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);
342: MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);
343: } else {
344: PetscInt *tbz;
346: PetscMalloc1(ne,&tbz);
347: PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
348: PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);
349: ISGetIndices(nedfieldlocal,&idxs);
350: for (i=0,cum=0;i<ne;i++)
351: if (matis->sf_leafdata[idxs[i]] == 1)
352: tbz[cum++] = i;
353: ISRestoreIndices(nedfieldlocal,&idxs);
354: ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);
355: MatZeroRows(G,cum,tbz,0.,NULL,NULL);
356: PetscFree(tbz);
357: }
358: } else { /* we need the entire G to infer the nullspace */
359: PetscObjectReference((PetscObject)pcbddc->discretegradient);
360: G = pcbddc->discretegradient;
361: }
363: /* Extract subdomain relevant rows of G */
364: ISLocalToGlobalMappingGetIndices(el2g,&idxs);
365: ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);
366: MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);
367: ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);
368: ISDestroy(&lned);
369: MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);
370: MatDestroy(&lGall);
371: MatISGetLocalMat(lGis,&lG);
373: /* SF for nodal dofs communications */
374: MatGetLocalSize(G,NULL,&Lv);
375: MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);
376: PetscObjectReference((PetscObject)vl2g);
377: ISLocalToGlobalMappingGetSize(vl2g,&nv);
378: PetscSFCreate(comm,&sfv);
379: ISLocalToGlobalMappingGetIndices(vl2g,&idxs);
380: PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);
381: ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);
382: i = singular ? 2 : 1;
383: PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);
385: /* Destroy temporary G created in MATIS format and modified G */
386: PetscObjectReference((PetscObject)lG);
387: MatDestroy(&lGis);
388: MatDestroy(&G);
390: if (print) {
391: PetscObjectSetName((PetscObject)lG,"initial_lG");
392: MatView(lG,NULL);
393: }
395: /* Save lG for values insertion in change of basis */
396: MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);
398: /* Analyze the edge-nodes connections (duplicate lG) */
399: MatDuplicate(lG,MAT_COPY_VALUES,&lGe);
400: MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
401: PetscBTCreate(nv,&btv);
402: PetscBTCreate(ne,&bte);
403: PetscBTCreate(ne,&btb);
404: PetscBTCreate(ne,&btbd);
405: PetscBTCreate(nv,&btvcand);
406: /* need to import the boundary specification to ensure the
407: proper detection of coarse edges' endpoints */
408: if (pcbddc->DirichletBoundariesLocal) {
409: IS is;
411: if (fl2g) {
412: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);
413: } else {
414: is = pcbddc->DirichletBoundariesLocal;
415: }
416: ISGetLocalSize(is,&cum);
417: ISGetIndices(is,&idxs);
418: for (i=0;i<cum;i++) {
419: if (idxs[i] >= 0) {
420: PetscBTSet(btb,idxs[i]);
421: PetscBTSet(btbd,idxs[i]);
422: }
423: }
424: ISRestoreIndices(is,&idxs);
425: if (fl2g) {
426: ISDestroy(&is);
427: }
428: }
429: if (pcbddc->NeumannBoundariesLocal) {
430: IS is;
432: if (fl2g) {
433: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);
434: } else {
435: is = pcbddc->NeumannBoundariesLocal;
436: }
437: ISGetLocalSize(is,&cum);
438: ISGetIndices(is,&idxs);
439: for (i=0;i<cum;i++) {
440: if (idxs[i] >= 0) {
441: PetscBTSet(btb,idxs[i]);
442: }
443: }
444: ISRestoreIndices(is,&idxs);
445: if (fl2g) {
446: ISDestroy(&is);
447: }
448: }
450: /* Count neighs per dof */
451: ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);
452: ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);
454: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
455: for proper detection of coarse edges' endpoints */
456: PetscBTCreate(ne,&btee);
457: for (i=0;i<ne;i++) {
458: if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) {
459: PetscBTSet(btee,i);
460: }
461: }
462: PetscMalloc1(ne,&marks);
463: if (!conforming) {
464: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
465: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
466: }
467: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
468: MatSeqAIJGetArray(lGe,&vals);
469: cum = 0;
470: for (i=0;i<ne;i++) {
471: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
472: if (!PetscBTLookup(btee,i)) {
473: marks[cum++] = i;
474: continue;
475: }
476: /* set badly connected edge dofs as primal */
477: if (!conforming) {
478: if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
479: marks[cum++] = i;
480: PetscBTSet(bte,i);
481: for (j=ii[i];j<ii[i+1];j++) {
482: PetscBTSet(btv,jj[j]);
483: }
484: } else {
485: /* every edge dofs should be connected trough a certain number of nodal dofs
486: to other edge dofs belonging to coarse edges
487: - at most 2 endpoints
488: - order-1 interior nodal dofs
489: - no undefined nodal dofs (nconn < order)
490: */
491: PetscInt ends = 0,ints = 0, undef = 0;
492: for (j=ii[i];j<ii[i+1];j++) {
493: PetscInt v = jj[j],k;
494: PetscInt nconn = iit[v+1]-iit[v];
495: for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--;
496: if (nconn > order) ends++;
497: else if (nconn == order) ints++;
498: else undef++;
499: }
500: if (undef || ends > 2 || ints != order -1) {
501: marks[cum++] = i;
502: PetscBTSet(bte,i);
503: for (j=ii[i];j<ii[i+1];j++) {
504: PetscBTSet(btv,jj[j]);
505: }
506: }
507: }
508: }
509: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
510: if (!order && ii[i+1] != ii[i]) {
511: PetscScalar val = 1./(ii[i+1]-ii[i]-1);
512: for (j=ii[i];j<ii[i+1];j++) vals[j] = val;
513: }
514: }
515: PetscBTDestroy(&btee);
516: MatSeqAIJRestoreArray(lGe,&vals);
517: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
518: if (!conforming) {
519: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
520: MatDestroy(&lGt);
521: }
522: MatZeroRows(lGe,cum,marks,0.,NULL,NULL);
524: /* identify splitpoints and corner candidates */
525: MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);
526: if (print) {
527: PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");
528: MatView(lGe,NULL);
529: PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");
530: MatView(lGt,NULL);
531: }
532: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
533: MatSeqAIJGetArray(lGt,&vals);
534: for (i=0;i<nv;i++) {
535: PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i];
536: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
537: if (!order) { /* variable order */
538: PetscReal vorder = 0.;
540: for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]);
541: test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON);
542: if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test);
543: ord = 1;
544: }
545: if (PetscUnlikelyDebug(test%ord)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord);
546: for (j=ii[i];j<ii[i+1] && sneighs;j++) {
547: if (PetscBTLookup(btbd,jj[j])) {
548: bdir = PETSC_TRUE;
549: break;
550: }
551: if (vc != ecount[jj[j]]) {
552: sneighs = PETSC_FALSE;
553: } else {
554: PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]];
555: for (k=0;k<vc;k++) {
556: if (vn[k] != en[k]) {
557: sneighs = PETSC_FALSE;
558: break;
559: }
560: }
561: }
562: }
563: if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */
564: if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir);
565: PetscBTSet(btv,i);
566: } else if (test == ord) {
567: if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) {
568: if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i);
569: PetscBTSet(btv,i);
570: } else {
571: if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i);
572: PetscBTSet(btvcand,i);
573: }
574: }
575: }
576: ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);
577: ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);
578: PetscBTDestroy(&btbd);
580: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
581: if (order != 1) {
582: if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n");
583: MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
584: for (i=0;i<nv;i++) {
585: if (PetscBTLookup(btvcand,i)) {
586: PetscBool found = PETSC_FALSE;
587: for (j=ii[i];j<ii[i+1] && !found;j++) {
588: PetscInt k,e = jj[j];
589: if (PetscBTLookup(bte,e)) continue;
590: for (k=iit[e];k<iit[e+1];k++) {
591: PetscInt v = jjt[k];
592: if (v != i && PetscBTLookup(btvcand,v)) {
593: found = PETSC_TRUE;
594: break;
595: }
596: }
597: }
598: if (!found) {
599: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i);
600: PetscBTClear(btvcand,i);
601: } else {
602: if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i);
603: }
604: }
605: }
606: MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
607: }
608: MatSeqAIJRestoreArray(lGt,&vals);
609: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
610: MatDestroy(&lGe);
612: /* Get the local G^T explicitly */
613: MatDestroy(&lGt);
614: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
615: MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
617: /* Mark interior nodal dofs */
618: ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
619: PetscBTCreate(nv,&btvi);
620: for (i=1;i<n_neigh;i++) {
621: for (j=0;j<n_shared[i];j++) {
622: PetscBTSet(btvi,shared[i][j]);
623: }
624: }
625: ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);
627: /* communicate corners and splitpoints */
628: PetscMalloc1(nv,&vmarks);
629: PetscArrayzero(sfvleaves,nv);
630: PetscArrayzero(sfvroots,Lv);
631: for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1;
633: if (print) {
634: IS tbz;
636: cum = 0;
637: for (i=0;i<nv;i++)
638: if (sfvleaves[i])
639: vmarks[cum++] = i;
641: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
642: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");
643: ISView(tbz,NULL);
644: ISDestroy(&tbz);
645: }
647: PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
648: PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);
649: PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);
650: PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);
652: /* Zero rows of lGt corresponding to identified corners
653: and interior nodal dofs */
654: cum = 0;
655: for (i=0;i<nv;i++) {
656: if (sfvleaves[i]) {
657: vmarks[cum++] = i;
658: PetscBTSet(btv,i);
659: }
660: if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i;
661: }
662: PetscBTDestroy(&btvi);
663: if (print) {
664: IS tbz;
666: ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);
667: PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");
668: ISView(tbz,NULL);
669: ISDestroy(&tbz);
670: }
671: MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);
672: PetscFree(vmarks);
673: PetscSFDestroy(&sfv);
674: PetscFree2(sfvleaves,sfvroots);
676: /* Recompute G */
677: MatDestroy(&lG);
678: MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);
679: if (print) {
680: PetscObjectSetName((PetscObject)lG,"used_lG");
681: MatView(lG,NULL);
682: PetscObjectSetName((PetscObject)lGt,"used_lGt");
683: MatView(lGt,NULL);
684: }
686: /* Get primal dofs (if any) */
687: cum = 0;
688: for (i=0;i<ne;i++) {
689: if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i;
690: }
691: if (fl2g) {
692: ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);
693: }
694: ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);
695: if (print) {
696: PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");
697: ISView(primals,NULL);
698: }
699: PetscBTDestroy(&bte);
700: /* TODO: what if the user passed in some of them ? */
701: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
702: ISDestroy(&primals);
704: /* Compute edge connectivity */
705: PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");
707: /* Symbolic conn = lG*lGt */
708: MatProductCreate(lG,lGt,NULL,&conn);
709: MatProductSetType(conn,MATPRODUCT_AB);
710: MatProductSetAlgorithm(conn,"default");
711: MatProductSetFill(conn,PETSC_DEFAULT);
712: PetscObjectSetOptionsPrefix((PetscObject)conn,"econn_");
713: MatProductSetFromOptions(conn);
714: MatProductSymbolic(conn);
716: MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
717: if (fl2g) {
718: PetscBT btf;
719: PetscInt *iia,*jja,*iiu,*jju;
720: PetscBool rest = PETSC_FALSE,free = PETSC_FALSE;
722: /* create CSR for all local dofs */
723: PetscMalloc1(n+1,&iia);
724: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
725: if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n);
726: iiu = pcbddc->mat_graph->xadj;
727: jju = pcbddc->mat_graph->adjncy;
728: } else if (pcbddc->use_local_adj) {
729: rest = PETSC_TRUE;
730: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
731: } else {
732: free = PETSC_TRUE;
733: PetscMalloc2(n+1,&iiu,n,&jju);
734: iiu[0] = 0;
735: for (i=0;i<n;i++) {
736: iiu[i+1] = i+1;
737: jju[i] = -1;
738: }
739: }
741: /* import sizes of CSR */
742: iia[0] = 0;
743: for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i];
745: /* overwrite entries corresponding to the Nedelec field */
746: PetscBTCreate(n,&btf);
747: ISGetIndices(nedfieldlocal,&idxs);
748: for (i=0;i<ne;i++) {
749: PetscBTSet(btf,idxs[i]);
750: iia[idxs[i]+1] = ii[i+1]-ii[i];
751: }
753: /* iia in CSR */
754: for (i=0;i<n;i++) iia[i+1] += iia[i];
756: /* jja in CSR */
757: PetscMalloc1(iia[n],&jja);
758: for (i=0;i<n;i++)
759: if (!PetscBTLookup(btf,i))
760: for (j=0;j<iiu[i+1]-iiu[i];j++)
761: jja[iia[i]+j] = jju[iiu[i]+j];
763: /* map edge dofs connectivity */
764: if (jj) {
765: ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);
766: for (i=0;i<ne;i++) {
767: PetscInt e = idxs[i];
768: for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j];
769: }
770: }
771: ISRestoreIndices(nedfieldlocal,&idxs);
772: PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);
773: if (rest) {
774: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);
775: }
776: if (free) {
777: PetscFree2(iiu,jju);
778: }
779: PetscBTDestroy(&btf);
780: } else {
781: PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);
782: }
784: /* Analyze interface for edge dofs */
785: PCBDDCAnalyzeInterface(pc);
786: pcbddc->mat_graph->twodim = PETSC_FALSE;
788: /* Get coarse edges in the edge space */
789: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
790: MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
792: if (fl2g) {
793: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
794: PetscMalloc1(nee,&eedges);
795: for (i=0;i<nee;i++) {
796: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
797: }
798: } else {
799: eedges = alleedges;
800: primals = allprimals;
801: }
803: /* Mark fine edge dofs with their coarse edge id */
804: PetscArrayzero(marks,ne);
805: ISGetLocalSize(primals,&cum);
806: ISGetIndices(primals,&idxs);
807: for (i=0;i<cum;i++) marks[idxs[i]] = nee+1;
808: ISRestoreIndices(primals,&idxs);
809: if (print) {
810: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");
811: ISView(primals,NULL);
812: }
814: maxsize = 0;
815: for (i=0;i<nee;i++) {
816: PetscInt size,mark = i+1;
818: ISGetLocalSize(eedges[i],&size);
819: ISGetIndices(eedges[i],&idxs);
820: for (j=0;j<size;j++) marks[idxs[j]] = mark;
821: ISRestoreIndices(eedges[i],&idxs);
822: maxsize = PetscMax(maxsize,size);
823: }
825: /* Find coarse edge endpoints */
826: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
827: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
828: for (i=0;i<nee;i++) {
829: PetscInt mark = i+1,size;
831: ISGetLocalSize(eedges[i],&size);
832: if (!size && nedfieldlocal) continue;
833: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
834: ISGetIndices(eedges[i],&idxs);
835: if (print) {
836: PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);
837: ISView(eedges[i],NULL);
838: }
839: for (j=0;j<size;j++) {
840: PetscInt k, ee = idxs[j];
841: if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee);
842: for (k=ii[ee];k<ii[ee+1];k++) {
843: if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]);
844: if (PetscBTLookup(btv,jj[k])) {
845: if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]);
846: } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */
847: PetscInt k2;
848: PetscBool corner = PETSC_FALSE;
849: for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) {
850: if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2]));
851: /* it's a corner if either is connected with an edge dof belonging to a different cc or
852: if the edge dof lie on the natural part of the boundary */
853: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) {
854: corner = PETSC_TRUE;
855: break;
856: }
857: }
858: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
859: if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]);
860: PetscBTSet(btv,jj[k]);
861: } else {
862: if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n");
863: }
864: }
865: }
866: }
867: ISRestoreIndices(eedges[i],&idxs);
868: }
869: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
870: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
871: PetscBTDestroy(&btb);
873: /* Reset marked primal dofs */
874: ISGetLocalSize(primals,&cum);
875: ISGetIndices(primals,&idxs);
876: for (i=0;i<cum;i++) marks[idxs[i]] = 0;
877: ISRestoreIndices(primals,&idxs);
879: /* Now use the initial lG */
880: MatDestroy(&lG);
881: MatDestroy(&lGt);
882: lG = lGinit;
883: MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);
885: /* Compute extended cols indices */
886: PetscBTCreate(nv,&btvc);
887: PetscBTCreate(nee,&bter);
888: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
889: MatSeqAIJGetMaxRowNonzeros(lG,&i);
890: i *= maxsize;
891: PetscCalloc1(nee,&extcols);
892: PetscMalloc2(i,&extrow,i,&gidxs);
893: eerr = PETSC_FALSE;
894: for (i=0;i<nee;i++) {
895: PetscInt size,found = 0;
897: cum = 0;
898: ISGetLocalSize(eedges[i],&size);
899: if (!size && nedfieldlocal) continue;
900: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
901: ISGetIndices(eedges[i],&idxs);
902: PetscBTMemzero(nv,btvc);
903: for (j=0;j<size;j++) {
904: PetscInt k,ee = idxs[j];
905: for (k=ii[ee];k<ii[ee+1];k++) {
906: PetscInt vv = jj[k];
907: if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv;
908: else if (!PetscBTLookupSet(btvc,vv)) found++;
909: }
910: }
911: ISRestoreIndices(eedges[i],&idxs);
912: PetscSortRemoveDupsInt(&cum,extrow);
913: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
914: PetscSortIntWithArray(cum,gidxs,extrow);
915: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
916: /* it may happen that endpoints are not defined at this point
917: if it is the case, mark this edge for a second pass */
918: if (cum != size -1 || found != 2) {
919: PetscBTSet(bter,i);
920: if (print) {
921: PetscObjectSetName((PetscObject)eedges[i],"error_edge");
922: ISView(eedges[i],NULL);
923: PetscObjectSetName((PetscObject)extcols[i],"error_extcol");
924: ISView(extcols[i],NULL);
925: }
926: eerr = PETSC_TRUE;
927: }
928: }
929: /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
930: MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);
931: if (done) {
932: PetscInt *newprimals;
934: PetscMalloc1(ne,&newprimals);
935: ISGetLocalSize(primals,&cum);
936: ISGetIndices(primals,&idxs);
937: PetscArraycpy(newprimals,idxs,cum);
938: ISRestoreIndices(primals,&idxs);
939: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
940: if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr);
941: for (i=0;i<nee;i++) {
942: PetscBool has_candidates = PETSC_FALSE;
943: if (PetscBTLookup(bter,i)) {
944: PetscInt size,mark = i+1;
946: ISGetLocalSize(eedges[i],&size);
947: ISGetIndices(eedges[i],&idxs);
948: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
949: for (j=0;j<size;j++) {
950: PetscInt k,ee = idxs[j];
951: if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]);
952: for (k=ii[ee];k<ii[ee+1];k++) {
953: /* set all candidates located on the edge as corners */
954: if (PetscBTLookup(btvcand,jj[k])) {
955: PetscInt k2,vv = jj[k];
956: has_candidates = PETSC_TRUE;
957: if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv);
958: PetscBTSet(btv,vv);
959: /* set all edge dofs connected to candidate as primals */
960: for (k2=iit[vv];k2<iit[vv+1];k2++) {
961: if (marks[jjt[k2]] == mark) {
962: PetscInt k3,ee2 = jjt[k2];
963: if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2);
964: newprimals[cum++] = ee2;
965: /* finally set the new corners */
966: for (k3=ii[ee2];k3<ii[ee2+1];k3++) {
967: if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]);
968: PetscBTSet(btv,jj[k3]);
969: }
970: }
971: }
972: } else {
973: if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]);
974: }
975: }
976: }
977: if (!has_candidates) { /* circular edge */
978: PetscInt k, ee = idxs[0],*tmarks;
980: PetscCalloc1(ne,&tmarks);
981: if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i);
982: for (k=ii[ee];k<ii[ee+1];k++) {
983: PetscInt k2;
984: if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]);
985: PetscBTSet(btv,jj[k]);
986: for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++;
987: }
988: for (j=0;j<size;j++) {
989: if (tmarks[idxs[j]] > 1) {
990: if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]);
991: newprimals[cum++] = idxs[j];
992: }
993: }
994: PetscFree(tmarks);
995: }
996: ISRestoreIndices(eedges[i],&idxs);
997: }
998: ISDestroy(&extcols[i]);
999: }
1000: PetscFree(extcols);
1001: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);
1002: PetscSortRemoveDupsInt(&cum,newprimals);
1003: if (fl2g) {
1004: ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);
1005: ISDestroy(&primals);
1006: for (i=0;i<nee;i++) {
1007: ISDestroy(&eedges[i]);
1008: }
1009: PetscFree(eedges);
1010: }
1011: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1012: ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);
1013: PetscFree(newprimals);
1014: PCBDDCSetPrimalVerticesLocalIS(pc,primals);
1015: ISDestroy(&primals);
1016: PCBDDCAnalyzeInterface(pc);
1017: pcbddc->mat_graph->twodim = PETSC_FALSE;
1018: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1019: if (fl2g) {
1020: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);
1021: PetscMalloc1(nee,&eedges);
1022: for (i=0;i<nee;i++) {
1023: ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);
1024: }
1025: } else {
1026: eedges = alleedges;
1027: primals = allprimals;
1028: }
1029: PetscCalloc1(nee,&extcols);
1031: /* Mark again */
1032: PetscArrayzero(marks,ne);
1033: for (i=0;i<nee;i++) {
1034: PetscInt size,mark = i+1;
1036: ISGetLocalSize(eedges[i],&size);
1037: ISGetIndices(eedges[i],&idxs);
1038: for (j=0;j<size;j++) marks[idxs[j]] = mark;
1039: ISRestoreIndices(eedges[i],&idxs);
1040: }
1041: if (print) {
1042: PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");
1043: ISView(primals,NULL);
1044: }
1046: /* Recompute extended cols */
1047: eerr = PETSC_FALSE;
1048: for (i=0;i<nee;i++) {
1049: PetscInt size;
1051: cum = 0;
1052: ISGetLocalSize(eedges[i],&size);
1053: if (!size && nedfieldlocal) continue;
1054: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1055: ISGetIndices(eedges[i],&idxs);
1056: for (j=0;j<size;j++) {
1057: PetscInt k,ee = idxs[j];
1058: for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k];
1059: }
1060: ISRestoreIndices(eedges[i],&idxs);
1061: PetscSortRemoveDupsInt(&cum,extrow);
1062: ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);
1063: PetscSortIntWithArray(cum,gidxs,extrow);
1064: ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);
1065: if (cum != size -1) {
1066: if (print) {
1067: PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");
1068: ISView(eedges[i],NULL);
1069: PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");
1070: ISView(extcols[i],NULL);
1071: }
1072: eerr = PETSC_TRUE;
1073: }
1074: }
1075: }
1076: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1077: PetscFree2(extrow,gidxs);
1078: PetscBTDestroy(&bter);
1079: if (print) { PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF); }
1080: /* an error should not occur at this point */
1081: if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1083: /* Check the number of endpoints */
1084: MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1085: PetscMalloc1(2*nee,&corners);
1086: PetscMalloc1(nee,&cedges);
1087: for (i=0;i<nee;i++) {
1088: PetscInt size, found = 0, gc[2];
1090: /* init with defaults */
1091: cedges[i] = corners[i*2] = corners[i*2+1] = -1;
1092: ISGetLocalSize(eedges[i],&size);
1093: if (!size && nedfieldlocal) continue;
1094: if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i);
1095: ISGetIndices(eedges[i],&idxs);
1096: PetscBTMemzero(nv,btvc);
1097: for (j=0;j<size;j++) {
1098: PetscInt k,ee = idxs[j];
1099: for (k=ii[ee];k<ii[ee+1];k++) {
1100: PetscInt vv = jj[k];
1101: if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) {
1102: if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i);
1103: corners[i*2+found++] = vv;
1104: }
1105: }
1106: }
1107: if (found != 2) {
1108: PetscInt e;
1109: if (fl2g) {
1110: ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);
1111: } else {
1112: e = idxs[0];
1113: }
1114: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]);
1115: }
1117: /* get primal dof index on this coarse edge */
1118: ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);
1119: if (gc[0] > gc[1]) {
1120: PetscInt swap = corners[2*i];
1121: corners[2*i] = corners[2*i+1];
1122: corners[2*i+1] = swap;
1123: }
1124: cedges[i] = idxs[size-1];
1125: ISRestoreIndices(eedges[i],&idxs);
1126: if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]);
1127: }
1128: MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1129: PetscBTDestroy(&btvc);
1131: if (PetscDefined(USE_DEBUG)) {
1132: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1133: not interfere with neighbouring coarse edges */
1134: PetscMalloc1(nee+1,&emarks);
1135: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1136: for (i=0;i<nv;i++) {
1137: PetscInt emax = 0,eemax = 0;
1139: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1140: PetscArrayzero(emarks,nee+1);
1141: for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++;
1142: for (j=1;j<nee+1;j++) {
1143: if (emax < emarks[j]) {
1144: emax = emarks[j];
1145: eemax = j;
1146: }
1147: }
1148: /* not relevant for edges */
1149: if (!eemax) continue;
1151: for (j=ii[i];j<ii[i+1];j++) {
1152: if (marks[jj[j]] && marks[jj[j]] != eemax) {
1153: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]);
1154: }
1155: }
1156: }
1157: PetscFree(emarks);
1158: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1159: }
1161: /* Compute extended rows indices for edge blocks of the change of basis */
1162: MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1163: MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);
1164: extmem *= maxsize;
1165: PetscMalloc1(extmem*nee,&extrow);
1166: PetscMalloc1(nee,&extrows);
1167: PetscCalloc1(nee,&extrowcum);
1168: for (i=0;i<nv;i++) {
1169: PetscInt mark = 0,size,start;
1171: if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue;
1172: for (j=ii[i];j<ii[i+1];j++)
1173: if (marks[jj[j]] && !mark)
1174: mark = marks[jj[j]];
1176: /* not relevant */
1177: if (!mark) continue;
1179: /* import extended row */
1180: mark--;
1181: start = mark*extmem+extrowcum[mark];
1182: size = ii[i+1]-ii[i];
1183: if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem);
1184: PetscArraycpy(extrow+start,jj+ii[i],size);
1185: extrowcum[mark] += size;
1186: }
1187: MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);
1188: MatDestroy(&lGt);
1189: PetscFree(marks);
1191: /* Compress extrows */
1192: cum = 0;
1193: for (i=0;i<nee;i++) {
1194: PetscInt size = extrowcum[i],*start = extrow + i*extmem;
1195: PetscSortRemoveDupsInt(&size,start);
1196: ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);
1197: cum = PetscMax(cum,size);
1198: }
1199: PetscFree(extrowcum);
1200: PetscBTDestroy(&btv);
1201: PetscBTDestroy(&btvcand);
1203: /* Workspace for lapack inner calls and VecSetValues */
1204: PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);
1206: /* Create change of basis matrix (preallocation can be improved) */
1207: MatCreate(comm,&T);
1208: MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n,
1209: pc->pmat->rmap->N,pc->pmat->rmap->N);
1210: MatSetType(T,MATAIJ);
1211: MatSeqAIJSetPreallocation(T,10,NULL);
1212: MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);
1213: MatSetLocalToGlobalMapping(T,al2g,al2g);
1214: MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
1215: MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);
1216: ISLocalToGlobalMappingDestroy(&al2g);
1218: /* Defaults to identity */
1219: MatCreateVecs(pc->pmat,&tvec,NULL);
1220: VecSet(tvec,1.0);
1221: MatDiagonalSet(T,tvec,INSERT_VALUES);
1222: VecDestroy(&tvec);
1224: /* Create discrete gradient for the coarser level if needed */
1225: MatDestroy(&pcbddc->nedcG);
1226: ISDestroy(&pcbddc->nedclocal);
1227: if (pcbddc->current_level < pcbddc->max_levels) {
1228: ISLocalToGlobalMapping cel2g,cvl2g;
1229: IS wis,gwis;
1230: PetscInt cnv,cne;
1232: ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);
1233: if (fl2g) {
1234: ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);
1235: } else {
1236: PetscObjectReference((PetscObject)wis);
1237: pcbddc->nedclocal = wis;
1238: }
1239: ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);
1240: ISDestroy(&wis);
1241: ISRenumber(gwis,NULL,&cne,&wis);
1242: ISLocalToGlobalMappingCreateIS(wis,&cel2g);
1243: ISDestroy(&wis);
1244: ISDestroy(&gwis);
1246: ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);
1247: ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);
1248: ISDestroy(&wis);
1249: ISRenumber(gwis,NULL,&cnv,&wis);
1250: ISLocalToGlobalMappingCreateIS(wis,&cvl2g);
1251: ISDestroy(&wis);
1252: ISDestroy(&gwis);
1254: MatCreate(comm,&pcbddc->nedcG);
1255: MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);
1256: MatSetType(pcbddc->nedcG,MATAIJ);
1257: MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);
1258: MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);
1259: MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);
1260: ISLocalToGlobalMappingDestroy(&cel2g);
1261: ISLocalToGlobalMappingDestroy(&cvl2g);
1262: }
1263: ISLocalToGlobalMappingDestroy(&vl2g);
1265: #if defined(PRINT_GDET)
1266: inc = 0;
1267: lev = pcbddc->current_level;
1268: #endif
1270: /* Insert values in the change of basis matrix */
1271: for (i=0;i<nee;i++) {
1272: Mat Gins = NULL, GKins = NULL;
1273: IS cornersis = NULL;
1274: PetscScalar cvals[2];
1276: if (pcbddc->nedcG) {
1277: ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);
1278: }
1279: PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);
1280: if (Gins && GKins) {
1281: const PetscScalar *data;
1282: const PetscInt *rows,*cols;
1283: PetscInt nrh,nch,nrc,ncc;
1285: ISGetIndices(eedges[i],&cols);
1286: /* H1 */
1287: ISGetIndices(extrows[i],&rows);
1288: MatGetSize(Gins,&nrh,&nch);
1289: MatDenseGetArrayRead(Gins,&data);
1290: MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);
1291: MatDenseRestoreArrayRead(Gins,&data);
1292: ISRestoreIndices(extrows[i],&rows);
1293: /* complement */
1294: MatGetSize(GKins,&nrc,&ncc);
1295: if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i);
1296: if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i);
1297: if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc);
1298: MatDenseGetArrayRead(GKins,&data);
1299: MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);
1300: MatDenseRestoreArrayRead(GKins,&data);
1302: /* coarse discrete gradient */
1303: if (pcbddc->nedcG) {
1304: PetscInt cols[2];
1306: cols[0] = 2*i;
1307: cols[1] = 2*i+1;
1308: MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);
1309: }
1310: ISRestoreIndices(eedges[i],&cols);
1311: }
1312: ISDestroy(&extrows[i]);
1313: ISDestroy(&extcols[i]);
1314: ISDestroy(&cornersis);
1315: MatDestroy(&Gins);
1316: MatDestroy(&GKins);
1317: }
1318: ISLocalToGlobalMappingDestroy(&el2g);
1320: /* Start assembling */
1321: MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);
1322: if (pcbddc->nedcG) {
1323: MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1324: }
1326: /* Free */
1327: if (fl2g) {
1328: ISDestroy(&primals);
1329: for (i=0;i<nee;i++) {
1330: ISDestroy(&eedges[i]);
1331: }
1332: PetscFree(eedges);
1333: }
1335: /* hack mat_graph with primal dofs on the coarse edges */
1336: {
1337: PCBDDCGraph graph = pcbddc->mat_graph;
1338: PetscInt *oqueue = graph->queue;
1339: PetscInt *ocptr = graph->cptr;
1340: PetscInt ncc,*idxs;
1342: /* find first primal edge */
1343: if (pcbddc->nedclocal) {
1344: ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1345: } else {
1346: if (fl2g) {
1347: ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);
1348: }
1349: idxs = cedges;
1350: }
1351: cum = 0;
1352: while (cum < nee && cedges[cum] < 0) cum++;
1354: /* adapt connected components */
1355: PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);
1356: graph->cptr[0] = 0;
1357: for (i=0,ncc=0;i<graph->ncc;i++) {
1358: PetscInt lc = ocptr[i+1]-ocptr[i];
1359: if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */
1360: graph->cptr[ncc+1] = graph->cptr[ncc]+1;
1361: graph->queue[graph->cptr[ncc]] = cedges[cum];
1362: ncc++;
1363: lc--;
1364: cum++;
1365: while (cum < nee && cedges[cum] < 0) cum++;
1366: }
1367: graph->cptr[ncc+1] = graph->cptr[ncc] + lc;
1368: for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j];
1369: ncc++;
1370: }
1371: graph->ncc = ncc;
1372: if (pcbddc->nedclocal) {
1373: ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);
1374: }
1375: PetscFree2(ocptr,oqueue);
1376: }
1377: ISLocalToGlobalMappingDestroy(&fl2g);
1378: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);
1379: PCBDDCGraphResetCSR(pcbddc->mat_graph);
1380: MatDestroy(&conn);
1382: ISDestroy(&nedfieldlocal);
1383: PetscFree(extrow);
1384: PetscFree2(work,rwork);
1385: PetscFree(corners);
1386: PetscFree(cedges);
1387: PetscFree(extrows);
1388: PetscFree(extcols);
1389: MatDestroy(&lG);
1391: /* Complete assembling */
1392: MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);
1393: if (pcbddc->nedcG) {
1394: MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);
1395: #if 0
1396: PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");
1397: MatView(pcbddc->nedcG,NULL);
1398: #endif
1399: }
1401: /* set change of basis */
1402: PCBDDCSetChangeOfBasisMat(pc,T,singular);
1403: MatDestroy(&T);
1405: return(0);
1406: }
1408: /* the near-null space of BDDC carries information on quadrature weights,
1409: and these can be collinear -> so cheat with MatNullSpaceCreate
1410: and create a suitable set of basis vectors first */
1411: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1412: {
1414: PetscInt i;
1417: for (i=0;i<nvecs;i++) {
1418: PetscInt first,last;
1420: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1421: if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented");
1422: if (i>=first && i < last) {
1423: PetscScalar *data;
1424: VecGetArray(quad_vecs[i],&data);
1425: if (!has_const) {
1426: data[i-first] = 1.;
1427: } else {
1428: data[2*i-first] = 1./PetscSqrtReal(2.);
1429: data[2*i-first+1] = -1./PetscSqrtReal(2.);
1430: }
1431: VecRestoreArray(quad_vecs[i],&data);
1432: }
1433: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1434: }
1435: MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);
1436: for (i=0;i<nvecs;i++) { /* reset vectors */
1437: PetscInt first,last;
1438: VecLockReadPop(quad_vecs[i]);
1439: VecGetOwnershipRange(quad_vecs[i],&first,&last);
1440: if (i>=first && i < last) {
1441: PetscScalar *data;
1442: VecGetArray(quad_vecs[i],&data);
1443: if (!has_const) {
1444: data[i-first] = 0.;
1445: } else {
1446: data[2*i-first] = 0.;
1447: data[2*i-first+1] = 0.;
1448: }
1449: VecRestoreArray(quad_vecs[i],&data);
1450: }
1451: PetscObjectStateIncrease((PetscObject)quad_vecs[i]);
1452: VecLockReadPush(quad_vecs[i]);
1453: }
1454: return(0);
1455: }
1457: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1458: {
1459: Mat loc_divudotp;
1460: Vec p,v,vins,quad_vec,*quad_vecs;
1461: ISLocalToGlobalMapping map;
1462: PetscScalar *vals;
1463: const PetscScalar *array;
1464: PetscInt i,maxneighs = 0,maxsize,*gidxs;
1465: PetscInt n_neigh,*neigh,*n_shared,**shared;
1466: PetscMPIInt rank;
1467: PetscErrorCode ierr;
1470: ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1471: for (i=0;i<n_neigh;i++) maxneighs = PetscMax(graph->count[shared[i][0]]+1,maxneighs);
1472: MPIU_Allreduce(MPI_IN_PLACE,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));
1473: if (!maxneighs) {
1474: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1475: *nnsp = NULL;
1476: return(0);
1477: }
1478: maxsize = 0;
1479: for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize);
1480: PetscMalloc2(maxsize,&gidxs,maxsize,&vals);
1481: /* create vectors to hold quadrature weights */
1482: MatCreateVecs(A,&quad_vec,NULL);
1483: if (!transpose) {
1484: MatGetLocalToGlobalMapping(A,&map,NULL);
1485: } else {
1486: MatGetLocalToGlobalMapping(A,NULL,&map);
1487: }
1488: VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);
1489: VecDestroy(&quad_vec);
1490: PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);
1491: for (i=0;i<maxneighs;i++) {
1492: VecLockReadPop(quad_vecs[i]);
1493: }
1495: /* compute local quad vec */
1496: MatISGetLocalMat(divudotp,&loc_divudotp);
1497: if (!transpose) {
1498: MatCreateVecs(loc_divudotp,&v,&p);
1499: } else {
1500: MatCreateVecs(loc_divudotp,&p,&v);
1501: }
1502: VecSet(p,1.);
1503: if (!transpose) {
1504: MatMultTranspose(loc_divudotp,p,v);
1505: } else {
1506: MatMult(loc_divudotp,p,v);
1507: }
1508: if (vl2l) {
1509: Mat lA;
1510: VecScatter sc;
1512: MatISGetLocalMat(A,&lA);
1513: MatCreateVecs(lA,&vins,NULL);
1514: VecScatterCreate(v,NULL,vins,vl2l,&sc);
1515: VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1516: VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);
1517: VecScatterDestroy(&sc);
1518: } else {
1519: vins = v;
1520: }
1521: VecGetArrayRead(vins,&array);
1522: VecDestroy(&p);
1524: /* insert in global quadrature vecs */
1525: MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);
1526: for (i=1;i<n_neigh;i++) {
1527: const PetscInt *idxs;
1528: PetscInt idx,nn,j;
1530: idxs = shared[i];
1531: nn = n_shared[i];
1532: for (j=0;j<nn;j++) vals[j] = array[idxs[j]];
1533: PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);
1534: idx = -(idx+1);
1535: if (idx < 0 || idx >= maxneighs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid index %D not in [0,%D)",idx,maxneighs);
1536: ISLocalToGlobalMappingApply(map,nn,idxs,gidxs);
1537: VecSetValues(quad_vecs[idx],nn,gidxs,vals,INSERT_VALUES);
1538: }
1539: ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);
1540: VecRestoreArrayRead(vins,&array);
1541: if (vl2l) {
1542: VecDestroy(&vins);
1543: }
1544: VecDestroy(&v);
1545: PetscFree2(gidxs,vals);
1547: /* assemble near null space */
1548: for (i=0;i<maxneighs;i++) {
1549: VecAssemblyBegin(quad_vecs[i]);
1550: }
1551: for (i=0;i<maxneighs;i++) {
1552: VecAssemblyEnd(quad_vecs[i]);
1553: VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");
1554: VecLockReadPush(quad_vecs[i]);
1555: }
1556: VecDestroyVecs(maxneighs,&quad_vecs);
1557: return(0);
1558: }
1560: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1561: {
1562: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1566: if (primalv) {
1567: if (pcbddc->user_primal_vertices_local) {
1568: IS list[2], newp;
1570: list[0] = primalv;
1571: list[1] = pcbddc->user_primal_vertices_local;
1572: ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);
1573: ISSortRemoveDups(newp);
1574: ISDestroy(&list[1]);
1575: pcbddc->user_primal_vertices_local = newp;
1576: } else {
1577: PCBDDCSetPrimalVerticesLocalIS(pc,primalv);
1578: }
1579: }
1580: return(0);
1581: }
1583: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1584: {
1585: PetscInt f, *comp = (PetscInt *)ctx;
1588: for (f=0;f<Nf;f++) out[f] = X[*comp];
1589: return(0);
1590: }
1592: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1593: {
1595: Vec local,global;
1596: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
1597: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
1598: PetscBool monolithic = PETSC_FALSE;
1601: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");
1602: PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);
1603: PetscOptionsEnd();
1604: /* need to convert from global to local topology information and remove references to information in global ordering */
1605: MatCreateVecs(pc->pmat,&global,NULL);
1606: MatCreateVecs(matis->A,&local,NULL);
1607: VecBindToCPU(global,PETSC_TRUE);
1608: VecBindToCPU(local,PETSC_TRUE);
1609: if (monolithic) { /* just get block size to properly compute vertices */
1610: if (pcbddc->vertex_size == 1) {
1611: MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);
1612: }
1613: goto boundary;
1614: }
1616: if (pcbddc->user_provided_isfordofs) {
1617: if (pcbddc->n_ISForDofs) {
1618: PetscInt i;
1620: PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);
1621: for (i=0;i<pcbddc->n_ISForDofs;i++) {
1622: PetscInt bs;
1624: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);
1625: ISGetBlockSize(pcbddc->ISForDofs[i],&bs);
1626: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1627: ISDestroy(&pcbddc->ISForDofs[i]);
1628: }
1629: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1630: pcbddc->n_ISForDofs = 0;
1631: PetscFree(pcbddc->ISForDofs);
1632: }
1633: } else {
1634: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1635: DM dm;
1637: MatGetDM(pc->pmat, &dm);
1638: if (!dm) {
1639: PCGetDM(pc, &dm);
1640: }
1641: if (dm) {
1642: IS *fields;
1643: PetscInt nf,i;
1645: DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);
1646: PetscMalloc1(nf,&pcbddc->ISForDofsLocal);
1647: for (i=0;i<nf;i++) {
1648: PetscInt bs;
1650: PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);
1651: ISGetBlockSize(fields[i],&bs);
1652: ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);
1653: ISDestroy(&fields[i]);
1654: }
1655: PetscFree(fields);
1656: pcbddc->n_ISForDofsLocal = nf;
1657: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1658: PetscContainer c;
1660: PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);
1661: if (c) {
1662: MatISLocalFields lf;
1663: PetscContainerGetPointer(c,(void**)&lf);
1664: PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);
1665: } else { /* fallback, create the default fields if bs > 1 */
1666: PetscInt i, n = matis->A->rmap->n;
1667: MatGetBlockSize(pc->pmat,&i);
1668: if (i > 1) {
1669: pcbddc->n_ISForDofsLocal = i;
1670: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);
1671: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1672: ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);
1673: }
1674: }
1675: }
1676: }
1677: } else {
1678: PetscInt i;
1679: for (i=0;i<pcbddc->n_ISForDofsLocal;i++) {
1680: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);
1681: }
1682: }
1683: }
1685: boundary:
1686: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1687: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);
1688: } else if (pcbddc->DirichletBoundariesLocal) {
1689: PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);
1690: }
1691: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1692: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);
1693: } else if (pcbddc->NeumannBoundariesLocal) {
1694: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);
1695: }
1696: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) {
1697: PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);
1698: }
1699: VecDestroy(&global);
1700: VecDestroy(&local);
1701: /* detect local disconnected subdomains if requested (use matis->A) */
1702: if (pcbddc->detect_disconnected) {
1703: IS primalv = NULL;
1704: PetscInt i;
1705: PetscBool filter = pcbddc->detect_disconnected_filter;
1707: for (i=0;i<pcbddc->n_local_subs;i++) {
1708: ISDestroy(&pcbddc->local_subs[i]);
1709: }
1710: PetscFree(pcbddc->local_subs);
1711: PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);
1712: PCBDDCAddPrimalVerticesLocalIS(pc,primalv);
1713: ISDestroy(&primalv);
1714: }
1715: /* early stage corner detection */
1716: {
1717: DM dm;
1719: MatGetDM(pc->pmat,&dm);
1720: if (!dm) {
1721: PCGetDM(pc,&dm);
1722: }
1723: if (dm) {
1724: PetscBool isda;
1726: PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);
1727: if (isda) {
1728: ISLocalToGlobalMapping l2l;
1729: IS corners;
1730: Mat lA;
1731: PetscBool gl,lo;
1733: {
1734: Vec cvec;
1735: const PetscScalar *coords;
1736: PetscInt dof,n,cdim;
1737: PetscBool memc = PETSC_TRUE;
1739: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1740: DMGetCoordinates(dm,&cvec);
1741: VecGetLocalSize(cvec,&n);
1742: VecGetBlockSize(cvec,&cdim);
1743: n /= cdim;
1744: PetscFree(pcbddc->mat_graph->coords);
1745: PetscMalloc1(dof*n*cdim,&pcbddc->mat_graph->coords);
1746: VecGetArrayRead(cvec,&coords);
1747: #if defined(PETSC_USE_COMPLEX)
1748: memc = PETSC_FALSE;
1749: #endif
1750: if (dof != 1) memc = PETSC_FALSE;
1751: if (memc) {
1752: PetscArraycpy(pcbddc->mat_graph->coords,coords,cdim*n*dof);
1753: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1754: PetscReal *bcoords = pcbddc->mat_graph->coords;
1755: PetscInt i, b, d;
1757: for (i=0;i<n;i++) {
1758: for (b=0;b<dof;b++) {
1759: for (d=0;d<cdim;d++) {
1760: bcoords[i*dof*cdim + b*cdim + d] = PetscRealPart(coords[i*cdim+d]);
1761: }
1762: }
1763: }
1764: }
1765: VecRestoreArrayRead(cvec,&coords);
1766: pcbddc->mat_graph->cdim = cdim;
1767: pcbddc->mat_graph->cnloc = dof*n;
1768: pcbddc->mat_graph->cloc = PETSC_FALSE;
1769: }
1770: DMDAGetSubdomainCornersIS(dm,&corners);
1771: MatISGetLocalMat(pc->pmat,&lA);
1772: MatGetLocalToGlobalMapping(lA,&l2l,NULL);
1773: MatISRestoreLocalMat(pc->pmat,&lA);
1774: lo = (PetscBool)(l2l && corners);
1775: MPIU_Allreduce(&lo,&gl,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
1776: if (gl) { /* From PETSc's DMDA */
1777: const PetscInt *idx;
1778: PetscInt dof,bs,*idxout,n;
1780: DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);
1781: ISLocalToGlobalMappingGetBlockSize(l2l,&bs);
1782: ISGetLocalSize(corners,&n);
1783: ISGetIndices(corners,&idx);
1784: if (bs == dof) {
1785: PetscMalloc1(n,&idxout);
1786: ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);
1787: } else { /* the original DMDA local-to-local map have been modified */
1788: PetscInt i,d;
1790: PetscMalloc1(dof*n,&idxout);
1791: for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d;
1792: ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);
1794: bs = 1;
1795: n *= dof;
1796: }
1797: ISRestoreIndices(corners,&idx);
1798: DMDARestoreSubdomainCornersIS(dm,&corners);
1799: ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);
1800: PCBDDCAddPrimalVerticesLocalIS(pc,corners);
1801: ISDestroy(&corners);
1802: pcbddc->corner_selected = PETSC_TRUE;
1803: pcbddc->corner_selection = PETSC_TRUE;
1804: }
1805: if (corners) {
1806: DMDARestoreSubdomainCornersIS(dm,&corners);
1807: }
1808: }
1809: }
1810: }
1811: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
1812: DM dm;
1814: MatGetDM(pc->pmat,&dm);
1815: if (!dm) {
1816: PCGetDM(pc,&dm);
1817: }
1818: if (dm) { /* this can get very expensive, I need to find a faster alternative */
1819: Vec vcoords;
1820: PetscSection section;
1821: PetscReal *coords;
1822: PetscInt d,cdim,nl,nf,**ctxs;
1823: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
1825: DMGetCoordinateDim(dm,&cdim);
1826: DMGetLocalSection(dm,§ion);
1827: PetscSectionGetNumFields(section,&nf);
1828: DMCreateGlobalVector(dm,&vcoords);
1829: VecGetLocalSize(vcoords,&nl);
1830: PetscMalloc1(nl*cdim,&coords);
1831: PetscMalloc2(nf,&funcs,nf,&ctxs);
1832: PetscMalloc1(nf,&ctxs[0]);
1833: for (d=0;d<nf;d++) funcs[d] = func_coords_private;
1834: for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1;
1835: for (d=0;d<cdim;d++) {
1836: PetscInt i;
1837: const PetscScalar *v;
1839: for (i=0;i<nf;i++) ctxs[i][0] = d;
1840: DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);
1841: VecGetArrayRead(vcoords,&v);
1842: for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]);
1843: VecRestoreArrayRead(vcoords,&v);
1844: }
1845: VecDestroy(&vcoords);
1846: PCSetCoordinates(pc,cdim,nl,coords);
1847: PetscFree(coords);
1848: PetscFree(ctxs[0]);
1849: PetscFree2(funcs,ctxs);
1850: }
1851: }
1852: return(0);
1853: }
1855: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
1856: {
1857: Mat_IS *matis = (Mat_IS*)(pc->pmat->data);
1858: PetscErrorCode ierr;
1859: IS nis;
1860: const PetscInt *idxs;
1861: PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd;
1862: PetscBool *ld;
1865: if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR");
1866: if (mop == MPI_LAND) {
1867: /* init rootdata with true */
1868: ld = (PetscBool*) matis->sf_rootdata;
1869: for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE;
1870: } else {
1871: PetscArrayzero(matis->sf_rootdata,pc->pmat->rmap->n);
1872: }
1873: PetscArrayzero(matis->sf_leafdata,n);
1874: ISGetLocalSize(*is,&nd);
1875: ISGetIndices(*is,&idxs);
1876: ld = (PetscBool*) matis->sf_leafdata;
1877: for (i=0;i<nd;i++)
1878: if (-1 < idxs[i] && idxs[i] < n)
1879: ld[idxs[i]] = PETSC_TRUE;
1880: ISRestoreIndices(*is,&idxs);
1881: PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1882: PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);
1883: PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1884: PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);
1885: if (mop == MPI_LAND) {
1886: PetscMalloc1(nd,&nidxs);
1887: } else {
1888: PetscMalloc1(n,&nidxs);
1889: }
1890: for (i=0,nnd=0;i<n;i++)
1891: if (ld[i])
1892: nidxs[nnd++] = i;
1893: ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);
1894: ISDestroy(is);
1895: *is = nis;
1896: return(0);
1897: }
1899: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z)
1900: {
1901: PC_IS *pcis = (PC_IS*)(pc->data);
1902: PC_BDDC *pcbddc = (PC_BDDC*)(pc->data);
1903: PetscErrorCode ierr;
1906: if (!pcbddc->benign_have_null) {
1907: return(0);
1908: }
1909: if (pcbddc->ChangeOfBasisMatrix) {
1910: Vec swap;
1912: MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);
1913: swap = pcbddc->work_change;
1914: pcbddc->work_change = r;
1915: r = swap;
1916: }
1917: VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1918: VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
1919: KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);
1920: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
1921: VecSet(z,0.);
1922: VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1923: VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);
1924: if (pcbddc->ChangeOfBasisMatrix) {
1925: pcbddc->work_change = r;
1926: VecCopy(z,pcbddc->work_change);
1927: MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);
1928: }
1929: return(0);
1930: }
1932: PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
1933: {
1934: PCBDDCBenignMatMult_ctx ctx;
1935: PetscErrorCode ierr;
1936: PetscBool apply_right,apply_left,reset_x;
1939: MatShellGetContext(A,&ctx);
1940: if (transpose) {
1941: apply_right = ctx->apply_left;
1942: apply_left = ctx->apply_right;
1943: } else {
1944: apply_right = ctx->apply_right;
1945: apply_left = ctx->apply_left;
1946: }
1947: reset_x = PETSC_FALSE;
1948: if (apply_right) {
1949: const PetscScalar *ax;
1950: PetscInt nl,i;
1952: VecGetLocalSize(x,&nl);
1953: VecGetArrayRead(x,&ax);
1954: PetscArraycpy(ctx->work,ax,nl);
1955: VecRestoreArrayRead(x,&ax);
1956: for (i=0;i<ctx->benign_n;i++) {
1957: PetscScalar sum,val;
1958: const PetscInt *idxs;
1959: PetscInt nz,j;
1960: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1961: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1962: sum = 0.;
1963: if (ctx->apply_p0) {
1964: val = ctx->work[idxs[nz-1]];
1965: for (j=0;j<nz-1;j++) {
1966: sum += ctx->work[idxs[j]];
1967: ctx->work[idxs[j]] += val;
1968: }
1969: } else {
1970: for (j=0;j<nz-1;j++) {
1971: sum += ctx->work[idxs[j]];
1972: }
1973: }
1974: ctx->work[idxs[nz-1]] -= sum;
1975: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
1976: }
1977: VecPlaceArray(x,ctx->work);
1978: reset_x = PETSC_TRUE;
1979: }
1980: if (transpose) {
1981: MatMultTranspose(ctx->A,x,y);
1982: } else {
1983: MatMult(ctx->A,x,y);
1984: }
1985: if (reset_x) {
1986: VecResetArray(x);
1987: }
1988: if (apply_left) {
1989: PetscScalar *ay;
1990: PetscInt i;
1992: VecGetArray(y,&ay);
1993: for (i=0;i<ctx->benign_n;i++) {
1994: PetscScalar sum,val;
1995: const PetscInt *idxs;
1996: PetscInt nz,j;
1997: ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);
1998: ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);
1999: val = -ay[idxs[nz-1]];
2000: if (ctx->apply_p0) {
2001: sum = 0.;
2002: for (j=0;j<nz-1;j++) {
2003: sum += ay[idxs[j]];
2004: ay[idxs[j]] += val;
2005: }
2006: ay[idxs[nz-1]] += sum;
2007: } else {
2008: for (j=0;j<nz-1;j++) {
2009: ay[idxs[j]] += val;
2010: }
2011: ay[idxs[nz-1]] = 0.;
2012: }
2013: ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);
2014: }
2015: VecRestoreArray(y,&ay);
2016: }
2017: return(0);
2018: }
2020: PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2021: {
2025: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);
2026: return(0);
2027: }
2029: PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2030: {
2034: PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);
2035: return(0);
2036: }
2038: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2039: {
2040: PC_IS *pcis = (PC_IS*)pc->data;
2041: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2042: PCBDDCBenignMatMult_ctx ctx;
2043: PetscErrorCode ierr;
2046: if (!restore) {
2047: Mat A_IB,A_BI;
2048: PetscScalar *work;
2049: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2051: if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored");
2052: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) return(0);
2053: PetscMalloc1(pcis->n,&work);
2054: MatCreate(PETSC_COMM_SELF,&A_IB);
2055: MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);
2056: MatSetType(A_IB,MATSHELL);
2057: MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);
2058: MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);
2059: PetscNew(&ctx);
2060: MatShellSetContext(A_IB,ctx);
2061: ctx->apply_left = PETSC_TRUE;
2062: ctx->apply_right = PETSC_FALSE;
2063: ctx->apply_p0 = PETSC_FALSE;
2064: ctx->benign_n = pcbddc->benign_n;
2065: if (reuse) {
2066: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2067: ctx->free = PETSC_FALSE;
2068: } else { /* TODO: could be optimized for successive solves */
2069: ISLocalToGlobalMapping N_to_D;
2070: PetscInt i;
2072: ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);
2073: PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);
2074: for (i=0;i<pcbddc->benign_n;i++) {
2075: ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);
2076: }
2077: ISLocalToGlobalMappingDestroy(&N_to_D);
2078: ctx->free = PETSC_TRUE;
2079: }
2080: ctx->A = pcis->A_IB;
2081: ctx->work = work;
2082: MatSetUp(A_IB);
2083: MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);
2084: MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);
2085: pcis->A_IB = A_IB;
2087: /* A_BI as A_IB^T */
2088: MatCreateTranspose(A_IB,&A_BI);
2089: pcbddc->benign_original_mat = pcis->A_BI;
2090: pcis->A_BI = A_BI;
2091: } else {
2092: if (!pcbddc->benign_original_mat) {
2093: return(0);
2094: }
2095: MatShellGetContext(pcis->A_IB,&ctx);
2096: MatDestroy(&pcis->A_IB);
2097: pcis->A_IB = ctx->A;
2098: ctx->A = NULL;
2099: MatDestroy(&pcis->A_BI);
2100: pcis->A_BI = pcbddc->benign_original_mat;
2101: pcbddc->benign_original_mat = NULL;
2102: if (ctx->free) {
2103: PetscInt i;
2104: for (i=0;i<ctx->benign_n;i++) {
2105: ISDestroy(&ctx->benign_zerodiag_subs[i]);
2106: }
2107: PetscFree(ctx->benign_zerodiag_subs);
2108: }
2109: PetscFree(ctx->work);
2110: PetscFree(ctx);
2111: }
2112: return(0);
2113: }
2115: /* used just in bddc debug mode */
2116: PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2117: {
2118: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
2119: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
2120: Mat An;
2124: MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);
2125: MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);
2126: if (is1) {
2127: MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);
2128: MatDestroy(&An);
2129: } else {
2130: *B = An;
2131: }
2132: return(0);
2133: }
2135: /* TODO: add reuse flag */
2136: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2137: {
2138: Mat Bt;
2139: PetscScalar *a,*bdata;
2140: const PetscInt *ii,*ij;
2141: PetscInt m,n,i,nnz,*bii,*bij;
2142: PetscBool flg_row;
2146: MatGetSize(A,&n,&m);
2147: MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2148: MatSeqAIJGetArray(A,&a);
2149: nnz = n;
2150: for (i=0;i<ii[n];i++) {
2151: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2152: }
2153: PetscMalloc1(n+1,&bii);
2154: PetscMalloc1(nnz,&bij);
2155: PetscMalloc1(nnz,&bdata);
2156: nnz = 0;
2157: bii[0] = 0;
2158: for (i=0;i<n;i++) {
2159: PetscInt j;
2160: for (j=ii[i];j<ii[i+1];j++) {
2161: PetscScalar entry = a[j];
2162: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2163: bij[nnz] = ij[j];
2164: bdata[nnz] = entry;
2165: nnz++;
2166: }
2167: }
2168: bii[i+1] = nnz;
2169: }
2170: MatSeqAIJRestoreArray(A,&a);
2171: MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);
2172: MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);
2173: {
2174: Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data);
2175: b->free_a = PETSC_TRUE;
2176: b->free_ij = PETSC_TRUE;
2177: }
2178: if (*B == A) {
2179: MatDestroy(&A);
2180: }
2181: *B = Bt;
2182: return(0);
2183: }
2185: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv)
2186: {
2187: Mat B = NULL;
2188: DM dm;
2189: IS is_dummy,*cc_n;
2190: ISLocalToGlobalMapping l2gmap_dummy;
2191: PCBDDCGraph graph;
2192: PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL;
2193: PetscInt i,n;
2194: PetscInt *xadj,*adjncy;
2195: PetscBool isplex = PETSC_FALSE;
2196: PetscErrorCode ierr;
2199: if (ncc) *ncc = 0;
2200: if (cc) *cc = NULL;
2201: if (primalv) *primalv = NULL;
2202: PCBDDCGraphCreate(&graph);
2203: MatGetDM(pc->pmat,&dm);
2204: if (!dm) {
2205: PCGetDM(pc,&dm);
2206: }
2207: if (dm) {
2208: PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);
2209: }
2210: if (filter) isplex = PETSC_FALSE;
2212: if (isplex) { /* this code has been modified from plexpartition.c */
2213: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2214: PetscInt *adj = NULL;
2215: IS cellNumbering;
2216: const PetscInt *cellNum;
2217: PetscBool useCone, useClosure;
2218: PetscSection section;
2219: PetscSegBuffer adjBuffer;
2220: PetscSF sfPoint;
2224: DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);
2225: DMGetPointSF(dm, &sfPoint);
2226: PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);
2227: /* Build adjacency graph via a section/segbuffer */
2228: PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);
2229: PetscSectionSetChart(section, pStart, pEnd);
2230: PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);
2231: /* Always use FVM adjacency to create partitioner graph */
2232: DMGetBasicAdjacency(dm, &useCone, &useClosure);
2233: DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);
2234: DMPlexGetCellNumbering(dm, &cellNumbering);
2235: ISGetIndices(cellNumbering, &cellNum);
2236: for (n = 0, p = pStart; p < pEnd; p++) {
2237: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2238: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2239: adjSize = PETSC_DETERMINE;
2240: DMPlexGetAdjacency(dm, p, &adjSize, &adj);
2241: for (a = 0; a < adjSize; ++a) {
2242: const PetscInt point = adj[a];
2243: if (pStart <= point && point < pEnd) {
2244: PetscInt *PETSC_RESTRICT pBuf;
2245: PetscSectionAddDof(section, p, 1);
2246: PetscSegBufferGetInts(adjBuffer, 1, &pBuf);
2247: *pBuf = point;
2248: }
2249: }
2250: n++;
2251: }
2252: DMSetBasicAdjacency(dm, useCone, useClosure);
2253: /* Derive CSR graph from section/segbuffer */
2254: PetscSectionSetUp(section);
2255: PetscSectionGetStorageSize(section, &size);
2256: PetscMalloc1(n+1, &xadj);
2257: for (idx = 0, p = pStart; p < pEnd; p++) {
2258: if (nroots > 0) {if (cellNum[p] < 0) continue;}
2259: PetscSectionGetOffset(section, p, &(xadj[idx++]));
2260: }
2261: xadj[n] = size;
2262: PetscSegBufferExtractAlloc(adjBuffer, &adjncy);
2263: /* Clean up */
2264: PetscSegBufferDestroy(&adjBuffer);
2265: PetscSectionDestroy(§ion);
2266: PetscFree(adj);
2267: graph->xadj = xadj;
2268: graph->adjncy = adjncy;
2269: } else {
2270: Mat A;
2271: PetscBool isseqaij, flg_row;
2273: MatISGetLocalMat(pc->pmat,&A);
2274: if (!A->rmap->N || !A->cmap->N) {
2275: PCBDDCGraphDestroy(&graph);
2276: return(0);
2277: }
2278: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);
2279: if (!isseqaij && filter) {
2280: PetscBool isseqdense;
2282: PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);
2283: if (!isseqdense) {
2284: MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);
2285: } else { /* TODO: rectangular case and LDA */
2286: PetscScalar *array;
2287: PetscReal chop=1.e-6;
2289: MatDuplicate(A,MAT_COPY_VALUES,&B);
2290: MatDenseGetArray(B,&array);
2291: MatGetSize(B,&n,NULL);
2292: for (i=0;i<n;i++) {
2293: PetscInt j;
2294: for (j=i+1;j<n;j++) {
2295: PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)]));
2296: if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.;
2297: if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.;
2298: }
2299: }
2300: MatDenseRestoreArray(B,&array);
2301: MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);
2302: }
2303: } else {
2304: PetscObjectReference((PetscObject)A);
2305: B = A;
2306: }
2307: MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2309: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2310: if (filter) {
2311: PetscScalar *data;
2312: PetscInt j,cum;
2314: PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);
2315: MatSeqAIJGetArray(B,&data);
2316: cum = 0;
2317: for (i=0;i<n;i++) {
2318: PetscInt t;
2320: for (j=xadj[i];j<xadj[i+1];j++) {
2321: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) {
2322: continue;
2323: }
2324: adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j];
2325: }
2326: t = xadj_filtered[i];
2327: xadj_filtered[i] = cum;
2328: cum += t;
2329: }
2330: MatSeqAIJRestoreArray(B,&data);
2331: graph->xadj = xadj_filtered;
2332: graph->adjncy = adjncy_filtered;
2333: } else {
2334: graph->xadj = xadj;
2335: graph->adjncy = adjncy;
2336: }
2337: }
2338: /* compute local connected components using PCBDDCGraph */
2339: ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);
2340: ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);
2341: ISDestroy(&is_dummy);
2342: PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);
2343: ISLocalToGlobalMappingDestroy(&l2gmap_dummy);
2344: PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);
2345: PCBDDCGraphComputeConnectedComponents(graph);
2347: /* partial clean up */
2348: PetscFree2(xadj_filtered,adjncy_filtered);
2349: if (B) {
2350: PetscBool flg_row;
2351: MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
2352: MatDestroy(&B);
2353: }
2354: if (isplex) {
2355: PetscFree(xadj);
2356: PetscFree(adjncy);
2357: }
2359: /* get back data */
2360: if (isplex) {
2361: if (ncc) *ncc = graph->ncc;
2362: if (cc || primalv) {
2363: Mat A;
2364: PetscBT btv,btvt;
2365: PetscSection subSection;
2366: PetscInt *ids,cum,cump,*cids,*pids;
2368: DMPlexGetSubdomainSection(dm,&subSection);
2369: MatISGetLocalMat(pc->pmat,&A);
2370: PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);
2371: PetscBTCreate(A->rmap->n,&btv);
2372: PetscBTCreate(A->rmap->n,&btvt);
2374: cids[0] = 0;
2375: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2376: PetscInt j;
2378: PetscBTMemzero(A->rmap->n,btvt);
2379: for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) {
2380: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2382: DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2383: for (k = 0; k < 2*size; k += 2) {
2384: PetscInt s, pp, p = closure[k], off, dof, cdof;
2386: PetscSectionGetConstraintDof(subSection,p,&cdof);
2387: PetscSectionGetOffset(subSection,p,&off);
2388: PetscSectionGetDof(subSection,p,&dof);
2389: for (s = 0; s < dof-cdof; s++) {
2390: if (PetscBTLookupSet(btvt,off+s)) continue;
2391: if (!PetscBTLookup(btv,off+s)) {
2392: ids[cum++] = off+s;
2393: } else { /* cross-vertex */
2394: pids[cump++] = off+s;
2395: }
2396: }
2397: DMPlexGetTreeParent(dm,p,&pp,NULL);
2398: if (pp != p) {
2399: PetscSectionGetConstraintDof(subSection,pp,&cdof);
2400: PetscSectionGetOffset(subSection,pp,&off);
2401: PetscSectionGetDof(subSection,pp,&dof);
2402: for (s = 0; s < dof-cdof; s++) {
2403: if (PetscBTLookupSet(btvt,off+s)) continue;
2404: if (!PetscBTLookup(btv,off+s)) {
2405: ids[cum++] = off+s;
2406: } else { /* cross-vertex */
2407: pids[cump++] = off+s;
2408: }
2409: }
2410: }
2411: }
2412: DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);
2413: }
2414: cids[i+1] = cum;
2415: /* mark dofs as already assigned */
2416: for (j = cids[i]; j < cids[i+1]; j++) {
2417: PetscBTSet(btv,ids[j]);
2418: }
2419: }
2420: if (cc) {
2421: PetscMalloc1(graph->ncc,&cc_n);
2422: for (i = 0; i < graph->ncc; i++) {
2423: ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);
2424: }
2425: *cc = cc_n;
2426: }
2427: if (primalv) {
2428: ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);
2429: }
2430: PetscFree3(ids,cids,pids);
2431: PetscBTDestroy(&btv);
2432: PetscBTDestroy(&btvt);
2433: }
2434: } else {
2435: if (ncc) *ncc = graph->ncc;
2436: if (cc) {
2437: PetscMalloc1(graph->ncc,&cc_n);
2438: for (i=0;i<graph->ncc;i++) {
2439: ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);
2440: }
2441: *cc = cc_n;
2442: }
2443: }
2444: /* clean up graph */
2445: graph->xadj = NULL;
2446: graph->adjncy = NULL;
2447: PCBDDCGraphDestroy(&graph);
2448: return(0);
2449: }
2451: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2452: {
2453: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2454: PC_IS* pcis = (PC_IS*)(pc->data);
2455: IS dirIS = NULL;
2456: PetscInt i;
2460: PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);
2461: if (zerodiag) {
2462: Mat A;
2463: Vec vec3_N;
2464: PetscScalar *vals;
2465: const PetscInt *idxs;
2466: PetscInt nz,*count;
2468: /* p0 */
2469: VecSet(pcis->vec1_N,0.);
2470: PetscMalloc1(pcis->n,&vals);
2471: ISGetLocalSize(zerodiag,&nz);
2472: ISGetIndices(zerodiag,&idxs);
2473: for (i=0;i<nz;i++) vals[i] = 1.;
2474: VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);
2475: VecAssemblyBegin(pcis->vec1_N);
2476: VecAssemblyEnd(pcis->vec1_N);
2477: /* v_I */
2478: VecSetRandom(pcis->vec2_N,NULL);
2479: for (i=0;i<nz;i++) vals[i] = 0.;
2480: VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);
2481: ISRestoreIndices(zerodiag,&idxs);
2482: ISGetIndices(pcis->is_B_local,&idxs);
2483: for (i=0;i<pcis->n_B;i++) vals[i] = 0.;
2484: VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);
2485: ISRestoreIndices(pcis->is_B_local,&idxs);
2486: if (dirIS) {
2487: PetscInt n;
2489: ISGetLocalSize(dirIS,&n);
2490: ISGetIndices(dirIS,&idxs);
2491: for (i=0;i<n;i++) vals[i] = 0.;
2492: VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);
2493: ISRestoreIndices(dirIS,&idxs);
2494: }
2495: VecAssemblyBegin(pcis->vec2_N);
2496: VecAssemblyEnd(pcis->vec2_N);
2497: VecDuplicate(pcis->vec1_N,&vec3_N);
2498: VecSet(vec3_N,0.);
2499: MatISGetLocalMat(pc->pmat,&A);
2500: MatMult(A,pcis->vec1_N,vec3_N);
2501: VecDot(vec3_N,pcis->vec2_N,&vals[0]);
2502: if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0]));
2503: PetscFree(vals);
2504: VecDestroy(&vec3_N);
2506: /* there should not be any pressure dofs lying on the interface */
2507: PetscCalloc1(pcis->n,&count);
2508: ISGetIndices(pcis->is_B_local,&idxs);
2509: for (i=0;i<pcis->n_B;i++) count[idxs[i]]++;
2510: ISRestoreIndices(pcis->is_B_local,&idxs);
2511: ISGetIndices(zerodiag,&idxs);
2512: for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]);
2513: ISRestoreIndices(zerodiag,&idxs);
2514: PetscFree(count);
2515: }
2516: ISDestroy(&dirIS);
2518: /* check PCBDDCBenignGetOrSetP0 */
2519: VecSetRandom(pcis->vec1_global,NULL);
2520: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i;
2521: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);
2522: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1;
2523: PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);
2524: for (i=0;i<pcbddc->benign_n;i++) {
2525: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2526: if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);
2527: }
2528: return(0);
2529: }
2531: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2532: {
2533: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
2534: IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs;
2535: PetscInt nz,n,benign_n,bsp = 1;
2536: PetscInt *interior_dofs,n_interior_dofs,nneu;
2537: PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb;
2541: if (reuse) goto project_b0;
2542: PetscSFDestroy(&pcbddc->benign_sf);
2543: MatDestroy(&pcbddc->benign_B0);
2544: for (n=0;n<pcbddc->benign_n;n++) {
2545: ISDestroy(&pcbddc->benign_zerodiag_subs[n]);
2546: }
2547: PetscFree(pcbddc->benign_zerodiag_subs);
2548: has_null_pressures = PETSC_TRUE;
2549: have_null = PETSC_TRUE;
2550: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2551: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2552: Checks if all the pressure dofs in each subdomain have a zero diagonal
2553: If not, a change of basis on pressures is not needed
2554: since the local Schur complements are already SPD
2555: */
2556: if (pcbddc->n_ISForDofsLocal) {
2557: IS iP = NULL;
2558: PetscInt p,*pp;
2559: PetscBool flg;
2561: PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);
2562: n = pcbddc->n_ISForDofsLocal;
2563: PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");
2564: PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);
2565: PetscOptionsEnd();
2566: if (!flg) {
2567: n = 1;
2568: pp[0] = pcbddc->n_ISForDofsLocal-1;
2569: }
2571: bsp = 0;
2572: for (p=0;p<n;p++) {
2573: PetscInt bs;
2575: if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]);
2576: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2577: bsp += bs;
2578: }
2579: PetscMalloc1(bsp,&bzerodiag);
2580: bsp = 0;
2581: for (p=0;p<n;p++) {
2582: const PetscInt *idxs;
2583: PetscInt b,bs,npl,*bidxs;
2585: ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);
2586: ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);
2587: ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2588: PetscMalloc1(npl/bs,&bidxs);
2589: for (b=0;b<bs;b++) {
2590: PetscInt i;
2592: for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b];
2593: ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);
2594: bsp++;
2595: }
2596: PetscFree(bidxs);
2597: ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);
2598: }
2599: ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);
2601: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2602: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);
2603: if (iP) {
2604: IS newpressures;
2606: ISDifference(pressures,iP,&newpressures);
2607: ISDestroy(&pressures);
2608: pressures = newpressures;
2609: }
2610: ISSorted(pressures,&sorted);
2611: if (!sorted) {
2612: ISSort(pressures);
2613: }
2614: PetscFree(pp);
2615: }
2617: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2618: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2619: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2620: MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);
2621: ISSorted(zerodiag,&sorted);
2622: if (!sorted) {
2623: ISSort(zerodiag);
2624: }
2625: PetscObjectReference((PetscObject)zerodiag);
2626: zerodiag_save = zerodiag;
2627: ISGetLocalSize(zerodiag,&nz);
2628: if (!nz) {
2629: if (n) have_null = PETSC_FALSE;
2630: has_null_pressures = PETSC_FALSE;
2631: ISDestroy(&zerodiag);
2632: }
2633: recompute_zerodiag = PETSC_FALSE;
2635: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2636: zerodiag_subs = NULL;
2637: benign_n = 0;
2638: n_interior_dofs = 0;
2639: interior_dofs = NULL;
2640: nneu = 0;
2641: if (pcbddc->NeumannBoundariesLocal) {
2642: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);
2643: }
2644: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2645: if (checkb) { /* need to compute interior nodes */
2646: PetscInt n,i,j;
2647: PetscInt n_neigh,*neigh,*n_shared,**shared;
2648: PetscInt *iwork;
2650: ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);
2651: ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2652: PetscCalloc1(n,&iwork);
2653: PetscMalloc1(n,&interior_dofs);
2654: for (i=1;i<n_neigh;i++)
2655: for (j=0;j<n_shared[i];j++)
2656: iwork[shared[i][j]] += 1;
2657: for (i=0;i<n;i++)
2658: if (!iwork[i])
2659: interior_dofs[n_interior_dofs++] = i;
2660: PetscFree(iwork);
2661: ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);
2662: }
2663: if (has_null_pressures) {
2664: IS *subs;
2665: PetscInt nsubs,i,j,nl;
2666: const PetscInt *idxs;
2667: PetscScalar *array;
2668: Vec *work;
2669: Mat_IS* matis = (Mat_IS*)(pc->pmat->data);
2671: subs = pcbddc->local_subs;
2672: nsubs = pcbddc->n_local_subs;
2673: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2674: if (checkb) {
2675: VecDuplicateVecs(matis->y,2,&work);
2676: ISGetLocalSize(zerodiag,&nl);
2677: ISGetIndices(zerodiag,&idxs);
2678: /* work[0] = 1_p */
2679: VecSet(work[0],0.);
2680: VecGetArray(work[0],&array);
2681: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2682: VecRestoreArray(work[0],&array);
2683: /* work[0] = 1_v */
2684: VecSet(work[1],1.);
2685: VecGetArray(work[1],&array);
2686: for (j=0;j<nl;j++) array[idxs[j]] = 0.;
2687: VecRestoreArray(work[1],&array);
2688: ISRestoreIndices(zerodiag,&idxs);
2689: }
2691: if (nsubs > 1 || bsp > 1) {
2692: IS *is;
2693: PetscInt b,totb;
2695: totb = bsp;
2696: is = bsp > 1 ? bzerodiag : &zerodiag;
2697: nsubs = PetscMax(nsubs,1);
2698: PetscCalloc1(nsubs*totb,&zerodiag_subs);
2699: for (b=0;b<totb;b++) {
2700: for (i=0;i<nsubs;i++) {
2701: ISLocalToGlobalMapping l2g;
2702: IS t_zerodiag_subs;
2703: PetscInt nl;
2705: if (subs) {
2706: ISLocalToGlobalMappingCreateIS(subs[i],&l2g);
2707: } else {
2708: IS tis;
2710: MatGetLocalSize(pcbddc->local_mat,&nl,NULL);
2711: ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);
2712: ISLocalToGlobalMappingCreateIS(tis,&l2g);
2713: ISDestroy(&tis);
2714: }
2715: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);
2716: ISGetLocalSize(t_zerodiag_subs,&nl);
2717: if (nl) {
2718: PetscBool valid = PETSC_TRUE;
2720: if (checkb) {
2721: VecSet(matis->x,0);
2722: ISGetLocalSize(subs[i],&nl);
2723: ISGetIndices(subs[i],&idxs);
2724: VecGetArray(matis->x,&array);
2725: for (j=0;j<nl;j++) array[idxs[j]] = 1.;
2726: VecRestoreArray(matis->x,&array);
2727: ISRestoreIndices(subs[i],&idxs);
2728: VecPointwiseMult(matis->x,work[0],matis->x);
2729: MatMult(matis->A,matis->x,matis->y);
2730: VecPointwiseMult(matis->y,work[1],matis->y);
2731: VecGetArray(matis->y,&array);
2732: for (j=0;j<n_interior_dofs;j++) {
2733: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2734: valid = PETSC_FALSE;
2735: break;
2736: }
2737: }
2738: VecRestoreArray(matis->y,&array);
2739: }
2740: if (valid && nneu) {
2741: const PetscInt *idxs;
2742: PetscInt nzb;
2744: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2745: ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);
2746: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
2747: if (nzb) valid = PETSC_FALSE;
2748: }
2749: if (valid && pressures) {
2750: IS t_pressure_subs,tmp;
2751: PetscInt i1,i2;
2753: ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);
2754: ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);
2755: ISGetLocalSize(tmp,&i1);
2756: ISGetLocalSize(t_zerodiag_subs,&i2);
2757: if (i2 != i1) valid = PETSC_FALSE;
2758: ISDestroy(&t_pressure_subs);
2759: ISDestroy(&tmp);
2760: }
2761: if (valid) {
2762: ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);
2763: benign_n++;
2764: } else recompute_zerodiag = PETSC_TRUE;
2765: }
2766: ISDestroy(&t_zerodiag_subs);
2767: ISLocalToGlobalMappingDestroy(&l2g);
2768: }
2769: }
2770: } else { /* there's just one subdomain (or zero if they have not been detected */
2771: PetscBool valid = PETSC_TRUE;
2773: if (nneu) valid = PETSC_FALSE;
2774: if (valid && pressures) {
2775: ISEqual(pressures,zerodiag,&valid);
2776: }
2777: if (valid && checkb) {
2778: MatMult(matis->A,work[0],matis->x);
2779: VecPointwiseMult(matis->x,work[1],matis->x);
2780: VecGetArray(matis->x,&array);
2781: for (j=0;j<n_interior_dofs;j++) {
2782: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2783: valid = PETSC_FALSE;
2784: break;
2785: }
2786: }
2787: VecRestoreArray(matis->x,&array);
2788: }
2789: if (valid) {
2790: benign_n = 1;
2791: PetscMalloc1(benign_n,&zerodiag_subs);
2792: PetscObjectReference((PetscObject)zerodiag);
2793: zerodiag_subs[0] = zerodiag;
2794: }
2795: }
2796: if (checkb) {
2797: VecDestroyVecs(2,&work);
2798: }
2799: }
2800: PetscFree(interior_dofs);
2802: if (!benign_n) {
2803: PetscInt n;
2805: ISDestroy(&zerodiag);
2806: recompute_zerodiag = PETSC_FALSE;
2807: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2808: if (n) have_null = PETSC_FALSE;
2809: }
2811: /* final check for null pressures */
2812: if (zerodiag && pressures) {
2813: ISEqual(pressures,zerodiag,&have_null);
2814: }
2816: if (recompute_zerodiag) {
2817: ISDestroy(&zerodiag);
2818: if (benign_n == 1) {
2819: PetscObjectReference((PetscObject)zerodiag_subs[0]);
2820: zerodiag = zerodiag_subs[0];
2821: } else {
2822: PetscInt i,nzn,*new_idxs;
2824: nzn = 0;
2825: for (i=0;i<benign_n;i++) {
2826: PetscInt ns;
2827: ISGetLocalSize(zerodiag_subs[i],&ns);
2828: nzn += ns;
2829: }
2830: PetscMalloc1(nzn,&new_idxs);
2831: nzn = 0;
2832: for (i=0;i<benign_n;i++) {
2833: PetscInt ns,*idxs;
2834: ISGetLocalSize(zerodiag_subs[i],&ns);
2835: ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2836: PetscArraycpy(new_idxs+nzn,idxs,ns);
2837: ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);
2838: nzn += ns;
2839: }
2840: PetscSortInt(nzn,new_idxs);
2841: ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);
2842: }
2843: have_null = PETSC_FALSE;
2844: }
2846: /* determines if the coarse solver will be singular or not */
2847: MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));
2849: /* Prepare matrix to compute no-net-flux */
2850: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
2851: Mat A,loc_divudotp;
2852: ISLocalToGlobalMapping rl2g,cl2g,l2gmap;
2853: IS row,col,isused = NULL;
2854: PetscInt M,N,n,st,n_isused;
2856: if (pressures) {
2857: isused = pressures;
2858: } else {
2859: isused = zerodiag_save;
2860: }
2861: MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);
2862: MatISGetLocalMat(pc->pmat,&A);
2863: MatGetLocalSize(A,&n,NULL);
2864: if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field");
2865: n_isused = 0;
2866: if (isused) {
2867: ISGetLocalSize(isused,&n_isused);
2868: }
2869: MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
2870: st = st-n_isused;
2871: if (n) {
2872: const PetscInt *gidxs;
2874: MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);
2875: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
2876: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
2877: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2878: ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);
2879: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
2880: } else {
2881: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);
2882: ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);
2883: ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);
2884: }
2885: MatGetSize(pc->pmat,NULL,&N);
2886: ISGetSize(row,&M);
2887: ISLocalToGlobalMappingCreateIS(row,&rl2g);
2888: ISLocalToGlobalMappingCreateIS(col,&cl2g);
2889: ISDestroy(&row);
2890: ISDestroy(&col);
2891: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);
2892: MatSetType(pcbddc->divudotp,MATIS);
2893: MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);
2894: MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);
2895: ISLocalToGlobalMappingDestroy(&rl2g);
2896: ISLocalToGlobalMappingDestroy(&cl2g);
2897: MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);
2898: MatDestroy(&loc_divudotp);
2899: MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2900: MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);
2901: }
2902: ISDestroy(&zerodiag_save);
2903: ISDestroy(&pressures);
2904: if (bzerodiag) {
2905: PetscInt i;
2907: for (i=0;i<bsp;i++) {
2908: ISDestroy(&bzerodiag[i]);
2909: }
2910: PetscFree(bzerodiag);
2911: }
2912: pcbddc->benign_n = benign_n;
2913: pcbddc->benign_zerodiag_subs = zerodiag_subs;
2915: /* determines if the problem has subdomains with 0 pressure block */
2916: have_null = (PetscBool)(!!pcbddc->benign_n);
2917: MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
2919: project_b0:
2920: MatGetLocalSize(pcbddc->local_mat,&n,NULL);
2921: /* change of basis and p0 dofs */
2922: if (pcbddc->benign_n) {
2923: PetscInt i,s,*nnz;
2925: /* local change of basis for pressures */
2926: MatDestroy(&pcbddc->benign_change);
2927: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);
2928: MatSetType(pcbddc->benign_change,MATAIJ);
2929: MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);
2930: PetscMalloc1(n,&nnz);
2931: for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */
2932: for (i=0;i<pcbddc->benign_n;i++) {
2933: const PetscInt *idxs;
2934: PetscInt nzs,j;
2936: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);
2937: ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2938: for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */
2939: nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */
2940: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);
2941: }
2942: MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);
2943: MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
2944: PetscFree(nnz);
2945: /* set identity by default */
2946: for (i=0;i<n;i++) {
2947: MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);
2948: }
2949: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
2950: PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);
2951: /* set change on pressures */
2952: for (s=0;s<pcbddc->benign_n;s++) {
2953: PetscScalar *array;
2954: const PetscInt *idxs;
2955: PetscInt nzs;
2957: ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);
2958: ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2959: for (i=0;i<nzs-1;i++) {
2960: PetscScalar vals[2];
2961: PetscInt cols[2];
2963: cols[0] = idxs[i];
2964: cols[1] = idxs[nzs-1];
2965: vals[0] = 1.;
2966: vals[1] = 1.;
2967: MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);
2968: }
2969: PetscMalloc1(nzs,&array);
2970: for (i=0;i<nzs-1;i++) array[i] = -1.;
2971: array[nzs-1] = 1.;
2972: MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);
2973: /* store local idxs for p0 */
2974: pcbddc->benign_p0_lidx[s] = idxs[nzs-1];
2975: ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);
2976: PetscFree(array);
2977: }
2978: MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2979: MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);
2981: /* project if needed */
2982: if (pcbddc->benign_change_explicit) {
2983: Mat M;
2985: MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);
2986: MatDestroy(&pcbddc->local_mat);
2987: MatSeqAIJCompress(M,&pcbddc->local_mat);
2988: MatDestroy(&M);
2989: }
2990: /* store global idxs for p0 */
2991: ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);
2992: }
2993: *zerodiaglocal = zerodiag;
2994: return(0);
2995: }
2997: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
2998: {
2999: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3000: PetscScalar *array;
3004: if (!pcbddc->benign_sf) {
3005: PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);
3006: PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);
3007: }
3008: if (get) {
3009: VecGetArrayRead(v,(const PetscScalar**)&array);
3010: PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3011: PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);
3012: VecRestoreArrayRead(v,(const PetscScalar**)&array);
3013: } else {
3014: VecGetArray(v,&array);
3015: PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3016: PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);
3017: VecRestoreArray(v,&array);
3018: }
3019: return(0);
3020: }
3022: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3023: {
3024: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3028: /* TODO: add error checking
3029: - avoid nested pop (or push) calls.
3030: - cannot push before pop.
3031: - cannot call this if pcbddc->local_mat is NULL
3032: */
3033: if (!pcbddc->benign_n) {
3034: return(0);
3035: }
3036: if (pop) {
3037: if (pcbddc->benign_change_explicit) {
3038: IS is_p0;
3039: MatReuse reuse;
3041: /* extract B_0 */
3042: reuse = MAT_INITIAL_MATRIX;
3043: if (pcbddc->benign_B0) {
3044: reuse = MAT_REUSE_MATRIX;
3045: }
3046: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
3047: MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);
3048: /* remove rows and cols from local problem */
3049: MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);
3050: MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
3051: MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);
3052: ISDestroy(&is_p0);
3053: } else {
3054: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
3055: PetscScalar *vals;
3056: PetscInt i,n,*idxs_ins;
3058: VecGetLocalSize(matis->y,&n);
3059: PetscMalloc2(n,&idxs_ins,n,&vals);
3060: if (!pcbddc->benign_B0) {
3061: PetscInt *nnz;
3062: MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);
3063: MatSetType(pcbddc->benign_B0,MATAIJ);
3064: MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);
3065: PetscMalloc1(pcbddc->benign_n,&nnz);
3066: for (i=0;i<pcbddc->benign_n;i++) {
3067: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);
3068: nnz[i] = n - nnz[i];
3069: }
3070: MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);
3071: MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
3072: PetscFree(nnz);
3073: }
3075: for (i=0;i<pcbddc->benign_n;i++) {
3076: PetscScalar *array;
3077: PetscInt *idxs,j,nz,cum;
3079: VecSet(matis->x,0.);
3080: ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);
3081: ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3082: for (j=0;j<nz;j++) vals[j] = 1.;
3083: VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);
3084: VecAssemblyBegin(matis->x);
3085: VecAssemblyEnd(matis->x);
3086: VecSet(matis->y,0.);
3087: MatMult(matis->A,matis->x,matis->y);
3088: VecGetArray(matis->y,&array);
3089: cum = 0;
3090: for (j=0;j<n;j++) {
3091: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3092: vals[cum] = array[j];
3093: idxs_ins[cum] = j;
3094: cum++;
3095: }
3096: }
3097: MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);
3098: VecRestoreArray(matis->y,&array);
3099: ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);
3100: }
3101: MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3102: MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);
3103: PetscFree2(idxs_ins,vals);
3104: }
3105: } else { /* push */
3106: if (pcbddc->benign_change_explicit) {
3107: PetscInt i;
3109: for (i=0;i<pcbddc->benign_n;i++) {
3110: PetscScalar *B0_vals;
3111: PetscInt *B0_cols,B0_ncol;
3113: MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3114: MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);
3115: MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);
3116: MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);
3117: MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);
3118: }
3119: MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3120: MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);
3121: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!");
3122: }
3123: return(0);
3124: }
3126: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3127: {
3128: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3129: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3130: PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork;
3131: PetscBLASInt *B_iwork,*B_ifail;
3132: PetscScalar *work,lwork;
3133: PetscScalar *St,*S,*eigv;
3134: PetscScalar *Sarray,*Starray;
3135: PetscReal *eigs,thresh,lthresh,uthresh;
3136: PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs;
3137: PetscBool allocated_S_St;
3138: #if defined(PETSC_USE_COMPLEX)
3139: PetscReal *rwork;
3140: #endif
3141: PetscErrorCode ierr;
3144: if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data");
3145: if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3146: if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef);
3147: PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3149: if (pcbddc->dbg_flag) {
3150: PetscViewerFlush(pcbddc->dbg_viewer);
3151: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
3152: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");
3153: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
3154: }
3156: if (pcbddc->dbg_flag) {
3157: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);
3158: }
3160: /* max size of subsets */
3161: mss = 0;
3162: for (i=0;i<sub_schurs->n_subs;i++) {
3163: PetscInt subset_size;
3165: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3166: mss = PetscMax(mss,subset_size);
3167: }
3169: /* min/max and threshold */
3170: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3171: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3172: nmax = PetscMax(nmin,nmax);
3173: allocated_S_St = PETSC_FALSE;
3174: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3175: allocated_S_St = PETSC_TRUE;
3176: }
3178: /* allocate lapack workspace */
3179: cum = cum2 = 0;
3180: maxneigs = 0;
3181: for (i=0;i<sub_schurs->n_subs;i++) {
3182: PetscInt n,subset_size;
3184: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3185: n = PetscMin(subset_size,nmax);
3186: cum += subset_size;
3187: cum2 += subset_size*n;
3188: maxneigs = PetscMax(maxneigs,n);
3189: }
3190: lwork = 0;
3191: if (mss) {
3192: if (sub_schurs->is_symmetric) {
3193: PetscScalar sdummy = 0.;
3194: PetscBLASInt B_itype = 1;
3195: PetscBLASInt B_N = mss, idummy = 0;
3196: PetscReal rdummy = 0.,zero = 0.0;
3197: PetscReal eps = 0.0; /* dlamch? */
3199: B_lwork = -1;
3200: /* some implementations may complain about NULL pointers, even if we are querying */
3201: S = &sdummy;
3202: St = &sdummy;
3203: eigs = &rdummy;
3204: eigv = &sdummy;
3205: B_iwork = &idummy;
3206: B_ifail = &idummy;
3207: #if defined(PETSC_USE_COMPLEX)
3208: rwork = &rdummy;
3209: #endif
3210: thresh = 1.0;
3211: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3212: #if defined(PETSC_USE_COMPLEX)
3213: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3214: #else
3215: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr));
3216: #endif
3217: if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr);
3218: PetscFPTrapPop();
3219: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3220: }
3222: nv = 0;
3223: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3224: ISGetLocalSize(sub_schurs->is_vertices,&nv);
3225: }
3226: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
3227: if (allocated_S_St) {
3228: PetscMalloc2(mss*mss,&S,mss*mss,&St);
3229: }
3230: PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);
3231: #if defined(PETSC_USE_COMPLEX)
3232: PetscMalloc1(7*mss,&rwork);
3233: #endif
3234: PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n,
3235: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr,
3236: nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr,
3237: nv+cum,&pcbddc->adaptive_constraints_idxs,
3238: nv+cum2,&pcbddc->adaptive_constraints_data);
3239: PetscArrayzero(pcbddc->adaptive_constraints_n,nv+sub_schurs->n_subs);
3241: maxneigs = 0;
3242: cum = cumarray = 0;
3243: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3244: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3245: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3246: const PetscInt *idxs;
3248: ISGetIndices(sub_schurs->is_vertices,&idxs);
3249: for (cum=0;cum<nv;cum++) {
3250: pcbddc->adaptive_constraints_n[cum] = 1;
3251: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3252: pcbddc->adaptive_constraints_data[cum] = 1.0;
3253: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1;
3254: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1;
3255: }
3256: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
3257: }
3259: if (mss) { /* multilevel */
3260: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3261: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3262: }
3264: lthresh = pcbddc->adaptive_threshold[0];
3265: uthresh = pcbddc->adaptive_threshold[1];
3266: for (i=0;i<sub_schurs->n_subs;i++) {
3267: const PetscInt *idxs;
3268: PetscReal upper,lower;
3269: PetscInt j,subset_size,eigs_start = 0;
3270: PetscBLASInt B_N;
3271: PetscBool same_data = PETSC_FALSE;
3272: PetscBool scal = PETSC_FALSE;
3274: if (pcbddc->use_deluxe_scaling) {
3275: upper = PETSC_MAX_REAL;
3276: lower = uthresh;
3277: } else {
3278: if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling");
3279: upper = 1./uthresh;
3280: lower = 0.;
3281: }
3282: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
3283: ISGetIndices(sub_schurs->is_subs[i],&idxs);
3284: PetscBLASIntCast(subset_size,&B_N);
3285: /* this is experimental: we assume the dofs have been properly grouped to have
3286: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3287: if (!sub_schurs->is_posdef) {
3288: Mat T;
3290: for (j=0;j<subset_size;j++) {
3291: if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) {
3292: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);
3293: MatScale(T,-1.0);
3294: MatDestroy(&T);
3295: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);
3296: MatScale(T,-1.0);
3297: MatDestroy(&T);
3298: if (sub_schurs->change_primal_sub) {
3299: PetscInt nz,k;
3300: const PetscInt *idxs;
3302: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);
3303: ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);
3304: for (k=0;k<nz;k++) {
3305: *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0;
3306: *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0;
3307: }
3308: ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);
3309: }
3310: scal = PETSC_TRUE;
3311: break;
3312: }
3313: }
3314: }
3316: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3317: if (sub_schurs->is_symmetric) {
3318: PetscInt j,k;
3319: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3320: PetscArrayzero(S,subset_size*subset_size);
3321: PetscArrayzero(St,subset_size*subset_size);
3322: }
3323: for (j=0;j<subset_size;j++) {
3324: for (k=j;k<subset_size;k++) {
3325: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3326: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3327: }
3328: }
3329: } else {
3330: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3331: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3332: }
3333: } else {
3334: S = Sarray + cumarray;
3335: St = Starray + cumarray;
3336: }
3337: /* see if we can save some work */
3338: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) {
3339: PetscArraycmp(S,St,subset_size*subset_size,&same_data);
3340: }
3342: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3343: B_neigs = 0;
3344: } else {
3345: if (sub_schurs->is_symmetric) {
3346: PetscBLASInt B_itype = 1;
3347: PetscBLASInt B_IL, B_IU;
3348: PetscReal eps = -1.0; /* dlamch? */
3349: PetscInt nmin_s;
3350: PetscBool compute_range;
3352: B_neigs = 0;
3353: compute_range = (PetscBool)!same_data;
3354: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3356: if (pcbddc->dbg_flag) {
3357: PetscInt nc = 0;
3359: if (sub_schurs->change_primal_sub) {
3360: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);
3361: }
3362: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);
3363: }
3365: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3366: if (compute_range) {
3368: /* ask for eigenvalues larger than thresh */
3369: if (sub_schurs->is_posdef) {
3370: #if defined(PETSC_USE_COMPLEX)
3371: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3372: #else
3373: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3374: #endif
3375: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3376: } else { /* no theory so far, but it works nicely */
3377: PetscInt recipe = 0,recipe_m = 1;
3378: PetscReal bb[2];
3380: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);
3381: switch (recipe) {
3382: case 0:
3383: if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; }
3384: else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; }
3385: #if defined(PETSC_USE_COMPLEX)
3386: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3387: #else
3388: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3389: #endif
3390: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3391: break;
3392: case 1:
3393: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh;
3394: #if defined(PETSC_USE_COMPLEX)
3395: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3396: #else
3397: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3398: #endif
3399: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3400: if (!scal) {
3401: PetscBLASInt B_neigs2 = 0;
3403: bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL;
3404: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3405: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3406: #if defined(PETSC_USE_COMPLEX)
3407: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3408: #else
3409: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3410: #endif
3411: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3412: B_neigs += B_neigs2;
3413: }
3414: break;
3415: case 2:
3416: if (scal) {
3417: bb[0] = PETSC_MIN_REAL;
3418: bb[1] = 0;
3419: #if defined(PETSC_USE_COMPLEX)
3420: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3421: #else
3422: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3423: #endif
3424: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3425: } else {
3426: PetscBLASInt B_neigs2 = 0;
3427: PetscBool import = PETSC_FALSE;
3429: lthresh = PetscMax(lthresh,0.0);
3430: if (lthresh > 0.0) {
3431: bb[0] = PETSC_MIN_REAL;
3432: bb[1] = lthresh*lthresh;
3434: import = PETSC_TRUE;
3435: #if defined(PETSC_USE_COMPLEX)
3436: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3437: #else
3438: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3439: #endif
3440: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3441: }
3442: bb[0] = PetscMax(lthresh*lthresh,uthresh);
3443: bb[1] = PETSC_MAX_REAL;
3444: if (import) {
3445: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3446: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3447: }
3448: #if defined(PETSC_USE_COMPLEX)
3449: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3450: #else
3451: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3452: #endif
3453: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3454: B_neigs += B_neigs2;
3455: }
3456: break;
3457: case 3:
3458: if (scal) {
3459: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);
3460: } else {
3461: PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);
3462: }
3463: if (!scal) {
3464: bb[0] = uthresh;
3465: bb[1] = PETSC_MAX_REAL;
3466: #if defined(PETSC_USE_COMPLEX)
3467: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3468: #else
3469: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3470: #endif
3471: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3472: }
3473: if (recipe_m > 0 && B_N - B_neigs > 0) {
3474: PetscBLASInt B_neigs2 = 0;
3476: B_IL = 1;
3477: PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);
3478: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3479: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3480: #if defined(PETSC_USE_COMPLEX)
3481: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3482: #else
3483: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3484: #endif
3485: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3486: B_neigs += B_neigs2;
3487: }
3488: break;
3489: case 4:
3490: bb[0] = PETSC_MIN_REAL; bb[1] = lthresh;
3491: #if defined(PETSC_USE_COMPLEX)
3492: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3493: #else
3494: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3495: #endif
3496: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3497: {
3498: PetscBLASInt B_neigs2 = 0;
3500: bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL;
3501: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3502: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3503: #if defined(PETSC_USE_COMPLEX)
3504: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3505: #else
3506: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3507: #endif
3508: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3509: B_neigs += B_neigs2;
3510: }
3511: break;
3512: case 5: /* same as before: first compute all eigenvalues, then filter */
3513: #if defined(PETSC_USE_COMPLEX)
3514: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3515: #else
3516: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3517: #endif
3518: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3519: {
3520: PetscInt e,k,ne;
3521: for (e=0,ne=0;e<B_neigs;e++) {
3522: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3523: for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k];
3524: eigs[ne] = eigs[e];
3525: ne++;
3526: }
3527: }
3528: PetscArraycpy(eigv,S,B_N*ne);
3529: B_neigs = ne;
3530: }
3531: break;
3532: default:
3533: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe);
3534: break;
3535: }
3536: }
3537: } else if (!same_data) { /* this is just to see all the eigenvalues */
3538: B_IU = PetscMax(1,PetscMin(B_N,nmax));
3539: B_IL = 1;
3540: #if defined(PETSC_USE_COMPLEX)
3541: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3542: #else
3543: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3544: #endif
3545: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3546: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3547: PetscInt k;
3548: if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
3549: ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);
3550: PetscBLASIntCast(nmax,&B_neigs);
3551: nmin = nmax;
3552: PetscArrayzero(eigv,subset_size*nmax);
3553: for (k=0;k<nmax;k++) {
3554: eigs[k] = 1./PETSC_SMALL;
3555: eigv[k*(subset_size+1)] = 1.0;
3556: }
3557: }
3558: PetscFPTrapPop();
3559: if (B_ierr) {
3560: if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3561: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3562: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3563: }
3565: if (B_neigs > nmax) {
3566: if (pcbddc->dbg_flag) {
3567: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);
3568: }
3569: if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax;
3570: B_neigs = nmax;
3571: }
3573: nmin_s = PetscMin(nmin,B_N);
3574: if (B_neigs < nmin_s) {
3575: PetscBLASInt B_neigs2 = 0;
3577: if (pcbddc->use_deluxe_scaling) {
3578: if (scal) {
3579: B_IU = nmin_s;
3580: B_IL = B_neigs + 1;
3581: } else {
3582: B_IL = B_N - nmin_s + 1;
3583: B_IU = B_N - B_neigs;
3584: }
3585: } else {
3586: B_IL = B_neigs + 1;
3587: B_IU = nmin_s;
3588: }
3589: if (pcbddc->dbg_flag) {
3590: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);
3591: }
3592: if (sub_schurs->is_symmetric) {
3593: PetscInt j,k;
3594: for (j=0;j<subset_size;j++) {
3595: for (k=j;k<subset_size;k++) {
3596: S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k];
3597: St[j*subset_size+k] = Starray[cumarray+j*subset_size+k];
3598: }
3599: }
3600: } else {
3601: PetscArraycpy(S,Sarray+cumarray,subset_size*subset_size);
3602: PetscArraycpy(St,Starray+cumarray,subset_size*subset_size);
3603: }
3604: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
3605: #if defined(PETSC_USE_COMPLEX)
3606: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr));
3607: #else
3608: PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr));
3609: #endif
3610: PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);
3611: PetscFPTrapPop();
3612: B_neigs += B_neigs2;
3613: }
3614: if (B_ierr) {
3615: if (B_ierr < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr);
3616: else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr);
3617: else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1);
3618: }
3619: if (pcbddc->dbg_flag) {
3620: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);
3621: for (j=0;j<B_neigs;j++) {
3622: if (eigs[j] == 0.0) {
3623: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");
3624: } else {
3625: if (pcbddc->use_deluxe_scaling) {
3626: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);
3627: } else {
3628: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);
3629: }
3630: }
3631: }
3632: }
3633: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
3634: }
3635: /* change the basis back to the original one */
3636: if (sub_schurs->change) {
3637: Mat change,phi,phit;
3639: if (pcbddc->dbg_flag > 2) {
3640: PetscInt ii;
3641: for (ii=0;ii<B_neigs;ii++) {
3642: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);
3643: for (j=0;j<B_N;j++) {
3644: #if defined(PETSC_USE_COMPLEX)
3645: PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]);
3646: PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]);
3647: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3648: #else
3649: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);
3650: #endif
3651: }
3652: }
3653: }
3654: KSPGetOperators(sub_schurs->change[i],&change,NULL);
3655: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);
3656: MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);
3657: MatCopy(phi,phit,SAME_NONZERO_PATTERN);
3658: MatDestroy(&phit);
3659: MatDestroy(&phi);
3660: }
3661: maxneigs = PetscMax(B_neigs,maxneigs);
3662: pcbddc->adaptive_constraints_n[i+nv] = B_neigs;
3663: if (B_neigs) {
3664: PetscArraycpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size);
3666: if (pcbddc->dbg_flag > 1) {
3667: PetscInt ii;
3668: for (ii=0;ii<B_neigs;ii++) {
3669: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);
3670: for (j=0;j<B_N;j++) {
3671: #if defined(PETSC_USE_COMPLEX)
3672: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3673: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3674: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);
3675: #else
3676: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);
3677: #endif
3678: }
3679: }
3680: }
3681: PetscArraycpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size);
3682: pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3683: pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs;
3684: cum++;
3685: }
3686: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
3687: /* shift for next computation */
3688: cumarray += subset_size*subset_size;
3689: }
3690: if (pcbddc->dbg_flag) {
3691: PetscViewerFlush(pcbddc->dbg_viewer);
3692: }
3694: if (mss) {
3695: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);
3696: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);
3697: /* destroy matrices (junk) */
3698: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
3699: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
3700: }
3701: if (allocated_S_St) {
3702: PetscFree2(S,St);
3703: }
3704: PetscFree5(eigv,eigs,work,B_iwork,B_ifail);
3705: #if defined(PETSC_USE_COMPLEX)
3706: PetscFree(rwork);
3707: #endif
3708: if (pcbddc->dbg_flag) {
3709: PetscInt maxneigs_r;
3710: MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));
3711: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);
3712: }
3713: PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);
3714: return(0);
3715: }
3717: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3718: {
3719: PetscScalar *coarse_submat_vals;
3723: /* Setup local scatters R_to_B and (optionally) R_to_D */
3724: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3725: PCBDDCSetUpLocalScatters(pc);
3727: /* Setup local neumann solver ksp_R */
3728: /* PCBDDCSetUpLocalScatters should be called first! */
3729: PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);
3731: /*
3732: Setup local correction and local part of coarse basis.
3733: Gives back the dense local part of the coarse matrix in column major ordering
3734: */
3735: PCBDDCSetUpCorrection(pc,&coarse_submat_vals);
3737: /* Compute total number of coarse nodes and setup coarse solver */
3738: PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);
3740: /* free */
3741: PetscFree(coarse_submat_vals);
3742: return(0);
3743: }
3745: PetscErrorCode PCBDDCResetCustomization(PC pc)
3746: {
3747: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3751: ISDestroy(&pcbddc->user_primal_vertices);
3752: ISDestroy(&pcbddc->user_primal_vertices_local);
3753: ISDestroy(&pcbddc->NeumannBoundaries);
3754: ISDestroy(&pcbddc->NeumannBoundariesLocal);
3755: ISDestroy(&pcbddc->DirichletBoundaries);
3756: MatNullSpaceDestroy(&pcbddc->onearnullspace);
3757: PetscFree(pcbddc->onearnullvecs_state);
3758: ISDestroy(&pcbddc->DirichletBoundariesLocal);
3759: PCBDDCSetDofsSplitting(pc,0,NULL);
3760: PCBDDCSetDofsSplittingLocal(pc,0,NULL);
3761: return(0);
3762: }
3764: PetscErrorCode PCBDDCResetTopography(PC pc)
3765: {
3766: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3767: PetscInt i;
3771: MatDestroy(&pcbddc->nedcG);
3772: ISDestroy(&pcbddc->nedclocal);
3773: MatDestroy(&pcbddc->discretegradient);
3774: MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);
3775: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
3776: MatDestroy(&pcbddc->switch_static_change);
3777: VecDestroy(&pcbddc->work_change);
3778: MatDestroy(&pcbddc->ConstraintMatrix);
3779: MatDestroy(&pcbddc->divudotp);
3780: ISDestroy(&pcbddc->divudotp_vl2l);
3781: PCBDDCGraphDestroy(&pcbddc->mat_graph);
3782: for (i=0;i<pcbddc->n_local_subs;i++) {
3783: ISDestroy(&pcbddc->local_subs[i]);
3784: }
3785: pcbddc->n_local_subs = 0;
3786: PetscFree(pcbddc->local_subs);
3787: PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);
3788: pcbddc->graphanalyzed = PETSC_FALSE;
3789: pcbddc->recompute_topography = PETSC_TRUE;
3790: pcbddc->corner_selected = PETSC_FALSE;
3791: return(0);
3792: }
3794: PetscErrorCode PCBDDCResetSolvers(PC pc)
3795: {
3796: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3800: VecDestroy(&pcbddc->coarse_vec);
3801: if (pcbddc->coarse_phi_B) {
3802: PetscScalar *array;
3803: MatDenseGetArray(pcbddc->coarse_phi_B,&array);
3804: PetscFree(array);
3805: }
3806: MatDestroy(&pcbddc->coarse_phi_B);
3807: MatDestroy(&pcbddc->coarse_phi_D);
3808: MatDestroy(&pcbddc->coarse_psi_B);
3809: MatDestroy(&pcbddc->coarse_psi_D);
3810: VecDestroy(&pcbddc->vec1_P);
3811: VecDestroy(&pcbddc->vec1_C);
3812: MatDestroy(&pcbddc->local_auxmat2);
3813: MatDestroy(&pcbddc->local_auxmat1);
3814: VecDestroy(&pcbddc->vec1_R);
3815: VecDestroy(&pcbddc->vec2_R);
3816: ISDestroy(&pcbddc->is_R_local);
3817: VecScatterDestroy(&pcbddc->R_to_B);
3818: VecScatterDestroy(&pcbddc->R_to_D);
3819: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
3820: KSPReset(pcbddc->ksp_D);
3821: KSPReset(pcbddc->ksp_R);
3822: KSPReset(pcbddc->coarse_ksp);
3823: MatDestroy(&pcbddc->local_mat);
3824: PetscFree(pcbddc->primal_indices_local_idxs);
3825: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
3826: PetscFree(pcbddc->global_primal_indices);
3827: ISDestroy(&pcbddc->coarse_subassembling);
3828: MatDestroy(&pcbddc->benign_change);
3829: VecDestroy(&pcbddc->benign_vec);
3830: PCBDDCBenignShellMat(pc,PETSC_TRUE);
3831: MatDestroy(&pcbddc->benign_B0);
3832: PetscSFDestroy(&pcbddc->benign_sf);
3833: if (pcbddc->benign_zerodiag_subs) {
3834: PetscInt i;
3835: for (i=0;i<pcbddc->benign_n;i++) {
3836: ISDestroy(&pcbddc->benign_zerodiag_subs[i]);
3837: }
3838: PetscFree(pcbddc->benign_zerodiag_subs);
3839: }
3840: PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);
3841: return(0);
3842: }
3844: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
3845: {
3846: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
3847: PC_IS *pcis = (PC_IS*)pc->data;
3848: VecType impVecType;
3849: PetscInt n_constraints,n_R,old_size;
3853: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
3854: n_R = pcis->n - pcbddc->n_vertices;
3855: VecGetType(pcis->vec1_N,&impVecType);
3856: /* local work vectors (try to avoid unneeded work)*/
3857: /* R nodes */
3858: old_size = -1;
3859: if (pcbddc->vec1_R) {
3860: VecGetSize(pcbddc->vec1_R,&old_size);
3861: }
3862: if (n_R != old_size) {
3863: VecDestroy(&pcbddc->vec1_R);
3864: VecDestroy(&pcbddc->vec2_R);
3865: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);
3866: VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);
3867: VecSetType(pcbddc->vec1_R,impVecType);
3868: VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);
3869: }
3870: /* local primal dofs */
3871: old_size = -1;
3872: if (pcbddc->vec1_P) {
3873: VecGetSize(pcbddc->vec1_P,&old_size);
3874: }
3875: if (pcbddc->local_primal_size != old_size) {
3876: VecDestroy(&pcbddc->vec1_P);
3877: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);
3878: VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);
3879: VecSetType(pcbddc->vec1_P,impVecType);
3880: }
3881: /* local explicit constraints */
3882: old_size = -1;
3883: if (pcbddc->vec1_C) {
3884: VecGetSize(pcbddc->vec1_C,&old_size);
3885: }
3886: if (n_constraints && n_constraints != old_size) {
3887: VecDestroy(&pcbddc->vec1_C);
3888: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);
3889: VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);
3890: VecSetType(pcbddc->vec1_C,impVecType);
3891: }
3892: return(0);
3893: }
3895: PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n)
3896: {
3897: PetscErrorCode ierr;
3898: /* pointers to pcis and pcbddc */
3899: PC_IS* pcis = (PC_IS*)pc->data;
3900: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
3901: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3902: /* submatrices of local problem */
3903: Mat A_RV,A_VR,A_VV,local_auxmat2_R;
3904: /* submatrices of local coarse problem */
3905: Mat S_VV,S_CV,S_VC,S_CC;
3906: /* working matrices */
3907: Mat C_CR;
3908: /* additional working stuff */
3909: PC pc_R;
3910: Mat F,Brhs = NULL;
3911: Vec dummy_vec;
3912: PetscBool isLU,isCHOL,need_benign_correction,sparserhs;
3913: PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */
3914: PetscScalar *work;
3915: PetscInt *idx_V_B;
3916: PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I;
3917: PetscInt i,n_R,n_D,n_B;
3918: PetscScalar one=1.0,m_one=-1.0;
3921: if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented");
3922: PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
3924: /* Set Non-overlapping dimensions */
3925: n_vertices = pcbddc->n_vertices;
3926: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
3927: n_B = pcis->n_B;
3928: n_D = pcis->n - n_B;
3929: n_R = pcis->n - n_vertices;
3931: /* vertices in boundary numbering */
3932: PetscMalloc1(n_vertices,&idx_V_B);
3933: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);
3934: if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i);
3936: /* Subdomain contribution (Non-overlapping) to coarse matrix */
3937: PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);
3938: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);
3939: MatDenseSetLDA(S_VV,pcbddc->local_primal_size);
3940: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);
3941: MatDenseSetLDA(S_CV,pcbddc->local_primal_size);
3942: MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);
3943: MatDenseSetLDA(S_VC,pcbddc->local_primal_size);
3944: MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);
3945: MatDenseSetLDA(S_CC,pcbddc->local_primal_size);
3947: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
3948: KSPGetPC(pcbddc->ksp_R,&pc_R);
3949: PCSetUp(pc_R);
3950: PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);
3951: PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);
3952: lda_rhs = n_R;
3953: need_benign_correction = PETSC_FALSE;
3954: if (isLU || isCHOL) {
3955: PCFactorGetMatrix(pc_R,&F);
3956: } else if (sub_schurs && sub_schurs->reuse_solver) {
3957: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
3958: MatFactorType type;
3960: F = reuse_solver->F;
3961: MatGetFactorType(F,&type);
3962: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
3963: if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
3964: MatGetSize(F,&lda_rhs,NULL);
3965: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
3966: } else F = NULL;
3968: /* determine if we can use a sparse right-hand side */
3969: sparserhs = PETSC_FALSE;
3970: if (F) {
3971: MatSolverType solver;
3973: MatFactorGetSolverType(F,&solver);
3974: PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);
3975: }
3977: /* allocate workspace */
3978: n = 0;
3979: if (n_constraints) {
3980: n += lda_rhs*n_constraints;
3981: }
3982: if (n_vertices) {
3983: n = PetscMax(2*lda_rhs*n_vertices,n);
3984: n = PetscMax((lda_rhs+n_B)*n_vertices,n);
3985: }
3986: if (!pcbddc->symmetric_primal) {
3987: n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n);
3988: }
3989: PetscMalloc1(n,&work);
3991: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
3992: dummy_vec = NULL;
3993: if (need_benign_correction && lda_rhs != n_R && F) {
3994: VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);
3995: VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);
3996: VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);
3997: }
3999: MatDestroy(&pcbddc->local_auxmat1);
4000: MatDestroy(&pcbddc->local_auxmat2);
4002: /* Precompute stuffs needed for preprocessing and application of BDDC*/
4003: if (n_constraints) {
4004: Mat M3,C_B;
4005: IS is_aux;
4006: PetscScalar *array,*array2;
4008: /* Extract constraints on R nodes: C_{CR} */
4009: ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);
4010: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);
4011: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4013: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4014: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4015: if (!sparserhs) {
4016: PetscArrayzero(work,lda_rhs*n_constraints);
4017: for (i=0;i<n_constraints;i++) {
4018: const PetscScalar *row_cmat_values;
4019: const PetscInt *row_cmat_indices;
4020: PetscInt size_of_constraint,j;
4022: MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4023: for (j=0;j<size_of_constraint;j++) {
4024: work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j];
4025: }
4026: MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);
4027: }
4028: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);
4029: } else {
4030: Mat tC_CR;
4032: MatScale(C_CR,-1.0);
4033: if (lda_rhs != n_R) {
4034: PetscScalar *aa;
4035: PetscInt r,*ii,*jj;
4036: PetscBool done;
4038: MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4039: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4040: MatSeqAIJGetArray(C_CR,&aa);
4041: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);
4042: MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4043: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4044: } else {
4045: PetscObjectReference((PetscObject)C_CR);
4046: tC_CR = C_CR;
4047: }
4048: MatCreateTranspose(tC_CR,&Brhs);
4049: MatDestroy(&tC_CR);
4050: }
4051: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);
4052: if (F) {
4053: if (need_benign_correction) {
4054: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4056: /* rhs is already zero on interior dofs, no need to change the rhs */
4057: PetscArrayzero(reuse_solver->benign_save_vals,pcbddc->benign_n);
4058: }
4059: MatMatSolve(F,Brhs,local_auxmat2_R);
4060: if (need_benign_correction) {
4061: PetscScalar *marr;
4062: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4064: MatDenseGetArray(local_auxmat2_R,&marr);
4065: if (lda_rhs != n_R) {
4066: for (i=0;i<n_constraints;i++) {
4067: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4068: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4069: VecResetArray(dummy_vec);
4070: }
4071: } else {
4072: for (i=0;i<n_constraints;i++) {
4073: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4074: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4075: VecResetArray(pcbddc->vec1_R);
4076: }
4077: }
4078: MatDenseRestoreArray(local_auxmat2_R,&marr);
4079: }
4080: } else {
4081: PetscScalar *marr;
4083: MatDenseGetArray(local_auxmat2_R,&marr);
4084: for (i=0;i<n_constraints;i++) {
4085: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4086: VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);
4087: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4088: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4089: VecResetArray(pcbddc->vec1_R);
4090: VecResetArray(pcbddc->vec2_R);
4091: }
4092: MatDenseRestoreArray(local_auxmat2_R,&marr);
4093: }
4094: if (sparserhs) {
4095: MatScale(C_CR,-1.0);
4096: }
4097: MatDestroy(&Brhs);
4098: if (!pcbddc->switch_static) {
4099: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);
4100: MatDenseGetArray(pcbddc->local_auxmat2,&array);
4101: MatDenseGetArray(local_auxmat2_R,&array2);
4102: for (i=0;i<n_constraints;i++) {
4103: VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);
4104: VecPlaceArray(pcis->vec1_B,array+i*n_B);
4105: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4106: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4107: VecResetArray(pcis->vec1_B);
4108: VecResetArray(pcbddc->vec1_R);
4109: }
4110: MatDenseRestoreArray(local_auxmat2_R,&array2);
4111: MatDenseRestoreArray(pcbddc->local_auxmat2,&array);
4112: MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4113: } else {
4114: if (lda_rhs != n_R) {
4115: IS dummy;
4117: ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);
4118: MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);
4119: ISDestroy(&dummy);
4120: } else {
4121: PetscObjectReference((PetscObject)local_auxmat2_R);
4122: pcbddc->local_auxmat2 = local_auxmat2_R;
4123: }
4124: MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);
4125: }
4126: ISDestroy(&is_aux);
4127: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
4128: MatScale(M3,m_one);
4129: if (isCHOL) {
4130: MatCholeskyFactor(M3,NULL,NULL);
4131: } else {
4132: MatLUFactor(M3,NULL,NULL,NULL);
4133: }
4134: MatSeqDenseInvertFactors_Private(M3);
4135: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4136: MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);
4137: MatDestroy(&C_B);
4138: MatCopy(M3,S_CC,SAME_NONZERO_PATTERN); /* S_CC can have a different LDA, MatMatSolve doesn't support it */
4139: MatDestroy(&M3);
4140: }
4142: /* Get submatrices from subdomain matrix */
4143: if (n_vertices) {
4144: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4145: PetscBool oldpin;
4146: #endif
4147: PetscBool isaij;
4148: IS is_aux;
4150: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4151: IS tis;
4153: ISDuplicate(pcbddc->is_R_local,&tis);
4154: ISSort(tis);
4155: ISComplement(tis,0,pcis->n,&is_aux);
4156: ISDestroy(&tis);
4157: } else {
4158: ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);
4159: }
4160: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4161: oldpin = pcbddc->local_mat->boundtocpu;
4162: #endif
4163: MatBindToCPU(pcbddc->local_mat,PETSC_TRUE);
4164: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);
4165: MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);
4166: PetscObjectBaseTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isaij);
4167: if (!isaij) { /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4168: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4169: }
4170: MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);
4171: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4172: MatBindToCPU(pcbddc->local_mat,oldpin);
4173: #endif
4174: ISDestroy(&is_aux);
4175: }
4177: /* Matrix of coarse basis functions (local) */
4178: if (pcbddc->coarse_phi_B) {
4179: PetscInt on_B,on_primal,on_D=n_D;
4180: if (pcbddc->coarse_phi_D) {
4181: MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);
4182: }
4183: MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);
4184: if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) {
4185: PetscScalar *marray;
4187: MatDenseGetArray(pcbddc->coarse_phi_B,&marray);
4188: PetscFree(marray);
4189: MatDestroy(&pcbddc->coarse_phi_B);
4190: MatDestroy(&pcbddc->coarse_psi_B);
4191: MatDestroy(&pcbddc->coarse_phi_D);
4192: MatDestroy(&pcbddc->coarse_psi_D);
4193: }
4194: }
4196: if (!pcbddc->coarse_phi_B) {
4197: PetscScalar *marr;
4199: /* memory size */
4200: n = n_B*pcbddc->local_primal_size;
4201: if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size;
4202: if (!pcbddc->symmetric_primal) n *= 2;
4203: PetscCalloc1(n,&marr);
4204: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);
4205: marr += n_B*pcbddc->local_primal_size;
4206: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4207: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);
4208: marr += n_D*pcbddc->local_primal_size;
4209: }
4210: if (!pcbddc->symmetric_primal) {
4211: MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);
4212: marr += n_B*pcbddc->local_primal_size;
4213: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4214: MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);
4215: }
4216: } else {
4217: PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);
4218: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
4219: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4220: PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);
4221: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
4222: }
4223: }
4224: }
4226: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4227: p0_lidx_I = NULL;
4228: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4229: const PetscInt *idxs;
4231: ISGetIndices(pcis->is_I_local,&idxs);
4232: PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);
4233: for (i=0;i<pcbddc->benign_n;i++) {
4234: PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);
4235: }
4236: ISRestoreIndices(pcis->is_I_local,&idxs);
4237: }
4239: /* vertices */
4240: if (n_vertices) {
4241: PetscBool restoreavr = PETSC_FALSE;
4243: MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);
4245: if (n_R) {
4246: Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */
4247: PetscBLASInt B_N,B_one = 1;
4248: const PetscScalar *x;
4249: PetscScalar *y;
4251: MatScale(A_RV,m_one);
4252: if (need_benign_correction) {
4253: ISLocalToGlobalMapping RtoN;
4254: IS is_p0;
4255: PetscInt *idxs_p0,n;
4257: PetscMalloc1(pcbddc->benign_n,&idxs_p0);
4258: ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);
4259: ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);
4260: if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n);
4261: ISLocalToGlobalMappingDestroy(&RtoN);
4262: ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);
4263: MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);
4264: ISDestroy(&is_p0);
4265: }
4267: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);
4268: if (!sparserhs || need_benign_correction) {
4269: if (lda_rhs == n_R) {
4270: MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);
4271: } else {
4272: PetscScalar *av,*array;
4273: const PetscInt *xadj,*adjncy;
4274: PetscInt n;
4275: PetscBool flg_row;
4277: array = work+lda_rhs*n_vertices;
4278: PetscArrayzero(array,lda_rhs*n_vertices);
4279: MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);
4280: MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4281: MatSeqAIJGetArray(A_RV,&av);
4282: for (i=0;i<n;i++) {
4283: PetscInt j;
4284: for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j];
4285: }
4286: MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4287: MatDestroy(&A_RV);
4288: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);
4289: }
4290: if (need_benign_correction) {
4291: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4292: PetscScalar *marr;
4294: MatDenseGetArray(A_RV,&marr);
4295: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4297: | 0 0 0 | (V)
4298: L = | 0 0 -1 | (P-p0)
4299: | 0 0 -1 | (p0)
4301: */
4302: for (i=0;i<reuse_solver->benign_n;i++) {
4303: const PetscScalar *vals;
4304: const PetscInt *idxs,*idxs_zero;
4305: PetscInt n,j,nz;
4307: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4308: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4309: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4310: for (j=0;j<n;j++) {
4311: PetscScalar val = vals[j];
4312: PetscInt k,col = idxs[j];
4313: for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val;
4314: }
4315: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4316: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4317: }
4318: MatDenseRestoreArray(A_RV,&marr);
4319: }
4320: PetscObjectReference((PetscObject)A_RV);
4321: Brhs = A_RV;
4322: } else {
4323: Mat tA_RVT,A_RVT;
4325: if (!pcbddc->symmetric_primal) {
4326: /* A_RV already scaled by -1 */
4327: MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);
4328: } else {
4329: restoreavr = PETSC_TRUE;
4330: MatScale(A_VR,-1.0);
4331: PetscObjectReference((PetscObject)A_VR);
4332: A_RVT = A_VR;
4333: }
4334: if (lda_rhs != n_R) {
4335: PetscScalar *aa;
4336: PetscInt r,*ii,*jj;
4337: PetscBool done;
4339: MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4340: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");
4341: MatSeqAIJGetArray(A_RVT,&aa);
4342: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);
4343: MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);
4344: if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");
4345: } else {
4346: PetscObjectReference((PetscObject)A_RVT);
4347: tA_RVT = A_RVT;
4348: }
4349: MatCreateTranspose(tA_RVT,&Brhs);
4350: MatDestroy(&tA_RVT);
4351: MatDestroy(&A_RVT);
4352: }
4353: if (F) {
4354: /* need to correct the rhs */
4355: if (need_benign_correction) {
4356: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4357: PetscScalar *marr;
4359: MatDenseGetArray(Brhs,&marr);
4360: if (lda_rhs != n_R) {
4361: for (i=0;i<n_vertices;i++) {
4362: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4363: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);
4364: VecResetArray(dummy_vec);
4365: }
4366: } else {
4367: for (i=0;i<n_vertices;i++) {
4368: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4369: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);
4370: VecResetArray(pcbddc->vec1_R);
4371: }
4372: }
4373: MatDenseRestoreArray(Brhs,&marr);
4374: }
4375: MatMatSolve(F,Brhs,A_RRmA_RV);
4376: if (restoreavr) {
4377: MatScale(A_VR,-1.0);
4378: }
4379: /* need to correct the solution */
4380: if (need_benign_correction) {
4381: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4382: PetscScalar *marr;
4384: MatDenseGetArray(A_RRmA_RV,&marr);
4385: if (lda_rhs != n_R) {
4386: for (i=0;i<n_vertices;i++) {
4387: VecPlaceArray(dummy_vec,marr+i*lda_rhs);
4388: PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);
4389: VecResetArray(dummy_vec);
4390: }
4391: } else {
4392: for (i=0;i<n_vertices;i++) {
4393: VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);
4394: PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);
4395: VecResetArray(pcbddc->vec1_R);
4396: }
4397: }
4398: MatDenseRestoreArray(A_RRmA_RV,&marr);
4399: }
4400: } else {
4401: MatDenseGetArray(Brhs,&y);
4402: for (i=0;i<n_vertices;i++) {
4403: VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);
4404: VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);
4405: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4406: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4407: VecResetArray(pcbddc->vec1_R);
4408: VecResetArray(pcbddc->vec2_R);
4409: }
4410: MatDenseRestoreArray(Brhs,&y);
4411: }
4412: MatDestroy(&A_RV);
4413: MatDestroy(&Brhs);
4414: /* S_VV and S_CV */
4415: if (n_constraints) {
4416: Mat B;
4418: PetscArrayzero(work+lda_rhs*n_vertices,n_B*n_vertices);
4419: for (i=0;i<n_vertices;i++) {
4420: VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);
4421: VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);
4422: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4423: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4424: VecResetArray(pcis->vec1_B);
4425: VecResetArray(pcbddc->vec1_R);
4426: }
4427: MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);
4428: /* Reuse dense S_C = pcbddc->local_auxmat1 * B */
4429: MatProductCreateWithMat(pcbddc->local_auxmat1,B,NULL,S_CV);
4430: MatProductSetType(S_CV,MATPRODUCT_AB);
4431: MatProductSetFromOptions(S_CV);
4432: MatProductSymbolic(S_CV);
4433: MatProductNumeric(S_CV);
4434: MatProductClear(S_CV);
4436: MatDestroy(&B);
4437: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);
4438: /* Reuse B = local_auxmat2_R * S_CV */
4439: MatProductCreateWithMat(local_auxmat2_R,S_CV,NULL,B);
4440: MatProductSetType(B,MATPRODUCT_AB);
4441: MatProductSetFromOptions(B);
4442: MatProductSymbolic(B);
4443: MatProductNumeric(B);
4445: MatScale(S_CV,m_one);
4446: PetscBLASIntCast(lda_rhs*n_vertices,&B_N);
4447: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one));
4448: MatDestroy(&B);
4449: }
4450: if (lda_rhs != n_R) {
4451: MatDestroy(&A_RRmA_RV);
4452: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);
4453: MatDenseSetLDA(A_RRmA_RV,lda_rhs);
4454: }
4455: MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);
4456: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4457: if (need_benign_correction) {
4458: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4459: PetscScalar *marr,*sums;
4461: PetscMalloc1(n_vertices,&sums);
4462: MatDenseGetArray(S_VVt,&marr);
4463: for (i=0;i<reuse_solver->benign_n;i++) {
4464: const PetscScalar *vals;
4465: const PetscInt *idxs,*idxs_zero;
4466: PetscInt n,j,nz;
4468: ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);
4469: ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4470: for (j=0;j<n_vertices;j++) {
4471: PetscInt k;
4472: sums[j] = 0.;
4473: for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs];
4474: }
4475: MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);
4476: for (j=0;j<n;j++) {
4477: PetscScalar val = vals[j];
4478: PetscInt k;
4479: for (k=0;k<n_vertices;k++) {
4480: marr[idxs[j]+k*n_vertices] += val*sums[k];
4481: }
4482: }
4483: MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);
4484: ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);
4485: }
4486: PetscFree(sums);
4487: MatDenseRestoreArray(S_VVt,&marr);
4488: MatDestroy(&A_RV_bcorr);
4489: }
4490: MatDestroy(&A_RRmA_RV);
4491: PetscBLASIntCast(n_vertices*n_vertices,&B_N);
4492: MatDenseGetArrayRead(A_VV,&x);
4493: MatDenseGetArray(S_VVt,&y);
4494: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one));
4495: MatDenseRestoreArrayRead(A_VV,&x);
4496: MatDenseRestoreArray(S_VVt,&y);
4497: MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);
4498: MatDestroy(&S_VVt);
4499: } else {
4500: MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);
4501: }
4502: MatDestroy(&A_VV);
4504: /* coarse basis functions */
4505: for (i=0;i<n_vertices;i++) {
4506: PetscScalar *y;
4508: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4509: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4510: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4511: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4512: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4513: y[n_B*i+idx_V_B[i]] = 1.0;
4514: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4515: VecResetArray(pcis->vec1_B);
4517: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4518: PetscInt j;
4520: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4521: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4522: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4523: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4524: VecResetArray(pcis->vec1_D);
4525: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4526: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4527: }
4528: VecResetArray(pcbddc->vec1_R);
4529: }
4530: /* if n_R == 0 the object is not destroyed */
4531: MatDestroy(&A_RV);
4532: }
4533: VecDestroy(&dummy_vec);
4535: if (n_constraints) {
4536: Mat B;
4538: MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);
4539: MatScale(S_CC,m_one);
4540: MatProductCreateWithMat(local_auxmat2_R,S_CC,NULL,B);
4541: MatProductSetType(B,MATPRODUCT_AB);
4542: MatProductSetFromOptions(B);
4543: MatProductSymbolic(B);
4544: MatProductNumeric(B);
4546: MatScale(S_CC,m_one);
4547: if (n_vertices) {
4548: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
4549: MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);
4550: } else {
4551: Mat S_VCt;
4553: if (lda_rhs != n_R) {
4554: MatDestroy(&B);
4555: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);
4556: MatDenseSetLDA(B,lda_rhs);
4557: }
4558: MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);
4559: MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);
4560: MatDestroy(&S_VCt);
4561: }
4562: }
4563: MatDestroy(&B);
4564: /* coarse basis functions */
4565: for (i=0;i<n_constraints;i++) {
4566: PetscScalar *y;
4568: VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);
4569: MatDenseGetArray(pcbddc->coarse_phi_B,&y);
4570: VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));
4571: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4572: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4573: MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);
4574: VecResetArray(pcis->vec1_B);
4575: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4576: PetscInt j;
4578: MatDenseGetArray(pcbddc->coarse_phi_D,&y);
4579: VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));
4580: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4581: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4582: VecResetArray(pcis->vec1_D);
4583: for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0;
4584: MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);
4585: }
4586: VecResetArray(pcbddc->vec1_R);
4587: }
4588: }
4589: if (n_constraints) {
4590: MatDestroy(&local_auxmat2_R);
4591: }
4592: PetscFree(p0_lidx_I);
4594: /* coarse matrix entries relative to B_0 */
4595: if (pcbddc->benign_n) {
4596: Mat B0_B,B0_BPHI;
4597: IS is_dummy;
4598: const PetscScalar *data;
4599: PetscInt j;
4601: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4602: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4603: ISDestroy(&is_dummy);
4604: MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4605: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4606: MatDenseGetArrayRead(B0_BPHI,&data);
4607: for (j=0;j<pcbddc->benign_n;j++) {
4608: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4609: for (i=0;i<pcbddc->local_primal_size;i++) {
4610: coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j];
4611: coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j];
4612: }
4613: }
4614: MatDenseRestoreArrayRead(B0_BPHI,&data);
4615: MatDestroy(&B0_B);
4616: MatDestroy(&B0_BPHI);
4617: }
4619: /* compute other basis functions for non-symmetric problems */
4620: if (!pcbddc->symmetric_primal) {
4621: Mat B_V=NULL,B_C=NULL;
4622: PetscScalar *marray;
4624: if (n_constraints) {
4625: Mat S_CCT,C_CRT;
4627: MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);
4628: MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);
4629: MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);
4630: MatDestroy(&S_CCT);
4631: if (n_vertices) {
4632: Mat S_VCT;
4634: MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);
4635: MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);
4636: MatDestroy(&S_VCT);
4637: }
4638: MatDestroy(&C_CRT);
4639: } else {
4640: MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);
4641: }
4642: if (n_vertices && n_R) {
4643: PetscScalar *av,*marray;
4644: const PetscInt *xadj,*adjncy;
4645: PetscInt n;
4646: PetscBool flg_row;
4648: /* B_V = B_V - A_VR^T */
4649: MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);
4650: MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4651: MatSeqAIJGetArray(A_VR,&av);
4652: MatDenseGetArray(B_V,&marray);
4653: for (i=0;i<n;i++) {
4654: PetscInt j;
4655: for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j];
4656: }
4657: MatDenseRestoreArray(B_V,&marray);
4658: MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);
4659: MatDestroy(&A_VR);
4660: }
4662: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
4663: if (n_vertices) {
4664: MatDenseGetArray(B_V,&marray);
4665: for (i=0;i<n_vertices;i++) {
4666: VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);
4667: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4668: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4669: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4670: VecResetArray(pcbddc->vec1_R);
4671: VecResetArray(pcbddc->vec2_R);
4672: }
4673: MatDenseRestoreArray(B_V,&marray);
4674: }
4675: if (B_C) {
4676: MatDenseGetArray(B_C,&marray);
4677: for (i=n_vertices;i<n_constraints+n_vertices;i++) {
4678: VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);
4679: VecPlaceArray(pcbddc->vec2_R,work+i*n_R);
4680: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);
4681: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
4682: VecResetArray(pcbddc->vec1_R);
4683: VecResetArray(pcbddc->vec2_R);
4684: }
4685: MatDenseRestoreArray(B_C,&marray);
4686: }
4687: /* coarse basis functions */
4688: for (i=0;i<pcbddc->local_primal_size;i++) {
4689: PetscScalar *y;
4691: VecPlaceArray(pcbddc->vec1_R,work+i*n_R);
4692: MatDenseGetArray(pcbddc->coarse_psi_B,&y);
4693: VecPlaceArray(pcis->vec1_B,y+n_B*i);
4694: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4695: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
4696: if (i<n_vertices) {
4697: y[n_B*i+idx_V_B[i]] = 1.0;
4698: }
4699: MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);
4700: VecResetArray(pcis->vec1_B);
4702: if (pcbddc->switch_static || pcbddc->dbg_flag) {
4703: MatDenseGetArray(pcbddc->coarse_psi_D,&y);
4704: VecPlaceArray(pcis->vec1_D,y+n_D*i);
4705: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4706: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);
4707: VecResetArray(pcis->vec1_D);
4708: MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);
4709: }
4710: VecResetArray(pcbddc->vec1_R);
4711: }
4712: MatDestroy(&B_V);
4713: MatDestroy(&B_C);
4714: }
4716: /* free memory */
4717: PetscFree(idx_V_B);
4718: MatDestroy(&S_VV);
4719: MatDestroy(&S_CV);
4720: MatDestroy(&S_VC);
4721: MatDestroy(&S_CC);
4722: PetscFree(work);
4723: if (n_vertices) {
4724: MatDestroy(&A_VR);
4725: }
4726: if (n_constraints) {
4727: MatDestroy(&C_CR);
4728: }
4729: PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);
4731: /* Checking coarse_sub_mat and coarse basis functios */
4732: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4733: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
4734: if (pcbddc->dbg_flag) {
4735: Mat coarse_sub_mat;
4736: Mat AUXMAT,TM1,TM2,TM3,TM4;
4737: Mat coarse_phi_D,coarse_phi_B;
4738: Mat coarse_psi_D,coarse_psi_B;
4739: Mat A_II,A_BB,A_IB,A_BI;
4740: Mat C_B,CPHI;
4741: IS is_dummy;
4742: Vec mones;
4743: MatType checkmattype=MATSEQAIJ;
4744: PetscReal real_value;
4746: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
4747: Mat A;
4748: PCBDDCBenignProject(pc,NULL,NULL,&A);
4749: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);
4750: MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);
4751: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);
4752: MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);
4753: MatDestroy(&A);
4754: } else {
4755: MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);
4756: MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);
4757: MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);
4758: MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);
4759: }
4760: MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);
4761: MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);
4762: if (!pcbddc->symmetric_primal) {
4763: MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);
4764: MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);
4765: }
4766: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);
4768: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
4769: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);
4770: PetscViewerFlush(pcbddc->dbg_viewer);
4771: if (!pcbddc->symmetric_primal) {
4772: MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4773: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);
4774: MatDestroy(&AUXMAT);
4775: MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4776: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);
4777: MatDestroy(&AUXMAT);
4778: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4779: MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4780: MatDestroy(&AUXMAT);
4781: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4782: MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4783: MatDestroy(&AUXMAT);
4784: } else {
4785: MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);
4786: MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);
4787: MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4788: MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);
4789: MatDestroy(&AUXMAT);
4790: MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);
4791: MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);
4792: MatDestroy(&AUXMAT);
4793: }
4794: MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);
4795: MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);
4796: MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);
4797: MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);
4798: if (pcbddc->benign_n) {
4799: Mat B0_B,B0_BPHI;
4800: const PetscScalar *data2;
4801: PetscScalar *data;
4802: PetscInt j;
4804: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);
4805: MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
4806: MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);
4807: MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);
4808: MatDenseGetArray(TM1,&data);
4809: MatDenseGetArrayRead(B0_BPHI,&data2);
4810: for (j=0;j<pcbddc->benign_n;j++) {
4811: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
4812: for (i=0;i<pcbddc->local_primal_size;i++) {
4813: data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j];
4814: data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j];
4815: }
4816: }
4817: MatDenseRestoreArray(TM1,&data);
4818: MatDenseRestoreArrayRead(B0_BPHI,&data2);
4819: MatDestroy(&B0_B);
4820: ISDestroy(&is_dummy);
4821: MatDestroy(&B0_BPHI);
4822: }
4823: #if 0
4824: {
4825: PetscViewer viewer;
4826: char filename[256];
4827: sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level);
4828: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);
4829: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
4830: PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");
4831: MatView(coarse_sub_mat,viewer);
4832: PetscObjectSetName((PetscObject)TM1,"projected");
4833: MatView(TM1,viewer);
4834: if (pcbddc->coarse_phi_B) {
4835: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");
4836: MatView(pcbddc->coarse_phi_B,viewer);
4837: }
4838: if (pcbddc->coarse_phi_D) {
4839: PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");
4840: MatView(pcbddc->coarse_phi_D,viewer);
4841: }
4842: if (pcbddc->coarse_psi_B) {
4843: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");
4844: MatView(pcbddc->coarse_psi_B,viewer);
4845: }
4846: if (pcbddc->coarse_psi_D) {
4847: PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");
4848: MatView(pcbddc->coarse_psi_D,viewer);
4849: }
4850: PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");
4851: MatView(pcbddc->local_mat,viewer);
4852: PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");
4853: MatView(pcbddc->ConstraintMatrix,viewer);
4854: PetscObjectSetName((PetscObject)pcis->is_I_local,"I");
4855: ISView(pcis->is_I_local,viewer);
4856: PetscObjectSetName((PetscObject)pcis->is_B_local,"B");
4857: ISView(pcis->is_B_local,viewer);
4858: PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");
4859: ISView(pcbddc->is_R_local,viewer);
4860: PetscViewerDestroy(&viewer);
4861: }
4862: #endif
4863: MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);
4864: MatNorm(TM1,NORM_FROBENIUS,&real_value);
4865: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
4866: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);
4868: /* check constraints */
4869: ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);
4870: MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);
4871: if (!pcbddc->benign_n) { /* TODO: add benign case */
4872: MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);
4873: } else {
4874: PetscScalar *data;
4875: Mat tmat;
4876: MatDenseGetArray(pcbddc->coarse_phi_B,&data);
4877: MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);
4878: MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);
4879: MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);
4880: MatDestroy(&tmat);
4881: }
4882: MatCreateVecs(CPHI,&mones,NULL);
4883: VecSet(mones,-1.0);
4884: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4885: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4886: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4887: if (!pcbddc->symmetric_primal) {
4888: MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);
4889: VecSet(mones,-1.0);
4890: MatDiagonalSet(CPHI,mones,ADD_VALUES);
4891: MatNorm(CPHI,NORM_FROBENIUS,&real_value);
4892: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);
4893: }
4894: MatDestroy(&C_B);
4895: MatDestroy(&CPHI);
4896: ISDestroy(&is_dummy);
4897: VecDestroy(&mones);
4898: PetscViewerFlush(pcbddc->dbg_viewer);
4899: MatDestroy(&A_II);
4900: MatDestroy(&A_BB);
4901: MatDestroy(&A_IB);
4902: MatDestroy(&A_BI);
4903: MatDestroy(&TM1);
4904: MatDestroy(&TM2);
4905: MatDestroy(&TM3);
4906: MatDestroy(&TM4);
4907: MatDestroy(&coarse_phi_D);
4908: MatDestroy(&coarse_phi_B);
4909: if (!pcbddc->symmetric_primal) {
4910: MatDestroy(&coarse_psi_D);
4911: MatDestroy(&coarse_psi_B);
4912: }
4913: MatDestroy(&coarse_sub_mat);
4914: }
4915: /* FINAL CUDA support (we cannot currently mix viennacl and cuda vectors */
4916: {
4917: PetscBool gpu;
4919: PetscObjectTypeCompare((PetscObject)pcis->vec1_N,VECSEQCUDA,&gpu);
4920: if (gpu) {
4921: if (pcbddc->local_auxmat1) {
4922: MatConvert(pcbddc->local_auxmat1,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat1);
4923: }
4924: if (pcbddc->local_auxmat2) {
4925: MatConvert(pcbddc->local_auxmat2,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->local_auxmat2);
4926: }
4927: if (pcbddc->coarse_phi_B) {
4928: MatConvert(pcbddc->coarse_phi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_B);
4929: }
4930: if (pcbddc->coarse_phi_D) {
4931: MatConvert(pcbddc->coarse_phi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_phi_D);
4932: }
4933: if (pcbddc->coarse_psi_B) {
4934: MatConvert(pcbddc->coarse_psi_B,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_B);
4935: }
4936: if (pcbddc->coarse_psi_D) {
4937: MatConvert(pcbddc->coarse_psi_D,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&pcbddc->coarse_psi_D);
4938: }
4939: }
4940: }
4941: /* get back data */
4942: *coarse_submat_vals_n = coarse_submat_vals;
4943: return(0);
4944: }
4946: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B)
4947: {
4948: Mat *work_mat;
4949: IS isrow_s,iscol_s;
4950: PetscBool rsorted,csorted;
4951: PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL;
4955: ISSorted(isrow,&rsorted);
4956: ISSorted(iscol,&csorted);
4957: ISGetLocalSize(isrow,&rsize);
4958: ISGetLocalSize(iscol,&csize);
4960: if (!rsorted) {
4961: const PetscInt *idxs;
4962: PetscInt *idxs_sorted,i;
4964: PetscMalloc1(rsize,&idxs_perm_r);
4965: PetscMalloc1(rsize,&idxs_sorted);
4966: for (i=0;i<rsize;i++) {
4967: idxs_perm_r[i] = i;
4968: }
4969: ISGetIndices(isrow,&idxs);
4970: PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);
4971: for (i=0;i<rsize;i++) {
4972: idxs_sorted[i] = idxs[idxs_perm_r[i]];
4973: }
4974: ISRestoreIndices(isrow,&idxs);
4975: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);
4976: } else {
4977: PetscObjectReference((PetscObject)isrow);
4978: isrow_s = isrow;
4979: }
4981: if (!csorted) {
4982: if (isrow == iscol) {
4983: PetscObjectReference((PetscObject)isrow_s);
4984: iscol_s = isrow_s;
4985: } else {
4986: const PetscInt *idxs;
4987: PetscInt *idxs_sorted,i;
4989: PetscMalloc1(csize,&idxs_perm_c);
4990: PetscMalloc1(csize,&idxs_sorted);
4991: for (i=0;i<csize;i++) {
4992: idxs_perm_c[i] = i;
4993: }
4994: ISGetIndices(iscol,&idxs);
4995: PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);
4996: for (i=0;i<csize;i++) {
4997: idxs_sorted[i] = idxs[idxs_perm_c[i]];
4998: }
4999: ISRestoreIndices(iscol,&idxs);
5000: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);
5001: }
5002: } else {
5003: PetscObjectReference((PetscObject)iscol);
5004: iscol_s = iscol;
5005: }
5007: MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);
5009: if (!rsorted || !csorted) {
5010: Mat new_mat;
5011: IS is_perm_r,is_perm_c;
5013: if (!rsorted) {
5014: PetscInt *idxs_r,i;
5015: PetscMalloc1(rsize,&idxs_r);
5016: for (i=0;i<rsize;i++) {
5017: idxs_r[idxs_perm_r[i]] = i;
5018: }
5019: PetscFree(idxs_perm_r);
5020: ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);
5021: } else {
5022: ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);
5023: }
5024: ISSetPermutation(is_perm_r);
5026: if (!csorted) {
5027: if (isrow_s == iscol_s) {
5028: PetscObjectReference((PetscObject)is_perm_r);
5029: is_perm_c = is_perm_r;
5030: } else {
5031: PetscInt *idxs_c,i;
5032: if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present");
5033: PetscMalloc1(csize,&idxs_c);
5034: for (i=0;i<csize;i++) {
5035: idxs_c[idxs_perm_c[i]] = i;
5036: }
5037: PetscFree(idxs_perm_c);
5038: ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);
5039: }
5040: } else {
5041: ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);
5042: }
5043: ISSetPermutation(is_perm_c);
5045: MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);
5046: MatDestroy(&work_mat[0]);
5047: work_mat[0] = new_mat;
5048: ISDestroy(&is_perm_r);
5049: ISDestroy(&is_perm_c);
5050: }
5052: PetscObjectReference((PetscObject)work_mat[0]);
5053: *B = work_mat[0];
5054: MatDestroyMatrices(1,&work_mat);
5055: ISDestroy(&isrow_s);
5056: ISDestroy(&iscol_s);
5057: return(0);
5058: }
5060: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5061: {
5062: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5063: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5064: Mat new_mat,lA;
5065: IS is_local,is_global;
5066: PetscInt local_size;
5067: PetscBool isseqaij;
5071: MatDestroy(&pcbddc->local_mat);
5072: MatGetSize(matis->A,&local_size,NULL);
5073: ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);
5074: ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);
5075: ISDestroy(&is_local);
5076: MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);
5077: ISDestroy(&is_global);
5079: if (pcbddc->dbg_flag) {
5080: Vec x,x_change;
5081: PetscReal error;
5083: MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);
5084: VecSetRandom(x,NULL);
5085: MatMult(ChangeOfBasisMatrix,x,x_change);
5086: VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5087: VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);
5088: MatMult(new_mat,matis->x,matis->y);
5089: if (!pcbddc->change_interior) {
5090: const PetscScalar *x,*y,*v;
5091: PetscReal lerror = 0.;
5092: PetscInt i;
5094: VecGetArrayRead(matis->x,&x);
5095: VecGetArrayRead(matis->y,&y);
5096: VecGetArrayRead(matis->counter,&v);
5097: for (i=0;i<local_size;i++)
5098: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror)
5099: lerror = PetscAbsScalar(x[i]-y[i]);
5100: VecRestoreArrayRead(matis->x,&x);
5101: VecRestoreArrayRead(matis->y,&y);
5102: VecRestoreArrayRead(matis->counter,&v);
5103: MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));
5104: if (error > PETSC_SMALL) {
5105: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5106: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error);
5107: } else {
5108: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error);
5109: }
5110: }
5111: }
5112: VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5113: VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);
5114: VecAXPY(x,-1.0,x_change);
5115: VecNorm(x,NORM_INFINITY,&error);
5116: if (error > PETSC_SMALL) {
5117: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5118: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
5119: } else {
5120: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error);
5121: }
5122: }
5123: VecDestroy(&x);
5124: VecDestroy(&x_change);
5125: }
5127: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5128: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);
5130: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5131: PetscObjectBaseTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);
5132: if (isseqaij) {
5133: MatDestroy(&pcbddc->local_mat);
5134: MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5135: if (lA) {
5136: Mat work;
5137: MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5138: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5139: MatDestroy(&work);
5140: }
5141: } else {
5142: Mat work_mat;
5144: MatDestroy(&pcbddc->local_mat);
5145: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5146: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);
5147: MatDestroy(&work_mat);
5148: if (lA) {
5149: Mat work;
5150: MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);
5151: MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);
5152: PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);
5153: MatDestroy(&work);
5154: }
5155: }
5156: if (matis->A->symmetric_set) {
5157: MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);
5158: #if !defined(PETSC_USE_COMPLEX)
5159: MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);
5160: #endif
5161: }
5162: MatDestroy(&new_mat);
5163: return(0);
5164: }
5166: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5167: {
5168: PC_IS* pcis = (PC_IS*)(pc->data);
5169: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
5170: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5171: PetscInt *idx_R_local=NULL;
5172: PetscInt n_vertices,i,j,n_R,n_D,n_B;
5173: PetscInt vbs,bs;
5174: PetscBT bitmask=NULL;
5175: PetscErrorCode ierr;
5178: /*
5179: No need to setup local scatters if
5180: - primal space is unchanged
5181: AND
5182: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5183: AND
5184: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5185: */
5186: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) {
5187: return(0);
5188: }
5189: /* destroy old objects */
5190: ISDestroy(&pcbddc->is_R_local);
5191: VecScatterDestroy(&pcbddc->R_to_B);
5192: VecScatterDestroy(&pcbddc->R_to_D);
5193: /* Set Non-overlapping dimensions */
5194: n_B = pcis->n_B;
5195: n_D = pcis->n - n_B;
5196: n_vertices = pcbddc->n_vertices;
5198: /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */
5200: /* create auxiliary bitmask and allocate workspace */
5201: if (!sub_schurs || !sub_schurs->reuse_solver) {
5202: PetscMalloc1(pcis->n-n_vertices,&idx_R_local);
5203: PetscBTCreate(pcis->n,&bitmask);
5204: for (i=0;i<n_vertices;i++) {
5205: PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);
5206: }
5208: for (i=0, n_R=0; i<pcis->n; i++) {
5209: if (!PetscBTLookup(bitmask,i)) {
5210: idx_R_local[n_R++] = i;
5211: }
5212: }
5213: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5214: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5216: ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5217: ISGetLocalSize(reuse_solver->is_R,&n_R);
5218: }
5220: /* Block code */
5221: vbs = 1;
5222: MatGetBlockSize(pcbddc->local_mat,&bs);
5223: if (bs>1 && !(n_vertices%bs)) {
5224: PetscBool is_blocked = PETSC_TRUE;
5225: PetscInt *vary;
5226: if (!sub_schurs || !sub_schurs->reuse_solver) {
5227: PetscMalloc1(pcis->n/bs,&vary);
5228: PetscArrayzero(vary,pcis->n/bs);
5229: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5230: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5231: for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++;
5232: for (i=0; i<pcis->n/bs; i++) {
5233: if (vary[i]!=0 && vary[i]!=bs) {
5234: is_blocked = PETSC_FALSE;
5235: break;
5236: }
5237: }
5238: PetscFree(vary);
5239: } else {
5240: /* Verify directly the R set */
5241: for (i=0; i<n_R/bs; i++) {
5242: PetscInt j,node=idx_R_local[bs*i];
5243: for (j=1; j<bs; j++) {
5244: if (node != idx_R_local[bs*i+j]-j) {
5245: is_blocked = PETSC_FALSE;
5246: break;
5247: }
5248: }
5249: }
5250: }
5251: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5252: vbs = bs;
5253: for (i=0;i<n_R/vbs;i++) {
5254: idx_R_local[i] = idx_R_local[vbs*i]/vbs;
5255: }
5256: }
5257: }
5258: ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);
5259: if (sub_schurs && sub_schurs->reuse_solver) {
5260: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5262: ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);
5263: ISDestroy(&reuse_solver->is_R);
5264: PetscObjectReference((PetscObject)pcbddc->is_R_local);
5265: reuse_solver->is_R = pcbddc->is_R_local;
5266: } else {
5267: PetscFree(idx_R_local);
5268: }
5270: /* print some info if requested */
5271: if (pcbddc->dbg_flag) {
5272: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5273: PetscViewerFlush(pcbddc->dbg_viewer);
5274: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5275: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);
5276: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);
5277: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);
5278: PetscViewerFlush(pcbddc->dbg_viewer);
5279: }
5281: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5282: if (!sub_schurs || !sub_schurs->reuse_solver) {
5283: IS is_aux1,is_aux2;
5284: PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local;
5286: ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5287: PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);
5288: PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);
5289: ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5290: for (i=0; i<n_D; i++) {
5291: PetscBTSet(bitmask,is_indices[i]);
5292: }
5293: ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);
5294: for (i=0, j=0; i<n_R; i++) {
5295: if (!PetscBTLookup(bitmask,idx_R_local[i])) {
5296: aux_array1[j++] = i;
5297: }
5298: }
5299: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5300: ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5301: for (i=0, j=0; i<n_B; i++) {
5302: if (!PetscBTLookup(bitmask,is_indices[i])) {
5303: aux_array2[j++] = i;
5304: }
5305: }
5306: ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);
5307: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);
5308: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);
5309: ISDestroy(&is_aux1);
5310: ISDestroy(&is_aux2);
5312: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5313: PetscMalloc1(n_D,&aux_array1);
5314: for (i=0, j=0; i<n_R; i++) {
5315: if (PetscBTLookup(bitmask,idx_R_local[i])) {
5316: aux_array1[j++] = i;
5317: }
5318: }
5319: ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);
5320: VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5321: ISDestroy(&is_aux1);
5322: }
5323: PetscBTDestroy(&bitmask);
5324: ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);
5325: } else {
5326: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5327: IS tis;
5328: PetscInt schur_size;
5330: ISGetLocalSize(reuse_solver->is_B,&schur_size);
5331: ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);
5332: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);
5333: ISDestroy(&tis);
5334: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5335: ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);
5336: VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);
5337: ISDestroy(&tis);
5338: }
5339: }
5340: return(0);
5341: }
5343: static PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5344: {
5345: MatNullSpace NullSpace;
5346: Mat dmat;
5347: const Vec *nullvecs;
5348: Vec v,v2,*nullvecs2;
5349: VecScatter sct = NULL;
5350: PetscContainer c;
5351: PetscScalar *ddata;
5352: PetscInt k,nnsp_size,bsiz,bsiz2,n,N,bs;
5353: PetscBool nnsp_has_cnst;
5357: if (!is && !B) { /* MATIS */
5358: Mat_IS* matis = (Mat_IS*)A->data;
5360: if (!B) {
5361: MatISGetLocalMat(A,&B);
5362: }
5363: sct = matis->cctx;
5364: PetscObjectReference((PetscObject)sct);
5365: } else {
5366: MatGetNullSpace(B,&NullSpace);
5367: if (!NullSpace) {
5368: MatGetNearNullSpace(B,&NullSpace);
5369: }
5370: if (NullSpace) return(0);
5371: }
5372: MatGetNullSpace(A,&NullSpace);
5373: if (!NullSpace) {
5374: MatGetNearNullSpace(A,&NullSpace);
5375: }
5376: if (!NullSpace) return(0);
5378: MatCreateVecs(A,&v,NULL);
5379: MatCreateVecs(B,&v2,NULL);
5380: if (!sct) {
5381: VecScatterCreate(v,is,v2,NULL,&sct);
5382: }
5383: MatNullSpaceGetVecs(NullSpace,&nnsp_has_cnst,&nnsp_size,(const Vec**)&nullvecs);
5384: bsiz = bsiz2 = nnsp_size+!!nnsp_has_cnst;
5385: PetscMalloc1(bsiz,&nullvecs2);
5386: VecGetBlockSize(v2,&bs);
5387: VecGetSize(v2,&N);
5388: VecGetLocalSize(v2,&n);
5389: PetscMalloc1(n*bsiz,&ddata);
5390: for (k=0;k<nnsp_size;k++) {
5391: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*k,&nullvecs2[k]);
5392: VecScatterBegin(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5393: VecScatterEnd(sct,nullvecs[k],nullvecs2[k],INSERT_VALUES,SCATTER_FORWARD);
5394: }
5395: if (nnsp_has_cnst) {
5396: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),bs,n,N,ddata + n*nnsp_size,&nullvecs2[nnsp_size]);
5397: VecSet(nullvecs2[nnsp_size],1.0);
5398: }
5399: PCBDDCOrthonormalizeVecs(&bsiz2,nullvecs2);
5400: MatNullSpaceCreate(PetscObjectComm((PetscObject)B),PETSC_FALSE,bsiz2,nullvecs2,&NullSpace);
5402: MatCreateDense(PetscObjectComm((PetscObject)B),n,PETSC_DECIDE,N,bsiz2,ddata,&dmat);
5403: PetscContainerCreate(PetscObjectComm((PetscObject)B),&c);
5404: PetscContainerSetPointer(c,ddata);
5405: PetscContainerSetUserDestroy(c,PetscContainerUserDestroyDefault);
5406: PetscObjectCompose((PetscObject)dmat,"_PBDDC_Null_dmat_arr",(PetscObject)c);
5407: PetscContainerDestroy(&c);
5408: PetscObjectCompose((PetscObject)NullSpace,"_PBDDC_Null_dmat",(PetscObject)dmat);
5409: MatDestroy(&dmat);
5411: for (k=0;k<bsiz;k++) {
5412: VecDestroy(&nullvecs2[k]);
5413: }
5414: PetscFree(nullvecs2);
5415: MatSetNearNullSpace(B,NullSpace);
5416: MatNullSpaceDestroy(&NullSpace);
5417: VecDestroy(&v);
5418: VecDestroy(&v2);
5419: VecScatterDestroy(&sct);
5420: return(0);
5421: }
5423: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5424: {
5425: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
5426: PC_IS *pcis = (PC_IS*)pc->data;
5427: PC pc_temp;
5428: Mat A_RR;
5429: MatNullSpace nnsp;
5430: MatReuse reuse;
5431: PetscScalar m_one = -1.0;
5432: PetscReal value;
5433: PetscInt n_D,n_R;
5434: PetscBool issbaij,opts;
5436: void (*f)(void) = NULL;
5437: char dir_prefix[256],neu_prefix[256],str_level[16];
5438: size_t len;
5441: PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5442: /* approximate solver, propagate NearNullSpace if needed */
5443: if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5444: MatNullSpace gnnsp1,gnnsp2;
5445: PetscBool lhas,ghas;
5447: MatGetNearNullSpace(pcbddc->local_mat,&nnsp);
5448: MatGetNearNullSpace(pc->pmat,&gnnsp1);
5449: MatGetNullSpace(pc->pmat,&gnnsp2);
5450: lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5451: MPIU_Allreduce(&lhas,&ghas,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
5452: if (!ghas && (gnnsp1 || gnnsp2)) {
5453: MatNullSpacePropagateAny_Private(pc->pmat,NULL,NULL);
5454: }
5455: }
5457: /* compute prefixes */
5458: PetscStrcpy(dir_prefix,"");
5459: PetscStrcpy(neu_prefix,"");
5460: if (!pcbddc->current_level) {
5461: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));
5462: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));
5463: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5464: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5465: } else {
5466: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
5467: PetscStrlen(((PetscObject)pc)->prefix,&len);
5468: len -= 15; /* remove "pc_bddc_coarse_" */
5469: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
5470: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
5471: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
5472: PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);
5473: PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);
5474: PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));
5475: PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));
5476: PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));
5477: PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));
5478: }
5480: /* DIRICHLET PROBLEM */
5481: if (dirichlet) {
5482: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5483: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5484: if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented");
5485: if (pcbddc->dbg_flag) {
5486: Mat A_IIn;
5488: PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);
5489: MatDestroy(&pcis->A_II);
5490: pcis->A_II = A_IIn;
5491: }
5492: }
5493: if (pcbddc->local_mat->symmetric_set) {
5494: MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5495: }
5496: /* Matrix for Dirichlet problem is pcis->A_II */
5497: n_D = pcis->n - pcis->n_B;
5498: opts = PETSC_FALSE;
5499: if (!pcbddc->ksp_D) { /* create object if not yet build */
5500: opts = PETSC_TRUE;
5501: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);
5502: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);
5503: /* default */
5504: KSPSetType(pcbddc->ksp_D,KSPPREONLY);
5505: KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);
5506: PetscObjectTypeCompare((PetscObject)pcis->pA_II,MATSEQSBAIJ,&issbaij);
5507: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5508: if (issbaij) {
5509: PCSetType(pc_temp,PCCHOLESKY);
5510: } else {
5511: PCSetType(pc_temp,PCLU);
5512: }
5513: KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);
5514: }
5515: MatSetOptionsPrefix(pcis->pA_II,((PetscObject)pcbddc->ksp_D)->prefix);
5516: KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->pA_II);
5517: /* Allow user's customization */
5518: if (opts) {
5519: KSPSetFromOptions(pcbddc->ksp_D);
5520: }
5521: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5522: if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5523: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcis->is_I_local,pcis->pA_II);
5524: }
5525: MatGetNearNullSpace(pcis->pA_II,&nnsp);
5526: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5527: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5528: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5529: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5530: const PetscInt *idxs;
5531: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5533: ISGetLocalSize(pcis->is_I_local,&nl);
5534: ISGetIndices(pcis->is_I_local,&idxs);
5535: PetscMalloc1(nl*cdim,&scoords);
5536: for (i=0;i<nl;i++) {
5537: for (d=0;d<cdim;d++) {
5538: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5539: }
5540: }
5541: ISRestoreIndices(pcis->is_I_local,&idxs);
5542: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5543: PetscFree(scoords);
5544: }
5545: if (sub_schurs && sub_schurs->reuse_solver) {
5546: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5548: KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);
5549: }
5551: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5552: if (!n_D) {
5553: KSPGetPC(pcbddc->ksp_D,&pc_temp);
5554: PCSetType(pc_temp,PCNONE);
5555: }
5556: KSPSetUp(pcbddc->ksp_D);
5557: /* set ksp_D into pcis data */
5558: PetscObjectReference((PetscObject)pcbddc->ksp_D);
5559: KSPDestroy(&pcis->ksp_D);
5560: pcis->ksp_D = pcbddc->ksp_D;
5561: }
5563: /* NEUMANN PROBLEM */
5564: A_RR = NULL;
5565: if (neumann) {
5566: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5567: PetscInt ibs,mbs;
5568: PetscBool issbaij, reuse_neumann_solver;
5569: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
5571: reuse_neumann_solver = PETSC_FALSE;
5572: if (sub_schurs && sub_schurs->reuse_solver) {
5573: IS iP;
5575: reuse_neumann_solver = PETSC_TRUE;
5576: PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);
5577: if (iP) reuse_neumann_solver = PETSC_FALSE;
5578: }
5579: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
5580: ISGetSize(pcbddc->is_R_local,&n_R);
5581: if (pcbddc->ksp_R) { /* already created ksp */
5582: PetscInt nn_R;
5583: KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);
5584: PetscObjectReference((PetscObject)A_RR);
5585: MatGetSize(A_RR,&nn_R,NULL);
5586: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
5587: KSPReset(pcbddc->ksp_R);
5588: MatDestroy(&A_RR);
5589: reuse = MAT_INITIAL_MATRIX;
5590: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
5591: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
5592: MatDestroy(&A_RR);
5593: reuse = MAT_INITIAL_MATRIX;
5594: } else { /* safe to reuse the matrix */
5595: reuse = MAT_REUSE_MATRIX;
5596: }
5597: }
5598: /* last check */
5599: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
5600: MatDestroy(&A_RR);
5601: reuse = MAT_INITIAL_MATRIX;
5602: }
5603: } else { /* first time, so we need to create the matrix */
5604: reuse = MAT_INITIAL_MATRIX;
5605: }
5606: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
5607: TODO: Get Rid of these conversions */
5608: MatGetBlockSize(pcbddc->local_mat,&mbs);
5609: ISGetBlockSize(pcbddc->is_R_local,&ibs);
5610: PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);
5611: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
5612: if (matis->A == pcbddc->local_mat) {
5613: MatDestroy(&pcbddc->local_mat);
5614: MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5615: } else {
5616: MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5617: }
5618: } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */
5619: if (matis->A == pcbddc->local_mat) {
5620: MatDestroy(&pcbddc->local_mat);
5621: MatConvert(matis->A,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);
5622: } else {
5623: MatConvert(pcbddc->local_mat,mbs > 1 ? MATSEQBAIJ : MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);
5624: }
5625: }
5626: /* extract A_RR */
5627: if (reuse_neumann_solver) {
5628: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5630: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
5631: MatDestroy(&A_RR);
5632: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
5633: PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);
5634: } else {
5635: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);
5636: }
5637: } else {
5638: MatDestroy(&A_RR);
5639: PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);
5640: PetscObjectReference((PetscObject)A_RR);
5641: }
5642: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
5643: MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);
5644: }
5645: if (pcbddc->local_mat->symmetric_set) {
5646: MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric);
5647: }
5648: opts = PETSC_FALSE;
5649: if (!pcbddc->ksp_R) { /* create object if not present */
5650: opts = PETSC_TRUE;
5651: KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);
5652: PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);
5653: /* default */
5654: KSPSetType(pcbddc->ksp_R,KSPPREONLY);
5655: KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);
5656: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5657: PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);
5658: if (issbaij) {
5659: PCSetType(pc_temp,PCCHOLESKY);
5660: } else {
5661: PCSetType(pc_temp,PCLU);
5662: }
5663: KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);
5664: }
5665: KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);
5666: MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);
5667: if (opts) { /* Allow user's customization once */
5668: KSPSetFromOptions(pcbddc->ksp_R);
5669: }
5670: MatGetNearNullSpace(A_RR,&nnsp);
5671: if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
5672: MatNullSpacePropagateAny_Private(pcbddc->local_mat,pcbddc->is_R_local,A_RR);
5673: }
5674: MatGetNearNullSpace(A_RR,&nnsp);
5675: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5676: PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);
5677: if (f && pcbddc->mat_graph->cloc && !nnsp) {
5678: PetscReal *coords = pcbddc->mat_graph->coords,*scoords;
5679: const PetscInt *idxs;
5680: PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d;
5682: ISGetLocalSize(pcbddc->is_R_local,&nl);
5683: ISGetIndices(pcbddc->is_R_local,&idxs);
5684: PetscMalloc1(nl*cdim,&scoords);
5685: for (i=0;i<nl;i++) {
5686: for (d=0;d<cdim;d++) {
5687: scoords[i*cdim+d] = coords[idxs[i]*cdim+d];
5688: }
5689: }
5690: ISRestoreIndices(pcbddc->is_R_local,&idxs);
5691: PCSetCoordinates(pc_temp,cdim,nl,scoords);
5692: PetscFree(scoords);
5693: }
5695: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
5696: if (!n_R) {
5697: KSPGetPC(pcbddc->ksp_R,&pc_temp);
5698: PCSetType(pc_temp,PCNONE);
5699: }
5700: /* Reuse solver if it is present */
5701: if (reuse_neumann_solver) {
5702: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5704: KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);
5705: }
5706: KSPSetUp(pcbddc->ksp_R);
5707: }
5709: if (pcbddc->dbg_flag) {
5710: PetscViewerFlush(pcbddc->dbg_viewer);
5711: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
5712: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
5713: }
5714: PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);
5716: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
5717: if (pcbddc->NullSpace_corr[0]) {
5718: PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);
5719: }
5720: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) {
5721: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);
5722: }
5723: if (neumann && pcbddc->NullSpace_corr[2]) {
5724: PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);
5725: }
5726: /* check Dirichlet and Neumann solvers */
5727: if (pcbddc->dbg_flag) {
5728: if (dirichlet) { /* Dirichlet */
5729: VecSetRandom(pcis->vec1_D,NULL);
5730: MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);
5731: KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);
5732: KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);
5733: VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);
5734: VecNorm(pcis->vec1_D,NORM_INFINITY,&value);
5735: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);
5736: PetscViewerFlush(pcbddc->dbg_viewer);
5737: }
5738: if (neumann) { /* Neumann */
5739: VecSetRandom(pcbddc->vec1_R,NULL);
5740: MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);
5741: KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);
5742: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);
5743: VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);
5744: VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);
5745: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);
5746: PetscViewerFlush(pcbddc->dbg_viewer);
5747: }
5748: }
5749: /* free Neumann problem's matrix */
5750: MatDestroy(&A_RR);
5751: return(0);
5752: }
5754: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
5755: {
5756: PetscErrorCode ierr;
5757: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5758: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5759: PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
5762: if (!reuse_solver) {
5763: VecSet(pcbddc->vec1_R,0.);
5764: }
5765: if (!pcbddc->switch_static) {
5766: if (applytranspose && pcbddc->local_auxmat1) {
5767: MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);
5768: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5769: }
5770: if (!reuse_solver) {
5771: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5772: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5773: } else {
5774: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5776: VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5777: VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);
5778: }
5779: } else {
5780: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5781: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5782: VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5783: VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5784: if (applytranspose && pcbddc->local_auxmat1) {
5785: MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);
5786: MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);
5787: VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5788: VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);
5789: }
5790: }
5791: if (!reuse_solver || pcbddc->switch_static) {
5792: if (applytranspose) {
5793: KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5794: } else {
5795: KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);
5796: }
5797: KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);
5798: } else {
5799: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5801: if (applytranspose) {
5802: MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5803: } else {
5804: MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);
5805: }
5806: }
5807: VecSet(inout_B,0.);
5808: if (!pcbddc->switch_static) {
5809: if (!reuse_solver) {
5810: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5811: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5812: } else {
5813: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5815: VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5816: VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);
5817: }
5818: if (!applytranspose && pcbddc->local_auxmat1) {
5819: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5820: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);
5821: }
5822: } else {
5823: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5824: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5825: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5826: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5827: if (!applytranspose && pcbddc->local_auxmat1) {
5828: MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);
5829: MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);
5830: }
5831: VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5832: VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);
5833: VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5834: VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);
5835: }
5836: return(0);
5837: }
5839: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
5840: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
5841: {
5843: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5844: PC_IS* pcis = (PC_IS*) (pc->data);
5845: const PetscScalar zero = 0.0;
5848: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
5849: if (!pcbddc->benign_apply_coarse_only) {
5850: if (applytranspose) {
5851: MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);
5852: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5853: } else {
5854: MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);
5855: if (pcbddc->switch_static) { MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P); }
5856: }
5857: } else {
5858: VecSet(pcbddc->vec1_P,zero);
5859: }
5861: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
5862: if (pcbddc->benign_n) {
5863: PetscScalar *array;
5864: PetscInt j;
5866: VecGetArray(pcbddc->vec1_P,&array);
5867: for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j];
5868: VecRestoreArray(pcbddc->vec1_P,&array);
5869: }
5871: /* start communications from local primal nodes to rhs of coarse solver */
5872: VecSet(pcbddc->coarse_vec,zero);
5873: PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);
5874: PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);
5876: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
5877: if (pcbddc->coarse_ksp) {
5878: Mat coarse_mat;
5879: Vec rhs,sol;
5880: MatNullSpace nullsp;
5881: PetscBool isbddc = PETSC_FALSE;
5883: if (pcbddc->benign_have_null) {
5884: PC coarse_pc;
5886: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5887: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
5888: /* we need to propagate to coarser levels the need for a possible benign correction */
5889: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
5890: PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5891: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
5892: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
5893: }
5894: }
5895: KSPGetRhs(pcbddc->coarse_ksp,&rhs);
5896: KSPGetSolution(pcbddc->coarse_ksp,&sol);
5897: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
5898: if (applytranspose) {
5899: if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented");
5900: KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);
5901: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5902: MatGetTransposeNullSpace(coarse_mat,&nullsp);
5903: if (nullsp) {
5904: MatNullSpaceRemove(nullsp,sol);
5905: }
5906: } else {
5907: MatGetNullSpace(coarse_mat,&nullsp);
5908: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
5909: PC coarse_pc;
5911: if (nullsp) {
5912: MatNullSpaceRemove(nullsp,rhs);
5913: }
5914: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5915: PCPreSolve(coarse_pc,pcbddc->coarse_ksp);
5916: PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);
5917: PCPostSolve(coarse_pc,pcbddc->coarse_ksp);
5918: } else {
5919: KSPSolve(pcbddc->coarse_ksp,rhs,sol);
5920: KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);
5921: if (nullsp) {
5922: MatNullSpaceRemove(nullsp,sol);
5923: }
5924: }
5925: }
5926: /* we don't need the benign correction at coarser levels anymore */
5927: if (pcbddc->benign_have_null && isbddc) {
5928: PC coarse_pc;
5929: PC_BDDC* coarsepcbddc;
5931: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
5932: coarsepcbddc = (PC_BDDC*)(coarse_pc->data);
5933: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
5934: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
5935: }
5936: }
5938: /* Local solution on R nodes */
5939: if (pcis->n && !pcbddc->benign_apply_coarse_only) {
5940: PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);
5941: }
5942: /* communications from coarse sol to local primal nodes */
5943: PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);
5944: PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);
5946: /* Sum contributions from the two levels */
5947: if (!pcbddc->benign_apply_coarse_only) {
5948: if (applytranspose) {
5949: MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5950: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5951: } else {
5952: MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);
5953: if (pcbddc->switch_static) { MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D); }
5954: }
5955: /* store p0 */
5956: if (pcbddc->benign_n) {
5957: PetscScalar *array;
5958: PetscInt j;
5960: VecGetArray(pcbddc->vec1_P,&array);
5961: for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j];
5962: VecRestoreArray(pcbddc->vec1_P,&array);
5963: }
5964: } else { /* expand the coarse solution */
5965: if (applytranspose) {
5966: MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);
5967: } else {
5968: MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);
5969: }
5970: }
5971: return(0);
5972: }
5974: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode)
5975: {
5976: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
5977: Vec from,to;
5978: const PetscScalar *array;
5979: PetscErrorCode ierr;
5982: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
5983: from = pcbddc->coarse_vec;
5984: to = pcbddc->vec1_P;
5985: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
5986: Vec tvec;
5988: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
5989: VecResetArray(tvec);
5990: KSPGetSolution(pcbddc->coarse_ksp,&tvec);
5991: VecGetArrayRead(tvec,&array);
5992: VecPlaceArray(from,array);
5993: VecRestoreArrayRead(tvec,&array);
5994: }
5995: } else { /* from local to global -> put data in coarse right hand side */
5996: from = pcbddc->vec1_P;
5997: to = pcbddc->coarse_vec;
5998: }
5999: VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
6000: return(0);
6001: }
6003: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6004: {
6005: PC_BDDC* pcbddc = (PC_BDDC*)(pc->data);
6006: Vec from,to;
6007: const PetscScalar *array;
6008: PetscErrorCode ierr;
6011: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6012: from = pcbddc->coarse_vec;
6013: to = pcbddc->vec1_P;
6014: } else { /* from local to global -> put data in coarse right hand side */
6015: from = pcbddc->vec1_P;
6016: to = pcbddc->coarse_vec;
6017: }
6018: VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);
6019: if (smode == SCATTER_FORWARD) {
6020: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6021: Vec tvec;
6023: KSPGetRhs(pcbddc->coarse_ksp,&tvec);
6024: VecGetArrayRead(to,&array);
6025: VecPlaceArray(tvec,array);
6026: VecRestoreArrayRead(to,&array);
6027: }
6028: } else {
6029: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6030: VecResetArray(from);
6031: }
6032: }
6033: return(0);
6034: }
6036: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6037: {
6038: PetscErrorCode ierr;
6039: PC_IS* pcis = (PC_IS*)(pc->data);
6040: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
6041: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
6042: /* one and zero */
6043: PetscScalar one=1.0,zero=0.0;
6044: /* space to store constraints and their local indices */
6045: PetscScalar *constraints_data;
6046: PetscInt *constraints_idxs,*constraints_idxs_B;
6047: PetscInt *constraints_idxs_ptr,*constraints_data_ptr;
6048: PetscInt *constraints_n;
6049: /* iterators */
6050: PetscInt i,j,k,total_counts,total_counts_cc,cum;
6051: /* BLAS integers */
6052: PetscBLASInt lwork,lierr;
6053: PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1;
6054: PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC;
6055: /* reuse */
6056: PetscInt olocal_primal_size,olocal_primal_size_cc;
6057: PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult;
6058: /* change of basis */
6059: PetscBool qr_needed;
6060: PetscBT change_basis,qr_needed_idx;
6061: /* auxiliary stuff */
6062: PetscInt *nnz,*is_indices;
6063: PetscInt ncc;
6064: /* some quantities */
6065: PetscInt n_vertices,total_primal_vertices,valid_constraints;
6066: PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints;
6067: PetscReal tol; /* tolerance for retaining eigenmodes */
6070: tol = PetscSqrtReal(PETSC_SMALL);
6071: /* Destroy Mat objects computed previously */
6072: MatDestroy(&pcbddc->ChangeOfBasisMatrix);
6073: MatDestroy(&pcbddc->ConstraintMatrix);
6074: MatDestroy(&pcbddc->switch_static_change);
6075: /* save info on constraints from previous setup (if any) */
6076: olocal_primal_size = pcbddc->local_primal_size;
6077: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6078: PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);
6079: PetscArraycpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc);
6080: PetscArraycpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc);
6081: PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);
6082: PetscFree(pcbddc->primal_indices_local_idxs);
6084: if (!pcbddc->adaptive_selection) {
6085: IS ISForVertices,*ISForFaces,*ISForEdges;
6086: MatNullSpace nearnullsp;
6087: const Vec *nearnullvecs;
6088: Vec *localnearnullsp;
6089: PetscScalar *array;
6090: PetscInt n_ISForFaces,n_ISForEdges,nnsp_size;
6091: PetscBool nnsp_has_cnst;
6092: /* LAPACK working arrays for SVD or POD */
6093: PetscBool skip_lapack,boolforchange;
6094: PetscScalar *work;
6095: PetscReal *singular_vals;
6096: #if defined(PETSC_USE_COMPLEX)
6097: PetscReal *rwork;
6098: #endif
6099: PetscScalar *temp_basis = NULL,*correlation_mat = NULL;
6100: PetscBLASInt dummy_int=1;
6101: PetscScalar dummy_scalar=1.;
6102: PetscBool use_pod = PETSC_FALSE;
6104: /* MKL SVD with same input gives different results on different processes! */
6105: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL)
6106: use_pod = PETSC_TRUE;
6107: #endif
6108: /* Get index sets for faces, edges and vertices from graph */
6109: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);
6110: /* print some info */
6111: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6112: PetscInt nv;
6114: PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
6115: ISGetSize(ISForVertices,&nv);
6116: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
6117: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6118: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);
6119: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);
6120: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);
6121: PetscViewerFlush(pcbddc->dbg_viewer);
6122: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
6123: }
6125: /* free unneeded index sets */
6126: if (!pcbddc->use_vertices) {
6127: ISDestroy(&ISForVertices);
6128: }
6129: if (!pcbddc->use_edges) {
6130: for (i=0;i<n_ISForEdges;i++) {
6131: ISDestroy(&ISForEdges[i]);
6132: }
6133: PetscFree(ISForEdges);
6134: n_ISForEdges = 0;
6135: }
6136: if (!pcbddc->use_faces) {
6137: for (i=0;i<n_ISForFaces;i++) {
6138: ISDestroy(&ISForFaces[i]);
6139: }
6140: PetscFree(ISForFaces);
6141: n_ISForFaces = 0;
6142: }
6144: /* check if near null space is attached to global mat */
6145: if (pcbddc->use_nnsp) {
6146: MatGetNearNullSpace(pc->pmat,&nearnullsp);
6147: } else nearnullsp = NULL;
6149: if (nearnullsp) {
6150: MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);
6151: /* remove any stored info */
6152: MatNullSpaceDestroy(&pcbddc->onearnullspace);
6153: PetscFree(pcbddc->onearnullvecs_state);
6154: /* store information for BDDC solver reuse */
6155: PetscObjectReference((PetscObject)nearnullsp);
6156: pcbddc->onearnullspace = nearnullsp;
6157: PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);
6158: for (i=0;i<nnsp_size;i++) {
6159: PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);
6160: }
6161: } else { /* if near null space is not provided BDDC uses constants by default */
6162: nnsp_size = 0;
6163: nnsp_has_cnst = PETSC_TRUE;
6164: }
6165: /* get max number of constraints on a single cc */
6166: max_constraints = nnsp_size;
6167: if (nnsp_has_cnst) max_constraints++;
6169: /*
6170: Evaluate maximum storage size needed by the procedure
6171: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6172: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6173: There can be multiple constraints per connected component
6174: */
6175: n_vertices = 0;
6176: if (ISForVertices) {
6177: ISGetSize(ISForVertices,&n_vertices);
6178: }
6179: ncc = n_vertices+n_ISForFaces+n_ISForEdges;
6180: PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);
6182: total_counts = n_ISForFaces+n_ISForEdges;
6183: total_counts *= max_constraints;
6184: total_counts += n_vertices;
6185: PetscBTCreate(total_counts,&change_basis);
6187: total_counts = 0;
6188: max_size_of_constraint = 0;
6189: for (i=0;i<n_ISForEdges+n_ISForFaces;i++) {
6190: IS used_is;
6191: if (i<n_ISForEdges) {
6192: used_is = ISForEdges[i];
6193: } else {
6194: used_is = ISForFaces[i-n_ISForEdges];
6195: }
6196: ISGetSize(used_is,&j);
6197: total_counts += j;
6198: max_size_of_constraint = PetscMax(j,max_size_of_constraint);
6199: }
6200: PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);
6202: /* get local part of global near null space vectors */
6203: PetscMalloc1(nnsp_size,&localnearnullsp);
6204: for (k=0;k<nnsp_size;k++) {
6205: VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);
6206: VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6207: VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);
6208: }
6210: /* whether or not to skip lapack calls */
6211: skip_lapack = PETSC_TRUE;
6212: if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6214: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6215: if (!skip_lapack) {
6216: PetscScalar temp_work;
6218: if (use_pod) {
6219: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6220: PetscMalloc1(max_constraints*max_constraints,&correlation_mat);
6221: PetscMalloc1(max_constraints,&singular_vals);
6222: PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);
6223: #if defined(PETSC_USE_COMPLEX)
6224: PetscMalloc1(3*max_constraints,&rwork);
6225: #endif
6226: /* now we evaluate the optimal workspace using query with lwork=-1 */
6227: PetscBLASIntCast(max_constraints,&Blas_N);
6228: PetscBLASIntCast(max_constraints,&Blas_LDA);
6229: lwork = -1;
6230: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6231: #if !defined(PETSC_USE_COMPLEX)
6232: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr));
6233: #else
6234: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr));
6235: #endif
6236: PetscFPTrapPop();
6237: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr);
6238: } else {
6239: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6240: /* SVD */
6241: PetscInt max_n,min_n;
6242: max_n = max_size_of_constraint;
6243: min_n = max_constraints;
6244: if (max_size_of_constraint < max_constraints) {
6245: min_n = max_size_of_constraint;
6246: max_n = max_constraints;
6247: }
6248: PetscMalloc1(min_n,&singular_vals);
6249: #if defined(PETSC_USE_COMPLEX)
6250: PetscMalloc1(5*min_n,&rwork);
6251: #endif
6252: /* now we evaluate the optimal workspace using query with lwork=-1 */
6253: lwork = -1;
6254: PetscBLASIntCast(max_n,&Blas_M);
6255: PetscBLASIntCast(min_n,&Blas_N);
6256: PetscBLASIntCast(max_n,&Blas_LDA);
6257: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6258: #if !defined(PETSC_USE_COMPLEX)
6259: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr));
6260: #else
6261: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr));
6262: #endif
6263: PetscFPTrapPop();
6264: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr);
6265: #else
6266: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6267: #endif /* on missing GESVD */
6268: }
6269: /* Allocate optimal workspace */
6270: PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);
6271: PetscMalloc1(lwork,&work);
6272: }
6273: /* Now we can loop on constraining sets */
6274: total_counts = 0;
6275: constraints_idxs_ptr[0] = 0;
6276: constraints_data_ptr[0] = 0;
6277: /* vertices */
6278: if (n_vertices) {
6279: ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);
6280: PetscArraycpy(constraints_idxs,is_indices,n_vertices);
6281: for (i=0;i<n_vertices;i++) {
6282: constraints_n[total_counts] = 1;
6283: constraints_data[total_counts] = 1.0;
6284: constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1;
6285: constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1;
6286: total_counts++;
6287: }
6288: ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);
6289: n_vertices = total_counts;
6290: }
6292: /* edges and faces */
6293: total_counts_cc = total_counts;
6294: for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) {
6295: IS used_is;
6296: PetscBool idxs_copied = PETSC_FALSE;
6298: if (ncc<n_ISForEdges) {
6299: used_is = ISForEdges[ncc];
6300: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6301: } else {
6302: used_is = ISForFaces[ncc-n_ISForEdges];
6303: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6304: }
6305: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6307: ISGetSize(used_is,&size_of_constraint);
6308: ISGetIndices(used_is,(const PetscInt**)&is_indices);
6309: /* change of basis should not be performed on local periodic nodes */
6310: if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE;
6311: if (nnsp_has_cnst) {
6312: PetscScalar quad_value;
6314: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6315: idxs_copied = PETSC_TRUE;
6317: if (!pcbddc->use_nnsp_true) {
6318: quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint));
6319: } else {
6320: quad_value = 1.0;
6321: }
6322: for (j=0;j<size_of_constraint;j++) {
6323: constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value;
6324: }
6325: temp_constraints++;
6326: total_counts++;
6327: }
6328: for (k=0;k<nnsp_size;k++) {
6329: PetscReal real_value;
6330: PetscScalar *ptr_to_data;
6332: VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6333: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint];
6334: for (j=0;j<size_of_constraint;j++) {
6335: ptr_to_data[j] = array[is_indices[j]];
6336: }
6337: VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);
6338: /* check if array is null on the connected component */
6339: PetscBLASIntCast(size_of_constraint,&Blas_N);
6340: PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one));
6341: if (real_value > tol*size_of_constraint) { /* keep indices and values */
6342: temp_constraints++;
6343: total_counts++;
6344: if (!idxs_copied) {
6345: PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint);
6346: idxs_copied = PETSC_TRUE;
6347: }
6348: }
6349: }
6350: ISRestoreIndices(used_is,(const PetscInt**)&is_indices);
6351: valid_constraints = temp_constraints;
6352: if (!pcbddc->use_nnsp_true && temp_constraints) {
6353: if (temp_constraints == 1) { /* just normalize the constraint */
6354: PetscScalar norm,*ptr_to_data;
6356: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6357: PetscBLASIntCast(size_of_constraint,&Blas_N);
6358: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one));
6359: norm = 1.0/PetscSqrtReal(PetscRealPart(norm));
6360: PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one));
6361: } else { /* perform SVD */
6362: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6364: if (use_pod) {
6365: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6366: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6367: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6368: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6369: from that computed using LAPACKgesvd
6370: -> This is due to a different computation of eigenvectors in LAPACKheev
6371: -> The quality of the POD-computed basis will be the same */
6372: PetscArrayzero(correlation_mat,temp_constraints*temp_constraints);
6373: /* Store upper triangular part of correlation matrix */
6374: PetscBLASIntCast(size_of_constraint,&Blas_N);
6375: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6376: for (j=0;j<temp_constraints;j++) {
6377: for (k=0;k<j+1;k++) {
6378: PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one));
6379: }
6380: }
6381: /* compute eigenvalues and eigenvectors of correlation matrix */
6382: PetscBLASIntCast(temp_constraints,&Blas_N);
6383: PetscBLASIntCast(temp_constraints,&Blas_LDA);
6384: #if !defined(PETSC_USE_COMPLEX)
6385: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr));
6386: #else
6387: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr));
6388: #endif
6389: PetscFPTrapPop();
6390: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr);
6391: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6392: j = 0;
6393: while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++;
6394: total_counts = total_counts-j;
6395: valid_constraints = temp_constraints-j;
6396: /* scale and copy POD basis into used quadrature memory */
6397: PetscBLASIntCast(size_of_constraint,&Blas_M);
6398: PetscBLASIntCast(temp_constraints,&Blas_N);
6399: PetscBLASIntCast(temp_constraints,&Blas_K);
6400: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6401: PetscBLASIntCast(temp_constraints,&Blas_LDB);
6402: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6403: if (j<temp_constraints) {
6404: PetscInt ii;
6405: for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]);
6406: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6407: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC));
6408: PetscFPTrapPop();
6409: for (k=0;k<temp_constraints-j;k++) {
6410: for (ii=0;ii<size_of_constraint;ii++) {
6411: ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii];
6412: }
6413: }
6414: }
6415: } else {
6416: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6417: PetscBLASIntCast(size_of_constraint,&Blas_M);
6418: PetscBLASIntCast(temp_constraints,&Blas_N);
6419: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6420: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6421: #if !defined(PETSC_USE_COMPLEX)
6422: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr));
6423: #else
6424: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr));
6425: #endif
6426: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr);
6427: PetscFPTrapPop();
6428: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6429: k = temp_constraints;
6430: if (k > size_of_constraint) k = size_of_constraint;
6431: j = 0;
6432: while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++;
6433: valid_constraints = k-j;
6434: total_counts = total_counts-temp_constraints+valid_constraints;
6435: #else
6436: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"This should not happen");
6437: #endif /* on missing GESVD */
6438: }
6439: }
6440: }
6441: /* update pointers information */
6442: if (valid_constraints) {
6443: constraints_n[total_counts_cc] = valid_constraints;
6444: constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint;
6445: constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints;
6446: /* set change_of_basis flag */
6447: if (boolforchange) {
6448: PetscBTSet(change_basis,total_counts_cc);
6449: }
6450: total_counts_cc++;
6451: }
6452: }
6453: /* free workspace */
6454: if (!skip_lapack) {
6455: PetscFree(work);
6456: #if defined(PETSC_USE_COMPLEX)
6457: PetscFree(rwork);
6458: #endif
6459: PetscFree(singular_vals);
6460: PetscFree(correlation_mat);
6461: PetscFree(temp_basis);
6462: }
6463: for (k=0;k<nnsp_size;k++) {
6464: VecDestroy(&localnearnullsp[k]);
6465: }
6466: PetscFree(localnearnullsp);
6467: /* free index sets of faces, edges and vertices */
6468: for (i=0;i<n_ISForFaces;i++) {
6469: ISDestroy(&ISForFaces[i]);
6470: }
6471: if (n_ISForFaces) {
6472: PetscFree(ISForFaces);
6473: }
6474: for (i=0;i<n_ISForEdges;i++) {
6475: ISDestroy(&ISForEdges[i]);
6476: }
6477: if (n_ISForEdges) {
6478: PetscFree(ISForEdges);
6479: }
6480: ISDestroy(&ISForVertices);
6481: } else {
6482: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6484: total_counts = 0;
6485: n_vertices = 0;
6486: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
6487: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
6488: }
6489: max_constraints = 0;
6490: total_counts_cc = 0;
6491: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6492: total_counts += pcbddc->adaptive_constraints_n[i];
6493: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6494: max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]);
6495: }
6496: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6497: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6498: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6499: constraints_data = pcbddc->adaptive_constraints_data;
6500: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6501: PetscMalloc1(total_counts_cc,&constraints_n);
6502: total_counts_cc = 0;
6503: for (i=0;i<sub_schurs->n_subs+n_vertices;i++) {
6504: if (pcbddc->adaptive_constraints_n[i]) {
6505: constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6506: }
6507: }
6509: max_size_of_constraint = 0;
6510: for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]);
6511: PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);
6512: /* Change of basis */
6513: PetscBTCreate(total_counts_cc,&change_basis);
6514: if (pcbddc->use_change_of_basis) {
6515: for (i=0;i<sub_schurs->n_subs;i++) {
6516: if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) {
6517: PetscBTSet(change_basis,i+n_vertices);
6518: }
6519: }
6520: }
6521: }
6522: pcbddc->local_primal_size = total_counts;
6523: PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);
6525: /* map constraints_idxs in boundary numbering */
6526: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);
6527: if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i);
6529: /* Create constraint matrix */
6530: MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);
6531: MatSetType(pcbddc->ConstraintMatrix,MATAIJ);
6532: MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);
6534: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6535: /* determine if a QR strategy is needed for change of basis */
6536: qr_needed = pcbddc->use_qr_single;
6537: PetscBTCreate(total_counts_cc,&qr_needed_idx);
6538: total_primal_vertices=0;
6539: pcbddc->local_primal_size_cc = 0;
6540: for (i=0;i<total_counts_cc;i++) {
6541: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6542: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
6543: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
6544: pcbddc->local_primal_size_cc += 1;
6545: } else if (PetscBTLookup(change_basis,i)) {
6546: for (k=0;k<constraints_n[i];k++) {
6547: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6548: }
6549: pcbddc->local_primal_size_cc += constraints_n[i];
6550: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
6551: PetscBTSet(qr_needed_idx,i);
6552: qr_needed = PETSC_TRUE;
6553: }
6554: } else {
6555: pcbddc->local_primal_size_cc += 1;
6556: }
6557: }
6558: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
6559: pcbddc->n_vertices = total_primal_vertices;
6560: /* permute indices in order to have a sorted set of vertices */
6561: PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);
6562: PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);
6563: PetscArraycpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices);
6564: for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1;
6566: /* nonzero structure of constraint matrix */
6567: /* and get reference dof for local constraints */
6568: PetscMalloc1(pcbddc->local_primal_size,&nnz);
6569: for (i=0;i<total_primal_vertices;i++) nnz[i] = 1;
6571: j = total_primal_vertices;
6572: total_counts = total_primal_vertices;
6573: cum = total_primal_vertices;
6574: for (i=n_vertices;i<total_counts_cc;i++) {
6575: if (!PetscBTLookup(change_basis,i)) {
6576: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
6577: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
6578: cum++;
6579: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6580: for (k=0;k<constraints_n[i];k++) {
6581: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k];
6582: nnz[j+k] = size_of_constraint;
6583: }
6584: j += constraints_n[i];
6585: }
6586: }
6587: MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);
6588: MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6589: PetscFree(nnz);
6591: /* set values in constraint matrix */
6592: for (i=0;i<total_primal_vertices;i++) {
6593: MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);
6594: }
6595: total_counts = total_primal_vertices;
6596: for (i=n_vertices;i<total_counts_cc;i++) {
6597: if (!PetscBTLookup(change_basis,i)) {
6598: PetscInt *cols;
6600: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6601: cols = constraints_idxs+constraints_idxs_ptr[i];
6602: for (k=0;k<constraints_n[i];k++) {
6603: PetscInt row = total_counts+k;
6604: PetscScalar *vals;
6606: vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint;
6607: MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);
6608: }
6609: total_counts += constraints_n[i];
6610: }
6611: }
6612: /* assembling */
6613: MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6614: MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);
6615: MatViewFromOptions(pcbddc->ConstraintMatrix,(PetscObject)pc,"-pc_bddc_constraint_mat_view");
6617: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
6618: if (pcbddc->use_change_of_basis) {
6619: /* dual and primal dofs on a single cc */
6620: PetscInt dual_dofs,primal_dofs;
6621: /* working stuff for GEQRF */
6622: PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t;
6623: PetscBLASInt lqr_work;
6624: /* working stuff for UNGQR */
6625: PetscScalar *gqr_work = NULL,lgqr_work_t=0.0;
6626: PetscBLASInt lgqr_work;
6627: /* working stuff for TRTRS */
6628: PetscScalar *trs_rhs = NULL;
6629: PetscBLASInt Blas_NRHS;
6630: /* pointers for values insertion into change of basis matrix */
6631: PetscInt *start_rows,*start_cols;
6632: PetscScalar *start_vals;
6633: /* working stuff for values insertion */
6634: PetscBT is_primal;
6635: PetscInt *aux_primal_numbering_B;
6636: /* matrix sizes */
6637: PetscInt global_size,local_size;
6638: /* temporary change of basis */
6639: Mat localChangeOfBasisMatrix;
6640: /* extra space for debugging */
6641: PetscScalar *dbg_work = NULL;
6643: /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */
6644: MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);
6645: MatSetType(localChangeOfBasisMatrix,MATAIJ);
6646: MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);
6647: /* nonzeros for local mat */
6648: PetscMalloc1(pcis->n,&nnz);
6649: if (!pcbddc->benign_change || pcbddc->fake_change) {
6650: for (i=0;i<pcis->n;i++) nnz[i]=1;
6651: } else {
6652: const PetscInt *ii;
6653: PetscInt n;
6654: PetscBool flg_row;
6655: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6656: for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i];
6657: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);
6658: }
6659: for (i=n_vertices;i<total_counts_cc;i++) {
6660: if (PetscBTLookup(change_basis,i)) {
6661: size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i];
6662: if (PetscBTLookup(qr_needed_idx,i)) {
6663: for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint;
6664: } else {
6665: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
6666: for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2;
6667: }
6668: }
6669: }
6670: MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);
6671: MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);
6672: PetscFree(nnz);
6673: /* Set interior change in the matrix */
6674: if (!pcbddc->benign_change || pcbddc->fake_change) {
6675: for (i=0;i<pcis->n;i++) {
6676: MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);
6677: }
6678: } else {
6679: const PetscInt *ii,*jj;
6680: PetscScalar *aa;
6681: PetscInt n;
6682: PetscBool flg_row;
6683: MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6684: MatSeqAIJGetArray(pcbddc->benign_change,&aa);
6685: for (i=0;i<n;i++) {
6686: MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);
6687: }
6688: MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);
6689: MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);
6690: }
6692: if (pcbddc->dbg_flag) {
6693: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
6694: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);
6695: }
6698: /* Now we loop on the constraints which need a change of basis */
6699: /*
6700: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
6701: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
6703: Basic blocks of change of basis matrix T computed by
6705: - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
6707: | 1 0 ... 0 s_1/S |
6708: | 0 1 ... 0 s_2/S |
6709: | ... |
6710: | 0 ... 1 s_{n-1}/S |
6711: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
6713: with S = \sum_{i=1}^n s_i^2
6714: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
6715: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
6717: - QR decomposition of constraints otherwise
6718: */
6719: if (qr_needed && max_size_of_constraint) {
6720: /* space to store Q */
6721: PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);
6722: /* array to store scaling factors for reflectors */
6723: PetscMalloc1(max_constraints,&qr_tau);
6724: /* first we issue queries for optimal work */
6725: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6726: PetscBLASIntCast(max_constraints,&Blas_N);
6727: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6728: lqr_work = -1;
6729: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr));
6730: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr);
6731: PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);
6732: PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);
6733: lgqr_work = -1;
6734: PetscBLASIntCast(max_size_of_constraint,&Blas_M);
6735: PetscBLASIntCast(max_size_of_constraint,&Blas_N);
6736: PetscBLASIntCast(max_constraints,&Blas_K);
6737: PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);
6738: if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */
6739: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr));
6740: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr);
6741: PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);
6742: PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);
6743: /* array to store rhs and solution of triangular solver */
6744: PetscMalloc1(max_constraints*max_constraints,&trs_rhs);
6745: /* allocating workspace for check */
6746: if (pcbddc->dbg_flag) {
6747: PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);
6748: }
6749: }
6750: /* array to store whether a node is primal or not */
6751: PetscBTCreate(pcis->n_B,&is_primal);
6752: PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);
6753: ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);
6754: if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i);
6755: for (i=0;i<total_primal_vertices;i++) {
6756: PetscBTSet(is_primal,aux_primal_numbering_B[i]);
6757: }
6758: PetscFree(aux_primal_numbering_B);
6760: /* loop on constraints and see whether or not they need a change of basis and compute it */
6761: for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) {
6762: size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts];
6763: if (PetscBTLookup(change_basis,total_counts)) {
6764: /* get constraint info */
6765: primal_dofs = constraints_n[total_counts];
6766: dual_dofs = size_of_constraint-primal_dofs;
6768: if (pcbddc->dbg_flag) {
6769: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);
6770: }
6772: if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */
6774: /* copy quadrature constraints for change of basis check */
6775: if (pcbddc->dbg_flag) {
6776: PetscArraycpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6777: }
6778: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
6779: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6781: /* compute QR decomposition of constraints */
6782: PetscBLASIntCast(size_of_constraint,&Blas_M);
6783: PetscBLASIntCast(primal_dofs,&Blas_N);
6784: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6785: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6786: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr));
6787: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr);
6788: PetscFPTrapPop();
6790: /* explictly compute R^-T */
6791: PetscArrayzero(trs_rhs,primal_dofs*primal_dofs);
6792: for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0;
6793: PetscBLASIntCast(primal_dofs,&Blas_N);
6794: PetscBLASIntCast(primal_dofs,&Blas_NRHS);
6795: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6796: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6797: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6798: PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr));
6799: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr);
6800: PetscFPTrapPop();
6802: /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
6803: PetscBLASIntCast(size_of_constraint,&Blas_M);
6804: PetscBLASIntCast(size_of_constraint,&Blas_N);
6805: PetscBLASIntCast(primal_dofs,&Blas_K);
6806: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6807: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6808: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr));
6809: if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr);
6810: PetscFPTrapPop();
6812: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
6813: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
6814: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
6815: PetscBLASIntCast(size_of_constraint,&Blas_M);
6816: PetscBLASIntCast(primal_dofs,&Blas_N);
6817: PetscBLASIntCast(primal_dofs,&Blas_K);
6818: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6819: PetscBLASIntCast(primal_dofs,&Blas_LDB);
6820: PetscBLASIntCast(size_of_constraint,&Blas_LDC);
6821: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6822: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC));
6823: PetscFPTrapPop();
6824: PetscArraycpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs);
6826: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
6827: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
6828: /* insert cols for primal dofs */
6829: for (j=0;j<primal_dofs;j++) {
6830: start_vals = &qr_basis[j*size_of_constraint];
6831: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6832: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6833: }
6834: /* insert cols for dual dofs */
6835: for (j=0,k=0;j<dual_dofs;k++) {
6836: if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) {
6837: start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint];
6838: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6839: MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);
6840: j++;
6841: }
6842: }
6844: /* check change of basis */
6845: if (pcbddc->dbg_flag) {
6846: PetscInt ii,jj;
6847: PetscBool valid_qr=PETSC_TRUE;
6848: PetscBLASIntCast(primal_dofs,&Blas_M);
6849: PetscBLASIntCast(size_of_constraint,&Blas_N);
6850: PetscBLASIntCast(size_of_constraint,&Blas_K);
6851: PetscBLASIntCast(size_of_constraint,&Blas_LDA);
6852: PetscBLASIntCast(size_of_constraint,&Blas_LDB);
6853: PetscBLASIntCast(primal_dofs,&Blas_LDC);
6854: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
6855: PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC));
6856: PetscFPTrapPop();
6857: for (jj=0;jj<size_of_constraint;jj++) {
6858: for (ii=0;ii<primal_dofs;ii++) {
6859: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE;
6860: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
6861: }
6862: }
6863: if (!valid_qr) {
6864: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");
6865: for (jj=0;jj<size_of_constraint;jj++) {
6866: for (ii=0;ii<primal_dofs;ii++) {
6867: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) {
6868: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6869: }
6870: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) {
6871: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));
6872: }
6873: }
6874: }
6875: } else {
6876: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");
6877: }
6878: }
6879: } else { /* simple transformation block */
6880: PetscInt row,col;
6881: PetscScalar val,norm;
6883: PetscBLASIntCast(size_of_constraint,&Blas_N);
6884: PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one));
6885: for (j=0;j<size_of_constraint;j++) {
6886: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j];
6887: row = constraints_idxs[constraints_idxs_ptr[total_counts]+j];
6888: if (!PetscBTLookup(is_primal,row_B)) {
6889: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
6890: MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);
6891: MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);
6892: } else {
6893: for (k=0;k<size_of_constraint;k++) {
6894: col = constraints_idxs[constraints_idxs_ptr[total_counts]+k];
6895: if (row != col) {
6896: val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]];
6897: } else {
6898: val = constraints_data[constraints_data_ptr[total_counts]]/norm;
6899: }
6900: MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);
6901: }
6902: }
6903: }
6904: if (pcbddc->dbg_flag) {
6905: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");
6906: }
6907: }
6908: } else {
6909: if (pcbddc->dbg_flag) {
6910: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);
6911: }
6912: }
6913: }
6915: /* free workspace */
6916: if (qr_needed) {
6917: if (pcbddc->dbg_flag) {
6918: PetscFree(dbg_work);
6919: }
6920: PetscFree(trs_rhs);
6921: PetscFree(qr_tau);
6922: PetscFree(qr_work);
6923: PetscFree(gqr_work);
6924: PetscFree(qr_basis);
6925: }
6926: PetscBTDestroy(&is_primal);
6927: MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6928: MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);
6930: /* assembling of global change of variable */
6931: if (!pcbddc->fake_change) {
6932: Mat tmat;
6933: PetscInt bs;
6935: VecGetSize(pcis->vec1_global,&global_size);
6936: VecGetLocalSize(pcis->vec1_global,&local_size);
6937: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);
6938: MatISSetLocalMat(tmat,localChangeOfBasisMatrix);
6939: MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);
6940: MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);
6941: MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);
6942: MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);
6943: MatGetBlockSize(pc->pmat,&bs);
6944: MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);
6945: MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);
6946: MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);
6947: MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);
6948: MatDestroy(&tmat);
6949: VecSet(pcis->vec1_global,0.0);
6950: VecSet(pcis->vec1_N,1.0);
6951: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6952: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
6953: VecReciprocal(pcis->vec1_global);
6954: MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);
6956: /* check */
6957: if (pcbddc->dbg_flag) {
6958: PetscReal error;
6959: Vec x,x_change;
6961: VecDuplicate(pcis->vec1_global,&x);
6962: VecDuplicate(pcis->vec1_global,&x_change);
6963: VecSetRandom(x,NULL);
6964: VecCopy(x,pcis->vec1_global);
6965: VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6966: VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
6967: MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);
6968: VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6969: VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);
6970: MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);
6971: VecAXPY(x,-1.0,x_change);
6972: VecNorm(x,NORM_INFINITY,&error);
6973: if (error > PETSC_SMALL) {
6974: SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error);
6975: }
6976: VecDestroy(&x);
6977: VecDestroy(&x_change);
6978: }
6979: /* adapt sub_schurs computed (if any) */
6980: if (pcbddc->use_deluxe_scaling) {
6981: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
6983: if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
6984: if (sub_schurs && sub_schurs->S_Ej_all) {
6985: Mat S_new,tmat;
6986: IS is_all_N,is_V_Sall = NULL;
6988: ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);
6989: MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);
6990: if (pcbddc->deluxe_zerorows) {
6991: ISLocalToGlobalMapping NtoSall;
6992: IS is_V;
6993: ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);
6994: ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);
6995: ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);
6996: ISLocalToGlobalMappingDestroy(&NtoSall);
6997: ISDestroy(&is_V);
6998: }
6999: ISDestroy(&is_all_N);
7000: MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7001: MatDestroy(&sub_schurs->S_Ej_all);
7002: PetscObjectReference((PetscObject)S_new);
7003: if (pcbddc->deluxe_zerorows) {
7004: const PetscScalar *array;
7005: const PetscInt *idxs_V,*idxs_all;
7006: PetscInt i,n_V;
7008: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7009: ISGetLocalSize(is_V_Sall,&n_V);
7010: ISGetIndices(is_V_Sall,&idxs_V);
7011: ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);
7012: VecGetArrayRead(pcis->D,&array);
7013: for (i=0;i<n_V;i++) {
7014: PetscScalar val;
7015: PetscInt idx;
7017: idx = idxs_V[i];
7018: val = array[idxs_all[idxs_V[i]]];
7019: MatSetValue(S_new,idx,idx,val,INSERT_VALUES);
7020: }
7021: MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);
7022: MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);
7023: VecRestoreArrayRead(pcis->D,&array);
7024: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);
7025: ISRestoreIndices(is_V_Sall,&idxs_V);
7026: }
7027: sub_schurs->S_Ej_all = S_new;
7028: MatDestroy(&S_new);
7029: if (sub_schurs->sum_S_Ej_all) {
7030: MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);
7031: MatDestroy(&sub_schurs->sum_S_Ej_all);
7032: PetscObjectReference((PetscObject)S_new);
7033: if (pcbddc->deluxe_zerorows) {
7034: MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);
7035: }
7036: sub_schurs->sum_S_Ej_all = S_new;
7037: MatDestroy(&S_new);
7038: }
7039: ISDestroy(&is_V_Sall);
7040: MatDestroy(&tmat);
7041: }
7042: /* destroy any change of basis context in sub_schurs */
7043: if (sub_schurs && sub_schurs->change) {
7044: PetscInt i;
7046: for (i=0;i<sub_schurs->n_subs;i++) {
7047: KSPDestroy(&sub_schurs->change[i]);
7048: }
7049: PetscFree(sub_schurs->change);
7050: }
7051: }
7052: if (pcbddc->switch_static) { /* need to save the local change */
7053: pcbddc->switch_static_change = localChangeOfBasisMatrix;
7054: } else {
7055: MatDestroy(&localChangeOfBasisMatrix);
7056: }
7057: /* determine if any process has changed the pressures locally */
7058: pcbddc->change_interior = pcbddc->benign_have_null;
7059: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7060: MatDestroy(&pcbddc->ConstraintMatrix);
7061: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7062: pcbddc->use_qr_single = qr_needed;
7063: }
7064: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7065: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7066: PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);
7067: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7068: } else {
7069: Mat benign_global = NULL;
7070: if (pcbddc->benign_have_null) {
7071: Mat M;
7073: pcbddc->change_interior = PETSC_TRUE;
7074: VecCopy(matis->counter,pcis->vec1_N);
7075: VecReciprocal(pcis->vec1_N);
7076: MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);
7077: if (pcbddc->benign_change) {
7078: MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);
7079: MatDiagonalScale(M,pcis->vec1_N,NULL);
7080: } else {
7081: MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);
7082: MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);
7083: }
7084: MatISSetLocalMat(benign_global,M);
7085: MatDestroy(&M);
7086: MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);
7087: MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);
7088: }
7089: if (pcbddc->user_ChangeOfBasisMatrix) {
7090: MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);
7091: MatDestroy(&benign_global);
7092: } else if (pcbddc->benign_have_null) {
7093: pcbddc->ChangeOfBasisMatrix = benign_global;
7094: }
7095: }
7096: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7097: IS is_global;
7098: const PetscInt *gidxs;
7100: ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);
7101: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);
7102: ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);
7103: MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);
7104: ISDestroy(&is_global);
7105: }
7106: }
7107: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) {
7108: VecDuplicate(pcis->vec1_global,&pcbddc->work_change);
7109: }
7111: if (!pcbddc->fake_change) {
7112: /* add pressure dofs to set of primal nodes for numbering purposes */
7113: for (i=0;i<pcbddc->benign_n;i++) {
7114: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7115: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7116: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7117: pcbddc->local_primal_size_cc++;
7118: pcbddc->local_primal_size++;
7119: }
7121: /* check if a new primal space has been introduced (also take into account benign trick) */
7122: pcbddc->new_primal_space_local = PETSC_TRUE;
7123: if (olocal_primal_size == pcbddc->local_primal_size) {
7124: PetscArraycmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7125: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7126: if (!pcbddc->new_primal_space_local) {
7127: PetscArraycmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc,&pcbddc->new_primal_space_local);
7128: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7129: }
7130: }
7131: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7132: MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
7133: }
7134: PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);
7136: /* flush dbg viewer */
7137: if (pcbddc->dbg_flag) {
7138: PetscViewerFlush(pcbddc->dbg_viewer);
7139: }
7141: /* free workspace */
7142: PetscBTDestroy(&qr_needed_idx);
7143: PetscBTDestroy(&change_basis);
7144: if (!pcbddc->adaptive_selection) {
7145: PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);
7146: PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);
7147: } else {
7148: PetscFree5(pcbddc->adaptive_constraints_n,
7149: pcbddc->adaptive_constraints_idxs_ptr,
7150: pcbddc->adaptive_constraints_data_ptr,
7151: pcbddc->adaptive_constraints_idxs,
7152: pcbddc->adaptive_constraints_data);
7153: PetscFree(constraints_n);
7154: PetscFree(constraints_idxs_B);
7155: }
7156: return(0);
7157: }
7159: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7160: {
7161: ISLocalToGlobalMapping map;
7162: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
7163: Mat_IS *matis = (Mat_IS*)pc->pmat->data;
7164: PetscInt i,N;
7165: PetscBool rcsr = PETSC_FALSE;
7166: PetscErrorCode ierr;
7169: if (pcbddc->recompute_topography) {
7170: pcbddc->graphanalyzed = PETSC_FALSE;
7171: /* Reset previously computed graph */
7172: PCBDDCGraphReset(pcbddc->mat_graph);
7173: /* Init local Graph struct */
7174: MatGetSize(pc->pmat,&N,NULL);
7175: MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);
7176: PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);
7178: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) {
7179: PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);
7180: }
7181: /* Check validity of the csr graph passed in by the user */
7182: if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs);
7184: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7185: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7186: PetscInt *xadj,*adjncy;
7187: PetscInt nvtxs;
7188: PetscBool flg_row=PETSC_FALSE;
7190: MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7191: if (flg_row) {
7192: PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);
7193: pcbddc->computed_rowadj = PETSC_TRUE;
7194: }
7195: MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);
7196: rcsr = PETSC_TRUE;
7197: }
7198: if (pcbddc->dbg_flag) {
7199: PetscViewerFlush(pcbddc->dbg_viewer);
7200: }
7202: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7203: PetscReal *lcoords;
7204: PetscInt n;
7205: MPI_Datatype dimrealtype;
7207: /* TODO: support for blocked */
7208: if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n);
7209: MatGetLocalSize(matis->A,&n,NULL);
7210: PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);
7211: MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);
7212: MPI_Type_commit(&dimrealtype);
7213: PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7214: PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);
7215: MPI_Type_free(&dimrealtype);
7216: PetscFree(pcbddc->mat_graph->coords);
7218: pcbddc->mat_graph->coords = lcoords;
7219: pcbddc->mat_graph->cloc = PETSC_TRUE;
7220: pcbddc->mat_graph->cnloc = n;
7221: }
7222: if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs);
7223: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected);
7225: /* Setup of Graph */
7226: pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */
7227: PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);
7229: /* attach info on disconnected subdomains if present */
7230: if (pcbddc->n_local_subs) {
7231: PetscInt *local_subs,n,totn;
7233: MatGetLocalSize(matis->A,&n,NULL);
7234: PetscMalloc1(n,&local_subs);
7235: for (i=0;i<n;i++) local_subs[i] = pcbddc->n_local_subs;
7236: for (i=0;i<pcbddc->n_local_subs;i++) {
7237: const PetscInt *idxs;
7238: PetscInt nl,j;
7240: ISGetLocalSize(pcbddc->local_subs[i],&nl);
7241: ISGetIndices(pcbddc->local_subs[i],&idxs);
7242: for (j=0;j<nl;j++) local_subs[idxs[j]] = i;
7243: ISRestoreIndices(pcbddc->local_subs[i],&idxs);
7244: }
7245: for (i=0,totn=0;i<n;i++) totn = PetscMax(totn,local_subs[i]);
7246: pcbddc->mat_graph->n_local_subs = totn + 1;
7247: pcbddc->mat_graph->local_subs = local_subs;
7248: }
7249: }
7251: if (!pcbddc->graphanalyzed) {
7252: /* Graph's connected components analysis */
7253: PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);
7254: pcbddc->graphanalyzed = PETSC_TRUE;
7255: pcbddc->corner_selected = pcbddc->corner_selection;
7256: }
7257: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7258: return(0);
7259: }
7261: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7262: {
7263: PetscInt i,j,n;
7264: PetscScalar *alphas;
7265: PetscReal norm,*onorms;
7269: n = *nio;
7270: if (!n) return(0);
7271: PetscMalloc2(n,&alphas,n,&onorms);
7272: VecNormalize(vecs[0],&norm);
7273: if (norm < PETSC_SMALL) {
7274: onorms[0] = 0.0;
7275: VecSet(vecs[0],0.0);
7276: } else {
7277: onorms[0] = norm;
7278: }
7280: for (i=1;i<n;i++) {
7281: VecMDot(vecs[i],i,vecs,alphas);
7282: for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]);
7283: VecMAXPY(vecs[i],i,alphas,vecs);
7284: VecNormalize(vecs[i],&norm);
7285: if (norm < PETSC_SMALL) {
7286: onorms[i] = 0.0;
7287: VecSet(vecs[i],0.0);
7288: } else {
7289: onorms[i] = norm;
7290: }
7291: }
7292: /* push nonzero vectors at the beginning */
7293: for (i=0;i<n;i++) {
7294: if (onorms[i] == 0.0) {
7295: for (j=i+1;j<n;j++) {
7296: if (onorms[j] != 0.0) {
7297: VecCopy(vecs[j],vecs[i]);
7298: onorms[j] = 0.0;
7299: }
7300: }
7301: }
7302: }
7303: for (i=0,*nio=0;i<n;i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7304: PetscFree2(alphas,onorms);
7305: return(0);
7306: }
7308: PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void)
7309: {
7310: Mat A;
7311: PetscInt n_neighs,*neighs,*n_shared,**shared;
7312: PetscMPIInt size,rank,color;
7313: PetscInt *xadj,*adjncy;
7314: PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx;
7315: PetscInt im_active,active_procs,N,n,i,j,threshold = 2;
7316: PetscInt void_procs,*procs_candidates = NULL;
7317: PetscInt xadj_count,*count;
7318: PetscBool ismatis,use_vwgt=PETSC_FALSE;
7319: PetscSubcomm psubcomm;
7320: MPI_Comm subcomm;
7325: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7326: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7329: if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains);
7331: if (have_void) *have_void = PETSC_FALSE;
7332: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
7333: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
7334: MatISGetLocalMat(mat,&A);
7335: MatGetLocalSize(A,&n,NULL);
7336: im_active = !!n;
7337: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));
7338: void_procs = size - active_procs;
7339: /* get ranks of of non-active processes in mat communicator */
7340: if (void_procs) {
7341: PetscInt ncand;
7343: if (have_void) *have_void = PETSC_TRUE;
7344: PetscMalloc1(size,&procs_candidates);
7345: MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));
7346: for (i=0,ncand=0;i<size;i++) {
7347: if (!procs_candidates[i]) {
7348: procs_candidates[ncand++] = i;
7349: }
7350: }
7351: /* force n_subdomains to be not greater that the number of non-active processes */
7352: *n_subdomains = PetscMin(void_procs,*n_subdomains);
7353: }
7355: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7356: number of subdomains requested 1 -> send to master or first candidate in voids */
7357: MatGetSize(mat,&N,NULL);
7358: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7359: PetscInt issize,isidx,dest;
7360: if (*n_subdomains == 1) dest = 0;
7361: else dest = rank;
7362: if (im_active) {
7363: issize = 1;
7364: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7365: isidx = procs_candidates[dest];
7366: } else {
7367: isidx = dest;
7368: }
7369: } else {
7370: issize = 0;
7371: isidx = -1;
7372: }
7373: if (*n_subdomains != 1) *n_subdomains = active_procs;
7374: ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);
7375: PetscFree(procs_candidates);
7376: return(0);
7377: }
7378: PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);
7379: PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);
7380: threshold = PetscMax(threshold,2);
7382: /* Get info on mapping */
7383: ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7385: /* build local CSR graph of subdomains' connectivity */
7386: PetscMalloc1(2,&xadj);
7387: xadj[0] = 0;
7388: xadj[1] = PetscMax(n_neighs-1,0);
7389: PetscMalloc1(xadj[1],&adjncy);
7390: PetscMalloc1(xadj[1],&adjncy_wgt);
7391: PetscCalloc1(n,&count);
7392: for (i=1;i<n_neighs;i++)
7393: for (j=0;j<n_shared[i];j++)
7394: count[shared[i][j]] += 1;
7396: xadj_count = 0;
7397: for (i=1;i<n_neighs;i++) {
7398: for (j=0;j<n_shared[i];j++) {
7399: if (count[shared[i][j]] < threshold) {
7400: adjncy[xadj_count] = neighs[i];
7401: adjncy_wgt[xadj_count] = n_shared[i];
7402: xadj_count++;
7403: break;
7404: }
7405: }
7406: }
7407: xadj[1] = xadj_count;
7408: PetscFree(count);
7409: ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);
7410: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7412: PetscMalloc1(1,&ranks_send_to_idx);
7414: /* Restrict work on active processes only */
7415: PetscMPIIntCast(im_active,&color);
7416: if (void_procs) {
7417: PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);
7418: PetscSubcommSetNumber(psubcomm,2); /* 2 groups, active process and not active processes */
7419: PetscSubcommSetTypeGeneral(psubcomm,color,rank);
7420: subcomm = PetscSubcommChild(psubcomm);
7421: } else {
7422: psubcomm = NULL;
7423: subcomm = PetscObjectComm((PetscObject)mat);
7424: }
7426: v_wgt = NULL;
7427: if (!color) {
7428: PetscFree(xadj);
7429: PetscFree(adjncy);
7430: PetscFree(adjncy_wgt);
7431: } else {
7432: Mat subdomain_adj;
7433: IS new_ranks,new_ranks_contig;
7434: MatPartitioning partitioner;
7435: PetscInt rstart=0,rend=0;
7436: PetscInt *is_indices,*oldranks;
7437: PetscMPIInt size;
7438: PetscBool aggregate;
7440: MPI_Comm_size(subcomm,&size);
7441: if (void_procs) {
7442: PetscInt prank = rank;
7443: PetscMalloc1(size,&oldranks);
7444: MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);
7445: for (i=0;i<xadj[1];i++) {
7446: PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);
7447: }
7448: PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);
7449: } else {
7450: oldranks = NULL;
7451: }
7452: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7453: if (aggregate) { /* TODO: all this part could be made more efficient */
7454: PetscInt lrows,row,ncols,*cols;
7455: PetscMPIInt nrank;
7456: PetscScalar *vals;
7458: MPI_Comm_rank(subcomm,&nrank);
7459: lrows = 0;
7460: if (nrank<redprocs) {
7461: lrows = size/redprocs;
7462: if (nrank<size%redprocs) lrows++;
7463: }
7464: MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);
7465: MatGetOwnershipRange(subdomain_adj,&rstart,&rend);
7466: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
7467: MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
7468: row = nrank;
7469: ncols = xadj[1]-xadj[0];
7470: cols = adjncy;
7471: PetscMalloc1(ncols,&vals);
7472: for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i];
7473: MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);
7474: MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);
7475: MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);
7476: PetscFree(xadj);
7477: PetscFree(adjncy);
7478: PetscFree(adjncy_wgt);
7479: PetscFree(vals);
7480: if (use_vwgt) {
7481: Vec v;
7482: const PetscScalar *array;
7483: PetscInt nl;
7485: MatCreateVecs(subdomain_adj,&v,NULL);
7486: VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);
7487: VecAssemblyBegin(v);
7488: VecAssemblyEnd(v);
7489: VecGetLocalSize(v,&nl);
7490: VecGetArrayRead(v,&array);
7491: PetscMalloc1(nl,&v_wgt);
7492: for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7493: VecRestoreArrayRead(v,&array);
7494: VecDestroy(&v);
7495: }
7496: } else {
7497: MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);
7498: if (use_vwgt) {
7499: PetscMalloc1(1,&v_wgt);
7500: v_wgt[0] = n;
7501: }
7502: }
7503: /* MatView(subdomain_adj,0); */
7505: /* Partition */
7506: MatPartitioningCreate(subcomm,&partitioner);
7507: #if defined(PETSC_HAVE_PTSCOTCH)
7508: MatPartitioningSetType(partitioner,MATPARTITIONINGPTSCOTCH);
7509: #elif defined(PETSC_HAVE_PARMETIS)
7510: MatPartitioningSetType(partitioner,MATPARTITIONINGPARMETIS);
7511: #else
7512: MatPartitioningSetType(partitioner,MATPARTITIONINGAVERAGE);
7513: #endif
7514: MatPartitioningSetAdjacency(partitioner,subdomain_adj);
7515: if (v_wgt) {
7516: MatPartitioningSetVertexWeights(partitioner,v_wgt);
7517: }
7518: *n_subdomains = PetscMin((PetscInt)size,*n_subdomains);
7519: MatPartitioningSetNParts(partitioner,*n_subdomains);
7520: MatPartitioningSetFromOptions(partitioner);
7521: MatPartitioningApply(partitioner,&new_ranks);
7522: /* MatPartitioningView(partitioner,0); */
7524: /* renumber new_ranks to avoid "holes" in new set of processors */
7525: ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);
7526: ISDestroy(&new_ranks);
7527: ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7528: if (!aggregate) {
7529: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7530: if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7531: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7532: } else if (oldranks) {
7533: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7534: } else {
7535: ranks_send_to_idx[0] = is_indices[0];
7536: }
7537: } else {
7538: PetscInt idx = 0;
7539: PetscMPIInt tag;
7540: MPI_Request *reqs;
7542: PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);
7543: PetscMalloc1(rend-rstart,&reqs);
7544: for (i=rstart;i<rend;i++) {
7545: MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);
7546: }
7547: MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);
7548: MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);
7549: PetscFree(reqs);
7550: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7551: if (PetscUnlikelyDebug(!oldranks)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen");
7552: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7553: } else if (oldranks) {
7554: ranks_send_to_idx[0] = oldranks[idx];
7555: } else {
7556: ranks_send_to_idx[0] = idx;
7557: }
7558: }
7559: ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);
7560: /* clean up */
7561: PetscFree(oldranks);
7562: ISDestroy(&new_ranks_contig);
7563: MatDestroy(&subdomain_adj);
7564: MatPartitioningDestroy(&partitioner);
7565: }
7566: PetscSubcommDestroy(&psubcomm);
7567: PetscFree(procs_candidates);
7569: /* assemble parallel IS for sends */
7570: i = 1;
7571: if (!color) i=0;
7572: ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);
7573: return(0);
7574: }
7576: typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate;
7578: PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
7579: {
7580: Mat local_mat;
7581: IS is_sends_internal;
7582: PetscInt rows,cols,new_local_rows;
7583: PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs;
7584: PetscBool ismatis,isdense,newisdense,destroy_mat;
7585: ISLocalToGlobalMapping l2gmap;
7586: PetscInt* l2gmap_indices;
7587: const PetscInt* is_indices;
7588: MatType new_local_type;
7589: /* buffers */
7590: PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs;
7591: PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is;
7592: PetscInt *recv_buffer_idxs_local;
7593: PetscScalar *ptr_vals,*recv_buffer_vals;
7594: const PetscScalar *send_buffer_vals;
7595: PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs;
7596: /* MPI */
7597: MPI_Comm comm,comm_n;
7598: PetscSubcomm subcomm;
7599: PetscMPIInt n_sends,n_recvs,size;
7600: PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is;
7601: PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals;
7602: PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest;
7603: MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs;
7604: MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs;
7605: PetscErrorCode ierr;
7609: PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);
7610: if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME);
7617: if (nvecs) {
7618: if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported");
7620: }
7621: /* further checks */
7622: MatISGetLocalMat(mat,&local_mat);
7623: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);
7624: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
7625: MatGetSize(local_mat,&rows,&cols);
7626: if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square");
7627: if (reuse && *mat_n) {
7628: PetscInt mrows,mcols,mnrows,mncols;
7630: PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);
7631: if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS");
7632: MatGetSize(mat,&mrows,&mcols);
7633: MatGetSize(*mat_n,&mnrows,&mncols);
7634: if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows);
7635: if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols);
7636: }
7637: MatGetBlockSize(local_mat,&bs);
7640: /* prepare IS for sending if not provided */
7641: if (!is_sends) {
7642: if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains");
7643: PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);
7644: } else {
7645: PetscObjectReference((PetscObject)is_sends);
7646: is_sends_internal = is_sends;
7647: }
7649: /* get comm */
7650: PetscObjectGetComm((PetscObject)mat,&comm);
7652: /* compute number of sends */
7653: ISGetLocalSize(is_sends_internal,&i);
7654: PetscMPIIntCast(i,&n_sends);
7656: /* compute number of receives */
7657: MPI_Comm_size(comm,&size);
7658: PetscMalloc1(size,&iflags);
7659: PetscArrayzero(iflags,size);
7660: ISGetIndices(is_sends_internal,&is_indices);
7661: for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1;
7662: PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);
7663: PetscFree(iflags);
7665: /* restrict comm if requested */
7666: subcomm = NULL;
7667: destroy_mat = PETSC_FALSE;
7668: if (restrict_comm) {
7669: PetscMPIInt color,subcommsize;
7671: color = 0;
7672: if (restrict_full) {
7673: if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */
7674: } else {
7675: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */
7676: }
7677: MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);
7678: subcommsize = size - subcommsize;
7679: /* check if reuse has been requested */
7680: if (reuse) {
7681: if (*mat_n) {
7682: PetscMPIInt subcommsize2;
7683: MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);
7684: if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2);
7685: comm_n = PetscObjectComm((PetscObject)*mat_n);
7686: } else {
7687: comm_n = PETSC_COMM_SELF;
7688: }
7689: } else { /* MAT_INITIAL_MATRIX */
7690: PetscMPIInt rank;
7692: MPI_Comm_rank(comm,&rank);
7693: PetscSubcommCreate(comm,&subcomm);
7694: PetscSubcommSetNumber(subcomm,2);
7695: PetscSubcommSetTypeGeneral(subcomm,color,rank);
7696: comm_n = PetscSubcommChild(subcomm);
7697: }
7698: /* flag to destroy *mat_n if not significative */
7699: if (color) destroy_mat = PETSC_TRUE;
7700: } else {
7701: comm_n = comm;
7702: }
7704: /* prepare send/receive buffers */
7705: PetscMalloc1(size,&ilengths_idxs);
7706: PetscArrayzero(ilengths_idxs,size);
7707: PetscMalloc1(size,&ilengths_vals);
7708: PetscArrayzero(ilengths_vals,size);
7709: if (nis) {
7710: PetscCalloc1(size,&ilengths_idxs_is);
7711: }
7713: /* Get data from local matrices */
7714: if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented");
7715: /* TODO: See below some guidelines on how to prepare the local buffers */
7716: /*
7717: send_buffer_vals should contain the raw values of the local matrix
7718: send_buffer_idxs should contain:
7719: - MatType_PRIVATE type
7720: - PetscInt size_of_l2gmap
7721: - PetscInt global_row_indices[size_of_l2gmap]
7722: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
7723: */
7724: else {
7725: MatDenseGetArrayRead(local_mat,&send_buffer_vals);
7726: ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);
7727: PetscMalloc1(i+2,&send_buffer_idxs);
7728: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
7729: send_buffer_idxs[1] = i;
7730: ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7731: PetscArraycpy(&send_buffer_idxs[2],ptr_idxs,i);
7732: ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);
7733: PetscMPIIntCast(i,&len);
7734: for (i=0;i<n_sends;i++) {
7735: ilengths_vals[is_indices[i]] = len*len;
7736: ilengths_idxs[is_indices[i]] = len+2;
7737: }
7738: }
7739: PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);
7740: /* additional is (if any) */
7741: if (nis) {
7742: PetscMPIInt psum;
7743: PetscInt j;
7744: for (j=0,psum=0;j<nis;j++) {
7745: PetscInt plen;
7746: ISGetLocalSize(isarray[j],&plen);
7747: PetscMPIIntCast(plen,&len);
7748: psum += len+1; /* indices + lenght */
7749: }
7750: PetscMalloc1(psum,&send_buffer_idxs_is);
7751: for (j=0,psum=0;j<nis;j++) {
7752: PetscInt plen;
7753: const PetscInt *is_array_idxs;
7754: ISGetLocalSize(isarray[j],&plen);
7755: send_buffer_idxs_is[psum] = plen;
7756: ISGetIndices(isarray[j],&is_array_idxs);
7757: PetscArraycpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen);
7758: ISRestoreIndices(isarray[j],&is_array_idxs);
7759: psum += plen+1; /* indices + lenght */
7760: }
7761: for (i=0;i<n_sends;i++) {
7762: ilengths_idxs_is[is_indices[i]] = psum;
7763: }
7764: PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);
7765: }
7766: MatISRestoreLocalMat(mat,&local_mat);
7768: buf_size_idxs = 0;
7769: buf_size_vals = 0;
7770: buf_size_idxs_is = 0;
7771: buf_size_vecs = 0;
7772: for (i=0;i<n_recvs;i++) {
7773: buf_size_idxs += (PetscInt)olengths_idxs[i];
7774: buf_size_vals += (PetscInt)olengths_vals[i];
7775: if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i];
7776: if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i];
7777: }
7778: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);
7779: PetscMalloc1(buf_size_vals,&recv_buffer_vals);
7780: PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);
7781: PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);
7783: /* get new tags for clean communications */
7784: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);
7785: PetscObjectGetNewTag((PetscObject)mat,&tag_vals);
7786: PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);
7787: PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);
7789: /* allocate for requests */
7790: PetscMalloc1(n_sends,&send_req_idxs);
7791: PetscMalloc1(n_sends,&send_req_vals);
7792: PetscMalloc1(n_sends,&send_req_idxs_is);
7793: PetscMalloc1(n_sends,&send_req_vecs);
7794: PetscMalloc1(n_recvs,&recv_req_idxs);
7795: PetscMalloc1(n_recvs,&recv_req_vals);
7796: PetscMalloc1(n_recvs,&recv_req_idxs_is);
7797: PetscMalloc1(n_recvs,&recv_req_vecs);
7799: /* communications */
7800: ptr_idxs = recv_buffer_idxs;
7801: ptr_vals = recv_buffer_vals;
7802: ptr_idxs_is = recv_buffer_idxs_is;
7803: ptr_vecs = recv_buffer_vecs;
7804: for (i=0;i<n_recvs;i++) {
7805: source_dest = onodes[i];
7806: MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);
7807: MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);
7808: ptr_idxs += olengths_idxs[i];
7809: ptr_vals += olengths_vals[i];
7810: if (nis) {
7811: source_dest = onodes_is[i];
7812: MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);
7813: ptr_idxs_is += olengths_idxs_is[i];
7814: }
7815: if (nvecs) {
7816: source_dest = onodes[i];
7817: MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);
7818: ptr_vecs += olengths_idxs[i]-2;
7819: }
7820: }
7821: for (i=0;i<n_sends;i++) {
7822: PetscMPIIntCast(is_indices[i],&source_dest);
7823: MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);
7824: MPI_Isend((PetscScalar*)send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);
7825: if (nis) {
7826: MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);
7827: }
7828: if (nvecs) {
7829: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
7830: MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);
7831: }
7832: }
7833: ISRestoreIndices(is_sends_internal,&is_indices);
7834: ISDestroy(&is_sends_internal);
7836: /* assemble new l2g map */
7837: MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);
7838: ptr_idxs = recv_buffer_idxs;
7839: new_local_rows = 0;
7840: for (i=0;i<n_recvs;i++) {
7841: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7842: ptr_idxs += olengths_idxs[i];
7843: }
7844: PetscMalloc1(new_local_rows,&l2gmap_indices);
7845: ptr_idxs = recv_buffer_idxs;
7846: new_local_rows = 0;
7847: for (i=0;i<n_recvs;i++) {
7848: PetscArraycpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,*(ptr_idxs+1));
7849: new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */
7850: ptr_idxs += olengths_idxs[i];
7851: }
7852: PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);
7853: ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);
7854: PetscFree(l2gmap_indices);
7856: /* infer new local matrix type from received local matrices type */
7857: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
7858: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
7859: if (n_recvs) {
7860: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
7861: ptr_idxs = recv_buffer_idxs;
7862: for (i=0;i<n_recvs;i++) {
7863: if ((PetscInt)new_local_type_private != *ptr_idxs) {
7864: new_local_type_private = MATAIJ_PRIVATE;
7865: break;
7866: }
7867: ptr_idxs += olengths_idxs[i];
7868: }
7869: switch (new_local_type_private) {
7870: case MATDENSE_PRIVATE:
7871: new_local_type = MATSEQAIJ;
7872: bs = 1;
7873: break;
7874: case MATAIJ_PRIVATE:
7875: new_local_type = MATSEQAIJ;
7876: bs = 1;
7877: break;
7878: case MATBAIJ_PRIVATE:
7879: new_local_type = MATSEQBAIJ;
7880: break;
7881: case MATSBAIJ_PRIVATE:
7882: new_local_type = MATSEQSBAIJ;
7883: break;
7884: default:
7885: SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME);
7886: break;
7887: }
7888: } else { /* by default, new_local_type is seqaij */
7889: new_local_type = MATSEQAIJ;
7890: bs = 1;
7891: }
7893: /* create MATIS object if needed */
7894: if (!reuse) {
7895: MatGetSize(mat,&rows,&cols);
7896: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7897: } else {
7898: /* it also destroys the local matrices */
7899: if (*mat_n) {
7900: MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);
7901: } else { /* this is a fake object */
7902: MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);
7903: }
7904: }
7905: MatISGetLocalMat(*mat_n,&local_mat);
7906: MatSetType(local_mat,new_local_type);
7908: MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);
7910: /* Global to local map of received indices */
7911: PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local); /* needed for values insertion */
7912: ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);
7913: ISLocalToGlobalMappingDestroy(&l2gmap);
7915: /* restore attributes -> type of incoming data and its size */
7916: buf_size_idxs = 0;
7917: for (i=0;i<n_recvs;i++) {
7918: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
7919: recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1];
7920: buf_size_idxs += (PetscInt)olengths_idxs[i];
7921: }
7922: PetscFree(recv_buffer_idxs);
7924: /* set preallocation */
7925: PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);
7926: if (!newisdense) {
7927: PetscInt *new_local_nnz=NULL;
7929: ptr_idxs = recv_buffer_idxs_local;
7930: if (n_recvs) {
7931: PetscCalloc1(new_local_rows,&new_local_nnz);
7932: }
7933: for (i=0;i<n_recvs;i++) {
7934: PetscInt j;
7935: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
7936: for (j=0;j<*(ptr_idxs+1);j++) {
7937: new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1);
7938: }
7939: } else {
7940: /* TODO */
7941: }
7942: ptr_idxs += olengths_idxs[i];
7943: }
7944: if (new_local_nnz) {
7945: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows);
7946: MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);
7947: for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs;
7948: MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7949: for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0);
7950: MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);
7951: } else {
7952: MatSetUp(local_mat);
7953: }
7954: PetscFree(new_local_nnz);
7955: } else {
7956: MatSetUp(local_mat);
7957: }
7959: /* set values */
7960: ptr_vals = recv_buffer_vals;
7961: ptr_idxs = recv_buffer_idxs_local;
7962: for (i=0;i<n_recvs;i++) {
7963: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
7964: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);
7965: MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);
7966: MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);
7967: MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);
7968: MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);
7969: } else {
7970: /* TODO */
7971: }
7972: ptr_idxs += olengths_idxs[i];
7973: ptr_vals += olengths_vals[i];
7974: }
7975: MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);
7976: MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);
7977: MatISRestoreLocalMat(*mat_n,&local_mat);
7978: MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);
7979: MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);
7980: PetscFree(recv_buffer_vals);
7982: #if 0
7983: if (!restrict_comm) { /* check */
7984: Vec lvec,rvec;
7985: PetscReal infty_error;
7987: MatCreateVecs(mat,&rvec,&lvec);
7988: VecSetRandom(rvec,NULL);
7989: MatMult(mat,rvec,lvec);
7990: VecScale(lvec,-1.0);
7991: MatMultAdd(*mat_n,rvec,lvec,lvec);
7992: VecNorm(lvec,NORM_INFINITY,&infty_error);
7993: PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error);
7994: VecDestroy(&rvec);
7995: VecDestroy(&lvec);
7996: }
7997: #endif
7999: /* assemble new additional is (if any) */
8000: if (nis) {
8001: PetscInt **temp_idxs,*count_is,j,psum;
8003: MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);
8004: PetscCalloc1(nis,&count_is);
8005: ptr_idxs = recv_buffer_idxs_is;
8006: psum = 0;
8007: for (i=0;i<n_recvs;i++) {
8008: for (j=0;j<nis;j++) {
8009: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8010: count_is[j] += plen; /* increment counting of buffer for j-th IS */
8011: psum += plen;
8012: ptr_idxs += plen+1; /* shift pointer to received data */
8013: }
8014: }
8015: PetscMalloc1(nis,&temp_idxs);
8016: PetscMalloc1(psum,&temp_idxs[0]);
8017: for (i=1;i<nis;i++) {
8018: temp_idxs[i] = temp_idxs[i-1]+count_is[i-1];
8019: }
8020: PetscArrayzero(count_is,nis);
8021: ptr_idxs = recv_buffer_idxs_is;
8022: for (i=0;i<n_recvs;i++) {
8023: for (j=0;j<nis;j++) {
8024: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8025: PetscArraycpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen);
8026: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
8027: ptr_idxs += plen+1; /* shift pointer to received data */
8028: }
8029: }
8030: for (i=0;i<nis;i++) {
8031: ISDestroy(&isarray[i]);
8032: PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);
8033: ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);
8034: }
8035: PetscFree(count_is);
8036: PetscFree(temp_idxs[0]);
8037: PetscFree(temp_idxs);
8038: }
8039: /* free workspace */
8040: PetscFree(recv_buffer_idxs_is);
8041: MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);
8042: PetscFree(send_buffer_idxs);
8043: MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);
8044: if (isdense) {
8045: MatISGetLocalMat(mat,&local_mat);
8046: MatDenseRestoreArrayRead(local_mat,&send_buffer_vals);
8047: MatISRestoreLocalMat(mat,&local_mat);
8048: } else {
8049: /* PetscFree(send_buffer_vals); */
8050: }
8051: if (nis) {
8052: MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);
8053: PetscFree(send_buffer_idxs_is);
8054: }
8056: if (nvecs) {
8057: MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);
8058: MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);
8059: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8060: VecDestroy(&nnsp_vec[0]);
8061: VecCreate(comm_n,&nnsp_vec[0]);
8062: VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);
8063: VecSetType(nnsp_vec[0],VECSTANDARD);
8064: /* set values */
8065: ptr_vals = recv_buffer_vecs;
8066: ptr_idxs = recv_buffer_idxs_local;
8067: VecGetArray(nnsp_vec[0],&send_buffer_vecs);
8068: for (i=0;i<n_recvs;i++) {
8069: PetscInt j;
8070: for (j=0;j<*(ptr_idxs+1);j++) {
8071: send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j);
8072: }
8073: ptr_idxs += olengths_idxs[i];
8074: ptr_vals += olengths_idxs[i]-2;
8075: }
8076: VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);
8077: VecAssemblyBegin(nnsp_vec[0]);
8078: VecAssemblyEnd(nnsp_vec[0]);
8079: }
8081: PetscFree(recv_buffer_vecs);
8082: PetscFree(recv_buffer_idxs_local);
8083: PetscFree(recv_req_idxs);
8084: PetscFree(recv_req_vals);
8085: PetscFree(recv_req_vecs);
8086: PetscFree(recv_req_idxs_is);
8087: PetscFree(send_req_idxs);
8088: PetscFree(send_req_vals);
8089: PetscFree(send_req_vecs);
8090: PetscFree(send_req_idxs_is);
8091: PetscFree(ilengths_vals);
8092: PetscFree(ilengths_idxs);
8093: PetscFree(olengths_vals);
8094: PetscFree(olengths_idxs);
8095: PetscFree(onodes);
8096: if (nis) {
8097: PetscFree(ilengths_idxs_is);
8098: PetscFree(olengths_idxs_is);
8099: PetscFree(onodes_is);
8100: }
8101: PetscSubcommDestroy(&subcomm);
8102: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */
8103: MatDestroy(mat_n);
8104: for (i=0;i<nis;i++) {
8105: ISDestroy(&isarray[i]);
8106: }
8107: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8108: VecDestroy(&nnsp_vec[0]);
8109: }
8110: *mat_n = NULL;
8111: }
8112: return(0);
8113: }
8115: /* temporary hack into ksp private data structure */
8116: #include <petsc/private/kspimpl.h>
8118: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals)
8119: {
8120: PC_BDDC *pcbddc = (PC_BDDC*)pc->data;
8121: PC_IS *pcis = (PC_IS*)pc->data;
8122: Mat coarse_mat,coarse_mat_is,coarse_submat_dense;
8123: Mat coarsedivudotp = NULL;
8124: Mat coarseG,t_coarse_mat_is;
8125: MatNullSpace CoarseNullSpace = NULL;
8126: ISLocalToGlobalMapping coarse_islg;
8127: IS coarse_is,*isarray,corners;
8128: PetscInt i,im_active=-1,active_procs=-1;
8129: PetscInt nis,nisdofs,nisneu,nisvert;
8130: PetscInt coarse_eqs_per_proc;
8131: PC pc_temp;
8132: PCType coarse_pc_type;
8133: KSPType coarse_ksp_type;
8134: PetscBool multilevel_requested,multilevel_allowed;
8135: PetscBool coarse_reuse;
8136: PetscInt ncoarse,nedcfield;
8137: PetscBool compute_vecs = PETSC_FALSE;
8138: PetscScalar *array;
8139: MatReuse coarse_mat_reuse;
8140: PetscBool restr, full_restr, have_void;
8141: PetscMPIInt size;
8142: PetscErrorCode ierr;
8145: PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8146: /* Assign global numbering to coarse dofs */
8147: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8148: PetscInt ocoarse_size;
8149: compute_vecs = PETSC_TRUE;
8151: pcbddc->new_primal_space = PETSC_TRUE;
8152: ocoarse_size = pcbddc->coarse_size;
8153: PetscFree(pcbddc->global_primal_indices);
8154: PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);
8155: /* see if we can avoid some work */
8156: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8157: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8158: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8159: KSPReset(pcbddc->coarse_ksp);
8160: coarse_reuse = PETSC_FALSE;
8161: } else { /* we can safely reuse already computed coarse matrix */
8162: coarse_reuse = PETSC_TRUE;
8163: }
8164: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8165: coarse_reuse = PETSC_FALSE;
8166: }
8167: /* reset any subassembling information */
8168: if (!coarse_reuse || pcbddc->recompute_topography) {
8169: ISDestroy(&pcbddc->coarse_subassembling);
8170: }
8171: } else { /* primal space is unchanged, so we can reuse coarse matrix */
8172: coarse_reuse = PETSC_TRUE;
8173: }
8174: if (coarse_reuse && pcbddc->coarse_ksp) {
8175: KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);
8176: PetscObjectReference((PetscObject)coarse_mat);
8177: coarse_mat_reuse = MAT_REUSE_MATRIX;
8178: } else {
8179: coarse_mat = NULL;
8180: coarse_mat_reuse = MAT_INITIAL_MATRIX;
8181: }
8183: /* creates temporary l2gmap and IS for coarse indexes */
8184: ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);
8185: ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);
8187: /* creates temporary MATIS object for coarse matrix */
8188: MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_submat_dense);
8189: MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);
8190: MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);
8191: MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8192: MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);
8193: MatDestroy(&coarse_submat_dense);
8195: /* count "active" (i.e. with positive local size) and "void" processes */
8196: im_active = !!(pcis->n);
8197: MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8199: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8200: /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8201: /* full_restr : just use the receivers from the subassembling pattern */
8202: MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);
8203: coarse_mat_is = NULL;
8204: multilevel_allowed = PETSC_FALSE;
8205: multilevel_requested = PETSC_FALSE;
8206: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc);
8207: if (coarse_eqs_per_proc < 0) coarse_eqs_per_proc = pcbddc->coarse_size;
8208: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8209: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8210: if (multilevel_requested) {
8211: ncoarse = active_procs/pcbddc->coarsening_ratio;
8212: restr = PETSC_FALSE;
8213: full_restr = PETSC_FALSE;
8214: } else {
8215: ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc);
8216: restr = PETSC_TRUE;
8217: full_restr = PETSC_TRUE;
8218: }
8219: if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8220: ncoarse = PetscMax(1,ncoarse);
8221: if (!pcbddc->coarse_subassembling) {
8222: if (pcbddc->coarsening_ratio > 1) {
8223: if (multilevel_requested) {
8224: PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8225: } else {
8226: PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);
8227: }
8228: } else {
8229: PetscMPIInt rank;
8231: MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);
8232: have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE;
8233: ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);
8234: }
8235: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8236: PetscInt psum;
8237: if (pcbddc->coarse_ksp) psum = 1;
8238: else psum = 0;
8239: MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));
8240: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8241: }
8242: /* determine if we can go multilevel */
8243: if (multilevel_requested) {
8244: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8245: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8246: }
8247: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8249: /* dump subassembling pattern */
8250: if (pcbddc->dbg_flag && multilevel_allowed) {
8251: ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);
8252: }
8253: /* compute dofs splitting and neumann boundaries for coarse dofs */
8254: nedcfield = -1;
8255: corners = NULL;
8256: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8257: PetscInt *tidxs,*tidxs2,nout,tsize,i;
8258: const PetscInt *idxs;
8259: ISLocalToGlobalMapping tmap;
8261: /* create map between primal indices (in local representative ordering) and local primal numbering */
8262: ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);
8263: /* allocate space for temporary storage */
8264: PetscMalloc1(pcbddc->local_primal_size,&tidxs);
8265: PetscMalloc1(pcbddc->local_primal_size,&tidxs2);
8266: /* allocate for IS array */
8267: nisdofs = pcbddc->n_ISForDofsLocal;
8268: if (pcbddc->nedclocal) {
8269: if (pcbddc->nedfield > -1) {
8270: nedcfield = pcbddc->nedfield;
8271: } else {
8272: nedcfield = 0;
8273: if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs);
8274: nisdofs = 1;
8275: }
8276: }
8277: nisneu = !!pcbddc->NeumannBoundariesLocal;
8278: nisvert = 0; /* nisvert is not used */
8279: nis = nisdofs + nisneu + nisvert;
8280: PetscMalloc1(nis,&isarray);
8281: /* dofs splitting */
8282: for (i=0;i<nisdofs;i++) {
8283: /* ISView(pcbddc->ISForDofsLocal[i],0); */
8284: if (nedcfield != i) {
8285: ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);
8286: ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);
8287: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8288: ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);
8289: } else {
8290: ISGetLocalSize(pcbddc->nedclocal,&tsize);
8291: ISGetIndices(pcbddc->nedclocal,&idxs);
8292: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8293: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout);
8294: ISRestoreIndices(pcbddc->nedclocal,&idxs);
8295: }
8296: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8297: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);
8298: /* ISView(isarray[i],0); */
8299: }
8300: /* neumann boundaries */
8301: if (pcbddc->NeumannBoundariesLocal) {
8302: /* ISView(pcbddc->NeumannBoundariesLocal,0); */
8303: ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);
8304: ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8305: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8306: ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);
8307: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8308: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);
8309: /* ISView(isarray[nisdofs],0); */
8310: }
8311: /* coordinates */
8312: if (pcbddc->corner_selected) {
8313: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8314: ISGetLocalSize(corners,&tsize);
8315: ISGetIndices(corners,&idxs);
8316: ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);
8317: if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping corners! %D != %D",tsize,nout);
8318: ISRestoreIndices(corners,&idxs);
8319: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&corners);
8320: ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);
8321: ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&corners);
8322: }
8323: PetscFree(tidxs);
8324: PetscFree(tidxs2);
8325: ISLocalToGlobalMappingDestroy(&tmap);
8326: } else {
8327: nis = 0;
8328: nisdofs = 0;
8329: nisneu = 0;
8330: nisvert = 0;
8331: isarray = NULL;
8332: }
8333: /* destroy no longer needed map */
8334: ISLocalToGlobalMappingDestroy(&coarse_islg);
8336: /* subassemble */
8337: if (multilevel_allowed) {
8338: Vec vp[1];
8339: PetscInt nvecs = 0;
8340: PetscBool reuse,reuser;
8342: if (coarse_mat) reuse = PETSC_TRUE;
8343: else reuse = PETSC_FALSE;
8344: MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8345: vp[0] = NULL;
8346: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8347: VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);
8348: VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);
8349: VecSetType(vp[0],VECSTANDARD);
8350: nvecs = 1;
8352: if (pcbddc->divudotp) {
8353: Mat B,loc_divudotp;
8354: Vec v,p;
8355: IS dummy;
8356: PetscInt np;
8358: MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);
8359: MatGetSize(loc_divudotp,&np,NULL);
8360: ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);
8361: MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);
8362: MatCreateVecs(B,&v,&p);
8363: VecSet(p,1.);
8364: MatMultTranspose(B,p,v);
8365: VecDestroy(&p);
8366: MatDestroy(&B);
8367: VecGetArray(vp[0],&array);
8368: VecPlaceArray(pcbddc->vec1_P,array);
8369: VecRestoreArray(vp[0],&array);
8370: MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);
8371: VecResetArray(pcbddc->vec1_P);
8372: ISDestroy(&dummy);
8373: VecDestroy(&v);
8374: }
8375: }
8376: if (reuser) {
8377: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);
8378: } else {
8379: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);
8380: }
8381: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8382: PetscScalar *arraym;
8383: const PetscScalar *arrayv;
8384: PetscInt nl;
8385: VecGetLocalSize(vp[0],&nl);
8386: MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);
8387: MatDenseGetArray(coarsedivudotp,&arraym);
8388: VecGetArrayRead(vp[0],&arrayv);
8389: PetscArraycpy(arraym,arrayv,nl);
8390: VecRestoreArrayRead(vp[0],&arrayv);
8391: MatDenseRestoreArray(coarsedivudotp,&arraym);
8392: VecDestroy(&vp[0]);
8393: } else {
8394: MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);
8395: }
8396: } else {
8397: PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);
8398: }
8399: if (coarse_mat_is || coarse_mat) {
8400: if (!multilevel_allowed) {
8401: MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);
8402: } else {
8403: /* if this matrix is present, it means we are not reusing the coarse matrix */
8404: if (coarse_mat_is) {
8405: if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen");
8406: PetscObjectReference((PetscObject)coarse_mat_is);
8407: coarse_mat = coarse_mat_is;
8408: }
8409: }
8410: }
8411: MatDestroy(&t_coarse_mat_is);
8412: MatDestroy(&coarse_mat_is);
8414: /* create local to global scatters for coarse problem */
8415: if (compute_vecs) {
8416: PetscInt lrows;
8417: VecDestroy(&pcbddc->coarse_vec);
8418: if (coarse_mat) {
8419: MatGetLocalSize(coarse_mat,&lrows,NULL);
8420: } else {
8421: lrows = 0;
8422: }
8423: VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);
8424: VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);
8425: VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);
8426: VecScatterDestroy(&pcbddc->coarse_loc_to_glob);
8427: VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);
8428: }
8429: ISDestroy(&coarse_is);
8431: /* set defaults for coarse KSP and PC */
8432: if (multilevel_allowed) {
8433: coarse_ksp_type = KSPRICHARDSON;
8434: coarse_pc_type = PCBDDC;
8435: } else {
8436: coarse_ksp_type = KSPPREONLY;
8437: coarse_pc_type = PCREDUNDANT;
8438: }
8440: /* print some info if requested */
8441: if (pcbddc->dbg_flag) {
8442: if (!multilevel_allowed) {
8443: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8444: if (multilevel_requested) {
8445: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);
8446: } else if (pcbddc->max_levels) {
8447: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);
8448: }
8449: PetscViewerFlush(pcbddc->dbg_viewer);
8450: }
8451: }
8453: /* communicate coarse discrete gradient */
8454: coarseG = NULL;
8455: if (pcbddc->nedcG && multilevel_allowed) {
8456: MPI_Comm ccomm;
8457: if (coarse_mat) {
8458: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8459: } else {
8460: ccomm = MPI_COMM_NULL;
8461: }
8462: MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);
8463: }
8465: /* create the coarse KSP object only once with defaults */
8466: if (coarse_mat) {
8467: PetscBool isredundant,isbddc,force,valid;
8468: PetscViewer dbg_viewer = NULL;
8470: if (pcbddc->dbg_flag) {
8471: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8472: PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);
8473: }
8474: if (!pcbddc->coarse_ksp) {
8475: char prefix[256],str_level[16];
8476: size_t len;
8478: KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);
8479: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);
8480: PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);
8481: KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);
8482: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8483: KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);
8484: KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);
8485: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8486: /* TODO is this logic correct? should check for coarse_mat type */
8487: PCSetType(pc_temp,coarse_pc_type);
8488: /* prefix */
8489: PetscStrcpy(prefix,"");
8490: PetscStrcpy(str_level,"");
8491: if (!pcbddc->current_level) {
8492: PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));
8493: PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));
8494: } else {
8495: PetscStrlen(((PetscObject)pc)->prefix,&len);
8496: if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */
8497: if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */
8498: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8499: PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);
8500: PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));
8501: PetscStrlcat(prefix,str_level,sizeof(prefix));
8502: }
8503: KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);
8504: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8505: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8506: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8507: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8508: /* allow user customization */
8509: KSPSetFromOptions(pcbddc->coarse_ksp);
8510: /* get some info after set from options */
8511: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8512: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8513: force = PETSC_FALSE;
8514: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8515: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8516: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8517: if (multilevel_allowed && !force && !valid) {
8518: isbddc = PETSC_TRUE;
8519: PCSetType(pc_temp,PCBDDC);
8520: PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);
8521: PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);
8522: PCBDDCSetLevels(pc_temp,pcbddc->max_levels);
8523: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8524: PetscObjectOptionsBegin((PetscObject)pc_temp);
8525: (*pc_temp->ops->setfromoptions)(PetscOptionsObject,pc_temp);
8526: PetscObjectProcessOptionsHandlers(PetscOptionsObject,(PetscObject)pc_temp);
8527: PetscOptionsEnd();
8528: pc_temp->setfromoptionscalled++;
8529: }
8530: }
8531: }
8532: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8533: KSPGetPC(pcbddc->coarse_ksp,&pc_temp);
8534: if (nisdofs) {
8535: PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);
8536: for (i=0;i<nisdofs;i++) {
8537: ISDestroy(&isarray[i]);
8538: }
8539: }
8540: if (nisneu) {
8541: PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);
8542: ISDestroy(&isarray[nisdofs]);
8543: }
8544: if (nisvert) {
8545: PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);
8546: ISDestroy(&isarray[nis-1]);
8547: }
8548: if (coarseG) {
8549: PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);
8550: }
8552: /* get some info after set from options */
8553: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8555: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8556: if (isbddc && !multilevel_allowed) {
8557: PCSetType(pc_temp,coarse_pc_type);
8558: }
8559: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8560: force = PETSC_FALSE;
8561: PetscOptionsGetBool(NULL,((PetscObject)pc_temp)->prefix,"-pc_type_forced",&force,NULL);
8562: PetscObjectTypeCompareAny((PetscObject)pc_temp,&valid,PCBDDC,PCNN,PCHPDDM,"");
8563: if (multilevel_requested && multilevel_allowed && !valid && !force) {
8564: PCSetType(pc_temp,PCBDDC);
8565: }
8566: PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);
8567: if (isredundant) {
8568: KSP inner_ksp;
8569: PC inner_pc;
8571: PCRedundantGetKSP(pc_temp,&inner_ksp);
8572: KSPGetPC(inner_ksp,&inner_pc);
8573: }
8575: /* parameters which miss an API */
8576: PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);
8577: if (isbddc) {
8578: PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data;
8580: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
8581: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
8582: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
8583: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
8584: if (pcbddc_coarse->benign_saddle_point) {
8585: Mat coarsedivudotp_is;
8586: ISLocalToGlobalMapping l2gmap,rl2g,cl2g;
8587: IS row,col;
8588: const PetscInt *gidxs;
8589: PetscInt n,st,M,N;
8591: MatGetSize(coarsedivudotp,&n,NULL);
8592: MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));
8593: st = st-n;
8594: ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);
8595: MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);
8596: ISLocalToGlobalMappingGetSize(l2gmap,&n);
8597: ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);
8598: ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);
8599: ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);
8600: ISLocalToGlobalMappingCreateIS(row,&rl2g);
8601: ISLocalToGlobalMappingCreateIS(col,&cl2g);
8602: ISGetSize(row,&M);
8603: MatGetSize(coarse_mat,&N,NULL);
8604: ISDestroy(&row);
8605: ISDestroy(&col);
8606: MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);
8607: MatSetType(coarsedivudotp_is,MATIS);
8608: MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);
8609: MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);
8610: ISLocalToGlobalMappingDestroy(&rl2g);
8611: ISLocalToGlobalMappingDestroy(&cl2g);
8612: MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);
8613: MatDestroy(&coarsedivudotp);
8614: PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);
8615: MatDestroy(&coarsedivudotp_is);
8616: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
8617: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
8618: }
8619: }
8621: /* propagate symmetry info of coarse matrix */
8622: MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);
8623: if (pc->pmat->symmetric_set) {
8624: MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);
8625: }
8626: if (pc->pmat->hermitian_set) {
8627: MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);
8628: }
8629: if (pc->pmat->spd_set) {
8630: MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);
8631: }
8632: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) {
8633: MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);
8634: }
8635: /* set operators */
8636: MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");
8637: MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);
8638: KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);
8639: if (pcbddc->dbg_flag) {
8640: PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);
8641: }
8642: }
8643: MatDestroy(&coarseG);
8644: PetscFree(isarray);
8645: #if 0
8646: {
8647: PetscViewer viewer;
8648: char filename[256];
8649: sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level);
8650: PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);
8651: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);
8652: MatView(coarse_mat,viewer);
8653: PetscViewerPopFormat(viewer);
8654: PetscViewerDestroy(&viewer);
8655: }
8656: #endif
8658: if (corners) {
8659: Vec gv;
8660: IS is;
8661: const PetscInt *idxs;
8662: PetscInt i,d,N,n,cdim = pcbddc->mat_graph->cdim;
8663: PetscScalar *coords;
8665: if (!pcbddc->mat_graph->cloc) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing local coordinates");
8666: VecGetSize(pcbddc->coarse_vec,&N);
8667: VecGetLocalSize(pcbddc->coarse_vec,&n);
8668: VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec),&gv);
8669: VecSetBlockSize(gv,cdim);
8670: VecSetSizes(gv,n*cdim,N*cdim);
8671: VecSetType(gv,VECSTANDARD);
8672: VecSetFromOptions(gv);
8673: VecSet(gv,PETSC_MAX_REAL); /* we only propagate coordinates from vertices constraints */
8675: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8676: ISGetLocalSize(is,&n);
8677: ISGetIndices(is,&idxs);
8678: PetscMalloc1(n*cdim,&coords);
8679: for (i=0;i<n;i++) {
8680: for (d=0;d<cdim;d++) {
8681: coords[cdim*i+d] = pcbddc->mat_graph->coords[cdim*idxs[i]+d];
8682: }
8683: }
8684: ISRestoreIndices(is,&idxs);
8685: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&is);
8687: ISGetLocalSize(corners,&n);
8688: ISGetIndices(corners,&idxs);
8689: VecSetValuesBlocked(gv,n,idxs,coords,INSERT_VALUES);
8690: ISRestoreIndices(corners,&idxs);
8691: PetscFree(coords);
8692: VecAssemblyBegin(gv);
8693: VecAssemblyEnd(gv);
8694: VecGetArray(gv,&coords);
8695: if (pcbddc->coarse_ksp) {
8696: PC coarse_pc;
8697: PetscBool isbddc;
8699: KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);
8700: PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);
8701: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
8702: PetscReal *realcoords;
8704: VecGetLocalSize(gv,&n);
8705: #if defined(PETSC_USE_COMPLEX)
8706: PetscMalloc1(n,&realcoords);
8707: for (i=0;i<n;i++) realcoords[i] = PetscRealPart(coords[i]);
8708: #else
8709: realcoords = coords;
8710: #endif
8711: PCSetCoordinates(coarse_pc,cdim,n/cdim,realcoords);
8712: #if defined(PETSC_USE_COMPLEX)
8713: PetscFree(realcoords);
8714: #endif
8715: }
8716: }
8717: VecRestoreArray(gv,&coords);
8718: VecDestroy(&gv);
8719: }
8720: ISDestroy(&corners);
8722: if (pcbddc->coarse_ksp) {
8723: Vec crhs,csol;
8725: KSPGetSolution(pcbddc->coarse_ksp,&csol);
8726: KSPGetRhs(pcbddc->coarse_ksp,&crhs);
8727: if (!csol) {
8728: MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);
8729: }
8730: if (!crhs) {
8731: MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));
8732: }
8733: }
8734: MatDestroy(&coarsedivudotp);
8736: /* compute null space for coarse solver if the benign trick has been requested */
8737: if (pcbddc->benign_null) {
8739: VecSet(pcbddc->vec1_P,0.);
8740: for (i=0;i<pcbddc->benign_n;i++) {
8741: VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);
8742: }
8743: VecAssemblyBegin(pcbddc->vec1_P);
8744: VecAssemblyEnd(pcbddc->vec1_P);
8745: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8746: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);
8747: if (coarse_mat) {
8748: Vec nullv;
8749: PetscScalar *array,*array2;
8750: PetscInt nl;
8752: MatCreateVecs(coarse_mat,&nullv,NULL);
8753: VecGetLocalSize(nullv,&nl);
8754: VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8755: VecGetArray(nullv,&array2);
8756: PetscArraycpy(array2,array,nl);
8757: VecRestoreArray(nullv,&array2);
8758: VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);
8759: VecNormalize(nullv,NULL);
8760: MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);
8761: VecDestroy(&nullv);
8762: }
8763: }
8764: PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);
8766: PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8767: if (pcbddc->coarse_ksp) {
8768: PetscBool ispreonly;
8770: if (CoarseNullSpace) {
8771: PetscBool isnull;
8772: MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);
8773: if (isnull) {
8774: MatSetNullSpace(coarse_mat,CoarseNullSpace);
8775: }
8776: /* TODO: add local nullspaces (if any) */
8777: }
8778: /* setup coarse ksp */
8779: KSPSetUp(pcbddc->coarse_ksp);
8780: /* Check coarse problem if in debug mode or if solving with an iterative method */
8781: PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);
8782: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
8783: KSP check_ksp;
8784: KSPType check_ksp_type;
8785: PC check_pc;
8786: Vec check_vec,coarse_vec;
8787: PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0;
8788: PetscInt its;
8789: PetscBool compute_eigs;
8790: PetscReal *eigs_r,*eigs_c;
8791: PetscInt neigs;
8792: const char *prefix;
8794: /* Create ksp object suitable for estimation of extreme eigenvalues */
8795: KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);
8796: PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);
8797: KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);
8798: KSPSetOperators(check_ksp,coarse_mat,coarse_mat);
8799: KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);
8800: /* prevent from setup unneeded object */
8801: KSPGetPC(check_ksp,&check_pc);
8802: PCSetType(check_pc,PCNONE);
8803: if (ispreonly) {
8804: check_ksp_type = KSPPREONLY;
8805: compute_eigs = PETSC_FALSE;
8806: } else {
8807: check_ksp_type = KSPGMRES;
8808: compute_eigs = PETSC_TRUE;
8809: }
8810: KSPSetType(check_ksp,check_ksp_type);
8811: KSPSetComputeSingularValues(check_ksp,compute_eigs);
8812: KSPSetComputeEigenvalues(check_ksp,compute_eigs);
8813: KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);
8814: KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);
8815: KSPSetOptionsPrefix(check_ksp,prefix);
8816: KSPAppendOptionsPrefix(check_ksp,"check_");
8817: KSPSetFromOptions(check_ksp);
8818: KSPSetUp(check_ksp);
8819: KSPGetPC(pcbddc->coarse_ksp,&check_pc);
8820: KSPSetPC(check_ksp,check_pc);
8821: /* create random vec */
8822: MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);
8823: VecSetRandom(check_vec,NULL);
8824: MatMult(coarse_mat,check_vec,coarse_vec);
8825: /* solve coarse problem */
8826: KSPSolve(check_ksp,coarse_vec,coarse_vec);
8827: KSPCheckSolve(check_ksp,pc,coarse_vec);
8828: /* set eigenvalue estimation if preonly has not been requested */
8829: if (compute_eigs) {
8830: PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);
8831: PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);
8832: KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);
8833: if (neigs) {
8834: lambda_max = eigs_r[neigs-1];
8835: lambda_min = eigs_r[0];
8836: if (pcbddc->use_coarse_estimates) {
8837: if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
8838: KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);
8839: KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));
8840: }
8841: }
8842: }
8843: }
8845: /* check coarse problem residual error */
8846: if (pcbddc->dbg_flag) {
8847: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
8848: PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));
8849: VecAXPY(check_vec,-1.0,coarse_vec);
8850: VecNorm(check_vec,NORM_INFINITY,&infty_error);
8851: MatMult(coarse_mat,check_vec,coarse_vec);
8852: VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);
8853: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);
8854: PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);
8855: PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);
8856: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);
8857: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);
8858: if (CoarseNullSpace) {
8859: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");
8860: }
8861: if (compute_eigs) {
8862: PetscReal lambda_max_s,lambda_min_s;
8863: KSPConvergedReason reason;
8864: KSPGetType(check_ksp,&check_ksp_type);
8865: KSPGetIterationNumber(check_ksp,&its);
8866: KSPGetConvergedReason(check_ksp,&reason);
8867: KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);
8868: PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);
8869: for (i=0;i<neigs;i++) {
8870: PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);
8871: }
8872: }
8873: PetscViewerFlush(dbg_viewer);
8874: PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));
8875: }
8876: VecDestroy(&check_vec);
8877: VecDestroy(&coarse_vec);
8878: KSPDestroy(&check_ksp);
8879: if (compute_eigs) {
8880: PetscFree(eigs_r);
8881: PetscFree(eigs_c);
8882: }
8883: }
8884: }
8885: MatNullSpaceDestroy(&CoarseNullSpace);
8886: /* print additional info */
8887: if (pcbddc->dbg_flag) {
8888: /* waits until all processes reaches this point */
8889: PetscBarrier((PetscObject)pc);
8890: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);
8891: PetscViewerFlush(pcbddc->dbg_viewer);
8892: }
8894: /* free memory */
8895: MatDestroy(&coarse_mat);
8896: PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);
8897: return(0);
8898: }
8900: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n)
8901: {
8902: PC_BDDC* pcbddc = (PC_BDDC*)pc->data;
8903: PC_IS* pcis = (PC_IS*)pc->data;
8904: Mat_IS* matis = (Mat_IS*)pc->pmat->data;
8905: IS subset,subset_mult,subset_n;
8906: PetscInt local_size,coarse_size=0;
8907: PetscInt *local_primal_indices=NULL;
8908: const PetscInt *t_local_primal_indices;
8912: /* Compute global number of coarse dofs */
8913: if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first");
8914: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);
8915: ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);
8916: ISDestroy(&subset_n);
8917: ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);
8918: ISRenumber(subset,subset_mult,&coarse_size,&subset_n);
8919: ISDestroy(&subset);
8920: ISDestroy(&subset_mult);
8921: ISGetLocalSize(subset_n,&local_size);
8922: if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size);
8923: PetscMalloc1(local_size,&local_primal_indices);
8924: ISGetIndices(subset_n,&t_local_primal_indices);
8925: PetscArraycpy(local_primal_indices,t_local_primal_indices,local_size);
8926: ISRestoreIndices(subset_n,&t_local_primal_indices);
8927: ISDestroy(&subset_n);
8929: /* check numbering */
8930: if (pcbddc->dbg_flag) {
8931: PetscScalar coarsesum,*array,*array2;
8932: PetscInt i;
8933: PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE;
8935: PetscViewerFlush(pcbddc->dbg_viewer);
8936: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");
8937: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");
8938: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8939: /* counter */
8940: VecSet(pcis->vec1_global,0.0);
8941: VecSet(pcis->vec1_N,1.0);
8942: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8943: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8944: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8945: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);
8946: VecSet(pcis->vec1_N,0.0);
8947: for (i=0;i<pcbddc->local_primal_size;i++) {
8948: VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);
8949: }
8950: VecAssemblyBegin(pcis->vec1_N);
8951: VecAssemblyEnd(pcis->vec1_N);
8952: VecSet(pcis->vec1_global,0.0);
8953: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8954: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8955: VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8956: VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);
8957: VecGetArray(pcis->vec1_N,&array);
8958: VecGetArray(pcis->vec2_N,&array2);
8959: for (i=0;i<pcis->n;i++) {
8960: if (array[i] != 0.0 && array[i] != array2[i]) {
8961: PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi;
8962: PetscInt neigh = (PetscInt)PetscRealPart(array2[i]);
8963: set_error = PETSC_TRUE;
8964: ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);
8965: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);
8966: }
8967: }
8968: VecRestoreArray(pcis->vec2_N,&array2);
8969: MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
8970: PetscViewerFlush(pcbddc->dbg_viewer);
8971: for (i=0;i<pcis->n;i++) {
8972: if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]);
8973: }
8974: VecRestoreArray(pcis->vec1_N,&array);
8975: VecSet(pcis->vec1_global,0.0);
8976: VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8977: VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);
8978: VecSum(pcis->vec1_global,&coarsesum);
8979: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));
8980: if (pcbddc->dbg_flag > 1 || set_error_reduced) {
8981: PetscInt *gidxs;
8983: PetscMalloc1(pcbddc->local_primal_size,&gidxs);
8984: ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);
8985: PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");
8986: PetscViewerFlush(pcbddc->dbg_viewer);
8987: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);
8988: for (i=0;i<pcbddc->local_primal_size;i++) {
8989: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);
8990: }
8991: PetscViewerFlush(pcbddc->dbg_viewer);
8992: PetscFree(gidxs);
8993: }
8994: PetscViewerFlush(pcbddc->dbg_viewer);
8995: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
8996: if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed");
8997: }
8999: /* get back data */
9000: *coarse_size_n = coarse_size;
9001: *local_primal_indices_n = local_primal_indices;
9002: return(0);
9003: }
9005: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis)
9006: {
9007: IS localis_t;
9008: PetscInt i,lsize,*idxs,n;
9009: PetscScalar *vals;
9013: /* get indices in local ordering exploiting local to global map */
9014: ISGetLocalSize(globalis,&lsize);
9015: PetscMalloc1(lsize,&vals);
9016: for (i=0;i<lsize;i++) vals[i] = 1.0;
9017: ISGetIndices(globalis,(const PetscInt**)&idxs);
9018: VecSet(gwork,0.0);
9019: VecSet(lwork,0.0);
9020: if (idxs) { /* multilevel guard */
9021: VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);
9022: VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);
9023: }
9024: VecAssemblyBegin(gwork);
9025: ISRestoreIndices(globalis,(const PetscInt**)&idxs);
9026: PetscFree(vals);
9027: VecAssemblyEnd(gwork);
9028: /* now compute set in local ordering */
9029: VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9030: VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);
9031: VecGetArrayRead(lwork,(const PetscScalar**)&vals);
9032: VecGetSize(lwork,&n);
9033: for (i=0,lsize=0;i<n;i++) {
9034: if (PetscRealPart(vals[i]) > 0.5) {
9035: lsize++;
9036: }
9037: }
9038: PetscMalloc1(lsize,&idxs);
9039: for (i=0,lsize=0;i<n;i++) {
9040: if (PetscRealPart(vals[i]) > 0.5) {
9041: idxs[lsize++] = i;
9042: }
9043: }
9044: VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);
9045: ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);
9046: *localis = localis_t;
9047: return(0);
9048: }
9050: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9051: {
9052: PC_IS *pcis=(PC_IS*)pc->data;
9053: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9054: PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs;
9055: Mat S_j;
9056: PetscInt *used_xadj,*used_adjncy;
9057: PetscBool free_used_adj;
9058: PetscErrorCode ierr;
9061: PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9062: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9063: free_used_adj = PETSC_FALSE;
9064: if (pcbddc->sub_schurs_layers == -1) {
9065: used_xadj = NULL;
9066: used_adjncy = NULL;
9067: } else {
9068: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9069: used_xadj = pcbddc->mat_graph->xadj;
9070: used_adjncy = pcbddc->mat_graph->adjncy;
9071: } else if (pcbddc->computed_rowadj) {
9072: used_xadj = pcbddc->mat_graph->xadj;
9073: used_adjncy = pcbddc->mat_graph->adjncy;
9074: } else {
9075: PetscBool flg_row=PETSC_FALSE;
9076: const PetscInt *xadj,*adjncy;
9077: PetscInt nvtxs;
9079: MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9080: if (flg_row) {
9081: PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);
9082: PetscArraycpy(used_xadj,xadj,nvtxs+1);
9083: PetscArraycpy(used_adjncy,adjncy,xadj[nvtxs]);
9084: free_used_adj = PETSC_TRUE;
9085: } else {
9086: pcbddc->sub_schurs_layers = -1;
9087: used_xadj = NULL;
9088: used_adjncy = NULL;
9089: }
9090: MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);
9091: }
9092: }
9094: /* setup sub_schurs data */
9095: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9096: if (!sub_schurs->schur_explicit) {
9097: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9098: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9099: PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);
9100: } else {
9101: Mat change = NULL;
9102: Vec scaling = NULL;
9103: IS change_primal = NULL, iP;
9104: PetscInt benign_n;
9105: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9106: PetscBool need_change = PETSC_FALSE;
9107: PetscBool discrete_harmonic = PETSC_FALSE;
9109: if (!pcbddc->use_vertices && reuse_solvers) {
9110: PetscInt n_vertices;
9112: ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);
9113: reuse_solvers = (PetscBool)!n_vertices;
9114: }
9115: if (!pcbddc->benign_change_explicit) {
9116: benign_n = pcbddc->benign_n;
9117: } else {
9118: benign_n = 0;
9119: }
9120: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9121: We need a global reduction to avoid possible deadlocks.
9122: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9123: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9124: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9125: MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));
9126: need_change = (PetscBool)(!need_change);
9127: }
9128: /* If the user defines additional constraints, we import them here.
9129: We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */
9130: if (need_change) {
9131: PC_IS *pcisf;
9132: PC_BDDC *pcbddcf;
9133: PC pcf;
9135: if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph");
9136: PCCreate(PetscObjectComm((PetscObject)pc),&pcf);
9137: PCSetOperators(pcf,pc->mat,pc->pmat);
9138: PCSetType(pcf,PCBDDC);
9140: /* hacks */
9141: pcisf = (PC_IS*)pcf->data;
9142: pcisf->is_B_local = pcis->is_B_local;
9143: pcisf->vec1_N = pcis->vec1_N;
9144: pcisf->BtoNmap = pcis->BtoNmap;
9145: pcisf->n = pcis->n;
9146: pcisf->n_B = pcis->n_B;
9147: pcbddcf = (PC_BDDC*)pcf->data;
9148: PetscFree(pcbddcf->mat_graph);
9149: pcbddcf->mat_graph = pcbddc->mat_graph;
9150: pcbddcf->use_faces = PETSC_TRUE;
9151: pcbddcf->use_change_of_basis = PETSC_TRUE;
9152: pcbddcf->use_change_on_faces = PETSC_TRUE;
9153: pcbddcf->use_qr_single = PETSC_TRUE;
9154: pcbddcf->fake_change = PETSC_TRUE;
9156: /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */
9157: PCBDDCConstraintsSetUp(pcf);
9158: sub_schurs->change_with_qr = pcbddcf->use_qr_single;
9159: ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);
9160: change = pcbddcf->ConstraintMatrix;
9161: pcbddcf->ConstraintMatrix = NULL;
9163: /* free unneeded memory allocated in PCBDDCConstraintsSetUp */
9164: PetscFree(pcbddcf->sub_schurs);
9165: MatNullSpaceDestroy(&pcbddcf->onearnullspace);
9166: PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);
9167: PetscFree(pcbddcf->primal_indices_local_idxs);
9168: PetscFree(pcbddcf->onearnullvecs_state);
9169: PetscFree(pcf->data);
9170: pcf->ops->destroy = NULL;
9171: pcf->ops->reset = NULL;
9172: PCDestroy(&pcf);
9173: }
9174: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9176: PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);
9177: if (iP) {
9178: PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");
9179: PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);
9180: PetscOptionsEnd();
9181: }
9182: if (discrete_harmonic) {
9183: Mat A;
9184: MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);
9185: MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);
9186: PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);
9187: PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9188: MatDestroy(&A);
9189: } else {
9190: PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);
9191: }
9192: MatDestroy(&change);
9193: ISDestroy(&change_primal);
9194: }
9195: MatDestroy(&S_j);
9197: /* free adjacency */
9198: if (free_used_adj) {
9199: PetscFree2(used_xadj,used_adjncy);
9200: }
9201: PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);
9202: return(0);
9203: }
9205: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9206: {
9207: PC_IS *pcis=(PC_IS*)pc->data;
9208: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9209: PCBDDCGraph graph;
9210: PetscErrorCode ierr;
9213: /* attach interface graph for determining subsets */
9214: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9215: IS verticesIS,verticescomm;
9216: PetscInt vsize,*idxs;
9218: PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9219: ISGetSize(verticesIS,&vsize);
9220: ISGetIndices(verticesIS,(const PetscInt**)&idxs);
9221: ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);
9222: ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);
9223: PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);
9224: PCBDDCGraphCreate(&graph);
9225: PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);
9226: PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);
9227: ISDestroy(&verticescomm);
9228: PCBDDCGraphComputeConnectedComponents(graph);
9229: } else {
9230: graph = pcbddc->mat_graph;
9231: }
9232: /* print some info */
9233: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9234: IS vertices;
9235: PetscInt nv,nedges,nfaces;
9236: PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);
9237: PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9238: ISGetSize(vertices,&nv);
9239: PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);
9240: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");
9241: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);
9242: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);
9243: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);
9244: PetscViewerFlush(pcbddc->dbg_viewer);
9245: PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);
9246: PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);
9247: }
9249: /* sub_schurs init */
9250: if (!pcbddc->sub_schurs) {
9251: PCBDDCSubSchursCreate(&pcbddc->sub_schurs);
9252: }
9253: PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);
9255: /* free graph struct */
9256: if (pcbddc->sub_schurs_rebuild) {
9257: PCBDDCGraphDestroy(&graph);
9258: }
9259: return(0);
9260: }
9262: PetscErrorCode PCBDDCCheckOperator(PC pc)
9263: {
9264: PC_IS *pcis=(PC_IS*)pc->data;
9265: PC_BDDC *pcbddc=(PC_BDDC*)pc->data;
9266: PetscErrorCode ierr;
9269: if (pcbddc->n_vertices == pcbddc->local_primal_size) {
9270: IS zerodiag = NULL;
9271: Mat S_j,B0_B=NULL;
9272: Vec dummy_vec=NULL,vec_check_B,vec_scale_P;
9273: PetscScalar *p0_check,*array,*array2;
9274: PetscReal norm;
9275: PetscInt i;
9277: /* B0 and B0_B */
9278: if (zerodiag) {
9279: IS dummy;
9281: ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);
9282: MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);
9283: MatCreateVecs(B0_B,NULL,&dummy_vec);
9284: ISDestroy(&dummy);
9285: }
9286: /* I need a primal vector to scale primal nodes since BDDC sums contibutions */
9287: VecDuplicate(pcbddc->vec1_P,&vec_scale_P);
9288: VecSet(pcbddc->vec1_P,1.0);
9289: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9290: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9291: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9292: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);
9293: VecReciprocal(vec_scale_P);
9294: /* S_j */
9295: MatCreateSchurComplement(pcis->A_II,pcis->pA_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);
9296: MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);
9298: /* mimic vector in \widetilde{W}_\Gamma */
9299: VecSetRandom(pcis->vec1_N,NULL);
9300: /* continuous in primal space */
9301: VecSetRandom(pcbddc->coarse_vec,NULL);
9302: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9303: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9304: VecGetArray(pcbddc->vec1_P,&array);
9305: PetscCalloc1(pcbddc->benign_n,&p0_check);
9306: for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i];
9307: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9308: VecRestoreArray(pcbddc->vec1_P,&array);
9309: VecAssemblyBegin(pcis->vec1_N);
9310: VecAssemblyEnd(pcis->vec1_N);
9311: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9312: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);
9313: VecDuplicate(pcis->vec2_B,&vec_check_B);
9314: VecCopy(pcis->vec2_B,vec_check_B);
9316: /* assemble rhs for coarse problem */
9317: /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */
9318: /* local with Schur */
9319: MatMult(S_j,pcis->vec2_B,pcis->vec1_B);
9320: if (zerodiag) {
9321: VecGetArray(dummy_vec,&array);
9322: for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i];
9323: VecRestoreArray(dummy_vec,&array);
9324: MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);
9325: }
9326: /* sum on primal nodes the local contributions */
9327: VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9328: VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);
9329: VecGetArray(pcis->vec1_N,&array);
9330: VecGetArray(pcbddc->vec1_P,&array2);
9331: for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]];
9332: VecRestoreArray(pcbddc->vec1_P,&array2);
9333: VecRestoreArray(pcis->vec1_N,&array);
9334: VecSet(pcbddc->coarse_vec,0.);
9335: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9336: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);
9337: VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9338: VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);
9339: VecGetArray(pcbddc->vec1_P,&array);
9340: /* scale primal nodes (BDDC sums contibutions) */
9341: VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);
9342: VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);
9343: VecRestoreArray(pcbddc->vec1_P,&array);
9344: VecAssemblyBegin(pcis->vec1_N);
9345: VecAssemblyEnd(pcis->vec1_N);
9346: VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9347: VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);
9348: /* global: \widetilde{B0}_B w_\Gamma */
9349: if (zerodiag) {
9350: MatMult(B0_B,pcis->vec2_B,dummy_vec);
9351: VecGetArray(dummy_vec,&array);
9352: for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i];
9353: VecRestoreArray(dummy_vec,&array);
9354: }
9355: /* BDDC */
9356: VecSet(pcis->vec1_D,0.);
9357: PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);
9359: VecCopy(pcis->vec1_B,pcis->vec2_B);
9360: VecAXPY(pcis->vec1_B,-1.0,vec_check_B);
9361: VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);
9362: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);
9363: for (i=0;i<pcbddc->benign_n;i++) {
9364: PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));
9365: }
9366: PetscFree(p0_check);
9367: VecDestroy(&vec_scale_P);
9368: VecDestroy(&vec_check_B);
9369: VecDestroy(&dummy_vec);
9370: MatDestroy(&S_j);
9371: MatDestroy(&B0_B);
9372: }
9373: return(0);
9374: }
9376: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9377: PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9378: {
9379: Mat At;
9380: IS rows;
9381: PetscInt rst,ren;
9383: PetscLayout rmap;
9386: rst = ren = 0;
9387: if (ccomm != MPI_COMM_NULL) {
9388: PetscLayoutCreate(ccomm,&rmap);
9389: PetscLayoutSetSize(rmap,A->rmap->N);
9390: PetscLayoutSetBlockSize(rmap,1);
9391: PetscLayoutSetUp(rmap);
9392: PetscLayoutGetRange(rmap,&rst,&ren);
9393: }
9394: ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);
9395: MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);
9396: ISDestroy(&rows);
9398: if (ccomm != MPI_COMM_NULL) {
9399: Mat_MPIAIJ *a,*b;
9400: IS from,to;
9401: Vec gvec;
9402: PetscInt lsize;
9404: MatCreate(ccomm,B);
9405: MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);
9406: MatSetType(*B,MATAIJ);
9407: PetscLayoutDestroy(&((*B)->rmap));
9408: PetscLayoutSetUp((*B)->cmap);
9409: a = (Mat_MPIAIJ*)At->data;
9410: b = (Mat_MPIAIJ*)(*B)->data;
9411: MPI_Comm_size(ccomm,&b->size);
9412: MPI_Comm_rank(ccomm,&b->rank);
9413: PetscObjectReference((PetscObject)a->A);
9414: PetscObjectReference((PetscObject)a->B);
9415: b->A = a->A;
9416: b->B = a->B;
9418: b->donotstash = a->donotstash;
9419: b->roworiented = a->roworiented;
9420: b->rowindices = NULL;
9421: b->rowvalues = NULL;
9422: b->getrowactive = PETSC_FALSE;
9424: (*B)->rmap = rmap;
9425: (*B)->factortype = A->factortype;
9426: (*B)->assembled = PETSC_TRUE;
9427: (*B)->insertmode = NOT_SET_VALUES;
9428: (*B)->preallocated = PETSC_TRUE;
9430: if (a->colmap) {
9431: #if defined(PETSC_USE_CTABLE)
9432: PetscTableCreateCopy(a->colmap,&b->colmap);
9433: #else
9434: PetscMalloc1(At->cmap->N,&b->colmap);
9435: PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));
9436: PetscArraycpy(b->colmap,a->colmap,At->cmap->N);
9437: #endif
9438: } else b->colmap = NULL;
9439: if (a->garray) {
9440: PetscInt len;
9441: len = a->B->cmap->n;
9442: PetscMalloc1(len+1,&b->garray);
9443: PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));
9444: if (len) { PetscArraycpy(b->garray,a->garray,len); }
9445: } else b->garray = NULL;
9447: PetscObjectReference((PetscObject)a->lvec);
9448: b->lvec = a->lvec;
9449: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);
9451: /* cannot use VecScatterCopy */
9452: VecGetLocalSize(b->lvec,&lsize);
9453: ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);
9454: ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);
9455: MatCreateVecs(*B,&gvec,NULL);
9456: VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);
9457: PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);
9458: ISDestroy(&from);
9459: ISDestroy(&to);
9460: VecDestroy(&gvec);
9461: }
9462: MatDestroy(&At);
9463: return(0);
9464: }