Actual source code: mumps.c
petsc-3.14.0 2020-09-29
2: /*
3: Provides an interface to the MUMPS sparse solver
4: */
5: #include <petscpkg_version.h>
6: #include <../src/mat/impls/aij/mpi/mpiaij.h>
7: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8: #include <../src/mat/impls/sell/mpi/mpisell.h>
10: EXTERN_C_BEGIN
11: #if defined(PETSC_USE_COMPLEX)
12: #if defined(PETSC_USE_REAL_SINGLE)
13: #include <cmumps_c.h>
14: #else
15: #include <zmumps_c.h>
16: #endif
17: #else
18: #if defined(PETSC_USE_REAL_SINGLE)
19: #include <smumps_c.h>
20: #else
21: #include <dmumps_c.h>
22: #endif
23: #endif
24: EXTERN_C_END
25: #define JOB_INIT -1
26: #define JOB_FACTSYMBOLIC 1
27: #define JOB_FACTNUMERIC 2
28: #define JOB_SOLVE 3
29: #define JOB_END -2
31: /* calls to MUMPS */
32: #if defined(PETSC_USE_COMPLEX)
33: #if defined(PETSC_USE_REAL_SINGLE)
34: #define MUMPS_c cmumps_c
35: #else
36: #define MUMPS_c zmumps_c
37: #endif
38: #else
39: #if defined(PETSC_USE_REAL_SINGLE)
40: #define MUMPS_c smumps_c
41: #else
42: #define MUMPS_c dmumps_c
43: #endif
44: #endif
46: /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47: number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48: naming convention in PetscMPIInt, PetscBLASInt etc.
49: */
50: typedef MUMPS_INT PetscMUMPSInt;
52: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53: #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55: #endif
56: #else
57: #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58: #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59: #endif
60: #endif
62: #define MPIU_MUMPSINT MPI_INT
63: #define PETSC_MUMPS_INT_MAX 2147483647
64: #define PETSC_MUMPS_INT_MIN -2147483648
66: /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67: PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68: {
70: if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71: *b = (PetscMUMPSInt)(a);
72: return(0);
73: }
75: /* Put these utility routines here since they are only used in this file */
76: PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77: {
79: PetscInt myval;
80: PetscBool myset;
82: /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83: PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);
84: if (myset) {PetscMUMPSIntCast(myval,value);}
85: if (set) *set = myset;
86: return(0);
87: }
88: #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
90: /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92: #define PetscMUMPS_c(mumps) \
93: do { \
94: if (mumps->use_petsc_omp_support) { \
95: if (mumps->is_omp_master) { \
96: PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl); \
97: MUMPS_c(&mumps->id); \
98: PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl); \
99: } \
100: PetscOmpCtrlBarrier(mumps->omp_ctrl); \
101: /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \
102: to processes, so we only Bcast info[1], an error code and leave others (since they do not have \
103: an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \
104: omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105: */ \
106: MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm); \
107: MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL, 0,mumps->omp_comm); \
108: MPI_Bcast(mumps->id.info, 1, MPIU_MUMPSINT, 0,mumps->omp_comm); \
109: } else { \
110: MUMPS_c(&mumps->id); \
111: } \
112: } while (0)
113: #else
114: #define PetscMUMPS_c(mumps) \
115: do { MUMPS_c(&mumps->id); } while (0)
116: #endif
118: /* declare MumpsScalar */
119: #if defined(PETSC_USE_COMPLEX)
120: #if defined(PETSC_USE_REAL_SINGLE)
121: #define MumpsScalar mumps_complex
122: #else
123: #define MumpsScalar mumps_double_complex
124: #endif
125: #else
126: #define MumpsScalar PetscScalar
127: #endif
129: /* macros s.t. indices match MUMPS documentation */
130: #define ICNTL(I) icntl[(I)-1]
131: #define CNTL(I) cntl[(I)-1]
132: #define INFOG(I) infog[(I)-1]
133: #define INFO(I) info[(I)-1]
134: #define RINFOG(I) rinfog[(I)-1]
135: #define RINFO(I) rinfo[(I)-1]
137: typedef struct Mat_MUMPS Mat_MUMPS;
138: struct Mat_MUMPS {
139: #if defined(PETSC_USE_COMPLEX)
140: #if defined(PETSC_USE_REAL_SINGLE)
141: CMUMPS_STRUC_C id;
142: #else
143: ZMUMPS_STRUC_C id;
144: #endif
145: #else
146: #if defined(PETSC_USE_REAL_SINGLE)
147: SMUMPS_STRUC_C id;
148: #else
149: DMUMPS_STRUC_C id;
150: #endif
151: #endif
153: MatStructure matstruc;
154: PetscMPIInt myid,petsc_size;
155: PetscMUMPSInt *irn,*jcn; /* the (i,j,v) triplets passed to mumps. */
156: PetscScalar *val,*val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157: PetscInt64 nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */
158: PetscMUMPSInt sym;
159: MPI_Comm mumps_comm;
160: PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */
161: VecScatter scat_rhs, scat_sol; /* used by MatSolve() */
162: PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */
163: PetscMUMPSInt lrhs_loc,nloc_rhs,*irhs_loc;
164: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165: PetscInt *rhs_nrow,max_nrhs;
166: PetscMPIInt *rhs_recvcounts,*rhs_disps;
167: PetscScalar *rhs_loc,*rhs_recvbuf;
168: #endif
169: Vec b_seq,x_seq;
170: PetscInt ninfo,*info; /* which INFO to display */
171: PetscInt sizeredrhs;
172: PetscScalar *schur_sol;
173: PetscInt schur_sizesol;
174: PetscMUMPSInt *ia_alloc,*ja_alloc; /* work arrays used for the CSR struct for sparse rhs */
175: PetscInt64 cur_ilen,cur_jlen; /* current len of ia_alloc[], ja_alloc[] */
176: PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
178: /* stuff used by petsc/mumps OpenMP support*/
179: PetscBool use_petsc_omp_support;
180: PetscOmpCtrl omp_ctrl; /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
181: MPI_Comm petsc_comm,omp_comm; /* petsc_comm is petsc matrix's comm */
182: PetscInt64 *recvcount; /* a collection of nnz on omp_master */
183: PetscMPIInt tag,omp_comm_size;
184: PetscBool is_omp_master; /* is this rank the master of omp_comm */
185: MPI_Request *reqs;
186: };
188: /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
189: Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
190: */
191: static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
192: {
194: PetscInt nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
197: #if defined(PETSC_USE_64BIT_INDICES)
198: {
199: PetscInt i;
200: if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201: PetscFree(mumps->ia_alloc);
202: PetscMalloc1(nrow+1,&mumps->ia_alloc);
203: mumps->cur_ilen = nrow+1;
204: }
205: if (nnz > mumps->cur_jlen) {
206: PetscFree(mumps->ja_alloc);
207: PetscMalloc1(nnz,&mumps->ja_alloc);
208: mumps->cur_jlen = nnz;
209: }
210: for (i=0; i<nrow+1; i++) {PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));}
211: for (i=0; i<nnz; i++) {PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));}
212: *ia_mumps = mumps->ia_alloc;
213: *ja_mumps = mumps->ja_alloc;
214: }
215: #else
216: *ia_mumps = ia;
217: *ja_mumps = ja;
218: #endif
219: PetscMUMPSIntCast(nnz,nnz_mumps);
220: return(0);
221: }
223: static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224: {
228: PetscFree(mumps->id.listvar_schur);
229: PetscFree(mumps->id.redrhs);
230: PetscFree(mumps->schur_sol);
231: mumps->id.size_schur = 0;
232: mumps->id.schur_lld = 0;
233: mumps->id.ICNTL(19) = 0;
234: return(0);
235: }
237: /* solve with rhs in mumps->id.redrhs and return in the same location */
238: static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
239: {
240: Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
241: Mat S,B,X;
242: MatFactorSchurStatus schurstatus;
243: PetscInt sizesol;
244: PetscErrorCode ierr;
247: MatFactorFactorizeSchurComplement(F);
248: MatFactorGetSchurComplement(F,&S,&schurstatus);
249: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);
250: MatSetType(B,((PetscObject)S)->type_name);
251: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252: MatBindToCPU(B,S->boundtocpu);
253: #endif
254: switch (schurstatus) {
255: case MAT_FACTOR_SCHUR_FACTORED:
256: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);
257: MatSetType(X,((PetscObject)S)->type_name);
258: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259: MatBindToCPU(X,S->boundtocpu);
260: #endif
261: if (!mumps->id.ICNTL(9)) { /* transpose solve */
262: MatMatSolveTranspose(S,B,X);
263: } else {
264: MatMatSolve(S,B,X);
265: }
266: break;
267: case MAT_FACTOR_SCHUR_INVERTED:
268: sizesol = mumps->id.nrhs*mumps->id.size_schur;
269: if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270: PetscFree(mumps->schur_sol);
271: PetscMalloc1(sizesol,&mumps->schur_sol);
272: mumps->schur_sizesol = sizesol;
273: }
274: MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);
275: MatSetType(X,((PetscObject)S)->type_name);
276: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277: MatBindToCPU(X,S->boundtocpu);
278: #endif
279: MatProductCreateWithMat(S,B,NULL,X);
280: if (!mumps->id.ICNTL(9)) { /* transpose solve */
281: MatProductSetType(X,MATPRODUCT_AtB);
282: } else {
283: MatProductSetType(X,MATPRODUCT_AB);
284: }
285: MatProductSetFromOptions(X);
286: MatProductSymbolic(X);
287: MatProductNumeric(X);
289: MatCopy(X,B,SAME_NONZERO_PATTERN);
290: break;
291: default:
292: SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
293: break;
294: }
295: MatFactorRestoreSchurComplement(F,&S,schurstatus);
296: MatDestroy(&B);
297: MatDestroy(&X);
298: return(0);
299: }
301: static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
302: {
303: Mat_MUMPS *mumps=(Mat_MUMPS*)F->data;
307: if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
308: return(0);
309: }
310: if (!expansion) { /* prepare for the condensation step */
311: PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
312: /* allocate MUMPS internal array to store reduced right-hand sides */
313: if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
314: PetscFree(mumps->id.redrhs);
315: mumps->id.lredrhs = mumps->id.size_schur;
316: PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);
317: mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
318: }
319: mumps->id.ICNTL(26) = 1; /* condensation phase */
320: } else { /* prepare for the expansion step */
321: /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
322: MatMumpsSolveSchur_Private(F);
323: mumps->id.ICNTL(26) = 2; /* expansion phase */
324: PetscMUMPS_c(mumps);
325: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
326: /* restore defaults */
327: mumps->id.ICNTL(26) = -1;
328: /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
329: if (mumps->id.nrhs > 1) {
330: PetscFree(mumps->id.redrhs);
331: mumps->id.lredrhs = 0;
332: mumps->sizeredrhs = 0;
333: }
334: }
335: return(0);
336: }
338: /*
339: MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
341: input:
342: A - matrix in aij,baij or sbaij format
343: shift - 0: C style output triple; 1: Fortran style output triple.
344: reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
345: MAT_REUSE_MATRIX: only the values in v array are updated
346: output:
347: nnz - dim of r, c, and v (number of local nonzero entries of A)
348: r, c, v - row and col index, matrix values (matrix triples)
350: The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
351: freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
352: that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
354: */
356: PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
357: {
358: const PetscScalar *av;
359: const PetscInt *ai,*aj,*ajj,M=A->rmap->n;
360: PetscInt64 nz,rnz,i,j,k;
361: PetscErrorCode ierr;
362: PetscMUMPSInt *row,*col;
363: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
366: MatSeqAIJGetArrayRead(A,&av);
367: mumps->val = (PetscScalar*)av;
368: if (reuse == MAT_INITIAL_MATRIX) {
369: nz = aa->nz;
370: ai = aa->i;
371: aj = aa->j;
372: PetscMalloc2(nz,&row,nz,&col);
373: for (i=k=0; i<M; i++) {
374: rnz = ai[i+1] - ai[i];
375: ajj = aj + ai[i];
376: for (j=0; j<rnz; j++) {
377: PetscMUMPSIntCast(i+shift,&row[k]);
378: PetscMUMPSIntCast(ajj[j] + shift,&col[k]);
379: k++;
380: }
381: }
382: mumps->irn = row;
383: mumps->jcn = col;
384: mumps->nnz = nz;
385: }
386: MatSeqAIJRestoreArrayRead(A,&av);
387: return(0);
388: }
390: PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
391: {
393: PetscInt64 nz,i,j,k,r;
394: Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
395: PetscMUMPSInt *row,*col;
398: mumps->val = a->val;
399: if (reuse == MAT_INITIAL_MATRIX) {
400: nz = a->sliidx[a->totalslices];
401: PetscMalloc2(nz,&row,nz,&col);
402: for (i=k=0; i<a->totalslices; i++) {
403: for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
404: PetscMUMPSIntCast(8*i+r+shift,&row[k++]);
405: }
406: }
407: for (i=0;i<nz;i++) {PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);}
408: mumps->irn = row;
409: mumps->jcn = col;
410: mumps->nnz = nz;
411: }
412: return(0);
413: }
415: PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
416: {
417: Mat_SeqBAIJ *aa=(Mat_SeqBAIJ*)A->data;
418: const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
419: PetscInt64 M,nz,idx=0,rnz,i,j,k,m;
420: PetscInt bs;
422: PetscMUMPSInt *row,*col;
425: MatGetBlockSize(A,&bs);
426: M = A->rmap->N/bs;
427: mumps->val = aa->a;
428: if (reuse == MAT_INITIAL_MATRIX) {
429: ai = aa->i; aj = aa->j;
430: nz = bs2*aa->nz;
431: PetscMalloc2(nz,&row,nz,&col);
432: for (i=0; i<M; i++) {
433: ajj = aj + ai[i];
434: rnz = ai[i+1] - ai[i];
435: for (k=0; k<rnz; k++) {
436: for (j=0; j<bs; j++) {
437: for (m=0; m<bs; m++) {
438: PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);
439: PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);
440: idx++;
441: }
442: }
443: }
444: }
445: mumps->irn = row;
446: mumps->jcn = col;
447: mumps->nnz = nz;
448: }
449: return(0);
450: }
452: PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
453: {
454: const PetscInt *ai, *aj,*ajj;
455: PetscInt bs;
456: PetscInt64 nz,rnz,i,j,k,m;
457: PetscErrorCode ierr;
458: PetscMUMPSInt *row,*col;
459: PetscScalar *val;
460: Mat_SeqSBAIJ *aa=(Mat_SeqSBAIJ*)A->data;
461: const PetscInt bs2=aa->bs2,mbs=aa->mbs;
462: #if defined(PETSC_USE_COMPLEX)
463: PetscBool hermitian;
464: #endif
467: #if defined(PETSC_USE_COMPLEX)
468: MatGetOption(A,MAT_HERMITIAN,&hermitian);
469: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
470: #endif
471: ai = aa->i;
472: aj = aa->j;
473: MatGetBlockSize(A,&bs);
474: if (reuse == MAT_INITIAL_MATRIX) {
475: nz = aa->nz;
476: PetscMalloc2(bs2*nz,&row,bs2*nz,&col);
477: if (bs>1) {
478: PetscMalloc1(bs2*nz,&mumps->val_alloc);
479: mumps->val = mumps->val_alloc;
480: } else {
481: mumps->val = aa->a;
482: }
483: mumps->irn = row;
484: mumps->jcn = col;
485: } else {
486: if (bs == 1) mumps->val = aa->a;
487: row = mumps->irn;
488: col = mumps->jcn;
489: }
490: val = mumps->val;
492: nz = 0;
493: if (bs>1) {
494: for (i=0; i<mbs; i++) {
495: rnz = ai[i+1] - ai[i];
496: ajj = aj + ai[i];
497: for (j=0; j<rnz; j++) {
498: for (k=0; k<bs; k++) {
499: for (m=0; m<bs; m++) {
500: if (ajj[j]>i || k>=m) {
501: if (reuse == MAT_INITIAL_MATRIX) {
502: PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);
503: PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);
504: }
505: val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
506: }
507: }
508: }
509: }
510: }
511: } else if (reuse == MAT_INITIAL_MATRIX) {
512: for (i=0; i<mbs; i++) {
513: rnz = ai[i+1] - ai[i];
514: ajj = aj + ai[i];
515: for (j=0; j<rnz; j++) {
516: PetscMUMPSIntCast(i+shift,&row[nz]);
517: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
518: nz++;
519: }
520: }
521: if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
522: }
523: if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
524: return(0);
525: }
527: PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
528: {
529: const PetscInt *ai,*aj,*ajj,*adiag,M=A->rmap->n;
530: PetscInt64 nz,rnz,i,j;
531: const PetscScalar *av,*v1;
532: PetscScalar *val;
533: PetscErrorCode ierr;
534: PetscMUMPSInt *row,*col;
535: Mat_SeqAIJ *aa=(Mat_SeqAIJ*)A->data;
536: PetscBool missing;
537: #if defined(PETSC_USE_COMPLEX)
538: PetscBool hermitian;
539: #endif
542: #if defined(PETSC_USE_COMPLEX)
543: MatGetOption(A,MAT_HERMITIAN,&hermitian);
544: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
545: #endif
546: MatSeqAIJGetArrayRead(A,&av);
547: ai = aa->i; aj = aa->j;
548: adiag = aa->diag;
549: MatMissingDiagonal_SeqAIJ(A,&missing,NULL);
550: if (reuse == MAT_INITIAL_MATRIX) {
551: /* count nz in the upper triangular part of A */
552: nz = 0;
553: if (missing) {
554: for (i=0; i<M; i++) {
555: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
556: for (j=ai[i];j<ai[i+1];j++) {
557: if (aj[j] < i) continue;
558: nz++;
559: }
560: } else {
561: nz += ai[i+1] - adiag[i];
562: }
563: }
564: } else {
565: for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
566: }
567: PetscMalloc2(nz,&row,nz,&col);
568: PetscMalloc1(nz,&val);
569: mumps->nnz = nz;
570: mumps->irn = row;
571: mumps->jcn = col;
572: mumps->val = mumps->val_alloc = val;
574: nz = 0;
575: if (missing) {
576: for (i=0; i<M; i++) {
577: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
578: for (j=ai[i];j<ai[i+1];j++) {
579: if (aj[j] < i) continue;
580: PetscMUMPSIntCast(i+shift,&row[nz]);
581: PetscMUMPSIntCast(aj[j]+shift,&col[nz]);
582: val[nz] = av[j];
583: nz++;
584: }
585: } else {
586: rnz = ai[i+1] - adiag[i];
587: ajj = aj + adiag[i];
588: v1 = av + adiag[i];
589: for (j=0; j<rnz; j++) {
590: PetscMUMPSIntCast(i+shift,&row[nz]);
591: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
592: val[nz++] = v1[j];
593: }
594: }
595: }
596: } else {
597: for (i=0; i<M; i++) {
598: rnz = ai[i+1] - adiag[i];
599: ajj = aj + adiag[i];
600: v1 = av + adiag[i];
601: for (j=0; j<rnz; j++) {
602: PetscMUMPSIntCast(i+shift,&row[nz]);
603: PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);
604: val[nz++] = v1[j];
605: }
606: }
607: }
608: } else {
609: nz = 0;
610: val = mumps->val;
611: if (missing) {
612: for (i=0; i <M; i++) {
613: if (PetscUnlikely(adiag[i] >= ai[i+1])) {
614: for (j=ai[i];j<ai[i+1];j++) {
615: if (aj[j] < i) continue;
616: val[nz++] = av[j];
617: }
618: } else {
619: rnz = ai[i+1] - adiag[i];
620: v1 = av + adiag[i];
621: for (j=0; j<rnz; j++) {
622: val[nz++] = v1[j];
623: }
624: }
625: }
626: } else {
627: for (i=0; i <M; i++) {
628: rnz = ai[i+1] - adiag[i];
629: v1 = av + adiag[i];
630: for (j=0; j<rnz; j++) {
631: val[nz++] = v1[j];
632: }
633: }
634: }
635: }
636: MatSeqAIJRestoreArrayRead(A,&av);
637: return(0);
638: }
640: PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
641: {
642: PetscErrorCode ierr;
643: const PetscInt *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
644: PetscInt bs;
645: PetscInt64 rstart,nz,i,j,k,m,jj,irow,countA,countB;
646: PetscMUMPSInt *row,*col;
647: const PetscScalar *av,*bv,*v1,*v2;
648: PetscScalar *val;
649: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ*)A->data;
650: Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)(mat->A)->data;
651: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
652: const PetscInt bs2=aa->bs2,mbs=aa->mbs;
653: #if defined(PETSC_USE_COMPLEX)
654: PetscBool hermitian;
655: #endif
658: #if defined(PETSC_USE_COMPLEX)
659: MatGetOption(A,MAT_HERMITIAN,&hermitian);
660: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
661: #endif
662: MatGetBlockSize(A,&bs);
663: rstart = A->rmap->rstart;
664: ai = aa->i;
665: aj = aa->j;
666: bi = bb->i;
667: bj = bb->j;
668: av = aa->a;
669: bv = bb->a;
671: garray = mat->garray;
673: if (reuse == MAT_INITIAL_MATRIX) {
674: nz = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
675: PetscMalloc2(nz,&row,nz,&col);
676: PetscMalloc1(nz,&val);
677: /* can not decide the exact mumps->nnz now because of the SBAIJ */
678: mumps->irn = row;
679: mumps->jcn = col;
680: mumps->val = mumps->val_alloc = val;
681: } else {
682: val = mumps->val;
683: }
685: jj = 0; irow = rstart;
686: for (i=0; i<mbs; i++) {
687: ajj = aj + ai[i]; /* ptr to the beginning of this row */
688: countA = ai[i+1] - ai[i];
689: countB = bi[i+1] - bi[i];
690: bjj = bj + bi[i];
691: v1 = av + ai[i]*bs2;
692: v2 = bv + bi[i]*bs2;
694: if (bs>1) {
695: /* A-part */
696: for (j=0; j<countA; j++) {
697: for (k=0; k<bs; k++) {
698: for (m=0; m<bs; m++) {
699: if (rstart + ajj[j]*bs>irow || k>=m) {
700: if (reuse == MAT_INITIAL_MATRIX) {
701: PetscMUMPSIntCast(irow + m + shift,&row[jj]);
702: PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);
703: }
704: val[jj++] = v1[j*bs2 + m + k*bs];
705: }
706: }
707: }
708: }
710: /* B-part */
711: for (j=0; j < countB; j++) {
712: for (k=0; k<bs; k++) {
713: for (m=0; m<bs; m++) {
714: if (reuse == MAT_INITIAL_MATRIX) {
715: PetscMUMPSIntCast(irow + m + shift,&row[jj]);
716: PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);
717: }
718: val[jj++] = v2[j*bs2 + m + k*bs];
719: }
720: }
721: }
722: } else {
723: /* A-part */
724: for (j=0; j<countA; j++) {
725: if (reuse == MAT_INITIAL_MATRIX) {
726: PetscMUMPSIntCast(irow + shift,&row[jj]);
727: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
728: }
729: val[jj++] = v1[j];
730: }
732: /* B-part */
733: for (j=0; j < countB; j++) {
734: if (reuse == MAT_INITIAL_MATRIX) {
735: PetscMUMPSIntCast(irow + shift,&row[jj]);
736: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
737: }
738: val[jj++] = v2[j];
739: }
740: }
741: irow+=bs;
742: }
743: mumps->nnz = jj;
744: return(0);
745: }
747: PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
748: {
749: const PetscInt *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
750: PetscErrorCode ierr;
751: PetscInt64 rstart,nz,i,j,jj,irow,countA,countB;
752: PetscMUMPSInt *row,*col;
753: const PetscScalar *av, *bv,*v1,*v2;
754: PetscScalar *val;
755: Mat Ad,Ao;
756: Mat_SeqAIJ *aa;
757: Mat_SeqAIJ *bb;
760: MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);
761: MatSeqAIJGetArrayRead(Ad,&av);
762: MatSeqAIJGetArrayRead(Ao,&bv);
764: aa = (Mat_SeqAIJ*)(Ad)->data;
765: bb = (Mat_SeqAIJ*)(Ao)->data;
766: ai = aa->i;
767: aj = aa->j;
768: bi = bb->i;
769: bj = bb->j;
771: rstart = A->rmap->rstart;
773: if (reuse == MAT_INITIAL_MATRIX) {
774: nz = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
775: PetscMalloc2(nz,&row,nz,&col);
776: PetscMalloc1(nz,&val);
777: mumps->nnz = nz;
778: mumps->irn = row;
779: mumps->jcn = col;
780: mumps->val = mumps->val_alloc = val;
781: } else {
782: val = mumps->val;
783: }
785: jj = 0; irow = rstart;
786: for (i=0; i<m; i++) {
787: ajj = aj + ai[i]; /* ptr to the beginning of this row */
788: countA = ai[i+1] - ai[i];
789: countB = bi[i+1] - bi[i];
790: bjj = bj + bi[i];
791: v1 = av + ai[i];
792: v2 = bv + bi[i];
794: /* A-part */
795: for (j=0; j<countA; j++) {
796: if (reuse == MAT_INITIAL_MATRIX) {
797: PetscMUMPSIntCast(irow + shift,&row[jj]);
798: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
799: }
800: val[jj++] = v1[j];
801: }
803: /* B-part */
804: for (j=0; j < countB; j++) {
805: if (reuse == MAT_INITIAL_MATRIX) {
806: PetscMUMPSIntCast(irow + shift,&row[jj]);
807: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
808: }
809: val[jj++] = v2[j];
810: }
811: irow++;
812: }
813: MatSeqAIJRestoreArrayRead(Ad,&av);
814: MatSeqAIJRestoreArrayRead(Ao,&bv);
815: return(0);
816: }
818: PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
819: {
820: Mat_MPIBAIJ *mat = (Mat_MPIBAIJ*)A->data;
821: Mat_SeqBAIJ *aa = (Mat_SeqBAIJ*)(mat->A)->data;
822: Mat_SeqBAIJ *bb = (Mat_SeqBAIJ*)(mat->B)->data;
823: const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
824: const PetscInt *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
825: const PetscInt bs2=mat->bs2;
826: PetscErrorCode ierr;
827: PetscInt bs;
828: PetscInt64 nz,i,j,k,n,jj,irow,countA,countB,idx;
829: PetscMUMPSInt *row,*col;
830: const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
831: PetscScalar *val;
834: MatGetBlockSize(A,&bs);
835: if (reuse == MAT_INITIAL_MATRIX) {
836: nz = bs2*(aa->nz + bb->nz);
837: PetscMalloc2(nz,&row,nz,&col);
838: PetscMalloc1(nz,&val);
839: mumps->nnz = nz;
840: mumps->irn = row;
841: mumps->jcn = col;
842: mumps->val = mumps->val_alloc = val;
843: } else {
844: val = mumps->val;
845: }
847: jj = 0; irow = rstart;
848: for (i=0; i<mbs; i++) {
849: countA = ai[i+1] - ai[i];
850: countB = bi[i+1] - bi[i];
851: ajj = aj + ai[i];
852: bjj = bj + bi[i];
853: v1 = av + bs2*ai[i];
854: v2 = bv + bs2*bi[i];
856: idx = 0;
857: /* A-part */
858: for (k=0; k<countA; k++) {
859: for (j=0; j<bs; j++) {
860: for (n=0; n<bs; n++) {
861: if (reuse == MAT_INITIAL_MATRIX) {
862: PetscMUMPSIntCast(irow + n + shift,&row[jj]);
863: PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);
864: }
865: val[jj++] = v1[idx++];
866: }
867: }
868: }
870: idx = 0;
871: /* B-part */
872: for (k=0; k<countB; k++) {
873: for (j=0; j<bs; j++) {
874: for (n=0; n<bs; n++) {
875: if (reuse == MAT_INITIAL_MATRIX) {
876: PetscMUMPSIntCast(irow + n + shift,&row[jj]);
877: PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);
878: }
879: val[jj++] = v2[idx++];
880: }
881: }
882: }
883: irow += bs;
884: }
885: return(0);
886: }
888: PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
889: {
890: const PetscInt *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
891: PetscErrorCode ierr;
892: PetscInt64 rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
893: PetscMUMPSInt *row,*col;
894: const PetscScalar *av, *bv,*v1,*v2;
895: PetscScalar *val;
896: Mat Ad,Ao;
897: Mat_SeqAIJ *aa;
898: Mat_SeqAIJ *bb;
899: #if defined(PETSC_USE_COMPLEX)
900: PetscBool hermitian;
901: #endif
904: #if defined(PETSC_USE_COMPLEX)
905: MatGetOption(A,MAT_HERMITIAN,&hermitian);
906: if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
907: #endif
908: MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);
909: MatSeqAIJGetArrayRead(Ad,&av);
910: MatSeqAIJGetArrayRead(Ao,&bv);
912: aa = (Mat_SeqAIJ*)(Ad)->data;
913: bb = (Mat_SeqAIJ*)(Ao)->data;
914: ai = aa->i;
915: aj = aa->j;
916: adiag = aa->diag;
917: bi = bb->i;
918: bj = bb->j;
920: rstart = A->rmap->rstart;
922: if (reuse == MAT_INITIAL_MATRIX) {
923: nza = 0; /* num of upper triangular entries in mat->A, including diagonals */
924: nzb = 0; /* num of upper triangular entries in mat->B */
925: for (i=0; i<m; i++) {
926: nza += (ai[i+1] - adiag[i]);
927: countB = bi[i+1] - bi[i];
928: bjj = bj + bi[i];
929: for (j=0; j<countB; j++) {
930: if (garray[bjj[j]] > rstart) nzb++;
931: }
932: }
934: nz = nza + nzb; /* total nz of upper triangular part of mat */
935: PetscMalloc2(nz,&row,nz,&col);
936: PetscMalloc1(nz,&val);
937: mumps->nnz = nz;
938: mumps->irn = row;
939: mumps->jcn = col;
940: mumps->val = mumps->val_alloc = val;
941: } else {
942: val = mumps->val;
943: }
945: jj = 0; irow = rstart;
946: for (i=0; i<m; i++) {
947: ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */
948: v1 = av + adiag[i];
949: countA = ai[i+1] - adiag[i];
950: countB = bi[i+1] - bi[i];
951: bjj = bj + bi[i];
952: v2 = bv + bi[i];
954: /* A-part */
955: for (j=0; j<countA; j++) {
956: if (reuse == MAT_INITIAL_MATRIX) {
957: PetscMUMPSIntCast(irow + shift,&row[jj]);
958: PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);
959: }
960: val[jj++] = v1[j];
961: }
963: /* B-part */
964: for (j=0; j < countB; j++) {
965: if (garray[bjj[j]] > rstart) {
966: if (reuse == MAT_INITIAL_MATRIX) {
967: PetscMUMPSIntCast(irow + shift,&row[jj]);
968: PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);
969: }
970: val[jj++] = v2[j];
971: }
972: }
973: irow++;
974: }
975: MatSeqAIJRestoreArrayRead(Ad,&av);
976: MatSeqAIJRestoreArrayRead(Ao,&bv);
977: return(0);
978: }
980: PetscErrorCode MatDestroy_MUMPS(Mat A)
981: {
983: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
986: PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);
987: VecScatterDestroy(&mumps->scat_rhs);
988: VecScatterDestroy(&mumps->scat_sol);
989: VecDestroy(&mumps->b_seq);
990: VecDestroy(&mumps->x_seq);
991: PetscFree(mumps->id.perm_in);
992: PetscFree2(mumps->irn,mumps->jcn);
993: PetscFree(mumps->val_alloc);
994: PetscFree(mumps->info);
995: MatMumpsResetSchur_Private(mumps);
996: mumps->id.job = JOB_END;
997: PetscMUMPS_c(mumps);
998: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
999: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1000: if (mumps->use_petsc_omp_support) {
1001: PetscOmpCtrlDestroy(&mumps->omp_ctrl);
1002: PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);
1003: PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);
1004: }
1005: #endif
1006: PetscFree(mumps->ia_alloc);
1007: PetscFree(mumps->ja_alloc);
1008: PetscFree(mumps->recvcount);
1009: PetscFree(mumps->reqs);
1010: PetscFree(mumps->irhs_loc);
1011: if (mumps->mumps_comm != MPI_COMM_NULL) {MPI_Comm_free(&mumps->mumps_comm);}
1012: PetscFree(A->data);
1014: /* clear composed functions */
1015: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1016: PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);
1017: PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);
1018: PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);
1019: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);
1020: PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);
1021: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);
1022: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);
1023: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);
1024: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);
1025: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);
1026: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);
1027: PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);
1028: return(0);
1029: }
1031: /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1032: static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1033: {
1034: PetscErrorCode ierr;
1035: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1036: const PetscMPIInt ompsize=mumps->omp_comm_size;
1037: PetscInt i,m,M,rstart;
1040: MatGetSize(A,&M,NULL);
1041: MatGetLocalSize(A,&m,NULL);
1042: if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1043: if (ompsize == 1) {
1044: if (!mumps->irhs_loc) {
1045: mumps->nloc_rhs = m;
1046: PetscMalloc1(m,&mumps->irhs_loc);
1047: MatGetOwnershipRange(A,&rstart,NULL);
1048: for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1049: }
1050: mumps->id.rhs_loc = (MumpsScalar*)array;
1051: } else {
1052: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1053: const PetscInt *ranges;
1054: PetscMPIInt j,k,sendcount,*petsc_ranks,*omp_ranks;
1055: MPI_Group petsc_group,omp_group;
1056: PetscScalar *recvbuf=NULL;
1058: if (mumps->is_omp_master) {
1059: /* Lazily initialize the omp stuff for distributed rhs */
1060: if (!mumps->irhs_loc) {
1061: PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);
1062: PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);
1063: MPI_Comm_group(mumps->petsc_comm,&petsc_group);
1064: MPI_Comm_group(mumps->omp_comm,&omp_group);
1065: for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1066: MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);
1068: /* Populate mumps->irhs_loc[], rhs_nrow[] */
1069: mumps->nloc_rhs = 0;
1070: MatGetOwnershipRanges(A,&ranges);
1071: for (j=0; j<ompsize; j++) {
1072: mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1073: mumps->nloc_rhs += mumps->rhs_nrow[j];
1074: }
1075: PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);
1076: for (j=k=0; j<ompsize; j++) {
1077: for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1078: }
1080: PetscFree2(omp_ranks,petsc_ranks);
1081: MPI_Group_free(&petsc_group);
1082: MPI_Group_free(&omp_group);
1083: }
1085: /* Realloc buffers when current nrhs is bigger than what we have met */
1086: if (nrhs > mumps->max_nrhs) {
1087: PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);
1088: PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);
1089: mumps->max_nrhs = nrhs;
1090: }
1092: /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1093: for (j=0; j<ompsize; j++) {PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);}
1094: mumps->rhs_disps[0] = 0;
1095: for (j=1; j<ompsize; j++) {
1096: mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1097: if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1098: }
1099: recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1100: }
1102: PetscMPIIntCast(m*nrhs,&sendcount);
1103: MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);
1105: if (mumps->is_omp_master) {
1106: if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1107: PetscScalar *dst,*dstbase = mumps->rhs_loc;
1108: for (j=0; j<ompsize; j++) {
1109: const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1110: dst = dstbase;
1111: for (i=0; i<nrhs; i++) {
1112: PetscArraycpy(dst,src,mumps->rhs_nrow[j]);
1113: src += mumps->rhs_nrow[j];
1114: dst += mumps->nloc_rhs;
1115: }
1116: dstbase += mumps->rhs_nrow[j];
1117: }
1118: }
1119: mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1120: }
1121: #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1122: }
1123: mumps->id.nrhs = nrhs;
1124: mumps->id.nloc_rhs = mumps->nloc_rhs;
1125: mumps->id.lrhs_loc = mumps->nloc_rhs;
1126: mumps->id.irhs_loc = mumps->irhs_loc;
1127: return(0);
1128: }
1130: PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1131: {
1132: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1133: const PetscScalar *rarray = NULL;
1134: PetscScalar *array;
1135: IS is_iden,is_petsc;
1136: PetscErrorCode ierr;
1137: PetscInt i;
1138: PetscBool second_solve = PETSC_FALSE;
1139: static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1142: PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n",&cite1);
1143: PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n",&cite2);
1145: if (A->factorerrortype) {
1146: PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1147: VecSetInf(x);
1148: return(0);
1149: }
1151: mumps->id.nrhs = 1;
1152: if (mumps->petsc_size > 1) {
1153: if (mumps->ICNTL20 == 10) {
1154: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1155: VecGetArrayRead(b,&rarray);
1156: MatMumpsSetUpDistRHSInfo(A,1,rarray);
1157: } else {
1158: mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/
1159: VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);
1160: VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);
1161: if (!mumps->myid) {
1162: VecGetArray(mumps->b_seq,&array);
1163: mumps->id.rhs = (MumpsScalar*)array;
1164: }
1165: }
1166: } else { /* petsc_size == 1 */
1167: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1168: VecCopy(b,x);
1169: VecGetArray(x,&array);
1170: mumps->id.rhs = (MumpsScalar*)array;
1171: }
1173: /*
1174: handle condensation step of Schur complement (if any)
1175: We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1176: According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1177: Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1178: This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1179: */
1180: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1181: if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1182: second_solve = PETSC_TRUE;
1183: MatMumpsHandleSchur_Private(A,PETSC_FALSE);
1184: }
1185: /* solve phase */
1186: /*-------------*/
1187: mumps->id.job = JOB_SOLVE;
1188: PetscMUMPS_c(mumps);
1189: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1191: /* handle expansion step of Schur complement (if any) */
1192: if (second_solve) {
1193: MatMumpsHandleSchur_Private(A,PETSC_TRUE);
1194: }
1196: if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1197: if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1198: /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1199: VecScatterDestroy(&mumps->scat_sol);
1200: }
1201: if (!mumps->scat_sol) { /* create scatter scat_sol */
1202: PetscInt *isol2_loc=NULL;
1203: ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden); /* from */
1204: PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);
1205: for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1206: ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc); /* to */
1207: VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);
1208: ISDestroy(&is_iden);
1209: ISDestroy(&is_petsc);
1210: mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1211: }
1213: VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
1214: VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);
1215: }
1217: if (mumps->petsc_size > 1) {
1218: if (mumps->ICNTL20 == 10) {
1219: VecRestoreArrayRead(b,&rarray);
1220: } else if (!mumps->myid) {
1221: VecRestoreArray(mumps->b_seq,&array);
1222: }
1223: } else {VecRestoreArray(x,&array);}
1225: PetscLogFlops(2.0*mumps->id.RINFO(3));
1226: return(0);
1227: }
1229: PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1230: {
1231: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1235: mumps->id.ICNTL(9) = 0;
1236: MatSolve_MUMPS(A,b,x);
1237: mumps->id.ICNTL(9) = 1;
1238: return(0);
1239: }
1241: PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1242: {
1243: PetscErrorCode ierr;
1244: Mat Bt = NULL;
1245: PetscBool denseX,denseB,flg,flgT;
1246: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
1247: PetscInt i,nrhs,M;
1248: PetscScalar *array;
1249: const PetscScalar *rbray;
1250: PetscInt lsol_loc,nlsol_loc,*idxx,iidx = 0;
1251: PetscMUMPSInt *isol_loc,*isol_loc_save;
1252: PetscScalar *bray,*sol_loc,*sol_loc_save;
1253: IS is_to,is_from;
1254: PetscInt k,proc,j,m,myrstart;
1255: const PetscInt *rstart;
1256: Vec v_mpi,msol_loc;
1257: VecScatter scat_sol;
1258: Vec b_seq;
1259: VecScatter scat_rhs;
1260: PetscScalar *aa;
1261: PetscInt spnr,*ia,*ja;
1262: Mat_MPIAIJ *b = NULL;
1265: PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);
1266: if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1268: PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);
1269: if (denseB) {
1270: if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1271: mumps->id.ICNTL(20)= 0; /* dense RHS */
1272: } else { /* sparse B */
1273: if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1274: PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);
1275: if (flgT) { /* input B is transpose of actural RHS matrix,
1276: because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1277: MatTransposeGetMat(B,&Bt);
1278: } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1279: mumps->id.ICNTL(20)= 1; /* sparse RHS */
1280: }
1282: MatGetSize(B,&M,&nrhs);
1283: mumps->id.nrhs = nrhs;
1284: mumps->id.lrhs = M;
1285: mumps->id.rhs = NULL;
1287: if (mumps->petsc_size == 1) {
1288: PetscScalar *aa;
1289: PetscInt spnr,*ia,*ja;
1290: PetscBool second_solve = PETSC_FALSE;
1292: MatDenseGetArray(X,&array);
1293: mumps->id.rhs = (MumpsScalar*)array;
1295: if (denseB) {
1296: /* copy B to X */
1297: MatDenseGetArrayRead(B,&rbray);
1298: PetscArraycpy(array,rbray,M*nrhs);
1299: MatDenseRestoreArrayRead(B,&rbray);
1300: } else { /* sparse B */
1301: MatSeqAIJGetArray(Bt,&aa);
1302: MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1303: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1304: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
1305: mumps->id.rhs_sparse = (MumpsScalar*)aa;
1306: }
1307: /* handle condensation step of Schur complement (if any) */
1308: if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1309: second_solve = PETSC_TRUE;
1310: MatMumpsHandleSchur_Private(A,PETSC_FALSE);
1311: }
1312: /* solve phase */
1313: /*-------------*/
1314: mumps->id.job = JOB_SOLVE;
1315: PetscMUMPS_c(mumps);
1316: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1318: /* handle expansion step of Schur complement (if any) */
1319: if (second_solve) {
1320: MatMumpsHandleSchur_Private(A,PETSC_TRUE);
1321: }
1322: if (!denseB) { /* sparse B */
1323: MatSeqAIJRestoreArray(Bt,&aa);
1324: MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1325: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1326: }
1327: MatDenseRestoreArray(X,&array);
1328: return(0);
1329: }
1331: /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1332: if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1334: /* create msol_loc to hold mumps local solution */
1335: isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1336: sol_loc_save = (PetscScalar*)mumps->id.sol_loc;
1338: lsol_loc = mumps->id.lsol_loc;
1339: nlsol_loc = nrhs*lsol_loc; /* length of sol_loc */
1340: PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);
1341: mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1342: mumps->id.isol_loc = isol_loc;
1344: VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);
1346: if (denseB) {
1347: if (mumps->ICNTL20 == 10) {
1348: mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1349: MatDenseGetArrayRead(B,&rbray);
1350: MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);
1351: MatDenseRestoreArrayRead(B,&rbray);
1352: MatGetLocalSize(B,&m,NULL);
1353: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);
1354: } else {
1355: mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1356: /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1357: very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1358: 0, re-arrange B into desired order, which is a local operation.
1359: */
1361: /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1362: /* wrap dense rhs matrix B into a vector v_mpi */
1363: MatGetLocalSize(B,&m,NULL);
1364: MatDenseGetArray(B,&bray);
1365: VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);
1366: MatDenseRestoreArray(B,&bray);
1368: /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1369: if (!mumps->myid) {
1370: PetscInt *idx;
1371: /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1372: PetscMalloc1(nrhs*M,&idx);
1373: MatGetOwnershipRanges(B,&rstart);
1374: k = 0;
1375: for (proc=0; proc<mumps->petsc_size; proc++){
1376: for (j=0; j<nrhs; j++){
1377: for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1378: }
1379: }
1381: VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);
1382: ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);
1383: ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);
1384: } else {
1385: VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);
1386: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);
1387: ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);
1388: }
1389: VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);
1390: VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);
1391: ISDestroy(&is_to);
1392: ISDestroy(&is_from);
1393: VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);
1395: if (!mumps->myid) { /* define rhs on the host */
1396: VecGetArray(b_seq,&bray);
1397: mumps->id.rhs = (MumpsScalar*)bray;
1398: VecRestoreArray(b_seq,&bray);
1399: }
1400: }
1401: } else { /* sparse B */
1402: b = (Mat_MPIAIJ*)Bt->data;
1404: /* wrap dense X into a vector v_mpi */
1405: MatGetLocalSize(X,&m,NULL);
1406: MatDenseGetArray(X,&bray);
1407: VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);
1408: MatDenseRestoreArray(X,&bray);
1410: if (!mumps->myid) {
1411: MatSeqAIJGetArray(b->A,&aa);
1412: MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1413: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1414: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
1415: mumps->id.rhs_sparse = (MumpsScalar*)aa;
1416: } else {
1417: mumps->id.irhs_ptr = NULL;
1418: mumps->id.irhs_sparse = NULL;
1419: mumps->id.nz_rhs = 0;
1420: mumps->id.rhs_sparse = NULL;
1421: }
1422: }
1424: /* solve phase */
1425: /*-------------*/
1426: mumps->id.job = JOB_SOLVE;
1427: PetscMUMPS_c(mumps);
1428: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1430: /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1431: MatDenseGetArray(X,&array);
1432: VecPlaceArray(v_mpi,array);
1434: /* create scatter scat_sol */
1435: MatGetOwnershipRanges(X,&rstart);
1436: /* iidx: index for scatter mumps solution to petsc X */
1438: ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);
1439: PetscMalloc1(nlsol_loc,&idxx);
1440: for (i=0; i<lsol_loc; i++) {
1441: isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1443: for (proc=0; proc<mumps->petsc_size; proc++){
1444: if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1445: myrstart = rstart[proc];
1446: k = isol_loc[i] - myrstart; /* local index on 1st column of petsc vector X */
1447: iidx = k + myrstart*nrhs; /* maps mumps isol_loc[i] to petsc index in X */
1448: m = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1449: break;
1450: }
1451: }
1453: for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1454: }
1455: ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);
1456: VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);
1457: VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);
1458: ISDestroy(&is_from);
1459: ISDestroy(&is_to);
1460: VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);
1461: MatDenseRestoreArray(X,&array);
1463: /* free spaces */
1464: mumps->id.sol_loc = (MumpsScalar*)sol_loc_save;
1465: mumps->id.isol_loc = isol_loc_save;
1467: PetscFree2(sol_loc,isol_loc);
1468: PetscFree(idxx);
1469: VecDestroy(&msol_loc);
1470: VecDestroy(&v_mpi);
1471: if (!denseB) {
1472: if (!mumps->myid) {
1473: b = (Mat_MPIAIJ*)Bt->data;
1474: MatSeqAIJRestoreArray(b->A,&aa);
1475: MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
1476: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1477: }
1478: } else {
1479: if (mumps->ICNTL20 == 0) {
1480: VecDestroy(&b_seq);
1481: VecScatterDestroy(&scat_rhs);
1482: }
1483: }
1484: VecScatterDestroy(&scat_sol);
1485: PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));
1486: return(0);
1487: }
1489: PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1490: {
1492: PetscBool flg;
1493: Mat B;
1496: PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);
1497: if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1499: /* Create B=Bt^T that uses Bt's data structure */
1500: MatCreateTranspose(Bt,&B);
1502: MatMatSolve_MUMPS(A,B,X);
1503: MatDestroy(&B);
1504: return(0);
1505: }
1507: #if !defined(PETSC_USE_COMPLEX)
1508: /*
1509: input:
1510: F: numeric factor
1511: output:
1512: nneg: total number of negative pivots
1513: nzero: total number of zero pivots
1514: npos: (global dimension of F) - nneg - nzero
1515: */
1516: PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1517: {
1518: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
1520: PetscMPIInt size;
1523: MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);
1524: /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1525: if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1527: if (nneg) *nneg = mumps->id.INFOG(12);
1528: if (nzero || npos) {
1529: if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1530: if (nzero) *nzero = mumps->id.INFOG(28);
1531: if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1532: }
1533: return(0);
1534: }
1535: #endif
1537: PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1538: {
1540: PetscInt i,nreqs;
1541: PetscMUMPSInt *irn,*jcn;
1542: PetscMPIInt count;
1543: PetscInt64 totnnz,remain;
1544: const PetscInt osize=mumps->omp_comm_size;
1545: PetscScalar *val;
1548: if (osize > 1) {
1549: if (reuse == MAT_INITIAL_MATRIX) {
1550: /* master first gathers counts of nonzeros to receive */
1551: if (mumps->is_omp_master) {PetscMalloc1(osize,&mumps->recvcount);}
1552: MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);
1554: /* Then each computes number of send/recvs */
1555: if (mumps->is_omp_master) {
1556: /* Start from 1 since self communication is not done in MPI */
1557: nreqs = 0;
1558: for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1559: } else {
1560: nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1561: }
1562: PetscMalloc1(nreqs*3,&mumps->reqs); /* Triple the requests since we send irn, jcn and val seperately */
1564: /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1565: MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1566: might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1567: is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1568: */
1569: nreqs = 0; /* counter for actual send/recvs */
1570: if (mumps->is_omp_master) {
1571: for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1572: PetscMalloc2(totnnz,&irn,totnnz,&jcn);
1573: PetscMalloc1(totnnz,&val);
1575: /* Self communication */
1576: PetscArraycpy(irn,mumps->irn,mumps->nnz);
1577: PetscArraycpy(jcn,mumps->jcn,mumps->nnz);
1578: PetscArraycpy(val,mumps->val,mumps->nnz);
1580: /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1581: PetscFree2(mumps->irn,mumps->jcn);
1582: PetscFree(mumps->val_alloc);
1583: mumps->nnz = totnnz;
1584: mumps->irn = irn;
1585: mumps->jcn = jcn;
1586: mumps->val = mumps->val_alloc = val;
1588: irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1589: jcn += mumps->recvcount[0];
1590: val += mumps->recvcount[0];
1592: /* Remote communication */
1593: for (i=1; i<osize; i++) {
1594: count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1595: remain = mumps->recvcount[i] - count;
1596: while (count>0) {
1597: MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1598: MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1599: MPI_Irecv(val,count,MPIU_SCALAR, i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1600: irn += count;
1601: jcn += count;
1602: val += count;
1603: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1604: remain -= count;
1605: }
1606: }
1607: } else {
1608: irn = mumps->irn;
1609: jcn = mumps->jcn;
1610: val = mumps->val;
1611: count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1612: remain = mumps->nnz - count;
1613: while (count>0) {
1614: MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1615: MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1616: MPI_Isend(val,count,MPIU_SCALAR, 0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1617: irn += count;
1618: jcn += count;
1619: val += count;
1620: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1621: remain -= count;
1622: }
1623: }
1624: } else {
1625: nreqs = 0;
1626: if (mumps->is_omp_master) {
1627: val = mumps->val + mumps->recvcount[0];
1628: for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1629: count = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1630: remain = mumps->recvcount[i] - count;
1631: while (count>0) {
1632: MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1633: val += count;
1634: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1635: remain -= count;
1636: }
1637: }
1638: } else {
1639: val = mumps->val;
1640: count = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1641: remain = mumps->nnz - count;
1642: while (count>0) {
1643: MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);
1644: val += count;
1645: count = PetscMin(remain,PETSC_MPI_INT_MAX);
1646: remain -= count;
1647: }
1648: }
1649: }
1650: MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);
1651: mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1652: }
1653: return(0);
1654: }
1656: PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1657: {
1658: Mat_MUMPS *mumps =(Mat_MUMPS*)(F)->data;
1660: PetscBool isMPIAIJ;
1663: if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1664: if (mumps->id.INFOG(1) == -6) {
1665: PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1666: }
1667: PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1668: return(0);
1669: }
1671: (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);
1672: MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);
1674: /* numerical factorization phase */
1675: /*-------------------------------*/
1676: mumps->id.job = JOB_FACTNUMERIC;
1677: if (!mumps->id.ICNTL(18)) { /* A is centralized */
1678: if (!mumps->myid) {
1679: mumps->id.a = (MumpsScalar*)mumps->val;
1680: }
1681: } else {
1682: mumps->id.a_loc = (MumpsScalar*)mumps->val;
1683: }
1684: PetscMUMPS_c(mumps);
1685: if (mumps->id.INFOG(1) < 0) {
1686: if (A->erroriffailure) {
1687: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1688: } else {
1689: if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1690: PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1691: F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1692: } else if (mumps->id.INFOG(1) == -13) {
1693: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1694: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1695: } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1696: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));
1697: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1698: } else {
1699: PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1700: F->factorerrortype = MAT_FACTOR_OTHER;
1701: }
1702: }
1703: }
1704: if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB," mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1706: F->assembled = PETSC_TRUE;
1707: mumps->matstruc = SAME_NONZERO_PATTERN;
1708: if (F->schur) { /* reset Schur status to unfactored */
1709: #if defined(PETSC_HAVE_CUDA)
1710: F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1711: #endif
1712: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1713: mumps->id.ICNTL(19) = 2;
1714: MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);
1715: }
1716: MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);
1717: }
1719: /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1720: if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1722: if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1723: if (mumps->petsc_size > 1) {
1724: PetscInt lsol_loc;
1725: PetscScalar *sol_loc;
1727: PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);
1729: /* distributed solution; Create x_seq=sol_loc for repeated use */
1730: if (mumps->x_seq) {
1731: VecScatterDestroy(&mumps->scat_sol);
1732: PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);
1733: VecDestroy(&mumps->x_seq);
1734: }
1735: lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1736: PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);
1737: mumps->id.lsol_loc = lsol_loc;
1738: mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1739: VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);
1740: }
1741: PetscLogFlops(mumps->id.RINFO(2));
1742: return(0);
1743: }
1745: /* Sets MUMPS options from the options database */
1746: PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1747: {
1748: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1750: PetscMUMPSInt icntl=0;
1751: PetscInt info[80],i,ninfo=80;
1752: PetscBool flg=PETSC_FALSE;
1755: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");
1756: PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);
1757: if (flg) mumps->id.ICNTL(1) = icntl;
1758: PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);
1759: if (flg) mumps->id.ICNTL(2) = icntl;
1760: PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);
1761: if (flg) mumps->id.ICNTL(3) = icntl;
1763: PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);
1764: if (flg) mumps->id.ICNTL(4) = icntl;
1765: if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1767: PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);
1768: if (flg) mumps->id.ICNTL(6) = icntl;
1770: PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=Petsc (sequential only), 3=Scotch, 4=PORD, 5=Metis, 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);
1771: if (flg) {
1772: if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not supported by the PETSc/MUMPS interface for parallel matrices\n");
1773: else mumps->id.ICNTL(7) = icntl;
1774: }
1776: PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);
1777: /* PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL); handled by MatSolveTranspose_MUMPS() */
1778: PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);
1779: PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);
1780: PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);
1781: PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);
1782: PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);
1783: PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);
1784: if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1785: MatDestroy(&F->schur);
1786: MatMumpsResetSchur_Private(mumps);
1787: }
1789: /* MPICH Fortran MPI_IN_PLACE binding has a bug that prevents the use of 'mpi4py + mpich + mumps', e.g., by Firedrake.
1790: So we turn off distributed RHS for MPICH. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1791: and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1792: */
1793: #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0) && defined(PETSC_HAVE_OMPI_MAJOR_VERSION)
1794: mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1795: #else
1796: mumps->ICNTL20 = 0; /* Centralized dense RHS*/
1797: #endif
1798: PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);
1799: if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10\n",(int)mumps->ICNTL20);
1800: #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1801: if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0\n");
1802: #endif
1803: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL); we only use distributed solution vector */
1805: PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);
1806: PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);
1807: PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);
1808: if (mumps->id.ICNTL(24)) {
1809: mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1810: }
1812: PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);
1813: PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);
1814: PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);
1815: PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);
1816: PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);
1817: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL); */ /* call MatMumpsGetInverse() directly */
1818: PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);
1819: /* PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL); -- not supported by PETSc API */
1820: PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);
1821: PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);
1822: PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);
1823: PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);
1825: PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);
1826: PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);
1827: PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);
1828: PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);
1829: PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);
1830: PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);
1832: PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);
1834: PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);
1835: if (ninfo) {
1836: if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1837: PetscMalloc1(ninfo,&mumps->info);
1838: mumps->ninfo = ninfo;
1839: for (i=0; i<ninfo; i++) {
1840: if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1841: else mumps->info[i] = info[i];
1842: }
1843: }
1845: PetscOptionsEnd();
1846: return(0);
1847: }
1849: PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1850: {
1852: PetscInt nthreads=0;
1853: MPI_Comm newcomm=MPI_COMM_NULL;
1856: mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1857: MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);
1858: MPI_Comm_rank(mumps->petsc_comm,&mumps->myid); /* so that code like "if (!myid)" still works even if mumps_comm is different */
1860: PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);
1861: if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1862: PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);
1863: if (mumps->use_petsc_omp_support) {
1864: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1865: PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);
1866: PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);
1867: #else
1868: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n");
1869: #endif
1870: } else {
1871: mumps->omp_comm = PETSC_COMM_SELF;
1872: mumps->mumps_comm = mumps->petsc_comm;
1873: mumps->is_omp_master = PETSC_TRUE;
1874: }
1875: MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);
1876: mumps->reqs = NULL;
1877: mumps->tag = 0;
1879: /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1880: if (mumps->mumps_comm != MPI_COMM_NULL) {
1881: MPI_Comm_dup(mumps->mumps_comm,&newcomm);
1882: mumps->mumps_comm = newcomm;
1883: }
1885: mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1886: mumps->id.job = JOB_INIT;
1887: mumps->id.par = 1; /* host participates factorizaton and solve */
1888: mumps->id.sym = mumps->sym;
1890: PetscMUMPS_c(mumps);
1891: if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1893: /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1894: For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1895: */
1896: MPI_Bcast(mumps->id.icntl,40,MPI_INT, 0,mumps->omp_comm); /* see MUMPS-5.1.2 Manual Section 9 */
1897: MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);
1899: mumps->scat_rhs = NULL;
1900: mumps->scat_sol = NULL;
1902: /* set PETSc-MUMPS default options - override MUMPS default */
1903: mumps->id.ICNTL(3) = 0;
1904: mumps->id.ICNTL(4) = 0;
1905: if (mumps->petsc_size == 1) {
1906: mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */
1907: mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */
1908: } else {
1909: mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */
1910: mumps->id.ICNTL(21) = 1; /* distributed solution */
1911: }
1913: /* schur */
1914: mumps->id.size_schur = 0;
1915: mumps->id.listvar_schur = NULL;
1916: mumps->id.schur = NULL;
1917: mumps->sizeredrhs = 0;
1918: mumps->schur_sol = NULL;
1919: mumps->schur_sizesol = 0;
1920: return(0);
1921: }
1923: PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1924: {
1928: if (mumps->id.INFOG(1) < 0) {
1929: if (A->erroriffailure) {
1930: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1931: } else {
1932: if (mumps->id.INFOG(1) == -6) {
1933: PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1934: F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1935: } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1936: PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1937: F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1938: } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1939: PetscInfo(F,"Empty matrix\n");
1940: } else {
1941: PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1942: F->factorerrortype = MAT_FACTOR_OTHER;
1943: }
1944: }
1945: }
1946: return(0);
1947: }
1949: /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */
1950: PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1951: {
1952: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
1954: Vec b;
1955: const PetscInt M = A->rmap->N;
1958: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1960: /* Set MUMPS options from the options database */
1961: PetscSetMUMPSFromOptions(F,A);
1963: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
1964: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
1966: /* analysis phase */
1967: /*----------------*/
1968: mumps->id.job = JOB_FACTSYMBOLIC;
1969: mumps->id.n = M;
1970: switch (mumps->id.ICNTL(18)) {
1971: case 0: /* centralized assembled matrix input */
1972: if (!mumps->myid) {
1973: mumps->id.nnz = mumps->nnz;
1974: mumps->id.irn = mumps->irn;
1975: mumps->id.jcn = mumps->jcn;
1976: if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1977: if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */
1978: if (!mumps->myid) {
1979: const PetscInt *idx;
1980: PetscInt i;
1982: PetscMalloc1(M,&mumps->id.perm_in);
1983: ISGetIndices(r,&idx);
1984: for (i=0; i<M; i++) {PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));} /* perm_in[]: start from 1, not 0! */
1985: ISRestoreIndices(r,&idx);
1986: }
1987: }
1988: }
1989: break;
1990: case 3: /* distributed assembled matrix input (size>1) */
1991: mumps->id.nnz_loc = mumps->nnz;
1992: mumps->id.irn_loc = mumps->irn;
1993: mumps->id.jcn_loc = mumps->jcn;
1994: if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1995: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1996: MatCreateVecs(A,NULL,&b);
1997: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
1998: VecDestroy(&b);
1999: }
2000: break;
2001: }
2002: PetscMUMPS_c(mumps);
2003: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2005: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2006: F->ops->solve = MatSolve_MUMPS;
2007: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2008: F->ops->matsolve = MatMatSolve_MUMPS;
2009: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2010: return(0);
2011: }
2013: /* Note the Petsc r and c permutations are ignored */
2014: PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2015: {
2016: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2018: Vec b;
2019: const PetscInt M = A->rmap->N;
2022: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2024: /* Set MUMPS options from the options database */
2025: PetscSetMUMPSFromOptions(F,A);
2027: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2028: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
2030: /* analysis phase */
2031: /*----------------*/
2032: mumps->id.job = JOB_FACTSYMBOLIC;
2033: mumps->id.n = M;
2034: switch (mumps->id.ICNTL(18)) {
2035: case 0: /* centralized assembled matrix input */
2036: if (!mumps->myid) {
2037: mumps->id.nnz = mumps->nnz;
2038: mumps->id.irn = mumps->irn;
2039: mumps->id.jcn = mumps->jcn;
2040: if (mumps->id.ICNTL(6)>1) {
2041: mumps->id.a = (MumpsScalar*)mumps->val;
2042: }
2043: }
2044: break;
2045: case 3: /* distributed assembled matrix input (size>1) */
2046: mumps->id.nnz_loc = mumps->nnz;
2047: mumps->id.irn_loc = mumps->irn;
2048: mumps->id.jcn_loc = mumps->jcn;
2049: if (mumps->id.ICNTL(6)>1) {
2050: mumps->id.a_loc = (MumpsScalar*)mumps->val;
2051: }
2052: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2053: MatCreateVecs(A,NULL,&b);
2054: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
2055: VecDestroy(&b);
2056: }
2057: break;
2058: }
2059: PetscMUMPS_c(mumps);
2060: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2062: F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2063: F->ops->solve = MatSolve_MUMPS;
2064: F->ops->solvetranspose = MatSolveTranspose_MUMPS;
2065: return(0);
2066: }
2068: /* Note the Petsc r permutation and factor info are ignored */
2069: PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2070: {
2071: Mat_MUMPS *mumps = (Mat_MUMPS*)F->data;
2073: Vec b;
2074: const PetscInt M = A->rmap->N;
2077: mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2079: /* Set MUMPS options from the options database */
2080: PetscSetMUMPSFromOptions(F,A);
2082: (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);
2083: MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);
2085: /* analysis phase */
2086: /*----------------*/
2087: mumps->id.job = JOB_FACTSYMBOLIC;
2088: mumps->id.n = M;
2089: switch (mumps->id.ICNTL(18)) {
2090: case 0: /* centralized assembled matrix input */
2091: if (!mumps->myid) {
2092: mumps->id.nnz = mumps->nnz;
2093: mumps->id.irn = mumps->irn;
2094: mumps->id.jcn = mumps->jcn;
2095: if (mumps->id.ICNTL(6)>1) {
2096: mumps->id.a = (MumpsScalar*)mumps->val;
2097: }
2098: }
2099: break;
2100: case 3: /* distributed assembled matrix input (size>1) */
2101: mumps->id.nnz_loc = mumps->nnz;
2102: mumps->id.irn_loc = mumps->irn;
2103: mumps->id.jcn_loc = mumps->jcn;
2104: if (mumps->id.ICNTL(6)>1) {
2105: mumps->id.a_loc = (MumpsScalar*)mumps->val;
2106: }
2107: if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2108: MatCreateVecs(A,NULL,&b);
2109: VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);
2110: VecDestroy(&b);
2111: }
2112: break;
2113: }
2114: PetscMUMPS_c(mumps);
2115: MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);
2117: F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2118: F->ops->solve = MatSolve_MUMPS;
2119: F->ops->solvetranspose = MatSolve_MUMPS;
2120: F->ops->matsolve = MatMatSolve_MUMPS;
2121: F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2122: #if defined(PETSC_USE_COMPLEX)
2123: F->ops->getinertia = NULL;
2124: #else
2125: F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2126: #endif
2127: return(0);
2128: }
2130: PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2131: {
2132: PetscErrorCode ierr;
2133: PetscBool iascii;
2134: PetscViewerFormat format;
2135: Mat_MUMPS *mumps=(Mat_MUMPS*)A->data;
2138: /* check if matrix is mumps type */
2139: if (A->ops->solve != MatSolve_MUMPS) return(0);
2141: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
2142: if (iascii) {
2143: PetscViewerGetFormat(viewer,&format);
2144: if (format == PETSC_VIEWER_ASCII_INFO) {
2145: PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");
2146: PetscViewerASCIIPrintf(viewer," SYM (matrix type): %d \n",mumps->id.sym);
2147: PetscViewerASCIIPrintf(viewer," PAR (host participation): %d \n",mumps->id.par);
2148: PetscViewerASCIIPrintf(viewer," ICNTL(1) (output for error): %d \n",mumps->id.ICNTL(1));
2149: PetscViewerASCIIPrintf(viewer," ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));
2150: PetscViewerASCIIPrintf(viewer," ICNTL(3) (output for global info): %d \n",mumps->id.ICNTL(3));
2151: PetscViewerASCIIPrintf(viewer," ICNTL(4) (level of printing): %d \n",mumps->id.ICNTL(4));
2152: PetscViewerASCIIPrintf(viewer," ICNTL(5) (input mat struct): %d \n",mumps->id.ICNTL(5));
2153: PetscViewerASCIIPrintf(viewer," ICNTL(6) (matrix prescaling): %d \n",mumps->id.ICNTL(6));
2154: PetscViewerASCIIPrintf(viewer," ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));
2155: PetscViewerASCIIPrintf(viewer," ICNTL(8) (scaling strategy): %d \n",mumps->id.ICNTL(8));
2156: PetscViewerASCIIPrintf(viewer," ICNTL(10) (max num of refinements): %d \n",mumps->id.ICNTL(10));
2157: PetscViewerASCIIPrintf(viewer," ICNTL(11) (error analysis): %d \n",mumps->id.ICNTL(11));
2158: if (mumps->id.ICNTL(11)>0) {
2159: PetscViewerASCIIPrintf(viewer," RINFOG(4) (inf norm of input mat): %g\n",mumps->id.RINFOG(4));
2160: PetscViewerASCIIPrintf(viewer," RINFOG(5) (inf norm of solution): %g\n",mumps->id.RINFOG(5));
2161: PetscViewerASCIIPrintf(viewer," RINFOG(6) (inf norm of residual): %g\n",mumps->id.RINFOG(6));
2162: PetscViewerASCIIPrintf(viewer," RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));
2163: PetscViewerASCIIPrintf(viewer," RINFOG(9) (error estimate): %g \n",mumps->id.RINFOG(9));
2164: PetscViewerASCIIPrintf(viewer," RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));
2165: }
2166: PetscViewerASCIIPrintf(viewer," ICNTL(12) (efficiency control): %d \n",mumps->id.ICNTL(12));
2167: PetscViewerASCIIPrintf(viewer," ICNTL(13) (sequential factorization of the root node): %d \n",mumps->id.ICNTL(13));
2168: PetscViewerASCIIPrintf(viewer," ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));
2169: /* ICNTL(15-17) not used */
2170: PetscViewerASCIIPrintf(viewer," ICNTL(18) (input mat struct): %d \n",mumps->id.ICNTL(18));
2171: PetscViewerASCIIPrintf(viewer," ICNTL(19) (Schur complement info): %d \n",mumps->id.ICNTL(19));
2172: PetscViewerASCIIPrintf(viewer," ICNTL(20) (RHS sparse pattern): %d \n",mumps->id.ICNTL(20));
2173: PetscViewerASCIIPrintf(viewer," ICNTL(21) (solution struct): %d \n",mumps->id.ICNTL(21));
2174: PetscViewerASCIIPrintf(viewer," ICNTL(22) (in-core/out-of-core facility): %d \n",mumps->id.ICNTL(22));
2175: PetscViewerASCIIPrintf(viewer," ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));
2177: PetscViewerASCIIPrintf(viewer," ICNTL(24) (detection of null pivot rows): %d \n",mumps->id.ICNTL(24));
2178: PetscViewerASCIIPrintf(viewer," ICNTL(25) (computation of a null space basis): %d \n",mumps->id.ICNTL(25));
2179: PetscViewerASCIIPrintf(viewer," ICNTL(26) (Schur options for RHS or solution): %d \n",mumps->id.ICNTL(26));
2180: PetscViewerASCIIPrintf(viewer," ICNTL(27) (blocking size for multiple RHS): %d \n",mumps->id.ICNTL(27));
2181: PetscViewerASCIIPrintf(viewer," ICNTL(28) (use parallel or sequential ordering): %d \n",mumps->id.ICNTL(28));
2182: PetscViewerASCIIPrintf(viewer," ICNTL(29) (parallel ordering): %d \n",mumps->id.ICNTL(29));
2184: PetscViewerASCIIPrintf(viewer," ICNTL(30) (user-specified set of entries in inv(A)): %d \n",mumps->id.ICNTL(30));
2185: PetscViewerASCIIPrintf(viewer," ICNTL(31) (factors is discarded in the solve phase): %d \n",mumps->id.ICNTL(31));
2186: PetscViewerASCIIPrintf(viewer," ICNTL(33) (compute determinant): %d \n",mumps->id.ICNTL(33));
2187: PetscViewerASCIIPrintf(viewer," ICNTL(35) (activate BLR based factorization): %d \n",mumps->id.ICNTL(35));
2188: PetscViewerASCIIPrintf(viewer," ICNTL(36) (choice of BLR factorization variant): %d \n",mumps->id.ICNTL(36));
2189: PetscViewerASCIIPrintf(viewer," ICNTL(38) (estimated compression rate of LU factors): %d \n",mumps->id.ICNTL(38));
2191: PetscViewerASCIIPrintf(viewer," CNTL(1) (relative pivoting threshold): %g \n",mumps->id.CNTL(1));
2192: PetscViewerASCIIPrintf(viewer," CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));
2193: PetscViewerASCIIPrintf(viewer," CNTL(3) (absolute pivoting threshold): %g \n",mumps->id.CNTL(3));
2194: PetscViewerASCIIPrintf(viewer," CNTL(4) (value of static pivoting): %g \n",mumps->id.CNTL(4));
2195: PetscViewerASCIIPrintf(viewer," CNTL(5) (fixation for null pivots): %g \n",mumps->id.CNTL(5));
2196: PetscViewerASCIIPrintf(viewer," CNTL(7) (dropping parameter for BLR): %g \n",mumps->id.CNTL(7));
2198: /* infomation local to each processor */
2199: PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis): \n");
2200: PetscViewerASCIIPushSynchronized(viewer);
2201: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(1));
2202: PetscViewerFlush(viewer);
2203: PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization): \n");
2204: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(2));
2205: PetscViewerFlush(viewer);
2206: PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization): \n");
2207: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %g \n",mumps->myid,mumps->id.RINFO(3));
2208: PetscViewerFlush(viewer);
2210: PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");
2211: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(15));
2212: PetscViewerFlush(viewer);
2214: PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");
2215: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(16));
2216: PetscViewerFlush(viewer);
2218: PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization): \n");
2219: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(23));
2220: PetscViewerFlush(viewer);
2222: if (mumps->ninfo && mumps->ninfo <= 80){
2223: PetscInt i;
2224: for (i=0; i<mumps->ninfo; i++){
2225: PetscViewerASCIIPrintf(viewer, " INFO(%d): \n",mumps->info[i]);
2226: PetscViewerASCIISynchronizedPrintf(viewer," [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));
2227: PetscViewerFlush(viewer);
2228: }
2229: }
2230: PetscViewerASCIIPopSynchronized(viewer);
2232: if (!mumps->myid) { /* information from the host */
2233: PetscViewerASCIIPrintf(viewer," RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));
2234: PetscViewerASCIIPrintf(viewer," RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));
2235: PetscViewerASCIIPrintf(viewer," RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));
2236: PetscViewerASCIIPrintf(viewer," (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));
2238: PetscViewerASCIIPrintf(viewer," INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));
2239: PetscViewerASCIIPrintf(viewer," INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));
2240: PetscViewerASCIIPrintf(viewer," INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));
2241: PetscViewerASCIIPrintf(viewer," INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));
2242: PetscViewerASCIIPrintf(viewer," INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));
2243: PetscViewerASCIIPrintf(viewer," INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));
2244: PetscViewerASCIIPrintf(viewer," INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));
2245: PetscViewerASCIIPrintf(viewer," INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));
2246: PetscViewerASCIIPrintf(viewer," INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));
2247: PetscViewerASCIIPrintf(viewer," INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));
2248: PetscViewerASCIIPrintf(viewer," INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));
2249: PetscViewerASCIIPrintf(viewer," INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));
2250: PetscViewerASCIIPrintf(viewer," INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));
2251: PetscViewerASCIIPrintf(viewer," INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));
2252: PetscViewerASCIIPrintf(viewer," INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));
2253: PetscViewerASCIIPrintf(viewer," INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));
2254: PetscViewerASCIIPrintf(viewer," INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));
2255: PetscViewerASCIIPrintf(viewer," INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));
2256: PetscViewerASCIIPrintf(viewer," INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));
2257: PetscViewerASCIIPrintf(viewer," INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));
2258: PetscViewerASCIIPrintf(viewer," INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));
2259: PetscViewerASCIIPrintf(viewer," INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));
2260: PetscViewerASCIIPrintf(viewer," INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));
2261: PetscViewerASCIIPrintf(viewer," INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));
2262: PetscViewerASCIIPrintf(viewer," INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));
2263: PetscViewerASCIIPrintf(viewer," INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));
2264: PetscViewerASCIIPrintf(viewer," INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));
2265: PetscViewerASCIIPrintf(viewer," INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));
2266: PetscViewerASCIIPrintf(viewer," INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));
2267: PetscViewerASCIIPrintf(viewer," INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));
2268: PetscViewerASCIIPrintf(viewer," INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));
2269: PetscViewerASCIIPrintf(viewer," INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));
2270: PetscViewerASCIIPrintf(viewer," INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));
2271: PetscViewerASCIIPrintf(viewer," INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));
2272: }
2273: }
2274: }
2275: return(0);
2276: }
2278: PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2279: {
2280: Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2283: info->block_size = 1.0;
2284: info->nz_allocated = mumps->id.INFOG(20);
2285: info->nz_used = mumps->id.INFOG(20);
2286: info->nz_unneeded = 0.0;
2287: info->assemblies = 0.0;
2288: info->mallocs = 0.0;
2289: info->memory = 0.0;
2290: info->fill_ratio_given = 0;
2291: info->fill_ratio_needed = 0;
2292: info->factor_mallocs = 0;
2293: return(0);
2294: }
2296: /* -------------------------------------------------------------------------------------------*/
2297: PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2298: {
2299: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2300: const PetscScalar *arr;
2301: const PetscInt *idxs;
2302: PetscInt size,i;
2303: PetscErrorCode ierr;
2306: ISGetLocalSize(is,&size);
2307: if (mumps->petsc_size > 1) {
2308: PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2310: ls = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2311: MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);
2312: if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
2313: }
2315: /* Schur complement matrix */
2316: MatDestroy(&F->schur);
2317: MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);
2318: MatDenseGetArrayRead(F->schur,&arr);
2319: mumps->id.schur = (MumpsScalar*)arr;
2320: mumps->id.size_schur = size;
2321: mumps->id.schur_lld = size;
2322: MatDenseRestoreArrayRead(F->schur,&arr);
2323: if (mumps->sym == 1) {
2324: MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);
2325: }
2327: /* MUMPS expects Fortran style indices */
2328: PetscFree(mumps->id.listvar_schur);
2329: PetscMalloc1(size,&mumps->id.listvar_schur);
2330: ISGetIndices(is,&idxs);
2331: for (i=0; i<size; i++) {PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));}
2332: ISRestoreIndices(is,&idxs);
2333: if (mumps->petsc_size > 1) {
2334: mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2335: } else {
2336: if (F->factortype == MAT_FACTOR_LU) {
2337: mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2338: } else {
2339: mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2340: }
2341: }
2342: /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2343: mumps->id.ICNTL(26) = -1;
2344: return(0);
2345: }
2347: /* -------------------------------------------------------------------------------------------*/
2348: PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2349: {
2350: Mat St;
2351: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2352: PetscScalar *array;
2353: #if defined(PETSC_USE_COMPLEX)
2354: PetscScalar im = PetscSqrtScalar((PetscScalar)-1.0);
2355: #endif
2359: if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2360: MatCreate(PETSC_COMM_SELF,&St);
2361: MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);
2362: MatSetType(St,MATDENSE);
2363: MatSetUp(St);
2364: MatDenseGetArray(St,&array);
2365: if (!mumps->sym) { /* MUMPS always return a full matrix */
2366: if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2367: PetscInt i,j,N=mumps->id.size_schur;
2368: for (i=0;i<N;i++) {
2369: for (j=0;j<N;j++) {
2370: #if !defined(PETSC_USE_COMPLEX)
2371: PetscScalar val = mumps->id.schur[i*N+j];
2372: #else
2373: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2374: #endif
2375: array[j*N+i] = val;
2376: }
2377: }
2378: } else { /* stored by columns */
2379: PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);
2380: }
2381: } else { /* either full or lower-triangular (not packed) */
2382: if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2383: PetscInt i,j,N=mumps->id.size_schur;
2384: for (i=0;i<N;i++) {
2385: for (j=i;j<N;j++) {
2386: #if !defined(PETSC_USE_COMPLEX)
2387: PetscScalar val = mumps->id.schur[i*N+j];
2388: #else
2389: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2390: #endif
2391: array[i*N+j] = val;
2392: array[j*N+i] = val;
2393: }
2394: }
2395: } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2396: PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);
2397: } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2398: PetscInt i,j,N=mumps->id.size_schur;
2399: for (i=0;i<N;i++) {
2400: for (j=0;j<i+1;j++) {
2401: #if !defined(PETSC_USE_COMPLEX)
2402: PetscScalar val = mumps->id.schur[i*N+j];
2403: #else
2404: PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2405: #endif
2406: array[i*N+j] = val;
2407: array[j*N+i] = val;
2408: }
2409: }
2410: }
2411: }
2412: MatDenseRestoreArray(St,&array);
2413: *S = St;
2414: return(0);
2415: }
2417: /* -------------------------------------------------------------------------------------------*/
2418: PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2419: {
2421: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2424: PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));
2425: return(0);
2426: }
2428: PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2429: {
2430: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2433: *ival = mumps->id.ICNTL(icntl);
2434: return(0);
2435: }
2437: /*@
2438: MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2440: Logically Collective on Mat
2442: Input Parameters:
2443: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2444: . icntl - index of MUMPS parameter array ICNTL()
2445: - ival - value of MUMPS ICNTL(icntl)
2447: Options Database:
2448: . -mat_mumps_icntl_<icntl> <ival>
2450: Level: beginner
2452: References:
2453: . MUMPS Users' Guide
2455: .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2456: @*/
2457: PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2458: {
2463: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2466: PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2467: return(0);
2468: }
2470: /*@
2471: MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2473: Logically Collective on Mat
2475: Input Parameters:
2476: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2477: - icntl - index of MUMPS parameter array ICNTL()
2479: Output Parameter:
2480: . ival - value of MUMPS ICNTL(icntl)
2482: Level: beginner
2484: References:
2485: . MUMPS Users' Guide
2487: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2488: @*/
2489: PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2490: {
2495: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2498: PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2499: return(0);
2500: }
2502: /* -------------------------------------------------------------------------------------------*/
2503: PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2504: {
2505: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2508: mumps->id.CNTL(icntl) = val;
2509: return(0);
2510: }
2512: PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2513: {
2514: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2517: *val = mumps->id.CNTL(icntl);
2518: return(0);
2519: }
2521: /*@
2522: MatMumpsSetCntl - Set MUMPS parameter CNTL()
2524: Logically Collective on Mat
2526: Input Parameters:
2527: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2528: . icntl - index of MUMPS parameter array CNTL()
2529: - val - value of MUMPS CNTL(icntl)
2531: Options Database:
2532: . -mat_mumps_cntl_<icntl> <val>
2534: Level: beginner
2536: References:
2537: . MUMPS Users' Guide
2539: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2540: @*/
2541: PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2542: {
2547: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2550: PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2551: return(0);
2552: }
2554: /*@
2555: MatMumpsGetCntl - Get MUMPS parameter CNTL()
2557: Logically Collective on Mat
2559: Input Parameters:
2560: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2561: - icntl - index of MUMPS parameter array CNTL()
2563: Output Parameter:
2564: . val - value of MUMPS CNTL(icntl)
2566: Level: beginner
2568: References:
2569: . MUMPS Users' Guide
2571: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2572: @*/
2573: PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2574: {
2579: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2582: PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2583: return(0);
2584: }
2586: PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2587: {
2588: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2591: *info = mumps->id.INFO(icntl);
2592: return(0);
2593: }
2595: PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2596: {
2597: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2600: *infog = mumps->id.INFOG(icntl);
2601: return(0);
2602: }
2604: PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2605: {
2606: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2609: *rinfo = mumps->id.RINFO(icntl);
2610: return(0);
2611: }
2613: PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2614: {
2615: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2618: *rinfog = mumps->id.RINFOG(icntl);
2619: return(0);
2620: }
2622: PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2623: {
2625: Mat Bt = NULL,Btseq = NULL;
2626: PetscBool flg;
2627: Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2628: PetscScalar *aa;
2629: PetscInt spnr,*ia,*ja,M,nrhs;
2633: PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);
2634: if (flg) {
2635: MatTransposeGetMat(spRHS,&Bt);
2636: } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2638: MatMumpsSetIcntl(F,30,1);
2640: if (mumps->petsc_size > 1) {
2641: Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2642: Btseq = b->A;
2643: } else {
2644: Btseq = Bt;
2645: }
2647: MatGetSize(spRHS,&M,&nrhs);
2648: mumps->id.nrhs = nrhs;
2649: mumps->id.lrhs = M;
2650: mumps->id.rhs = NULL;
2652: if (!mumps->myid) {
2653: MatSeqAIJGetArray(Btseq,&aa);
2654: MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
2655: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2656: PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);
2657: mumps->id.rhs_sparse = (MumpsScalar*)aa;
2658: } else {
2659: mumps->id.irhs_ptr = NULL;
2660: mumps->id.irhs_sparse = NULL;
2661: mumps->id.nz_rhs = 0;
2662: mumps->id.rhs_sparse = NULL;
2663: }
2664: mumps->id.ICNTL(20) = 1; /* rhs is sparse */
2665: mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */
2667: /* solve phase */
2668: /*-------------*/
2669: mumps->id.job = JOB_SOLVE;
2670: PetscMUMPS_c(mumps);
2671: if (mumps->id.INFOG(1) < 0)
2672: SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2674: if (!mumps->myid) {
2675: MatSeqAIJRestoreArray(Btseq,&aa);
2676: MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);
2677: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2678: }
2679: return(0);
2680: }
2682: /*@
2683: MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2685: Logically Collective on Mat
2687: Input Parameters:
2688: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2689: - spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2691: Output Parameter:
2692: . spRHS - requested entries of inverse of A
2694: Level: beginner
2696: References:
2697: . MUMPS Users' Guide
2699: .seealso: MatGetFactor(), MatCreateTranspose()
2700: @*/
2701: PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2702: {
2707: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2708: PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2709: return(0);
2710: }
2712: PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2713: {
2715: Mat spRHS;
2718: MatCreateTranspose(spRHST,&spRHS);
2719: MatMumpsGetInverse_MUMPS(F,spRHS);
2720: MatDestroy(&spRHS);
2721: return(0);
2722: }
2724: /*@
2725: MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2727: Logically Collective on Mat
2729: Input Parameters:
2730: + F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2731: - spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2733: Output Parameter:
2734: . spRHST - requested entries of inverse of A^T
2736: Level: beginner
2738: References:
2739: . MUMPS Users' Guide
2741: .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2742: @*/
2743: PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2744: {
2746: PetscBool flg;
2750: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2751: PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);
2752: if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2754: PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2755: return(0);
2756: }
2758: /*@
2759: MatMumpsGetInfo - Get MUMPS parameter INFO()
2761: Logically Collective on Mat
2763: Input Parameters:
2764: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2765: - icntl - index of MUMPS parameter array INFO()
2767: Output Parameter:
2768: . ival - value of MUMPS INFO(icntl)
2770: Level: beginner
2772: References:
2773: . MUMPS Users' Guide
2775: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2776: @*/
2777: PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2778: {
2783: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2785: PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2786: return(0);
2787: }
2789: /*@
2790: MatMumpsGetInfog - Get MUMPS parameter INFOG()
2792: Logically Collective on Mat
2794: Input Parameters:
2795: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2796: - icntl - index of MUMPS parameter array INFOG()
2798: Output Parameter:
2799: . ival - value of MUMPS INFOG(icntl)
2801: Level: beginner
2803: References:
2804: . MUMPS Users' Guide
2806: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2807: @*/
2808: PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2809: {
2814: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2816: PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2817: return(0);
2818: }
2820: /*@
2821: MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2823: Logically Collective on Mat
2825: Input Parameters:
2826: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2827: - icntl - index of MUMPS parameter array RINFO()
2829: Output Parameter:
2830: . val - value of MUMPS RINFO(icntl)
2832: Level: beginner
2834: References:
2835: . MUMPS Users' Guide
2837: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2838: @*/
2839: PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2840: {
2845: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2847: PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2848: return(0);
2849: }
2851: /*@
2852: MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2854: Logically Collective on Mat
2856: Input Parameters:
2857: + F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2858: - icntl - index of MUMPS parameter array RINFOG()
2860: Output Parameter:
2861: . val - value of MUMPS RINFOG(icntl)
2863: Level: beginner
2865: References:
2866: . MUMPS Users' Guide
2868: .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2869: @*/
2870: PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2871: {
2876: if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2878: PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2879: return(0);
2880: }
2882: /*MC
2883: MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for
2884: distributed and sequential matrices via the external package MUMPS.
2886: Works with MATAIJ and MATSBAIJ matrices
2888: Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2890: Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2892: Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2894: Options Database Keys:
2895: + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2896: . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2897: . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host
2898: . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4)
2899: . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2900: . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 1=PETSc (sequential only) 3=Scotch, 4=PORD, 5=Metis
2901: . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77)
2902: . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements
2903: . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2904: . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2905: . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2906: . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space
2907: . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement
2908: . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2909: . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2910: . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2911: . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1)
2912: . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2913: . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix
2914: . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2915: . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2916: . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2917: . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2918: . -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2919: . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2920: . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2921: . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2922: . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold
2923: . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement
2924: . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2925: . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2926: . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2927: . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2928: - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2929: Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2931: If run sequentially can use the PETSc provided ordering with the option -mat_mumps_icntl_7 1
2933: Level: beginner
2935: Notes:
2936: MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2938: When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2939: $ KSPGetPC(ksp,&pc);
2940: $ PCFactorGetMatrix(pc,&mat);
2941: $ MatMumpsGetInfo(mat,....);
2942: $ MatMumpsGetInfog(mat,....); etc.
2943: Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2945: Two modes to run MUMPS/PETSc with OpenMP
2947: $ Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2948: $ threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2950: $ -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2951: $ if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2953: To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2954: (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2955: (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2956: libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2957: (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2959: If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2960: processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2961: size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2962: are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2963: by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2964: In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2965: if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2966: MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2967: cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2968: problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2969: For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2970: examine the mapping result.
2972: PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2973: for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2974: calls omp_set_num_threads(m) internally before calling MUMPS.
2976: References:
2977: + 1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2978: - 2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2980: .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix()
2982: M*/
2984: static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2985: {
2987: *type = MATSOLVERMUMPS;
2988: return(0);
2989: }
2991: /* MatGetFactor for Seq and MPI AIJ matrices */
2992: static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2993: {
2994: Mat B;
2996: Mat_MUMPS *mumps;
2997: PetscBool isSeqAIJ;
2998: PetscMPIInt size;
3001: #if defined(PETSC_USE_COMPLEX)
3002: if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3003: #endif
3004: /* Create the factorization matrix */
3005: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);
3006: MatCreate(PetscObjectComm((PetscObject)A),&B);
3007: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3008: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3009: MatSetUp(B);
3011: PetscNewLog(B,&mumps);
3013: B->ops->view = MatView_MUMPS;
3014: B->ops->getinfo = MatGetInfo_MUMPS;
3016: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3017: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3018: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3019: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3020: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3021: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3022: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3023: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3024: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3025: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3026: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3027: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3028: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3030: if (ftype == MAT_FACTOR_LU) {
3031: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3032: B->factortype = MAT_FACTOR_LU;
3033: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3034: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3035: mumps->sym = 0;
3036: } else {
3037: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3038: B->factortype = MAT_FACTOR_CHOLESKY;
3039: if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3040: else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3041: #if defined(PETSC_USE_COMPLEX)
3042: mumps->sym = 2;
3043: #else
3044: if (A->spd_set && A->spd) mumps->sym = 1;
3045: else mumps->sym = 2;
3046: #endif
3047: }
3049: /* set solvertype */
3050: PetscFree(B->solvertype);
3051: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3052: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3053: if (size == 1) {
3054: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3055: B->useordering = PETSC_TRUE;
3056: }
3058: B->ops->destroy = MatDestroy_MUMPS;
3059: B->data = (void*)mumps;
3061: PetscInitializeMUMPS(A,mumps);
3063: *F = B;
3064: return(0);
3065: }
3067: /* MatGetFactor for Seq and MPI SBAIJ matrices */
3068: static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3069: {
3070: Mat B;
3072: Mat_MUMPS *mumps;
3073: PetscBool isSeqSBAIJ;
3074: PetscMPIInt size;
3077: #if defined(PETSC_USE_COMPLEX)
3078: if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3079: #endif
3080: MatCreate(PetscObjectComm((PetscObject)A),&B);
3081: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3082: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3083: MatSetUp(B);
3085: PetscNewLog(B,&mumps);
3086: PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);
3087: if (isSeqSBAIJ) {
3088: mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3089: } else {
3090: mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3091: }
3093: B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3094: B->ops->view = MatView_MUMPS;
3095: B->ops->getinfo = MatGetInfo_MUMPS;
3097: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3098: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3099: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3100: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3101: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3102: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3103: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3104: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3105: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3106: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3107: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3108: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3109: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3111: B->factortype = MAT_FACTOR_CHOLESKY;
3112: #if defined(PETSC_USE_COMPLEX)
3113: mumps->sym = 2;
3114: #else
3115: if (A->spd_set && A->spd) mumps->sym = 1;
3116: else mumps->sym = 2;
3117: #endif
3119: /* set solvertype */
3120: PetscFree(B->solvertype);
3121: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3122: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3123: if (size == 1) {
3124: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3125: B->useordering = PETSC_TRUE;
3126: }
3128: B->ops->destroy = MatDestroy_MUMPS;
3129: B->data = (void*)mumps;
3131: PetscInitializeMUMPS(A,mumps);
3133: *F = B;
3134: return(0);
3135: }
3137: static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3138: {
3139: Mat B;
3141: Mat_MUMPS *mumps;
3142: PetscBool isSeqBAIJ;
3143: PetscMPIInt size;
3146: /* Create the factorization matrix */
3147: PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);
3148: MatCreate(PetscObjectComm((PetscObject)A),&B);
3149: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3150: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3151: MatSetUp(B);
3153: PetscNewLog(B,&mumps);
3154: if (ftype == MAT_FACTOR_LU) {
3155: B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3156: B->factortype = MAT_FACTOR_LU;
3157: if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3158: else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3159: mumps->sym = 0;
3160: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
3162: B->ops->view = MatView_MUMPS;
3163: B->ops->getinfo = MatGetInfo_MUMPS;
3165: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3166: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3167: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3168: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3169: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3170: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3171: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3172: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3173: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3174: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3175: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3176: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);
3177: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);
3179: /* set solvertype */
3180: PetscFree(B->solvertype);
3181: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3182: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3183: if (size == 1) {
3184: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3185: B->useordering = PETSC_TRUE;
3186: }
3188: B->ops->destroy = MatDestroy_MUMPS;
3189: B->data = (void*)mumps;
3191: PetscInitializeMUMPS(A,mumps);
3193: *F = B;
3194: return(0);
3195: }
3197: /* MatGetFactor for Seq and MPI SELL matrices */
3198: static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3199: {
3200: Mat B;
3202: Mat_MUMPS *mumps;
3203: PetscBool isSeqSELL;
3204: PetscMPIInt size;
3207: /* Create the factorization matrix */
3208: PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);
3209: MatCreate(PetscObjectComm((PetscObject)A),&B);
3210: MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
3211: PetscStrallocpy("mumps",&((PetscObject)B)->type_name);
3212: MatSetUp(B);
3214: PetscNewLog(B,&mumps);
3216: B->ops->view = MatView_MUMPS;
3217: B->ops->getinfo = MatGetInfo_MUMPS;
3219: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);
3220: PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);
3221: PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);
3222: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);
3223: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);
3224: PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);
3225: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);
3226: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);
3227: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);
3228: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);
3229: PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);
3231: if (ftype == MAT_FACTOR_LU) {
3232: B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3233: B->factortype = MAT_FACTOR_LU;
3234: if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3235: else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3236: mumps->sym = 0;
3237: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3239: /* set solvertype */
3240: PetscFree(B->solvertype);
3241: PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);
3242: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
3243: if (size == 1) {
3244: /* MUMPS option can use ordering with "-mat_mumps_icntl_7 1 when sequential so generate the ordering, even if it may not be used */
3245: B->useordering = PETSC_TRUE;
3246: }
3248: B->ops->destroy = MatDestroy_MUMPS;
3249: B->data = (void*)mumps;
3251: PetscInitializeMUMPS(A,mumps);
3253: *F = B;
3254: return(0);
3255: }
3257: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3258: {
3262: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);
3263: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);
3264: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);
3265: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);
3266: MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);
3267: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);
3268: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);
3269: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);
3270: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);
3271: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);
3272: MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);
3273: return(0);
3274: }