Actual source code: mpidense.c
petsc-3.14.0 2020-09-29
2: /*
3: Basic functions for basic parallel dense matrices.
4: */
6: #include <../src/mat/impls/dense/mpi/mpidense.h>
7: #include <../src/mat/impls/aij/mpi/mpiaij.h>
8: #include <petscblaslapack.h>
10: /*@
12: MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential
13: matrix that represents the operator. For sequential matrices it returns itself.
15: Input Parameter:
16: . A - the Seq or MPI dense matrix
18: Output Parameter:
19: . B - the inner matrix
21: Level: intermediate
23: @*/
24: PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B)
25: {
26: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
28: PetscBool flg;
33: PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg);
34: if (flg) *B = mat->A;
35: else {
36: PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg);
37: if (!flg) SETERRQ1(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name);
38: *B = A;
39: }
40: return(0);
41: }
43: PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
44: {
45: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
47: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
50: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
51: lrow = row - rstart;
52: MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
53: return(0);
54: }
56: PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
57: {
58: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
60: PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend;
63: if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows");
64: lrow = row - rstart;
65: MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);
66: return(0);
67: }
69: PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a)
70: {
71: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
73: PetscInt m = A->rmap->n,rstart = A->rmap->rstart;
74: PetscScalar *array;
75: MPI_Comm comm;
76: PetscBool flg;
77: Mat B;
80: MatHasCongruentLayouts(A,&flg);
81: if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported.");
82: PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);
83: if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */
85: PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);
86: if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA);
87: PetscObjectGetComm((PetscObject)(mdn->A),&comm);
88: MatCreate(comm,&B);
89: MatSetSizes(B,m,m,m,m);
90: MatSetType(B,((PetscObject)mdn->A)->type_name);
91: MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);
92: MatSeqDenseSetPreallocation(B,array+m*rstart);
93: MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);
94: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
95: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
96: PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);
97: *a = B;
98: MatDestroy(&B);
99: } else *a = B;
100: return(0);
101: }
103: PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv)
104: {
105: Mat_MPIDense *A = (Mat_MPIDense*)mat->data;
107: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
108: PetscBool roworiented = A->roworiented;
111: for (i=0; i<m; i++) {
112: if (idxm[i] < 0) continue;
113: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
114: if (idxm[i] >= rstart && idxm[i] < rend) {
115: row = idxm[i] - rstart;
116: if (roworiented) {
117: MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);
118: } else {
119: for (j=0; j<n; j++) {
120: if (idxn[j] < 0) continue;
121: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
122: MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);
123: }
124: }
125: } else if (!A->donotstash) {
126: mat->assembled = PETSC_FALSE;
127: if (roworiented) {
128: MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);
129: } else {
130: MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);
131: }
132: }
133: }
134: return(0);
135: }
137: PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
138: {
139: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
141: PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row;
144: for (i=0; i<m; i++) {
145: if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
146: if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
147: if (idxm[i] >= rstart && idxm[i] < rend) {
148: row = idxm[i] - rstart;
149: for (j=0; j<n; j++) {
150: if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
151: if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
152: MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);
153: }
154: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
155: }
156: return(0);
157: }
159: static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda)
160: {
161: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
165: MatDenseGetLDA(a->A,lda);
166: return(0);
167: }
169: static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda)
170: {
171: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
172: PetscBool iscuda;
176: if (!a->A) {
177: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
178: PetscLayoutSetUp(A->rmap);
179: PetscLayoutSetUp(A->cmap);
180: MatCreate(PETSC_COMM_SELF,&a->A);
181: PetscLogObjectParent((PetscObject)A,(PetscObject)a->A);
182: MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);
183: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);
184: MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);
185: }
186: MatDenseSetLDA(a->A,lda);
187: return(0);
188: }
190: static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array)
191: {
192: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
196: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
197: MatDenseGetArray(a->A,array);
198: return(0);
199: }
201: static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array)
202: {
203: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
207: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
208: MatDenseGetArrayRead(a->A,array);
209: return(0);
210: }
212: static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array)
213: {
214: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
218: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
219: MatDenseGetArrayWrite(a->A,array);
220: return(0);
221: }
223: static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array)
224: {
225: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
229: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
230: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
231: MatDensePlaceArray(a->A,array);
232: return(0);
233: }
235: static PetscErrorCode MatDenseResetArray_MPIDense(Mat A)
236: {
237: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
241: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
242: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
243: MatDenseResetArray(a->A);
244: return(0);
245: }
247: static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array)
248: {
249: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
253: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
254: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
255: MatDenseReplaceArray(a->A,array);
256: return(0);
257: }
259: static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B)
260: {
261: Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd;
262: PetscErrorCode ierr;
263: PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols;
264: const PetscInt *irow,*icol;
265: const PetscScalar *v;
266: PetscScalar *bv;
267: Mat newmat;
268: IS iscol_local;
269: MPI_Comm comm_is,comm_mat;
272: PetscObjectGetComm((PetscObject)A,&comm_mat);
273: PetscObjectGetComm((PetscObject)iscol,&comm_is);
274: if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator");
276: ISAllGather(iscol,&iscol_local);
277: ISGetIndices(isrow,&irow);
278: ISGetIndices(iscol_local,&icol);
279: ISGetLocalSize(isrow,&nrows);
280: ISGetLocalSize(iscol,&ncols);
281: ISGetSize(iscol,&Ncols); /* global number of columns, size of iscol_local */
283: /* No parallel redistribution currently supported! Should really check each index set
284: to comfirm that it is OK. ... Currently supports only submatrix same partitioning as
285: original matrix! */
287: MatGetLocalSize(A,&nlrows,&nlcols);
288: MatGetOwnershipRange(A,&rstart,&rend);
290: /* Check submatrix call */
291: if (scall == MAT_REUSE_MATRIX) {
292: /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */
293: /* Really need to test rows and column sizes! */
294: newmat = *B;
295: } else {
296: /* Create and fill new matrix */
297: MatCreate(PetscObjectComm((PetscObject)A),&newmat);
298: MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);
299: MatSetType(newmat,((PetscObject)A)->type_name);
300: MatMPIDenseSetPreallocation(newmat,NULL);
301: }
303: /* Now extract the data pointers and do the copy, column at a time */
304: newmatd = (Mat_MPIDense*)newmat->data;
305: MatDenseGetArray(newmatd->A,&bv);
306: MatDenseGetArrayRead(mat->A,&v);
307: MatDenseGetLDA(mat->A,&lda);
308: for (i=0; i<Ncols; i++) {
309: const PetscScalar *av = v + lda*icol[i];
310: for (j=0; j<nrows; j++) {
311: *bv++ = av[irow[j] - rstart];
312: }
313: }
314: MatDenseRestoreArrayRead(mat->A,&v);
315: MatDenseRestoreArray(newmatd->A,&bv);
317: /* Assemble the matrices so that the correct flags are set */
318: MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);
319: MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);
321: /* Free work space */
322: ISRestoreIndices(isrow,&irow);
323: ISRestoreIndices(iscol_local,&icol);
324: ISDestroy(&iscol_local);
325: *B = newmat;
326: return(0);
327: }
329: PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array)
330: {
331: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
335: MatDenseRestoreArray(a->A,array);
336: return(0);
337: }
339: PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array)
340: {
341: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
345: MatDenseRestoreArrayRead(a->A,array);
346: return(0);
347: }
349: PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array)
350: {
351: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
355: MatDenseRestoreArrayWrite(a->A,array);
356: return(0);
357: }
359: PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode)
360: {
361: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
363: PetscInt nstash,reallocs;
366: if (mdn->donotstash || mat->nooffprocentries) return(0);
368: MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);
369: MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);
370: PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
371: return(0);
372: }
374: PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode)
375: {
376: Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data;
378: PetscInt i,*row,*col,flg,j,rstart,ncols;
379: PetscMPIInt n;
380: PetscScalar *val;
383: if (!mdn->donotstash && !mat->nooffprocentries) {
384: /* wait on receives */
385: while (1) {
386: MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);
387: if (!flg) break;
389: for (i=0; i<n;) {
390: /* Now identify the consecutive vals belonging to the same row */
391: for (j=i,rstart=row[j]; j<n; j++) {
392: if (row[j] != rstart) break;
393: }
394: if (j < n) ncols = j-i;
395: else ncols = n-i;
396: /* Now assemble all these values with a single function call */
397: MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);
398: i = j;
399: }
400: }
401: MatStashScatterEnd_Private(&mat->stash);
402: }
404: MatAssemblyBegin(mdn->A,mode);
405: MatAssemblyEnd(mdn->A,mode);
407: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
408: MatSetUpMultiply_MPIDense(mat);
409: }
410: return(0);
411: }
413: PetscErrorCode MatZeroEntries_MPIDense(Mat A)
414: {
416: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
419: MatZeroEntries(l->A);
420: return(0);
421: }
423: PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
424: {
425: Mat_MPIDense *l = (Mat_MPIDense*)A->data;
426: PetscErrorCode ierr;
427: PetscInt i,len,*lrows;
430: /* get locally owned rows */
431: PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);
432: /* fix right hand side if needed */
433: if (x && b) {
434: const PetscScalar *xx;
435: PetscScalar *bb;
437: VecGetArrayRead(x, &xx);
438: VecGetArrayWrite(b, &bb);
439: for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]];
440: VecRestoreArrayRead(x, &xx);
441: VecRestoreArrayWrite(b, &bb);
442: }
443: MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);
444: if (diag != 0.0) {
445: Vec d;
447: MatCreateVecs(A,NULL,&d);
448: VecSet(d,diag);
449: MatDiagonalSet(A,d,INSERT_VALUES);
450: VecDestroy(&d);
451: }
452: PetscFree(lrows);
453: return(0);
454: }
456: PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec);
457: PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec);
458: PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec);
459: PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec);
461: PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy)
462: {
463: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
464: PetscErrorCode ierr;
465: const PetscScalar *ax;
466: PetscScalar *ay;
469: VecGetArrayReadInPlace(xx,&ax);
470: VecGetArrayInPlace(mdn->lvec,&ay);
471: PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);
472: PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);
473: VecRestoreArrayInPlace(mdn->lvec,&ay);
474: VecRestoreArrayReadInPlace(xx,&ax);
475: (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);
476: return(0);
477: }
479: PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz)
480: {
481: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
482: PetscErrorCode ierr;
483: const PetscScalar *ax;
484: PetscScalar *ay;
487: VecGetArrayReadInPlace(xx,&ax);
488: VecGetArrayInPlace(mdn->lvec,&ay);
489: PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);
490: PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);
491: VecRestoreArrayInPlace(mdn->lvec,&ay);
492: VecRestoreArrayReadInPlace(xx,&ax);
493: (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);
494: return(0);
495: }
497: PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy)
498: {
499: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
500: PetscErrorCode ierr;
501: const PetscScalar *ax;
502: PetscScalar *ay;
505: VecSet(yy,0.0);
506: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
507: VecGetArrayReadInPlace(a->lvec,&ax);
508: VecGetArrayInPlace(yy,&ay);
509: PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);
510: PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);
511: VecRestoreArrayReadInPlace(a->lvec,&ax);
512: VecRestoreArrayInPlace(yy,&ay);
513: return(0);
514: }
516: PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz)
517: {
518: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
519: PetscErrorCode ierr;
520: const PetscScalar *ax;
521: PetscScalar *ay;
524: VecCopy(yy,zz);
525: (*a->A->ops->multtranspose)(a->A,xx,a->lvec);
526: VecGetArrayReadInPlace(a->lvec,&ax);
527: VecGetArrayInPlace(zz,&ay);
528: PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);
529: PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);
530: VecRestoreArrayReadInPlace(a->lvec,&ax);
531: VecRestoreArrayInPlace(zz,&ay);
532: return(0);
533: }
535: PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v)
536: {
537: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
538: PetscErrorCode ierr;
539: PetscInt lda,len,i,n,m = A->rmap->n,radd;
540: PetscScalar *x,zero = 0.0;
541: const PetscScalar *av;
544: VecSet(v,zero);
545: VecGetArray(v,&x);
546: VecGetSize(v,&n);
547: if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec");
548: len = PetscMin(a->A->rmap->n,a->A->cmap->n);
549: radd = A->rmap->rstart*m;
550: MatDenseGetArrayRead(a->A,&av);
551: MatDenseGetLDA(a->A,&lda);
552: for (i=0; i<len; i++) {
553: x[i] = av[radd + i*lda + i];
554: }
555: MatDenseRestoreArrayRead(a->A,&av);
556: VecRestoreArray(v,&x);
557: return(0);
558: }
560: PetscErrorCode MatDestroy_MPIDense(Mat mat)
561: {
562: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
566: #if defined(PETSC_USE_LOG)
567: PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
568: #endif
569: MatStashDestroy_Private(&mat->stash);
570: if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
571: if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
572: MatDestroy(&mdn->A);
573: VecDestroy(&mdn->lvec);
574: PetscSFDestroy(&mdn->Mvctx);
575: VecDestroy(&mdn->cvec);
576: MatDestroy(&mdn->cmat);
578: PetscFree(mat->data);
579: PetscObjectChangeTypeName((PetscObject)mat,NULL);
581: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);
582: PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);
583: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);
584: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);
585: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);
586: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);
587: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);
588: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);
589: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);
590: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);
591: PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);
592: #if defined(PETSC_HAVE_ELEMENTAL)
593: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);
594: #endif
595: #if defined(PETSC_HAVE_SCALAPACK)
596: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL);
597: #endif
598: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);
599: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);
600: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);
601: #if defined (PETSC_HAVE_CUDA)
602: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);
603: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);
604: #endif
605: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);
606: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);
607: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);
608: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);
609: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);
610: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);
611: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);
612: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);
613: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);
614: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);
615: return(0);
616: }
618: PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer);
620: #include <petscdraw.h>
621: static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
622: {
623: Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data;
624: PetscErrorCode ierr;
625: PetscMPIInt rank;
626: PetscViewerType vtype;
627: PetscBool iascii,isdraw;
628: PetscViewer sviewer;
629: PetscViewerFormat format;
632: MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);
633: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
634: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
635: if (iascii) {
636: PetscViewerGetType(viewer,&vtype);
637: PetscViewerGetFormat(viewer,&format);
638: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
639: MatInfo info;
640: MatGetInfo(mat,MAT_LOCAL,&info);
641: PetscViewerASCIIPushSynchronized(viewer);
642: PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);
643: PetscViewerFlush(viewer);
644: PetscViewerASCIIPopSynchronized(viewer);
645: PetscSFView(mdn->Mvctx,viewer);
646: return(0);
647: } else if (format == PETSC_VIEWER_ASCII_INFO) {
648: return(0);
649: }
650: } else if (isdraw) {
651: PetscDraw draw;
652: PetscBool isnull;
654: PetscViewerDrawGetDraw(viewer,0,&draw);
655: PetscDrawIsNull(draw,&isnull);
656: if (isnull) return(0);
657: }
659: {
660: /* assemble the entire matrix onto first processor. */
661: Mat A;
662: PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz;
663: PetscInt *cols;
664: PetscScalar *vals;
666: MatCreate(PetscObjectComm((PetscObject)mat),&A);
667: if (!rank) {
668: MatSetSizes(A,M,N,M,N);
669: } else {
670: MatSetSizes(A,0,0,M,N);
671: }
672: /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */
673: MatSetType(A,MATMPIDENSE);
674: MatMPIDenseSetPreallocation(A,NULL);
675: PetscLogObjectParent((PetscObject)mat,(PetscObject)A);
677: /* Copy the matrix ... This isn't the most efficient means,
678: but it's quick for now */
679: A->insertmode = INSERT_VALUES;
681: row = mat->rmap->rstart;
682: m = mdn->A->rmap->n;
683: for (i=0; i<m; i++) {
684: MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);
685: MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);
686: MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);
687: row++;
688: }
690: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
691: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
692: PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
693: if (!rank) {
694: PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);
695: MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);
696: }
697: PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);
698: PetscViewerFlush(viewer);
699: MatDestroy(&A);
700: }
701: return(0);
702: }
704: PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer)
705: {
707: PetscBool iascii,isbinary,isdraw,issocket;
710: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
711: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
712: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);
713: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);
715: if (iascii || issocket || isdraw) {
716: MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);
717: } else if (isbinary) {
718: MatView_Dense_Binary(mat,viewer);
719: }
720: return(0);
721: }
723: PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info)
724: {
725: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
726: Mat mdn = mat->A;
728: PetscLogDouble isend[5],irecv[5];
731: info->block_size = 1.0;
733: MatGetInfo(mdn,MAT_LOCAL,info);
735: isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
736: isend[3] = info->memory; isend[4] = info->mallocs;
737: if (flag == MAT_LOCAL) {
738: info->nz_used = isend[0];
739: info->nz_allocated = isend[1];
740: info->nz_unneeded = isend[2];
741: info->memory = isend[3];
742: info->mallocs = isend[4];
743: } else if (flag == MAT_GLOBAL_MAX) {
744: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));
746: info->nz_used = irecv[0];
747: info->nz_allocated = irecv[1];
748: info->nz_unneeded = irecv[2];
749: info->memory = irecv[3];
750: info->mallocs = irecv[4];
751: } else if (flag == MAT_GLOBAL_SUM) {
752: MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));
754: info->nz_used = irecv[0];
755: info->nz_allocated = irecv[1];
756: info->nz_unneeded = irecv[2];
757: info->memory = irecv[3];
758: info->mallocs = irecv[4];
759: }
760: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
761: info->fill_ratio_needed = 0;
762: info->factor_mallocs = 0;
763: return(0);
764: }
766: PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg)
767: {
768: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
772: switch (op) {
773: case MAT_NEW_NONZERO_LOCATIONS:
774: case MAT_NEW_NONZERO_LOCATION_ERR:
775: case MAT_NEW_NONZERO_ALLOCATION_ERR:
776: MatCheckPreallocated(A,1);
777: MatSetOption(a->A,op,flg);
778: break;
779: case MAT_ROW_ORIENTED:
780: MatCheckPreallocated(A,1);
781: a->roworiented = flg;
782: MatSetOption(a->A,op,flg);
783: break;
784: case MAT_NEW_DIAGONALS:
785: case MAT_KEEP_NONZERO_PATTERN:
786: case MAT_USE_HASH_TABLE:
787: case MAT_SORTED_FULL:
788: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
789: break;
790: case MAT_IGNORE_OFF_PROC_ENTRIES:
791: a->donotstash = flg;
792: break;
793: case MAT_SYMMETRIC:
794: case MAT_STRUCTURALLY_SYMMETRIC:
795: case MAT_HERMITIAN:
796: case MAT_SYMMETRY_ETERNAL:
797: case MAT_IGNORE_LOWER_TRIANGULAR:
798: case MAT_IGNORE_ZERO_ENTRIES:
799: PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);
800: break;
801: default:
802: SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]);
803: }
804: return(0);
805: }
807: PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr)
808: {
809: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
810: const PetscScalar *l;
811: PetscScalar x,*v,*vv,*r;
812: PetscErrorCode ierr;
813: PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda;
816: MatDenseGetArray(mdn->A,&vv);
817: MatDenseGetLDA(mdn->A,&lda);
818: MatGetLocalSize(A,&s2,&s3);
819: if (ll) {
820: VecGetLocalSize(ll,&s2a);
821: if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2);
822: VecGetArrayRead(ll,&l);
823: for (i=0; i<m; i++) {
824: x = l[i];
825: v = vv + i;
826: for (j=0; j<n; j++) { (*v) *= x; v+= lda;}
827: }
828: VecRestoreArrayRead(ll,&l);
829: PetscLogFlops(1.0*n*m);
830: }
831: if (rr) {
832: const PetscScalar *ar;
834: VecGetLocalSize(rr,&s3a);
835: if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3);
836: VecGetArrayRead(rr,&ar);
837: VecGetArray(mdn->lvec,&r);
838: PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);
839: PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);
840: VecRestoreArrayRead(rr,&ar);
841: for (i=0; i<n; i++) {
842: x = r[i];
843: v = vv + i*lda;
844: for (j=0; j<m; j++) (*v++) *= x;
845: }
846: VecRestoreArray(mdn->lvec,&r);
847: PetscLogFlops(1.0*n*m);
848: }
849: MatDenseRestoreArray(mdn->A,&vv);
850: return(0);
851: }
853: PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm)
854: {
855: Mat_MPIDense *mdn = (Mat_MPIDense*)A->data;
856: PetscErrorCode ierr;
857: PetscInt i,j;
858: PetscMPIInt size;
859: PetscReal sum = 0.0;
860: const PetscScalar *av,*v;
863: MatDenseGetArrayRead(mdn->A,&av);
864: v = av;
865: MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);
866: if (size == 1) {
867: MatNorm(mdn->A,type,nrm);
868: } else {
869: if (type == NORM_FROBENIUS) {
870: for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) {
871: sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
872: }
873: MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
874: *nrm = PetscSqrtReal(*nrm);
875: PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);
876: } else if (type == NORM_1) {
877: PetscReal *tmp,*tmp2;
878: PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);
879: *nrm = 0.0;
880: v = av;
881: for (j=0; j<mdn->A->cmap->n; j++) {
882: for (i=0; i<mdn->A->rmap->n; i++) {
883: tmp[j] += PetscAbsScalar(*v); v++;
884: }
885: }
886: MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));
887: for (j=0; j<A->cmap->N; j++) {
888: if (tmp2[j] > *nrm) *nrm = tmp2[j];
889: }
890: PetscFree2(tmp,tmp2);
891: PetscLogFlops(A->cmap->n*A->rmap->n);
892: } else if (type == NORM_INFINITY) { /* max row norm */
893: PetscReal ntemp;
894: MatNorm(mdn->A,type,&ntemp);
895: MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));
896: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm");
897: }
898: MatDenseRestoreArrayRead(mdn->A,&av);
899: return(0);
900: }
902: PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout)
903: {
904: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
905: Mat B;
906: PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart;
908: PetscInt j,i,lda;
909: PetscScalar *v;
912: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
913: MatCreate(PetscObjectComm((PetscObject)A),&B);
914: MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);
915: MatSetType(B,((PetscObject)A)->type_name);
916: MatMPIDenseSetPreallocation(B,NULL);
917: } else B = *matout;
919: m = a->A->rmap->n; n = a->A->cmap->n;
920: MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);
921: MatDenseGetLDA(a->A,&lda);
922: PetscMalloc1(m,&rwork);
923: for (i=0; i<m; i++) rwork[i] = rstart + i;
924: for (j=0; j<n; j++) {
925: MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);
926: v += lda;
927: }
928: MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);
929: PetscFree(rwork);
930: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
931: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
932: if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
933: *matout = B;
934: } else {
935: MatHeaderMerge(A,&B);
936: }
937: return(0);
938: }
940: static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*);
941: PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar);
943: PetscErrorCode MatSetUp_MPIDense(Mat A)
944: {
948: PetscLayoutSetUp(A->rmap);
949: PetscLayoutSetUp(A->cmap);
950: if (!A->preallocated) {
951: MatMPIDenseSetPreallocation(A,NULL);
952: }
953: return(0);
954: }
956: PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
957: {
959: Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data;
962: MatAXPY(A->A,alpha,B->A,str);
963: return(0);
964: }
966: PetscErrorCode MatConjugate_MPIDense(Mat mat)
967: {
968: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
972: MatConjugate(a->A);
973: return(0);
974: }
976: PetscErrorCode MatRealPart_MPIDense(Mat A)
977: {
978: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
982: MatRealPart(a->A);
983: return(0);
984: }
986: PetscErrorCode MatImaginaryPart_MPIDense(Mat A)
987: {
988: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
992: MatImaginaryPart(a->A);
993: return(0);
994: }
996: static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col)
997: {
999: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1002: if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix");
1003: if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation");
1004: (*a->A->ops->getcolumnvector)(a->A,v,col);
1005: return(0);
1006: }
1008: PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*);
1010: PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms)
1011: {
1013: PetscInt i,n;
1014: Mat_MPIDense *a = (Mat_MPIDense*) A->data;
1015: PetscReal *work;
1018: MatGetSize(A,NULL,&n);
1019: PetscMalloc1(n,&work);
1020: MatGetColumnNorms_SeqDense(a->A,type,work);
1021: if (type == NORM_2) {
1022: for (i=0; i<n; i++) work[i] *= work[i];
1023: }
1024: if (type == NORM_INFINITY) {
1025: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);
1026: } else {
1027: MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);
1028: }
1029: PetscFree(work);
1030: if (type == NORM_2) {
1031: for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
1032: }
1033: return(0);
1034: }
1036: #if defined(PETSC_HAVE_CUDA)
1037: static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1038: {
1039: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1041: PetscInt lda;
1044: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1045: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1046: if (!a->cvec) {
1047: VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1048: PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);
1049: }
1050: a->vecinuse = col + 1;
1051: MatDenseGetLDA(a->A,&lda);
1052: MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);
1053: VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1054: *v = a->cvec;
1055: return(0);
1056: }
1058: static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1059: {
1060: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1064: if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1065: if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1066: a->vecinuse = 0;
1067: MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);
1068: VecCUDAResetArray(a->cvec);
1069: *v = NULL;
1070: return(0);
1071: }
1073: static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1074: {
1075: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1076: PetscInt lda;
1080: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1081: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1082: if (!a->cvec) {
1083: VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1084: PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);
1085: }
1086: a->vecinuse = col + 1;
1087: MatDenseGetLDA(a->A,&lda);
1088: MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);
1089: VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1090: VecLockReadPush(a->cvec);
1091: *v = a->cvec;
1092: return(0);
1093: }
1095: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1096: {
1097: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1101: if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1102: if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1103: a->vecinuse = 0;
1104: MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);
1105: VecLockReadPop(a->cvec);
1106: VecCUDAResetArray(a->cvec);
1107: *v = NULL;
1108: return(0);
1109: }
1111: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1112: {
1113: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1115: PetscInt lda;
1118: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1119: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1120: if (!a->cvec) {
1121: VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1122: PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);
1123: }
1124: a->vecinuse = col + 1;
1125: MatDenseGetLDA(a->A,&lda);
1126: MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);
1127: VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1128: *v = a->cvec;
1129: return(0);
1130: }
1132: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v)
1133: {
1134: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1138: if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1139: if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1140: a->vecinuse = 0;
1141: MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);
1142: VecCUDAResetArray(a->cvec);
1143: *v = NULL;
1144: return(0);
1145: }
1147: static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1148: {
1149: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1153: if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1154: if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1155: MatDenseCUDAPlaceArray(l->A,a);
1156: return(0);
1157: }
1159: static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A)
1160: {
1161: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1165: if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1166: if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1167: MatDenseCUDAResetArray(l->A);
1168: return(0);
1169: }
1171: static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a)
1172: {
1173: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1177: if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1178: if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1179: MatDenseCUDAReplaceArray(l->A,a);
1180: return(0);
1181: }
1183: static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1184: {
1185: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1189: MatDenseCUDAGetArrayWrite(l->A,a);
1190: return(0);
1191: }
1193: static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a)
1194: {
1195: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1199: MatDenseCUDARestoreArrayWrite(l->A,a);
1200: return(0);
1201: }
1203: static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1204: {
1205: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1209: MatDenseCUDAGetArrayRead(l->A,a);
1210: return(0);
1211: }
1213: static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a)
1214: {
1215: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1219: MatDenseCUDARestoreArrayRead(l->A,a);
1220: return(0);
1221: }
1223: static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1224: {
1225: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1229: MatDenseCUDAGetArray(l->A,a);
1230: return(0);
1231: }
1233: static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a)
1234: {
1235: Mat_MPIDense *l = (Mat_MPIDense*) A->data;
1239: MatDenseCUDARestoreArray(l->A,a);
1240: return(0);
1241: }
1243: static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1244: static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1245: static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*);
1246: static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*);
1247: static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*);
1248: static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*);
1249: static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*);
1251: static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind)
1252: {
1253: Mat_MPIDense *d = (Mat_MPIDense*)mat->data;
1257: if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1258: if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1259: if (d->A) {
1260: MatBindToCPU(d->A,bind);
1261: }
1262: mat->boundtocpu = bind;
1263: if (!bind) {
1264: PetscBool iscuda;
1266: PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);
1267: if (!iscuda) {
1268: VecDestroy(&d->cvec);
1269: }
1270: PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);
1271: if (!iscuda) {
1272: MatDestroy(&d->cmat);
1273: }
1274: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);
1275: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);
1276: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);
1277: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);
1278: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);
1279: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);
1280: } else {
1281: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);
1282: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);
1283: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);
1284: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);
1285: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);
1286: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);
1287: }
1288: if (d->cmat) {
1289: MatBindToCPU(d->cmat,bind);
1290: }
1291: return(0);
1292: }
1294: PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data)
1295: {
1296: Mat_MPIDense *d = (Mat_MPIDense*)A->data;
1298: PetscBool iscuda;
1302: PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);
1303: if (!iscuda) return(0);
1304: PetscLayoutSetUp(A->rmap);
1305: PetscLayoutSetUp(A->cmap);
1306: if (!d->A) {
1307: MatCreate(PETSC_COMM_SELF,&d->A);
1308: PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);
1309: MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);
1310: }
1311: MatSetType(d->A,MATSEQDENSECUDA);
1312: MatSeqDenseCUDASetPreallocation(d->A,d_data);
1313: A->preallocated = PETSC_TRUE;
1314: return(0);
1315: }
1316: #endif
1318: static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx)
1319: {
1320: Mat_MPIDense *d = (Mat_MPIDense*)x->data;
1324: MatSetRandom(d->A,rctx);
1325: return(0);
1326: }
1328: static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d)
1329: {
1331: *missing = PETSC_FALSE;
1332: return(0);
1333: }
1335: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1336: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1337: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat);
1338: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat);
1339: static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*);
1340: static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer);
1342: /* -------------------------------------------------------------------*/
1343: static struct _MatOps MatOps_Values = { MatSetValues_MPIDense,
1344: MatGetRow_MPIDense,
1345: MatRestoreRow_MPIDense,
1346: MatMult_MPIDense,
1347: /* 4*/ MatMultAdd_MPIDense,
1348: MatMultTranspose_MPIDense,
1349: MatMultTransposeAdd_MPIDense,
1350: NULL,
1351: NULL,
1352: NULL,
1353: /* 10*/ NULL,
1354: NULL,
1355: NULL,
1356: NULL,
1357: MatTranspose_MPIDense,
1358: /* 15*/ MatGetInfo_MPIDense,
1359: MatEqual_MPIDense,
1360: MatGetDiagonal_MPIDense,
1361: MatDiagonalScale_MPIDense,
1362: MatNorm_MPIDense,
1363: /* 20*/ MatAssemblyBegin_MPIDense,
1364: MatAssemblyEnd_MPIDense,
1365: MatSetOption_MPIDense,
1366: MatZeroEntries_MPIDense,
1367: /* 24*/ MatZeroRows_MPIDense,
1368: NULL,
1369: NULL,
1370: NULL,
1371: NULL,
1372: /* 29*/ MatSetUp_MPIDense,
1373: NULL,
1374: NULL,
1375: MatGetDiagonalBlock_MPIDense,
1376: NULL,
1377: /* 34*/ MatDuplicate_MPIDense,
1378: NULL,
1379: NULL,
1380: NULL,
1381: NULL,
1382: /* 39*/ MatAXPY_MPIDense,
1383: MatCreateSubMatrices_MPIDense,
1384: NULL,
1385: MatGetValues_MPIDense,
1386: NULL,
1387: /* 44*/ NULL,
1388: MatScale_MPIDense,
1389: MatShift_Basic,
1390: NULL,
1391: NULL,
1392: /* 49*/ MatSetRandom_MPIDense,
1393: NULL,
1394: NULL,
1395: NULL,
1396: NULL,
1397: /* 54*/ NULL,
1398: NULL,
1399: NULL,
1400: NULL,
1401: NULL,
1402: /* 59*/ MatCreateSubMatrix_MPIDense,
1403: MatDestroy_MPIDense,
1404: MatView_MPIDense,
1405: NULL,
1406: NULL,
1407: /* 64*/ NULL,
1408: NULL,
1409: NULL,
1410: NULL,
1411: NULL,
1412: /* 69*/ NULL,
1413: NULL,
1414: NULL,
1415: NULL,
1416: NULL,
1417: /* 74*/ NULL,
1418: NULL,
1419: NULL,
1420: NULL,
1421: NULL,
1422: /* 79*/ NULL,
1423: NULL,
1424: NULL,
1425: NULL,
1426: /* 83*/ MatLoad_MPIDense,
1427: NULL,
1428: NULL,
1429: NULL,
1430: NULL,
1431: NULL,
1432: /* 89*/ NULL,
1433: NULL,
1434: NULL,
1435: NULL,
1436: NULL,
1437: /* 94*/ NULL,
1438: NULL,
1439: MatMatTransposeMultSymbolic_MPIDense_MPIDense,
1440: MatMatTransposeMultNumeric_MPIDense_MPIDense,
1441: NULL,
1442: /* 99*/ MatProductSetFromOptions_MPIDense,
1443: NULL,
1444: NULL,
1445: MatConjugate_MPIDense,
1446: NULL,
1447: /*104*/ NULL,
1448: MatRealPart_MPIDense,
1449: MatImaginaryPart_MPIDense,
1450: NULL,
1451: NULL,
1452: /*109*/ NULL,
1453: NULL,
1454: NULL,
1455: MatGetColumnVector_MPIDense,
1456: MatMissingDiagonal_MPIDense,
1457: /*114*/ NULL,
1458: NULL,
1459: NULL,
1460: NULL,
1461: NULL,
1462: /*119*/ NULL,
1463: NULL,
1464: NULL,
1465: NULL,
1466: NULL,
1467: /*124*/ NULL,
1468: MatGetColumnNorms_MPIDense,
1469: NULL,
1470: NULL,
1471: NULL,
1472: /*129*/ NULL,
1473: NULL,
1474: MatTransposeMatMultSymbolic_MPIDense_MPIDense,
1475: MatTransposeMatMultNumeric_MPIDense_MPIDense,
1476: NULL,
1477: /*134*/ NULL,
1478: NULL,
1479: NULL,
1480: NULL,
1481: NULL,
1482: /*139*/ NULL,
1483: NULL,
1484: NULL,
1485: NULL,
1486: NULL,
1487: MatCreateMPIMatConcatenateSeqMat_MPIDense,
1488: /*145*/ NULL,
1489: NULL,
1490: NULL
1491: };
1493: PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data)
1494: {
1495: Mat_MPIDense *a = (Mat_MPIDense*)mat->data;
1496: PetscBool iscuda;
1500: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1501: PetscLayoutSetUp(mat->rmap);
1502: PetscLayoutSetUp(mat->cmap);
1503: if (!a->A) {
1504: MatCreate(PETSC_COMM_SELF,&a->A);
1505: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
1506: MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);
1507: }
1508: PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);
1509: MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);
1510: MatSeqDenseSetPreallocation(a->A,data);
1511: mat->preallocated = PETSC_TRUE;
1512: return(0);
1513: }
1515: #if defined(PETSC_HAVE_ELEMENTAL)
1516: PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1517: {
1518: Mat mat_elemental;
1520: PetscScalar *v;
1521: PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols;
1524: if (reuse == MAT_REUSE_MATRIX) {
1525: mat_elemental = *newmat;
1526: MatZeroEntries(*newmat);
1527: } else {
1528: MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);
1529: MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
1530: MatSetType(mat_elemental,MATELEMENTAL);
1531: MatSetUp(mat_elemental);
1532: MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);
1533: }
1535: PetscMalloc2(m,&rows,N,&cols);
1536: for (i=0; i<N; i++) cols[i] = i;
1537: for (i=0; i<m; i++) rows[i] = rstart + i;
1539: /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */
1540: MatDenseGetArray(A,&v);
1541: MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);
1542: MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);
1543: MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);
1544: MatDenseRestoreArray(A,&v);
1545: PetscFree2(rows,cols);
1547: if (reuse == MAT_INPLACE_MATRIX) {
1548: MatHeaderReplace(A,&mat_elemental);
1549: } else {
1550: *newmat = mat_elemental;
1551: }
1552: return(0);
1553: }
1554: #endif
1556: static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals)
1557: {
1558: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1562: MatDenseGetColumn(mat->A,col,vals);
1563: return(0);
1564: }
1566: static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals)
1567: {
1568: Mat_MPIDense *mat = (Mat_MPIDense*)A->data;
1572: MatDenseRestoreColumn(mat->A,vals);
1573: return(0);
1574: }
1576: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
1577: {
1579: Mat_MPIDense *mat;
1580: PetscInt m,nloc,N;
1583: MatGetSize(inmat,&m,&N);
1584: MatGetLocalSize(inmat,NULL,&nloc);
1585: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
1586: PetscInt sum;
1588: if (n == PETSC_DECIDE) {
1589: PetscSplitOwnership(comm,&n,&N);
1590: }
1591: /* Check sum(n) = N */
1592: MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);
1593: if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
1595: MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);
1596: }
1598: /* numeric phase */
1599: mat = (Mat_MPIDense*)(*outmat)->data;
1600: MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);
1601: MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);
1602: MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);
1603: return(0);
1604: }
1606: #if defined(PETSC_HAVE_CUDA)
1607: PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1608: {
1609: Mat B;
1610: Mat_MPIDense *m;
1614: if (reuse == MAT_INITIAL_MATRIX) {
1615: MatDuplicate(M,MAT_COPY_VALUES,newmat);
1616: } else if (reuse == MAT_REUSE_MATRIX) {
1617: MatCopy(M,*newmat,SAME_NONZERO_PATTERN);
1618: }
1620: B = *newmat;
1621: MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);
1622: PetscFree(B->defaultvectype);
1623: PetscStrallocpy(VECSTANDARD,&B->defaultvectype);
1624: PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);
1625: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);
1626: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);
1627: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);
1628: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);
1629: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);
1630: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);
1631: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);
1632: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);
1633: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);
1634: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);
1635: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);
1636: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);
1637: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);
1638: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);
1639: m = (Mat_MPIDense*)(B)->data;
1640: if (m->A) {
1641: MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);
1642: MatSetUpMultiply_MPIDense(B);
1643: }
1644: B->ops->bindtocpu = NULL;
1645: B->offloadmask = PETSC_OFFLOAD_CPU;
1646: return(0);
1647: }
1649: PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat)
1650: {
1651: Mat B;
1652: Mat_MPIDense *m;
1656: if (reuse == MAT_INITIAL_MATRIX) {
1657: MatDuplicate(M,MAT_COPY_VALUES,newmat);
1658: } else if (reuse == MAT_REUSE_MATRIX) {
1659: MatCopy(M,*newmat,SAME_NONZERO_PATTERN);
1660: }
1662: B = *newmat;
1663: PetscFree(B->defaultvectype);
1664: PetscStrallocpy(VECCUDA,&B->defaultvectype);
1665: PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);
1666: PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);
1667: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);
1668: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);
1669: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);
1670: PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);
1671: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);
1672: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);
1673: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);
1674: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);
1675: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);
1676: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);
1677: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);
1678: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);
1679: PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);
1680: m = (Mat_MPIDense*)(B)->data;
1681: if (m->A) {
1682: MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);
1683: MatSetUpMultiply_MPIDense(B);
1684: B->offloadmask = PETSC_OFFLOAD_BOTH;
1685: } else {
1686: B->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1687: }
1688: MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);
1690: B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA;
1691: return(0);
1692: }
1693: #endif
1695: PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1696: {
1697: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1699: PetscInt lda;
1702: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1703: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1704: if (!a->cvec) {
1705: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1706: }
1707: a->vecinuse = col + 1;
1708: MatDenseGetLDA(a->A,&lda);
1709: MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);
1710: VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1711: *v = a->cvec;
1712: return(0);
1713: }
1715: PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v)
1716: {
1717: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1721: if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1722: if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector");
1723: a->vecinuse = 0;
1724: MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);
1725: VecResetArray(a->cvec);
1726: *v = NULL;
1727: return(0);
1728: }
1730: PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1731: {
1732: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1734: PetscInt lda;
1737: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1738: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1739: if (!a->cvec) {
1740: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1741: }
1742: a->vecinuse = col + 1;
1743: MatDenseGetLDA(a->A,&lda);
1744: MatDenseGetArrayRead(a->A,&a->ptrinuse);
1745: VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1746: VecLockReadPush(a->cvec);
1747: *v = a->cvec;
1748: return(0);
1749: }
1751: PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v)
1752: {
1753: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1757: if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1758: if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1759: a->vecinuse = 0;
1760: MatDenseRestoreArrayRead(a->A,&a->ptrinuse);
1761: VecLockReadPop(a->cvec);
1762: VecResetArray(a->cvec);
1763: *v = NULL;
1764: return(0);
1765: }
1767: PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1768: {
1769: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1771: PetscInt lda;
1774: if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1775: if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1776: if (!a->cvec) {
1777: VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);
1778: }
1779: a->vecinuse = col + 1;
1780: MatDenseGetLDA(a->A,&lda);
1781: MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);
1782: VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);
1783: *v = a->cvec;
1784: return(0);
1785: }
1787: PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v)
1788: {
1789: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1793: if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first");
1794: if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector");
1795: a->vecinuse = 0;
1796: MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);
1797: VecResetArray(a->cvec);
1798: *v = NULL;
1799: return(0);
1800: }
1802: PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v)
1803: {
1804: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1805: Mat_MPIDense *c;
1807: MPI_Comm comm;
1808: PetscBool setup = PETSC_FALSE;
1811: PetscObjectGetComm((PetscObject)A,&comm);
1812: if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first");
1813: if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1814: if (!a->cmat) {
1815: setup = PETSC_TRUE;
1816: MatCreate(comm,&a->cmat);
1817: PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);
1818: MatSetType(a->cmat,((PetscObject)A)->type_name);
1819: PetscLayoutReference(A->rmap,&a->cmat->rmap);
1820: PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);
1821: PetscLayoutSetUp(a->cmat->cmap);
1822: } else if (cend-cbegin != a->cmat->cmap->N) {
1823: setup = PETSC_TRUE;
1824: PetscLayoutDestroy(&a->cmat->cmap);
1825: PetscLayoutCreate(comm,&a->cmat->cmap);
1826: PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);
1827: PetscLayoutSetUp(a->cmat->cmap);
1828: }
1829: c = (Mat_MPIDense*)a->cmat->data;
1830: if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first");
1831: MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);
1832: if (setup) { /* do we really need this? */
1833: MatSetUpMultiply_MPIDense(a->cmat);
1834: }
1835: a->cmat->preallocated = PETSC_TRUE;
1836: a->cmat->assembled = PETSC_TRUE;
1837: a->matinuse = cbegin + 1;
1838: *v = a->cmat;
1839: return(0);
1840: }
1842: PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v)
1843: {
1844: Mat_MPIDense *a = (Mat_MPIDense*)A->data;
1845: Mat_MPIDense *c;
1849: if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first");
1850: if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix");
1851: if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()");
1852: a->matinuse = 0;
1853: c = (Mat_MPIDense*)a->cmat->data;
1854: MatDenseRestoreSubMatrix(a->A,&c->A);
1855: *v = NULL;
1856: return(0);
1857: }
1859: PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat)
1860: {
1861: Mat_MPIDense *a;
1865: PetscNewLog(mat,&a);
1866: mat->data = (void*)a;
1867: PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));
1869: mat->insertmode = NOT_SET_VALUES;
1871: /* build cache for off array entries formed */
1872: a->donotstash = PETSC_FALSE;
1874: MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);
1876: /* stuff used for matrix vector multiply */
1877: a->lvec = NULL;
1878: a->Mvctx = NULL;
1879: a->roworiented = PETSC_TRUE;
1881: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);
1882: PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);
1883: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);
1884: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);
1885: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);
1886: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);
1887: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);
1888: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);
1889: PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);
1890: PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);
1891: PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);
1892: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);
1893: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);
1894: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);
1895: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);
1896: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);
1897: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);
1898: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);
1899: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);
1900: #if defined(PETSC_HAVE_ELEMENTAL)
1901: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);
1902: #endif
1903: #if defined(PETSC_HAVE_SCALAPACK)
1904: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK);
1905: #endif
1906: #if defined(PETSC_HAVE_CUDA)
1907: PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);
1908: #endif
1909: PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);
1910: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);
1911: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);
1912: #if defined(PETSC_HAVE_CUDA)
1913: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);
1914: PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);
1915: #endif
1917: PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);
1918: PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);
1919: PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);
1920: return(0);
1921: }
1923: /*MC
1924: MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs.
1926: Options Database Keys:
1927: . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions()
1929: Level: beginner
1931: .seealso:
1933: M*/
1934: #if defined(PETSC_HAVE_CUDA)
1935: PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B)
1936: {
1940: PetscCUDAInitializeCheck();
1941: MatCreate_MPIDense(B);
1942: MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);
1943: return(0);
1944: }
1945: #endif
1947: /*MC
1948: MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices.
1950: This matrix type is identical to MATSEQDENSE when constructed with a single process communicator,
1951: and MATMPIDENSE otherwise.
1953: Options Database Keys:
1954: . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions()
1956: Level: beginner
1959: .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA
1960: M*/
1962: /*MC
1963: MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs.
1965: This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator,
1966: and MATMPIDENSECUDA otherwise.
1968: Options Database Keys:
1969: . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions()
1971: Level: beginner
1973: .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE
1974: M*/
1976: /*@C
1977: MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries
1979: Collective
1981: Input Parameters:
1982: . B - the matrix
1983: - data - optional location of matrix data. Set data=NULL for PETSc
1984: to control all matrix memory allocation.
1986: Notes:
1987: The dense format is fully compatible with standard Fortran 77
1988: storage by columns.
1990: The data input variable is intended primarily for Fortran programmers
1991: who wish to allocate their own matrix memory space. Most users should
1992: set data=NULL.
1994: Level: intermediate
1996: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
1997: @*/
1998: PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data)
1999: {
2004: PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));
2005: return(0);
2006: }
2008: /*@
2009: MatDensePlaceArray - Allows one to replace the array in a dense matrix with an
2010: array provided by the user. This is useful to avoid copying an array
2011: into a matrix
2013: Not Collective
2015: Input Parameters:
2016: + mat - the matrix
2017: - array - the array in column major order
2019: Notes:
2020: You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be
2021: freed when the matrix is destroyed.
2023: Level: developer
2025: .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2027: @*/
2028: PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array)
2029: {
2034: PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2035: PetscObjectStateIncrease((PetscObject)mat);
2036: #if defined(PETSC_HAVE_CUDA)
2037: mat->offloadmask = PETSC_OFFLOAD_CPU;
2038: #endif
2039: return(0);
2040: }
2042: /*@
2043: MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray()
2045: Not Collective
2047: Input Parameters:
2048: . mat - the matrix
2050: Notes:
2051: You can only call this after a call to MatDensePlaceArray()
2053: Level: developer
2055: .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray()
2057: @*/
2058: PetscErrorCode MatDenseResetArray(Mat mat)
2059: {
2064: PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));
2065: PetscObjectStateIncrease((PetscObject)mat);
2066: return(0);
2067: }
2069: /*@
2070: MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an
2071: array provided by the user. This is useful to avoid copying an array
2072: into a matrix
2074: Not Collective
2076: Input Parameters:
2077: + mat - the matrix
2078: - array - the array in column major order
2080: Notes:
2081: The memory passed in MUST be obtained with PetscMalloc() and CANNOT be
2082: freed by the user. It will be freed when the matrix is destroyed.
2084: Level: developer
2086: .seealso: MatDenseGetArray(), VecReplaceArray()
2087: @*/
2088: PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array)
2089: {
2094: PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2095: PetscObjectStateIncrease((PetscObject)mat);
2096: #if defined(PETSC_HAVE_CUDA)
2097: mat->offloadmask = PETSC_OFFLOAD_CPU;
2098: #endif
2099: return(0);
2100: }
2102: #if defined(PETSC_HAVE_CUDA)
2103: /*@C
2104: MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an
2105: array provided by the user. This is useful to avoid copying an array
2106: into a matrix
2108: Not Collective
2110: Input Parameters:
2111: + mat - the matrix
2112: - array - the array in column major order
2114: Notes:
2115: You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be
2116: freed when the matrix is destroyed. The array must have been allocated with cudaMalloc().
2118: Level: developer
2120: .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray()
2121: @*/
2122: PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array)
2123: {
2128: PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));
2129: PetscObjectStateIncrease((PetscObject)mat);
2130: mat->offloadmask = PETSC_OFFLOAD_GPU;
2131: return(0);
2132: }
2134: /*@C
2135: MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray()
2137: Not Collective
2139: Input Parameters:
2140: . mat - the matrix
2142: Notes:
2143: You can only call this after a call to MatDenseCUDAPlaceArray()
2145: Level: developer
2147: .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray()
2149: @*/
2150: PetscErrorCode MatDenseCUDAResetArray(Mat mat)
2151: {
2156: PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));
2157: PetscObjectStateIncrease((PetscObject)mat);
2158: return(0);
2159: }
2161: /*@C
2162: MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an
2163: array provided by the user. This is useful to avoid copying an array
2164: into a matrix
2166: Not Collective
2168: Input Parameters:
2169: + mat - the matrix
2170: - array - the array in column major order
2172: Notes:
2173: This permanently replaces the GPU array and frees the memory associated with the old GPU array.
2174: The memory passed in CANNOT be freed by the user. It will be freed
2175: when the matrix is destroyed. The array should respect the matrix leading dimension.
2177: Level: developer
2179: .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray()
2180: @*/
2181: PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array)
2182: {
2187: PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));
2188: PetscObjectStateIncrease((PetscObject)mat);
2189: mat->offloadmask = PETSC_OFFLOAD_GPU;
2190: return(0);
2191: }
2193: /*@C
2194: MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix.
2196: Not Collective
2198: Input Parameters:
2199: . A - the matrix
2201: Output Parameters
2202: . array - the GPU array in column major order
2204: Notes:
2205: The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed.
2207: Level: developer
2209: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead()
2210: @*/
2211: PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a)
2212: {
2217: PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));
2218: PetscObjectStateIncrease((PetscObject)A);
2219: return(0);
2220: }
2222: /*@C
2223: MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite().
2225: Not Collective
2227: Input Parameters:
2228: + A - the matrix
2229: - array - the GPU array in column major order
2231: Notes:
2233: Level: developer
2235: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2236: @*/
2237: PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a)
2238: {
2243: PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));
2244: PetscObjectStateIncrease((PetscObject)A);
2245: A->offloadmask = PETSC_OFFLOAD_GPU;
2246: return(0);
2247: }
2249: /*@C
2250: MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed.
2252: Not Collective
2254: Input Parameters:
2255: . A - the matrix
2257: Output Parameters
2258: . array - the GPU array in column major order
2260: Notes:
2261: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2263: Level: developer
2265: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2266: @*/
2267: PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a)
2268: {
2273: PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));
2274: return(0);
2275: }
2277: /*@C
2278: MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead().
2280: Not Collective
2282: Input Parameters:
2283: + A - the matrix
2284: - array - the GPU array in column major order
2286: Notes:
2287: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite().
2289: Level: developer
2291: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead()
2292: @*/
2293: PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a)
2294: {
2298: PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));
2299: return(0);
2300: }
2302: /*@C
2303: MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed.
2305: Not Collective
2307: Input Parameters:
2308: . A - the matrix
2310: Output Parameters
2311: . array - the GPU array in column major order
2313: Notes:
2314: Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead().
2316: Level: developer
2318: .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead()
2319: @*/
2320: PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a)
2321: {
2326: PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));
2327: PetscObjectStateIncrease((PetscObject)A);
2328: return(0);
2329: }
2331: /*@C
2332: MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray().
2334: Not Collective
2336: Input Parameters:
2337: + A - the matrix
2338: - array - the GPU array in column major order
2340: Notes:
2342: Level: developer
2344: .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead()
2345: @*/
2346: PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a)
2347: {
2352: PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));
2353: PetscObjectStateIncrease((PetscObject)A);
2354: A->offloadmask = PETSC_OFFLOAD_GPU;
2355: return(0);
2356: }
2357: #endif
2359: /*@C
2360: MatCreateDense - Creates a matrix in dense format.
2362: Collective
2364: Input Parameters:
2365: + comm - MPI communicator
2366: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2367: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2368: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2369: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2370: - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc
2371: to control all matrix memory allocation.
2373: Output Parameter:
2374: . A - the matrix
2376: Notes:
2377: The dense format is fully compatible with standard Fortran 77
2378: storage by columns.
2380: The data input variable is intended primarily for Fortran programmers
2381: who wish to allocate their own matrix memory space. Most users should
2382: set data=NULL (PETSC_NULL_SCALAR for Fortran users).
2384: The user MUST specify either the local or global matrix dimensions
2385: (possibly both).
2387: Level: intermediate
2389: .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues()
2390: @*/
2391: PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2392: {
2394: PetscMPIInt size;
2397: MatCreate(comm,A);
2398: MatSetSizes(*A,m,n,M,N);
2399: MPI_Comm_size(comm,&size);
2400: if (size > 1) {
2401: PetscBool havedata = (PetscBool)!!data;
2403: MatSetType(*A,MATMPIDENSE);
2404: MatMPIDenseSetPreallocation(*A,data);
2405: MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm);
2406: if (havedata) { /* user provided data array, so no need to assemble */
2407: MatSetUpMultiply_MPIDense(*A);
2408: (*A)->assembled = PETSC_TRUE;
2409: }
2410: } else {
2411: MatSetType(*A,MATSEQDENSE);
2412: MatSeqDenseSetPreallocation(*A,data);
2413: }
2414: return(0);
2415: }
2417: #if defined(PETSC_HAVE_CUDA)
2418: /*@C
2419: MatCreateDenseCUDA - Creates a matrix in dense format using CUDA.
2421: Collective
2423: Input Parameters:
2424: + comm - MPI communicator
2425: . m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2426: . n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2427: . M - number of global rows (or PETSC_DECIDE to have calculated if m is given)
2428: . N - number of global columns (or PETSC_DECIDE to have calculated if n is given)
2429: - data - optional location of GPU matrix data. Set data=NULL for PETSc
2430: to control matrix memory allocation.
2432: Output Parameter:
2433: . A - the matrix
2435: Notes:
2437: Level: intermediate
2439: .seealso: MatCreate(), MatCreateDense()
2440: @*/
2441: PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A)
2442: {
2444: PetscMPIInt size;
2447: MatCreate(comm,A);
2449: MatSetSizes(*A,m,n,M,N);
2450: MPI_Comm_size(comm,&size);
2451: if (size > 1) {
2452: MatSetType(*A,MATMPIDENSECUDA);
2453: MatMPIDenseCUDASetPreallocation(*A,data);
2454: if (data) { /* user provided data array, so no need to assemble */
2455: MatSetUpMultiply_MPIDense(*A);
2456: (*A)->assembled = PETSC_TRUE;
2457: }
2458: } else {
2459: MatSetType(*A,MATSEQDENSECUDA);
2460: MatSeqDenseCUDASetPreallocation(*A,data);
2461: }
2462: return(0);
2463: }
2464: #endif
2466: static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat)
2467: {
2468: Mat mat;
2469: Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data;
2473: *newmat = NULL;
2474: MatCreate(PetscObjectComm((PetscObject)A),&mat);
2475: MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);
2476: MatSetType(mat,((PetscObject)A)->type_name);
2477: a = (Mat_MPIDense*)mat->data;
2479: mat->factortype = A->factortype;
2480: mat->assembled = PETSC_TRUE;
2481: mat->preallocated = PETSC_TRUE;
2483: mat->insertmode = NOT_SET_VALUES;
2484: a->donotstash = oldmat->donotstash;
2486: PetscLayoutReference(A->rmap,&mat->rmap);
2487: PetscLayoutReference(A->cmap,&mat->cmap);
2489: MatDuplicate(oldmat->A,cpvalues,&a->A);
2490: PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);
2491: MatSetUpMultiply_MPIDense(mat);
2493: *newmat = mat;
2494: return(0);
2495: }
2497: PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer)
2498: {
2500: PetscBool isbinary;
2501: #if defined(PETSC_HAVE_HDF5)
2502: PetscBool ishdf5;
2503: #endif
2508: /* force binary viewer to load .info file if it has not yet done so */
2509: PetscViewerSetUp(viewer);
2510: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);
2511: #if defined(PETSC_HAVE_HDF5)
2512: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);
2513: #endif
2514: if (isbinary) {
2515: MatLoad_Dense_Binary(newMat,viewer);
2516: #if defined(PETSC_HAVE_HDF5)
2517: } else if (ishdf5) {
2518: MatLoad_Dense_HDF5(newMat,viewer);
2519: #endif
2520: } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
2521: return(0);
2522: }
2524: static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag)
2525: {
2526: Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data;
2527: Mat a,b;
2528: PetscBool flg;
2532: a = matA->A;
2533: b = matB->A;
2534: MatEqual(a,b,&flg);
2535: MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));
2536: return(0);
2537: }
2539: PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data)
2540: {
2541: PetscErrorCode ierr;
2542: Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data;
2545: PetscFree2(atb->sendbuf,atb->recvcounts);
2546: MatDestroy(&atb->atb);
2547: PetscFree(atb);
2548: return(0);
2549: }
2551: PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data)
2552: {
2553: PetscErrorCode ierr;
2554: Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data;
2557: PetscFree2(abt->buf[0],abt->buf[1]);
2558: PetscFree2(abt->recvcounts,abt->recvdispls);
2559: PetscFree(abt);
2560: return(0);
2561: }
2563: static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2564: {
2565: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2566: Mat_TransMatMultDense *atb;
2567: PetscErrorCode ierr;
2568: MPI_Comm comm;
2569: PetscMPIInt size,*recvcounts;
2570: PetscScalar *carray,*sendbuf;
2571: const PetscScalar *atbarray;
2572: PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j;
2573: const PetscInt *ranges;
2576: MatCheckProduct(C,3);
2577: if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2578: atb = (Mat_TransMatMultDense *)C->product->data;
2579: recvcounts = atb->recvcounts;
2580: sendbuf = atb->sendbuf;
2582: PetscObjectGetComm((PetscObject)A,&comm);
2583: MPI_Comm_size(comm,&size);
2585: /* compute atbarray = aseq^T * bseq */
2586: MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);
2588: MatGetOwnershipRanges(C,&ranges);
2590: /* arrange atbarray into sendbuf */
2591: MatDenseGetArrayRead(atb->atb,&atbarray);
2592: for (proc=0, k=0; proc<size; proc++) {
2593: for (j=0; j<cN; j++) {
2594: for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM];
2595: }
2596: }
2597: MatDenseRestoreArrayRead(atb->atb,&atbarray);
2599: /* sum all atbarray to local values of C */
2600: MatDenseGetArrayWrite(c->A,&carray);
2601: MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);
2602: MatDenseRestoreArrayWrite(c->A,&carray);
2603: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2604: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2605: return(0);
2606: }
2608: static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2609: {
2610: PetscErrorCode ierr;
2611: MPI_Comm comm;
2612: PetscMPIInt size;
2613: PetscInt cm=A->cmap->n,cM,cN=B->cmap->N;
2614: Mat_TransMatMultDense *atb;
2615: PetscBool cisdense;
2616: PetscInt i;
2617: const PetscInt *ranges;
2620: MatCheckProduct(C,3);
2621: if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2622: PetscObjectGetComm((PetscObject)A,&comm);
2623: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) {
2624: SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2625: }
2627: /* create matrix product C */
2628: MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);
2629: PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");
2630: if (!cisdense) {
2631: MatSetType(C,((PetscObject)A)->type_name);
2632: }
2633: MatSetUp(C);
2635: /* create data structure for reuse C */
2636: MPI_Comm_size(comm,&size);
2637: PetscNew(&atb);
2638: cM = C->rmap->N;
2639: PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);
2640: MatGetOwnershipRanges(C,&ranges);
2641: for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN;
2643: C->product->data = atb;
2644: C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense;
2645: return(0);
2646: }
2648: static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C)
2649: {
2650: PetscErrorCode ierr;
2651: MPI_Comm comm;
2652: PetscMPIInt i, size;
2653: PetscInt maxRows, bufsiz;
2654: PetscMPIInt tag;
2655: PetscInt alg;
2656: Mat_MatTransMultDense *abt;
2657: Mat_Product *product = C->product;
2658: PetscBool flg;
2661: MatCheckProduct(C,4);
2662: if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2663: /* check local size of A and B */
2664: if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n);
2666: PetscStrcmp(product->alg,"allgatherv",&flg);
2667: alg = flg ? 0 : 1;
2669: /* setup matrix product C */
2670: MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);
2671: MatSetType(C,MATMPIDENSE);
2672: MatSetUp(C);
2673: PetscObjectGetNewTag((PetscObject)C,&tag);
2675: /* create data structure for reuse C */
2676: PetscObjectGetComm((PetscObject)C,&comm);
2677: MPI_Comm_size(comm,&size);
2678: PetscNew(&abt);
2679: abt->tag = tag;
2680: abt->alg = alg;
2681: switch (alg) {
2682: case 1: /* alg: "cyclic" */
2683: for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i]));
2684: bufsiz = A->cmap->N * maxRows;
2685: PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));
2686: break;
2687: default: /* alg: "allgatherv" */
2688: PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));
2689: PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));
2690: for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N;
2691: for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i];
2692: break;
2693: }
2695: C->product->data = abt;
2696: C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense;
2697: C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense;
2698: return(0);
2699: }
2701: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C)
2702: {
2703: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2704: Mat_MatTransMultDense *abt;
2705: PetscErrorCode ierr;
2706: MPI_Comm comm;
2707: PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom;
2708: PetscScalar *sendbuf, *recvbuf=NULL, *cv;
2709: PetscInt i,cK=A->cmap->N,k,j,bn;
2710: PetscScalar _DOne=1.0,_DZero=0.0;
2711: const PetscScalar *av,*bv;
2712: PetscBLASInt cm, cn, ck, alda, blda = 0, clda;
2713: MPI_Request reqs[2];
2714: const PetscInt *ranges;
2717: MatCheckProduct(C,3);
2718: if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2719: abt = (Mat_MatTransMultDense*)C->product->data;
2720: PetscObjectGetComm((PetscObject)C,&comm);
2721: MPI_Comm_rank(comm,&rank);
2722: MPI_Comm_size(comm,&size);
2723: MatDenseGetArrayRead(a->A,&av);
2724: MatDenseGetArrayRead(b->A,&bv);
2725: MatDenseGetArrayWrite(c->A,&cv);
2726: MatDenseGetLDA(a->A,&i);
2727: PetscBLASIntCast(i,&alda);
2728: MatDenseGetLDA(b->A,&i);
2729: PetscBLASIntCast(i,&blda);
2730: MatDenseGetLDA(c->A,&i);
2731: PetscBLASIntCast(i,&clda);
2732: MatGetOwnershipRanges(B,&ranges);
2733: bn = B->rmap->n;
2734: if (blda == bn) {
2735: sendbuf = (PetscScalar*)bv;
2736: } else {
2737: sendbuf = abt->buf[0];
2738: for (k = 0, i = 0; i < cK; i++) {
2739: for (j = 0; j < bn; j++, k++) {
2740: sendbuf[k] = bv[i * blda + j];
2741: }
2742: }
2743: }
2744: if (size > 1) {
2745: sendto = (rank + size - 1) % size;
2746: recvfrom = (rank + size + 1) % size;
2747: } else {
2748: sendto = recvfrom = 0;
2749: }
2750: PetscBLASIntCast(cK,&ck);
2751: PetscBLASIntCast(c->A->rmap->n,&cm);
2752: recvisfrom = rank;
2753: for (i = 0; i < size; i++) {
2754: /* we have finished receiving in sending, bufs can be read/modified */
2755: PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */
2756: PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom];
2758: if (nextrecvisfrom != rank) {
2759: /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */
2760: sendsiz = cK * bn;
2761: recvsiz = cK * nextbn;
2762: recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1];
2763: MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);
2764: MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);
2765: }
2767: /* local aseq * sendbuf^T */
2768: PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);
2769: if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda));
2771: if (nextrecvisfrom != rank) {
2772: /* wait for the sends and receives to complete, swap sendbuf and recvbuf */
2773: MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);
2774: }
2775: bn = nextbn;
2776: recvisfrom = nextrecvisfrom;
2777: sendbuf = recvbuf;
2778: }
2779: MatDenseRestoreArrayRead(a->A,&av);
2780: MatDenseRestoreArrayRead(b->A,&bv);
2781: MatDenseRestoreArrayWrite(c->A,&cv);
2782: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2783: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2784: return(0);
2785: }
2787: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C)
2788: {
2789: Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data;
2790: Mat_MatTransMultDense *abt;
2791: PetscErrorCode ierr;
2792: MPI_Comm comm;
2793: PetscMPIInt size;
2794: PetscScalar *cv, *sendbuf, *recvbuf;
2795: const PetscScalar *av,*bv;
2796: PetscInt blda,i,cK=A->cmap->N,k,j,bn;
2797: PetscScalar _DOne=1.0,_DZero=0.0;
2798: PetscBLASInt cm, cn, ck, alda, clda;
2801: MatCheckProduct(C,3);
2802: if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2803: abt = (Mat_MatTransMultDense*)C->product->data;
2804: PetscObjectGetComm((PetscObject)A,&comm);
2805: MPI_Comm_size(comm,&size);
2806: MatDenseGetArrayRead(a->A,&av);
2807: MatDenseGetArrayRead(b->A,&bv);
2808: MatDenseGetArrayWrite(c->A,&cv);
2809: MatDenseGetLDA(a->A,&i);
2810: PetscBLASIntCast(i,&alda);
2811: MatDenseGetLDA(b->A,&blda);
2812: MatDenseGetLDA(c->A,&i);
2813: PetscBLASIntCast(i,&clda);
2814: /* copy transpose of B into buf[0] */
2815: bn = B->rmap->n;
2816: sendbuf = abt->buf[0];
2817: recvbuf = abt->buf[1];
2818: for (k = 0, j = 0; j < bn; j++) {
2819: for (i = 0; i < cK; i++, k++) {
2820: sendbuf[k] = bv[i * blda + j];
2821: }
2822: }
2823: MatDenseRestoreArrayRead(b->A,&bv);
2824: MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);
2825: PetscBLASIntCast(cK,&ck);
2826: PetscBLASIntCast(c->A->rmap->n,&cm);
2827: PetscBLASIntCast(c->A->cmap->n,&cn);
2828: if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda));
2829: MatDenseRestoreArrayRead(a->A,&av);
2830: MatDenseRestoreArrayRead(b->A,&bv);
2831: MatDenseRestoreArrayWrite(c->A,&cv);
2832: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
2833: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
2834: return(0);
2835: }
2837: static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C)
2838: {
2839: Mat_MatTransMultDense *abt;
2840: PetscErrorCode ierr;
2843: MatCheckProduct(C,3);
2844: if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
2845: abt = (Mat_MatTransMultDense*)C->product->data;
2846: switch (abt->alg) {
2847: case 1:
2848: MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);
2849: break;
2850: default:
2851: MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);
2852: break;
2853: }
2854: return(0);
2855: }
2857: PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data)
2858: {
2859: PetscErrorCode ierr;
2860: Mat_MatMultDense *ab = (Mat_MatMultDense*)data;
2863: MatDestroy(&ab->Ce);
2864: MatDestroy(&ab->Ae);
2865: MatDestroy(&ab->Be);
2866: PetscFree(ab);
2867: return(0);
2868: }
2870: #if defined(PETSC_HAVE_ELEMENTAL)
2871: PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C)
2872: {
2873: PetscErrorCode ierr;
2874: Mat_MatMultDense *ab;
2877: MatCheckProduct(C,3);
2878: if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data");
2879: ab = (Mat_MatMultDense*)C->product->data;
2880: MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);
2881: MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);
2882: MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);
2883: MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);
2884: return(0);
2885: }
2887: static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C)
2888: {
2889: PetscErrorCode ierr;
2890: Mat Ae,Be,Ce;
2891: Mat_MatMultDense *ab;
2894: MatCheckProduct(C,4);
2895: if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
2896: /* check local size of A and B */
2897: if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
2898: SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2899: }
2901: /* create elemental matrices Ae and Be */
2902: MatCreate(PetscObjectComm((PetscObject)A), &Ae);
2903: MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);
2904: MatSetType(Ae,MATELEMENTAL);
2905: MatSetUp(Ae);
2906: MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);
2908: MatCreate(PetscObjectComm((PetscObject)B), &Be);
2909: MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);
2910: MatSetType(Be,MATELEMENTAL);
2911: MatSetUp(Be);
2912: MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);
2914: /* compute symbolic Ce = Ae*Be */
2915: MatCreate(PetscObjectComm((PetscObject)C),&Ce);
2916: MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);
2918: /* setup C */
2919: MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
2920: MatSetType(C,MATDENSE);
2921: MatSetUp(C);
2923: /* create data structure for reuse Cdense */
2924: PetscNew(&ab);
2925: ab->Ae = Ae;
2926: ab->Be = Be;
2927: ab->Ce = Ce;
2929: C->product->data = ab;
2930: C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense;
2931: C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense;
2932: return(0);
2933: }
2934: #endif
2935: /* ----------------------------------------------- */
2936: #if defined(PETSC_HAVE_ELEMENTAL)
2937: static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C)
2938: {
2940: C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense;
2941: C->ops->productsymbolic = MatProductSymbolic_AB;
2942: return(0);
2943: }
2944: #endif
2946: static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C)
2947: {
2948: Mat_Product *product = C->product;
2949: Mat A = product->A,B=product->B;
2952: if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend)
2953: SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend);
2954: C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense;
2955: C->ops->productsymbolic = MatProductSymbolic_AtB;
2956: return(0);
2957: }
2959: static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C)
2960: {
2962: Mat_Product *product = C->product;
2963: const char *algTypes[2] = {"allgatherv","cyclic"};
2964: PetscInt alg,nalg = 2;
2965: PetscBool flg = PETSC_FALSE;
2968: /* Set default algorithm */
2969: alg = 0; /* default is allgatherv */
2970: PetscStrcmp(product->alg,"default",&flg);
2971: if (flg) {
2972: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2973: }
2975: /* Get runtime option */
2976: if (product->api_user) {
2977: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");
2978: PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);
2979: PetscOptionsEnd();
2980: } else {
2981: PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");
2982: PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);
2983: PetscOptionsEnd();
2984: }
2985: if (flg) {
2986: MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);
2987: }
2989: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense;
2990: C->ops->productsymbolic = MatProductSymbolic_ABt;
2991: return(0);
2992: }
2994: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C)
2995: {
2997: Mat_Product *product = C->product;
3000: switch (product->type) {
3001: #if defined(PETSC_HAVE_ELEMENTAL)
3002: case MATPRODUCT_AB:
3003: MatProductSetFromOptions_MPIDense_AB(C);
3004: break;
3005: #endif
3006: case MATPRODUCT_AtB:
3007: MatProductSetFromOptions_MPIDense_AtB(C);
3008: break;
3009: case MATPRODUCT_ABt:
3010: MatProductSetFromOptions_MPIDense_ABt(C);
3011: break;
3012: default:
3013: break;
3014: }
3015: return(0);
3016: }