Actual source code: ibcgs.c
petsc-3.14.0 2020-09-29
2: #include <petsc/private/kspimpl.h>
3: #include <petsc/private/vecimpl.h>
5: static PetscErrorCode KSPSetUp_IBCGS(KSP ksp)
6: {
8: PetscBool diagonalscale;
11: PCGetDiagonalScale(ksp->pc,&diagonalscale);
12: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
13: KSPSetWorkVecs(ksp,9);
14: return(0);
15: }
17: /*
18: The code below "cheats" from PETSc style
19: 1) VecRestoreArray() is called immediately after VecGetArray() and the array values are still accessed; the reason for the immediate
20: restore is that Vec operations are done on some of the vectors during the solve and if we did not restore immediately it would
21: generate two VecGetArray() (the second one inside the Vec operation) calls without a restore between them.
22: 2) The vector operations on done directly on the arrays instead of with VecXXXX() calls
24: For clarity in the code we name single VECTORS with two names, for example, Rn_1 and R, but they actually always
25: the exact same memory. We do this with macro defines so that compiler won't think they are
26: two different variables.
28: */
29: #define Xn_1 Xn
30: #define xn_1 xn
31: #define Rn_1 Rn
32: #define rn_1 rn
33: #define Un_1 Un
34: #define un_1 un
35: #define Vn_1 Vn
36: #define vn_1 vn
37: #define Qn_1 Qn
38: #define qn_1 qn
39: #define Zn_1 Zn
40: #define zn_1 zn
41: static PetscErrorCode KSPSolve_IBCGS(KSP ksp)
42: {
44: PetscInt i,N;
45: PetscReal rnorm = 0.0,rnormin = 0.0;
46: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
47: /* Because of possible instabilities in the algorithm (as indicated by different residual histories for the same problem
48: on the same number of processes with different runs) we support computing the inner products using Intel's 80 bit arithmetic
49: rather than just 64 bit. Thus we copy our double precision values into long doubles (hoping this keeps the 16 extra bits)
50: and tell MPI to do its ALlreduces with MPI_LONG_DOUBLE.
52: Note for developers that does not effect the code. Intel's long double is implemented by storing the 80 bits of extended double
53: precision into a 16 byte space (the rest of the space is ignored) */
54: long double insums[7],outsums[7];
55: #else
56: PetscScalar insums[7],outsums[7];
57: #endif
58: PetscScalar sigman_2, sigman_1, sigman, pin_1, pin, phin_1, phin,tmp1,tmp2;
59: PetscScalar taun_1, taun, rhon, alphan_1, alphan, omegan_1, omegan;
60: const PetscScalar *PETSC_RESTRICT r0, *PETSC_RESTRICT f0, *PETSC_RESTRICT qn, *PETSC_RESTRICT b, *PETSC_RESTRICT un;
61: PetscScalar *PETSC_RESTRICT rn, *PETSC_RESTRICT xn, *PETSC_RESTRICT vn, *PETSC_RESTRICT zn;
62: /* the rest do not have to keep n_1 values */
63: PetscScalar kappan, thetan, etan, gamman, betan, deltan;
64: const PetscScalar *PETSC_RESTRICT tn;
65: PetscScalar *PETSC_RESTRICT sn;
66: Vec R0,Rn,Xn,F0,Vn,Zn,Qn,Tn,Sn,B,Un;
67: Mat A;
70: if (!ksp->vec_rhs->petscnative) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Only coded for PETSc vectors");
72: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
73: /* since 80 bit long doubls do not fill the upper bits, we fill them initially so that
74: valgrind won't detect MPI_Allreduce() with uninitialized data */
75: PetscMemzero(insums,sizeof(insums));
76: PetscMemzero(insums,sizeof(insums));
77: #endif
79: PCGetOperators(ksp->pc,&A,NULL);
80: VecGetLocalSize(ksp->vec_sol,&N);
81: Xn = ksp->vec_sol; VecGetArray(Xn_1,(PetscScalar**)&xn_1); VecRestoreArray(Xn_1,NULL);
82: B = ksp->vec_rhs; VecGetArrayRead(B,(const PetscScalar**)&b); VecRestoreArrayRead(B,NULL);
83: R0 = ksp->work[0]; VecGetArrayRead(R0,(const PetscScalar**)&r0); VecRestoreArrayRead(R0,NULL);
84: Rn = ksp->work[1]; VecGetArray(Rn_1,(PetscScalar**)&rn_1); VecRestoreArray(Rn_1,NULL);
85: Un = ksp->work[2]; VecGetArrayRead(Un_1,(const PetscScalar**)&un_1); VecRestoreArrayRead(Un_1,NULL);
86: F0 = ksp->work[3]; VecGetArrayRead(F0,(const PetscScalar**)&f0); VecRestoreArrayRead(F0,NULL);
87: Vn = ksp->work[4]; VecGetArray(Vn_1,(PetscScalar**)&vn_1); VecRestoreArray(Vn_1,NULL);
88: Zn = ksp->work[5]; VecGetArray(Zn_1,(PetscScalar**)&zn_1); VecRestoreArray(Zn_1,NULL);
89: Qn = ksp->work[6]; VecGetArrayRead(Qn_1,(const PetscScalar**)&qn_1); VecRestoreArrayRead(Qn_1,NULL);
90: Tn = ksp->work[7]; VecGetArrayRead(Tn,(const PetscScalar**)&tn); VecRestoreArrayRead(Tn,NULL);
91: Sn = ksp->work[8]; VecGetArrayRead(Sn,(const PetscScalar**)&sn); VecRestoreArrayRead(Sn,NULL);
93: /* r0 = rn_1 = b - A*xn_1; */
94: /* KSP_PCApplyBAorAB(ksp,Xn_1,Rn_1,Tn);
95: VecAYPX(Rn_1,-1.0,B); */
96: KSPInitialResidual(ksp,Xn_1,Tn,Sn,Rn_1,B);
97: if (ksp->normtype != KSP_NORM_NONE) {
98: VecNorm(Rn_1,NORM_2,&rnorm);
99: KSPCheckNorm(ksp,rnorm);
100: }
101: KSPMonitor(ksp,0,rnorm);
102: (*ksp->converged)(ksp,0,rnorm,&ksp->reason,ksp->cnvP);
103: if (ksp->reason) return(0);
105: VecCopy(Rn_1,R0);
107: /* un_1 = A*rn_1; */
108: KSP_PCApplyBAorAB(ksp,Rn_1,Un_1,Tn);
110: /* f0 = A'*rn_1; */
111: if (ksp->pc_side == PC_RIGHT) { /* B' A' */
112: KSP_MatMultTranspose(ksp,A,R0,Tn);
113: KSP_PCApplyTranspose(ksp,Tn,F0);
114: } else if (ksp->pc_side == PC_LEFT) { /* A' B' */
115: KSP_PCApplyTranspose(ksp,R0,Tn);
116: KSP_MatMultTranspose(ksp,A,Tn,F0);
117: }
119: /*qn_1 = vn_1 = zn_1 = 0.0; */
120: VecSet(Qn_1,0.0);
121: VecSet(Vn_1,0.0);
122: VecSet(Zn_1,0.0);
124: sigman_2 = pin_1 = taun_1 = 0.0;
126: /* the paper says phin_1 should be initialized to zero, it is actually R0'R0 */
127: VecDot(R0,R0,&phin_1);
128: KSPCheckDot(ksp,phin_1);
130: /* sigman_1 = rn_1'un_1 */
131: VecDot(R0,Un_1,&sigman_1);
133: alphan_1 = omegan_1 = 1.0;
135: for (ksp->its = 1; ksp->its<ksp->max_it+1; ksp->its++) {
136: rhon = phin_1 - omegan_1*sigman_2 + omegan_1*alphan_1*pin_1;
137: if (ksp->its == 1) deltan = rhon;
138: else deltan = rhon/taun_1;
139: betan = deltan/omegan_1;
140: taun = sigman_1 + betan*taun_1 - deltan*pin_1;
141: if (taun == 0.0) {
142: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to taun is zero, iteration %D",ksp->its);
143: else {
144: ksp->reason = KSP_DIVERGED_NANORINF;
145: return(0);
146: }
147: }
148: alphan = rhon/taun;
149: PetscLogFlops(15.0);
151: /*
152: zn = alphan*rn_1 + (alphan/alphan_1)betan*zn_1 - alphan*deltan*vn_1
153: vn = un_1 + betan*vn_1 - deltan*qn_1
154: sn = rn_1 - alphan*vn
156: The algorithm in the paper is missing the alphan/alphan_1 term in the zn update
157: */
158: PetscLogEventBegin(VEC_Ops,0,0,0,0);
159: tmp1 = (alphan/alphan_1)*betan;
160: tmp2 = alphan*deltan;
161: for (i=0; i<N; i++) {
162: zn[i] = alphan*rn_1[i] + tmp1*zn_1[i] - tmp2*vn_1[i];
163: vn[i] = un_1[i] + betan*vn_1[i] - deltan*qn_1[i];
164: sn[i] = rn_1[i] - alphan*vn[i];
165: }
166: PetscLogFlops(3.0+11.0*N);
167: PetscLogEventEnd(VEC_Ops,0,0,0,0);
169: /*
170: qn = A*vn
171: */
172: KSP_PCApplyBAorAB(ksp,Vn,Qn,Tn);
174: /*
175: tn = un_1 - alphan*qn
176: */
177: VecWAXPY(Tn,-alphan,Qn,Un_1);
180: /*
181: phin = r0'sn
182: pin = r0'qn
183: gamman = f0'sn
184: etan = f0'tn
185: thetan = sn'tn
186: kappan = tn'tn
187: */
188: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
189: phin = pin = gamman = etan = thetan = kappan = 0.0;
190: for (i=0; i<N; i++) {
191: phin += r0[i]*sn[i];
192: pin += r0[i]*qn[i];
193: gamman += f0[i]*sn[i];
194: etan += f0[i]*tn[i];
195: thetan += sn[i]*tn[i];
196: kappan += tn[i]*tn[i];
197: }
198: PetscLogFlops(12.0*N);
199: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
201: insums[0] = phin;
202: insums[1] = pin;
203: insums[2] = gamman;
204: insums[3] = etan;
205: insums[4] = thetan;
206: insums[5] = kappan;
207: insums[6] = rnormin;
209: PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
210: #if defined(PETSC_HAVE_MPI_LONG_DOUBLE) && !defined(PETSC_USE_COMPLEX) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE))
211: if (ksp->lagnorm && ksp->its > 1) {
212: MPIU_Allreduce(insums,outsums,7,MPI_LONG_DOUBLE,MPI_SUM,PetscObjectComm((PetscObject)ksp));
213: } else {
214: MPIU_Allreduce(insums,outsums,6,MPI_LONG_DOUBLE,MPI_SUM,PetscObjectComm((PetscObject)ksp));
215: }
216: #else
217: if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) {
218: MPIU_Allreduce(insums,outsums,7,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
219: } else {
220: MPIU_Allreduce(insums,outsums,6,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
221: }
222: #endif
223: PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
224: phin = outsums[0];
225: pin = outsums[1];
226: gamman = outsums[2];
227: etan = outsums[3];
228: thetan = outsums[4];
229: kappan = outsums[5];
230: if (ksp->lagnorm && ksp->its > 1 && ksp->normtype != KSP_NORM_NONE) rnorm = PetscSqrtReal(PetscRealPart(outsums[6]));
232: if (kappan == 0.0) {
233: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to kappan is zero, iteration %D",ksp->its);
234: else {
235: ksp->reason = KSP_DIVERGED_NANORINF;
236: return(0);
237: }
238: }
239: if (thetan == 0.0) {
240: if (ksp->errorifnotconverged) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_NOT_CONVERGED,"KSPSolve has not converged due to thetan is zero, iteration %D",ksp->its);
241: else {
242: ksp->reason = KSP_DIVERGED_NANORINF;
243: return(0);
244: }
245: }
246: omegan = thetan/kappan;
247: sigman = gamman - omegan*etan;
249: /*
250: rn = sn - omegan*tn
251: xn = xn_1 + zn + omegan*sn
252: */
253: PetscLogEventBegin(VEC_Ops,0,0,0,0);
254: rnormin = 0.0;
255: for (i=0; i<N; i++) {
256: rn[i] = sn[i] - omegan*tn[i];
257: rnormin += PetscRealPart(PetscConj(rn[i])*rn[i]);
258: xn[i] += zn[i] + omegan*sn[i];
259: }
260: PetscObjectStateIncrease((PetscObject)Xn);
261: PetscLogFlops(7.0*N);
262: PetscLogEventEnd(VEC_Ops,0,0,0,0);
264: if (!ksp->lagnorm && ksp->chknorm < ksp->its && ksp->normtype != KSP_NORM_NONE) {
265: PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
266: MPIU_Allreduce(&rnormin,&rnorm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)ksp));
267: PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
268: rnorm = PetscSqrtReal(rnorm);
269: }
271: /* Test for convergence */
272: KSPMonitor(ksp,ksp->its,rnorm);
273: (*ksp->converged)(ksp,ksp->its,rnorm,&ksp->reason,ksp->cnvP);
274: if (ksp->reason) {
275: KSPUnwindPreconditioner(ksp,Xn,Tn);
276: return(0);
277: }
279: /* un = A*rn */
280: KSP_PCApplyBAorAB(ksp,Rn,Un,Tn);
282: /* Update n-1 locations with n locations */
283: sigman_2 = sigman_1;
284: sigman_1 = sigman;
285: pin_1 = pin;
286: phin_1 = phin;
287: alphan_1 = alphan;
288: taun_1 = taun;
289: omegan_1 = omegan;
290: }
291: if (ksp->its >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
292: KSPUnwindPreconditioner(ksp,Xn,Tn);
293: return(0);
294: }
297: /*MC
298: KSPIBCGS - Implements the IBiCGStab (Improved Stabilized version of BiConjugate Gradient) method
299: in an alternative form to have only a single global reduction operation instead of the usual 3 (or 4)
301: Options Database Keys:
302: . see KSPSolve()
304: Level: beginner
306: Notes:
307: Supports left and right preconditioning
309: See KSPBCGSL for additional stabilization
311: Unlike the Bi-CG-stab algorithm, this requires one multiplication be the transpose of the operator
312: before the iteration starts.
314: The paper has two errors in the algorithm presented, they are fixed in the code in KSPSolve_IBCGS()
316: For maximum reduction in the number of global reduction operations, this solver should be used with
317: KSPSetLagNorm().
319: This is not supported for complex numbers.
321: Reference: The Improved BiCGStab Method for Large and Sparse Unsymmetric Linear Systems on Parallel Distributed Memory
322: Architectures. L. T. Yang and R. Brent, Proceedings of the Fifth International Conference on Algorithms and
323: Architectures for Parallel Processing, 2002, IEEE.
325: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPBCGSL, KSPIBCGS, KSPSetLagNorm()
326: M*/
328: PETSC_EXTERN PetscErrorCode KSPCreate_IBCGS(KSP ksp)
329: {
334: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,3);
335: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
336: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);
338: ksp->ops->setup = KSPSetUp_IBCGS;
339: ksp->ops->solve = KSPSolve_IBCGS;
340: ksp->ops->destroy = KSPDestroyDefault;
341: ksp->ops->buildsolution = KSPBuildSolutionDefault;
342: ksp->ops->buildresidual = KSPBuildResidualDefault;
343: ksp->ops->setfromoptions = NULL;
344: ksp->ops->view = NULL;
345: #if defined(PETSC_USE_COMPLEX)
346: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"This is not supported for complex numbers");
347: #endif
348: return(0);
349: }