Actual source code: pipecgrr.c
petsc-3.14.0 2020-09-29
2: #include <petsc/private/kspimpl.h>
4: /*
5: KSPSetUp_PIPECGRR - Sets up the workspace needed by the PIPECGRR method.
7: This is called once, usually automatically by KSPSolve() or KSPSetUp()
8: but can be called directly by KSPSetUp()
9: */
10: static PetscErrorCode KSPSetUp_PIPECGRR(KSP ksp)
11: {
15: /* get work vectors needed by PIPECGRR */
16: KSPSetWorkVecs(ksp,9);
17: return(0);
18: }
20: /*
21: KSPSolve_PIPECGRR - This routine actually applies the pipelined conjugate gradient method with automated residual replacement
22: */
23: static PetscErrorCode KSPSolve_PIPECGRR(KSP ksp)
24: {
26: PetscInt i = 0,replace = 0,totreplaces = 0,nsize;
27: PetscScalar alpha = 0.0,beta = 0.0,gamma = 0.0,gammaold = 0.0,delta = 0.0,alphap = 0.0,betap = 0.0;
28: PetscReal dp = 0.0,nsi = 0.0,sqn = 0.0,Anorm = 0.0,rnp = 0.0,pnp = 0.0,snp = 0.0,unp = 0.0,wnp = 0.0,xnp = 0.0,qnp = 0.0,znp = 0.0,mnz = 5.0,tol = PETSC_SQRT_MACHINE_EPSILON,eps = PETSC_MACHINE_EPSILON;
29: PetscReal ds = 0.0,dz = 0.0,dx = 0.0,dpp = 0.0,dq = 0.0,dm = 0.0,du = 0.0,dw = 0.0,db = 0.0,errr = 0.0,errrprev = 0.0,errs = 0.0,errw = 0.0,errz = 0.0,errncr = 0.0,errncs = 0.0,errncw = 0.0,errncz = 0.0;
30: Vec X,B,Z,P,W,Q,U,M,N,R,S;
31: Mat Amat,Pmat;
32: PetscBool diagonalscale;
35: PCGetDiagonalScale(ksp->pc,&diagonalscale);
36: if (diagonalscale) SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
38: X = ksp->vec_sol;
39: B = ksp->vec_rhs;
40: M = ksp->work[0];
41: Z = ksp->work[1];
42: P = ksp->work[2];
43: N = ksp->work[3];
44: W = ksp->work[4];
45: Q = ksp->work[5];
46: U = ksp->work[6];
47: R = ksp->work[7];
48: S = ksp->work[8];
50: PCGetOperators(ksp->pc,&Amat,&Pmat);
52: ksp->its = 0;
53: if (!ksp->guess_zero) {
54: KSP_MatMult(ksp,Amat,X,R); /* r <- b - Ax */
55: VecAYPX(R,-1.0,B);
56: } else {
57: VecCopy(B,R); /* r <- b (x is 0) */
58: }
60: KSP_PCApply(ksp,R,U); /* u <- Br */
62: switch (ksp->normtype) {
63: case KSP_NORM_PRECONDITIONED:
64: VecNormBegin(U,NORM_2,&dp); /* dp <- u'*u = e'*A'*B'*B*A'*e' */
65: VecNormBegin(B,NORM_2,&db);
66: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)U));
67: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
68: VecNormEnd(U,NORM_2,&dp);
69: VecNormEnd(B,NORM_2,&db);
70: break;
71: case KSP_NORM_UNPRECONDITIONED:
72: VecNormBegin(R,NORM_2,&dp); /* dp <- r'*r = e'*A'*A*e */
73: VecNormBegin(B,NORM_2,&db);
74: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
75: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
76: VecNormEnd(R,NORM_2,&dp);
77: VecNormEnd(B,NORM_2,&db);
78: break;
79: case KSP_NORM_NATURAL:
80: VecDotBegin(R,U,&gamma); /* gamma <- u'*r */
81: VecNormBegin(B,NORM_2,&db);
82: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
83: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
84: VecDotEnd(R,U,&gamma);
85: VecNormEnd(B,NORM_2,&db);
86: KSPCheckDot(ksp,gamma);
87: dp = PetscSqrtReal(PetscAbsScalar(gamma)); /* dp <- r'*u = r'*B*r = e'*A'*B*A*e */
88: break;
89: case KSP_NORM_NONE:
90: KSP_MatMult(ksp,Amat,U,W);
91: dp = 0.0;
92: break;
93: default: SETERRQ1(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"%s",KSPNormTypes[ksp->normtype]);
94: }
95: KSPLogResidualHistory(ksp,dp);
96: KSPMonitor(ksp,0,dp);
97: ksp->rnorm = dp;
98: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP); /* test for convergence */
99: if (ksp->reason) return(0);
101: MatNorm(Amat,NORM_INFINITY,&Anorm);
102: VecGetSize(B,&nsize);
103: nsi = (PetscReal) nsize;
104: sqn = PetscSqrtReal(nsi);
106: do {
107: if (i > 1) {
108: pnp = dpp;
109: snp = ds;
110: qnp = dq;
111: znp = dz;
112: }
113: if (i > 0) {
114: rnp = dp;
115: unp = du;
116: wnp = dw;
117: xnp = dx;
118: alphap = alpha;
119: betap = beta;
120: }
122: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
123: VecNormBegin(R,NORM_2,&dp);
124: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
125: VecNormBegin(U,NORM_2,&dp);
126: }
127: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
128: VecDotBegin(R,U,&gamma);
129: }
130: VecDotBegin(W,U,&delta);
132: if (i > 0) {
133: VecNormBegin(S,NORM_2,&ds);
134: VecNormBegin(Z,NORM_2,&dz);
135: VecNormBegin(P,NORM_2,&dpp);
136: VecNormBegin(Q,NORM_2,&dq);
137: VecNormBegin(M,NORM_2,&dm);
138: }
139: VecNormBegin(X,NORM_2,&dx);
140: VecNormBegin(U,NORM_2,&du);
141: VecNormBegin(W,NORM_2,&dw);
143: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
144: KSP_PCApply(ksp,W,M); /* m <- Bw */
145: KSP_MatMult(ksp,Amat,M,N); /* n <- Am */
147: if (i > 0 && ksp->normtype == KSP_NORM_UNPRECONDITIONED) {
148: VecNormEnd(R,NORM_2,&dp);
149: } else if (i > 0 && ksp->normtype == KSP_NORM_PRECONDITIONED) {
150: VecNormEnd(U,NORM_2,&dp);
151: }
152: if (!(i == 0 && ksp->normtype == KSP_NORM_NATURAL)) {
153: VecDotEnd(R,U,&gamma);
154: }
155: VecDotEnd(W,U,&delta);
157: if (i > 0) {
158: VecNormEnd(S,NORM_2,&ds);
159: VecNormEnd(Z,NORM_2,&dz);
160: VecNormEnd(P,NORM_2,&dpp);
161: VecNormEnd(Q,NORM_2,&dq);
162: VecNormEnd(M,NORM_2,&dm);
163: }
164: VecNormEnd(X,NORM_2,&dx);
165: VecNormEnd(U,NORM_2,&du);
166: VecNormEnd(W,NORM_2,&dw);
168: if (i > 0) {
169: if (ksp->normtype == KSP_NORM_NATURAL) dp = PetscSqrtReal(PetscAbsScalar(gamma));
170: else if (ksp->normtype == KSP_NORM_NONE) dp = 0.0;
172: ksp->rnorm = dp;
173: KSPLogResidualHistory(ksp,dp);
174: KSPMonitor(ksp,i,dp);
175: (*ksp->converged)(ksp,i,dp,&ksp->reason,ksp->cnvP);
176: if (ksp->reason) return(0);
177: }
179: if (i == 0) {
180: alpha = gamma / delta;
181: VecCopy(N,Z); /* z <- n */
182: VecCopy(M,Q); /* q <- m */
183: VecCopy(U,P); /* p <- u */
184: VecCopy(W,S); /* s <- w */
185: } else {
186: beta = gamma / gammaold;
187: alpha = gamma / (delta - beta / alpha * gamma);
188: VecAYPX(Z,beta,N); /* z <- n + beta * z */
189: VecAYPX(Q,beta,M); /* q <- m + beta * q */
190: VecAYPX(P,beta,U); /* p <- u + beta * p */
191: VecAYPX(S,beta,W); /* s <- w + beta * s */
192: }
193: VecAXPY(X, alpha,P); /* x <- x + alpha * p */
194: VecAXPY(U,-alpha,Q); /* u <- u - alpha * q */
195: VecAXPY(W,-alpha,Z); /* w <- w - alpha * z */
196: VecAXPY(R,-alpha,S); /* r <- r - alpha * s */
197: gammaold = gamma;
199: if (i > 0) {
200: errncr = PetscSqrtReal(Anorm*xnp+2.0*Anorm*PetscAbsScalar(alphap)*dpp+rnp+2.0*PetscAbsScalar(alphap)*ds)*eps;
201: errncw = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(alphap)*dq+wnp+2.0*PetscAbsScalar(alphap)*dz)*eps;
202: }
203: if (i > 1) {
204: errncs = PetscSqrtReal(Anorm*unp+2.0*Anorm*PetscAbsScalar(betap)*pnp+wnp+2.0*PetscAbsScalar(betap)*snp)*eps;
205: errncz = PetscSqrtReal((mnz*sqn+2)*Anorm*dm+2.0*Anorm*PetscAbsScalar(betap)*qnp+2.0*PetscAbsScalar(betap)*znp)*eps;
206: }
208: if (i > 0) {
209: if (i == 1) {
210: errr = PetscSqrtReal((mnz*sqn+1)*Anorm*xnp+db)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dpp)*eps+errncr;
211: errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
212: errw = PetscSqrtReal(mnz*sqn*Anorm*unp)*eps+PetscSqrtReal(PetscAbsScalar(alphap)*mnz*sqn*Anorm*dq)*eps+errncw;
213: errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
214: } else if (replace == 1) {
215: errrprev = errr;
216: errr = PetscSqrtReal((mnz*sqn+1)*Anorm*dx+db)*eps;
217: errs = PetscSqrtReal(mnz*sqn*Anorm*dpp)*eps;
218: errw = PetscSqrtReal(mnz*sqn*Anorm*du)*eps;
219: errz = PetscSqrtReal(mnz*sqn*Anorm*dq)*eps;
220: replace = 0;
221: } else {
222: errrprev = errr;
223: errr = errr+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errs+PetscAbsScalar(alphap)*errw+errncr+PetscAbsScalar(alphap)*errncs;
224: errs = errw+PetscAbsScalar(betap)*errs+errncs;
225: errw = errw+PetscAbsScalar(alphap)*PetscAbsScalar(betap)*errz+errncw+PetscAbsScalar(alphap)*errncz;
226: errz = PetscAbsScalar(betap)*errz+errncz;
227: }
228: if (i > 1 && errrprev <= (tol * rnp) && errr > (tol * dp)) {
229: KSP_MatMult(ksp,Amat,X,R); /* r <- Ax - b */
230: VecAYPX(R,-1.0,B);
231: KSP_PCApply(ksp,R,U); /* u <- Br */
232: KSP_MatMult(ksp,Amat,U,W); /* w <- Au */
233: KSP_MatMult(ksp,Amat,P,S); /* s <- Ap */
234: KSP_PCApply(ksp,S,Q); /* q <- Bs */
235: KSP_MatMult(ksp,Amat,Q,Z); /* z <- Aq */
236: replace = 1;
237: totreplaces++;
238: }
239: }
241: i++;
242: ksp->its = i;
244: } while (i<=ksp->max_it);
245: if (!ksp->reason) ksp->reason = KSP_DIVERGED_ITS;
246: return(0);
247: }
250: /*MC
251: KSPPIPECGRR - Pipelined conjugate gradient method with automated residual replacements.
253: This method has only a single non-blocking reduction per iteration, compared to 2 blocking for standard CG. The
254: non-blocking reduction is overlapped by the matrix-vector product and preconditioner application.
256: KSPPIPECGRR improves the robustness of KSPPIPECG by adding an automated residual replacement strategy.
257: True residual and other auxiliary variables are computed explicitly in a number of dynamically determined
258: iterations to counteract the accumulation of rounding errors and thus attain a higher maximal final accuracy.
260: See also KSPPIPECG, which is identical to KSPPIPECGRR without residual replacements.
261: See also KSPPIPECR, where the reduction is only overlapped with the matrix-vector product.
263: Level: intermediate
265: Notes:
266: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
267: performance of pipelined methods. See the FAQ on the PETSc website for details.
269: Contributed by:
270: Siegfried Cools, Universiteit Antwerpen, Dept. Mathematics & Computer Science,
271: European FP7 Project on EXascale Algorithms and Advanced Computational Techniques (EXA2CT) / Research Foundation Flanders (FWO)
273: Reference:
274: S. Cools, E.F. Yetkin, E. Agullo, L. Giraud, W. Vanroose, "Analyzing the effect of local rounding error
275: propagation on the maximal attainable accuracy of the pipelined Conjugate Gradients method",
276: SIAM Journal on Matrix Analysis and Applications (SIMAX), 39(1):426--450, 2018.
278: .seealso: KSPCreate(), KSPSetType(), KSPPIPECR, KSPGROPPCG, KSPPIPECG, KSPPGMRES, KSPCG, KSPPIPEBCGS, KSPCGUseSingleReduction()
279: M*/
280: PETSC_EXTERN PetscErrorCode KSPCreate_PIPECGRR(KSP ksp)
281: {
285: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
286: KSPSetSupportedNorm(ksp,KSP_NORM_PRECONDITIONED,PC_LEFT,2);
287: KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
288: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_LEFT,1);
290: ksp->ops->setup = KSPSetUp_PIPECGRR;
291: ksp->ops->solve = KSPSolve_PIPECGRR;
292: ksp->ops->destroy = KSPDestroyDefault;
293: ksp->ops->view = NULL;
294: ksp->ops->setfromoptions = NULL;
295: ksp->ops->buildsolution = KSPBuildSolutionDefault;
296: ksp->ops->buildresidual = KSPBuildResidualDefault;
297: return(0);
298: }