Actual source code: nepdefl.c
slepc-3.17.1 2022-04-11
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h>
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
17: if (X) *X = extop->X;
18: if (H) MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
19: PetscFunctionReturn(0);
20: }
22: PetscErrorCode NEPDeflationExtendInvariantPair(NEP_EXT_OP extop,Vec u,PetscScalar lambda,PetscInt k)
23: {
24: Vec uu;
25: PetscInt ld,i;
26: PetscMPIInt np;
27: PetscReal norm;
29: BVGetColumn(extop->X,k,&uu);
30: ld = extop->szd+1;
31: NEPDeflationCopyToExtendedVec(extop,uu,extop->H+k*ld,u,PETSC_TRUE);
32: BVRestoreColumn(extop->X,k,&uu);
33: BVNormColumn(extop->X,k,NORM_2,&norm);
34: BVScaleColumn(extop->X,k,1.0/norm);
35: MPI_Comm_size(PetscObjectComm((PetscObject)u),&np);
36: for (i=0;i<k;i++) extop->H[k*ld+i] *= PetscSqrtReal(np)/norm;
37: extop->H[k*(ld+1)] = lambda;
38: PetscFunctionReturn(0);
39: }
41: PetscErrorCode NEPDeflationExtractEigenpair(NEP_EXT_OP extop,PetscInt k,Vec u,PetscScalar lambda,DS ds)
42: {
43: PetscScalar *Ap;
44: PetscInt ldh=extop->szd+1,ldds,i,j,k1=k+1;
45: PetscScalar *eigr,*eigi,*t,*Z;
46: Vec x;
48: NEPDeflationExtendInvariantPair(extop,u,lambda,k);
49: PetscCalloc3(k1,&eigr,k1,&eigi,extop->szd,&t);
50: DSReset(ds);
51: DSSetType(ds,DSNHEP);
52: DSAllocate(ds,ldh);
53: DSGetLeadingDimension(ds,&ldds);
54: DSGetArray(ds,DS_MAT_A,&Ap);
55: for (j=0;j<k1;j++)
56: for (i=0;i<k1;i++) Ap[j*ldds+i] = extop->H[j*ldh+i];
57: DSRestoreArray(ds,DS_MAT_A,&Ap);
58: DSSetDimensions(ds,k1,0,k1);
59: DSSolve(ds,eigr,eigi);
60: DSVectors(ds,DS_MAT_X,&k,NULL);
61: DSGetArray(ds,DS_MAT_X,&Z);
62: BVMultColumn(extop->X,1.0,Z[k*ldds+k],k,Z+k*ldds);
63: DSRestoreArray(ds,DS_MAT_X,&Z);
64: BVGetColumn(extop->X,k,&x);
65: NEPDeflationCopyToExtendedVec(extop,x,t,u,PETSC_FALSE);
66: BVRestoreColumn(extop->X,k,&x);
67: PetscFree3(eigr,eigi,t);
68: PetscFunctionReturn(0);
69: }
71: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
72: {
73: PetscMPIInt np,rk,count;
74: PetscScalar *array1,*array2;
75: PetscInt nloc;
77: if (extop->szd) {
78: MPI_Comm_rank(PetscObjectComm((PetscObject)vex),&rk);
79: MPI_Comm_size(PetscObjectComm((PetscObject)vex),&np);
80: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
81: if (v) {
82: VecGetArray(v,&array1);
83: VecGetArray(vex,&array2);
84: if (back) PetscArraycpy(array1,array2,nloc);
85: else PetscArraycpy(array2,array1,nloc);
86: VecRestoreArray(v,&array1);
87: VecRestoreArray(vex,&array2);
88: }
89: if (a) {
90: VecGetArray(vex,&array2);
91: if (back) {
92: PetscArraycpy(a,array2+nloc,extop->szd);
93: PetscMPIIntCast(extop->szd,&count);
94: MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));
95: } else {
96: PetscArraycpy(array2+nloc,a,extop->szd);
97: PetscMPIIntCast(extop->szd,&count);
98: MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)vex));
99: }
100: VecRestoreArray(vex,&array2);
101: }
102: } else {
103: if (back) VecCopy(vex,v);
104: else VecCopy(v,vex);
105: }
106: PetscFunctionReturn(0);
107: }
109: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
110: {
111: PetscInt nloc;
112: Vec u;
113: VecType type;
115: if (extop->szd) {
116: BVGetColumn(extop->nep->V,0,&u);
117: VecGetType(u,&type);
118: BVRestoreColumn(extop->nep->V,0,&u);
119: VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
120: VecSetType(*v,type);
121: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
122: nloc += extop->szd;
123: VecSetSizes(*v,nloc,PETSC_DECIDE);
124: } else BVCreateVec(extop->nep->V,v);
125: PetscFunctionReturn(0);
126: }
128: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
129: {
130: PetscInt nloc;
131: BVType type;
132: BVOrthogType otype;
133: BVOrthogRefineType oref;
134: PetscReal oeta;
135: BVOrthogBlockType oblock;
136: NEP nep=extop->nep;
138: if (extop->szd) {
139: BVGetSizes(nep->V,&nloc,NULL,NULL);
140: BVCreate(PetscObjectComm((PetscObject)nep),V);
141: BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
142: BVGetType(nep->V,&type);
143: BVSetType(*V,type);
144: BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
145: BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
146: PetscObjectStateIncrease((PetscObject)*V);
147: PetscLogObjectParent((PetscObject)nep,(PetscObject)*V);
148: } else BVDuplicateResize(nep->V,sz,V);
149: PetscFunctionReturn(0);
150: }
152: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
153: {
154: PetscInt n,next,i;
155: PetscRandom rand;
156: PetscScalar *array;
157: PetscMPIInt nn,np;
159: BVGetRandomContext(extop->nep->V,&rand);
160: VecSetRandom(v,rand);
161: if (extop->szd) {
162: MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);
163: BVGetSizes(extop->nep->V,&n,NULL,NULL);
164: VecGetLocalSize(v,&next);
165: VecGetArray(v,&array);
166: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
167: for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
168: PetscMPIIntCast(extop->n,&nn);
169: MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PetscObjectComm((PetscObject)v));
170: VecRestoreArray(v,&array);
171: }
172: PetscFunctionReturn(0);
173: }
175: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
176: {
177: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
178: PetscScalar sone=1.0,zero=0.0;
179: PetscBLASInt ldh_,ldhj_,n_;
181: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
182: PetscArrayzero(Hj,i);
183: PetscBLASIntCast(ldhj+1,&ldh_);
184: PetscBLASIntCast(ldhj,&ldhj_);
185: PetscBLASIntCast(n,&n_);
186: if (idx<1) {
187: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
188: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
189: } else {
190: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
191: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
192: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
193: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
194: }
195: if (idx<0) {
196: idx = -idx;
197: for (k=1;k<idx;k++) {
198: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
199: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
200: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
201: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
202: }
203: }
204: PetscFunctionReturn(0);
205: }
207: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
208: {
209: PetscInt i;
211: NEPDeflationExtendInvariantPair(extop,u,lambda,extop->n);
212: extop->n++;
213: BVSetActiveColumns(extop->X,0,extop->n);
214: if (extop->n <= extop->szd) {
215: /* update XpX */
216: BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
217: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
218: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
219: /* determine minimality index */
220: extop->midx = PetscMin(extop->max_midx,extop->n);
221: /* polynominal basis coefficients */
222: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
223: /* evaluate the polynomial basis in H */
224: NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
225: }
226: PetscFunctionReturn(0);
227: }
229: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
230: {
231: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
232: Mat A,B;
233: PetscScalar *array;
234: const PetscScalar *barray;
236: if (idx<0) {ini = 0; fin = extop->nep->nt;}
237: else {ini = idx; fin = idx+1;}
238: if (y) sz = hfjp?n+2:n+1;
239: else sz = hfjp?3*n:2*n;
240: ldh = extop->szd+1;
241: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
242: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
243: MatDenseGetArray(A,&array);
244: for (j=0;j<n;j++)
245: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
246: MatDenseRestoreArrayWrite(A,&array);
247: if (y) {
248: MatDenseGetArray(A,&array);
249: array[extop->n*(sz+1)] = lambda;
250: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
251: for (i=0;i<n;i++) array[n*sz+i] = y[i];
252: MatDenseRestoreArrayWrite(A,&array);
253: for (j=ini;j<fin;j++) {
254: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
255: MatDenseGetArrayRead(B,&barray);
256: for (i=0;i<n;i++) hfj[j*ld+i] = barray[n*sz+i];
257: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = barray[(n+1)*sz+i];
258: MatDenseRestoreArrayRead(B,&barray);
259: }
260: } else {
261: off = idx<0?ld*n:0;
262: MatDenseGetArray(A,&array);
263: for (k=0;k<n;k++) {
264: array[(n+k)*sz+k] = 1.0;
265: array[(n+k)*sz+n+k] = lambda;
266: }
267: if (hfjp) for (k=0;k<n;k++) {
268: array[(2*n+k)*sz+n+k] = 1.0;
269: array[(2*n+k)*sz+2*n+k] = lambda;
270: }
271: MatDenseRestoreArray(A,&array);
272: for (j=ini;j<fin;j++) {
273: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
274: MatDenseGetArrayRead(B,&barray);
275: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = barray[n*sz+i*sz+k];
276: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = barray[2*n*sz+i*sz+k];
277: MatDenseRestoreArrayRead(B,&barray);
278: }
279: }
280: MatDestroy(&A);
281: MatDestroy(&B);
282: PetscFunctionReturn(0);
283: }
285: static PetscErrorCode MatMult_NEPDeflation(Mat M,Vec x,Vec y)
286: {
287: NEP_DEF_MATSHELL *matctx;
288: NEP_EXT_OP extop;
289: Vec x1,y1;
290: PetscScalar *yy,sone=1.0,zero=0.0;
291: const PetscScalar *xx;
292: PetscInt nloc,i;
293: PetscMPIInt np;
294: PetscBLASInt n_,one=1,szd_;
296: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
297: MatShellGetContext(M,&matctx);
298: extop = matctx->extop;
299: if (extop->ref) VecZeroEntries(y);
300: if (extop->szd) {
301: x1 = matctx->w[0]; y1 = matctx->w[1];
302: VecGetArrayRead(x,&xx);
303: VecPlaceArray(x1,xx);
304: VecGetArray(y,&yy);
305: VecPlaceArray(y1,yy);
306: MatMult(matctx->T,x1,y1);
307: if (!extop->ref && extop->n) {
308: VecGetLocalSize(x1,&nloc);
309: /* copy for avoiding warning of constant array xx */
310: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
311: BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
312: BVDotVec(extop->X,x1,matctx->work);
313: PetscBLASIntCast(extop->n,&n_);
314: PetscBLASIntCast(extop->szd,&szd_);
315: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
316: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
317: for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
318: }
319: VecResetArray(x1);
320: VecRestoreArrayRead(x,&xx);
321: VecResetArray(y1);
322: VecRestoreArray(y,&yy);
323: } else MatMult(matctx->T,x,y);
324: PetscFunctionReturn(0);
325: }
327: static PetscErrorCode MatCreateVecs_NEPDeflation(Mat M,Vec *right,Vec *left)
328: {
329: NEP_DEF_MATSHELL *matctx;
331: MatShellGetContext(M,&matctx);
332: if (right) VecDuplicate(matctx->w[0],right);
333: if (left) VecDuplicate(matctx->w[0],left);
334: PetscFunctionReturn(0);
335: }
337: static PetscErrorCode MatDestroy_NEPDeflation(Mat M)
338: {
339: NEP_DEF_MATSHELL *matctx;
341: MatShellGetContext(M,&matctx);
342: if (matctx->extop->szd) {
343: BVDestroy(&matctx->U);
344: PetscFree2(matctx->hfj,matctx->work);
345: PetscFree2(matctx->A,matctx->B);
346: VecDestroy(&matctx->w[0]);
347: VecDestroy(&matctx->w[1]);
348: }
349: if (matctx->P != matctx->T) MatDestroy(&matctx->P);
350: MatDestroy(&matctx->T);
351: PetscFree(matctx);
352: PetscFunctionReturn(0);
353: }
355: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
356: {
357: PetscScalar p;
358: PetscInt i;
360: if (!jacobian) {
361: val[0] = 1.0;
362: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
363: } else {
364: val[0] = 0.0;
365: p = 1.0;
366: for (i=1;i<extop->n;i++) {
367: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
368: p *= (lambda-extop->bc[i-1]);
369: }
370: }
371: PetscFunctionReturn(0);
372: }
374: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
375: {
376: NEP_DEF_MATSHELL *matctx,*matctxC;
377: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
378: Mat F,Mshell,Mcomp;
379: PetscBool ini=PETSC_FALSE;
380: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
381: PetscBLASInt n_,info,szd_;
383: if (!M) Mshell = jacobian?extop->MJ:extop->MF;
384: else Mshell = *M;
385: Mcomp = jacobian?extop->MF:extop->MJ;
386: if (!Mshell) {
387: ini = PETSC_TRUE;
388: PetscNew(&matctx);
389: MatGetLocalSize(extop->nep->function,&mloc,&nloc);
390: nloc += szd; mloc += szd;
391: MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
392: MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))MatMult_NEPDeflation);
393: MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))MatCreateVecs_NEPDeflation);
394: MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))MatDestroy_NEPDeflation);
395: matctx->nep = extop->nep;
396: matctx->extop = extop;
397: if (!M) {
398: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
399: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
400: PetscObjectReference((PetscObject)matctx->T);
401: if (!jacobian) {
402: if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) {
403: matctx->P = extop->nep->function_pre;
404: PetscObjectReference((PetscObject)matctx->P);
405: } else matctx->P = matctx->T;
406: }
407: } else {
408: matctx->jacob = jacobian;
409: MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES,&matctx->T);
410: *M = Mshell;
411: if (!jacobian) {
412: if (extop->nep->function_pre && extop->nep->function_pre != extop->nep->function) MatDuplicate(extop->nep->function_pre,MAT_DO_NOT_COPY_VALUES,&matctx->P);
413: else matctx->P = matctx->T;
414: }
415: }
416: if (szd) {
417: BVCreateVec(extop->nep->V,matctx->w);
418: VecDuplicate(matctx->w[0],matctx->w+1);
419: BVDuplicateResize(extop->nep->V,szd,&matctx->U);
420: PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
421: PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
422: }
423: } else MatShellGetContext(Mshell,&matctx);
424: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
425: if (ini || matctx->theta != lambda) {
426: if (jacobian) NEPComputeJacobian(extop->nep,lambda,matctx->T);
427: else NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->P);
428: }
429: if (n) {
430: matctx->hfjset = PETSC_FALSE;
431: if (!extop->simpU) {
432: /* likely hfjp has been already computed */
433: if (Mcomp) {
434: MatShellGetContext(Mcomp,&matctxC);
435: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
436: PetscArraycpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt);
437: matctx->hfjset = PETSC_TRUE;
438: }
439: }
440: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
441: if (!matctx->hfjset) {
442: NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
443: matctx->hfjset = PETSC_TRUE;
444: }
445: BVSetActiveColumns(matctx->U,0,n);
446: hf = jacobian?hfjp:hfj;
447: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
448: BVMatMult(extop->X,extop->nep->A[0],matctx->U);
449: BVMultInPlace(matctx->U,F,0,n);
450: BVSetActiveColumns(extop->W,0,extop->n);
451: for (j=1;j<extop->nep->nt;j++) {
452: BVMatMult(extop->X,extop->nep->A[j],extop->W);
453: MatDensePlaceArray(F,hf+j*n*n);
454: BVMult(matctx->U,1.0,1.0,extop->W,F);
455: MatDenseResetArray(F);
456: }
457: MatDestroy(&F);
458: } else {
459: hfj = matctx->hfj;
460: BVSetActiveColumns(matctx->U,0,n);
461: BVMatMult(extop->X,matctx->T,matctx->U);
462: for (j=0;j<n;j++) {
463: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
464: hfj[j*(n+1)] += lambda;
465: }
466: PetscBLASIntCast(n,&n_);
467: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
468: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
469: PetscFPTrapPop();
470: SlepcCheckLapackInfo("trtri",info);
471: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
472: BVMultInPlace(matctx->U,F,0,n);
473: if (jacobian) {
474: NEPDeflationComputeFunction(extop,lambda,NULL);
475: MatShellGetContext(extop->MF,&matctxC);
476: BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
477: }
478: MatDestroy(&F);
479: }
480: PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
481: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
482: A = matctx->A;
483: PetscArrayzero(A,szd*szd);
484: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
485: for (j=0;j<n;j++)
486: for (i=0;i<n;i++)
487: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
488: PetscBLASIntCast(n,&n_);
489: PetscBLASIntCast(szd,&szd_);
490: B = matctx->B;
491: PetscArrayzero(B,szd*szd);
492: for (i=1;i<extop->midx;i++) {
493: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
494: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
495: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
496: pts = hHprev; hHprev = hH; hH = pts;
497: }
498: PetscFree3(basisv,hH,hHprev);
499: }
500: matctx->theta = lambda;
501: matctx->n = extop->n;
502: }
503: PetscFunctionReturn(0);
504: }
506: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
507: {
508: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
509: if (F) *F = extop->MF;
510: PetscFunctionReturn(0);
511: }
513: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
514: {
515: NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
516: if (J) *J = extop->MJ;
517: PetscFunctionReturn(0);
518: }
520: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
521: {
522: NEP_DEF_MATSHELL *matctx;
523: NEP_DEF_FUN_SOLVE solve;
524: PetscInt i,j,n=extop->n;
525: Vec u,tu;
526: Mat F;
527: PetscScalar snone=-1.0,sone=1.0;
528: PetscBLASInt n_,szd_,ldh_,*p,info;
529: Mat Mshell;
531: solve = extop->solve;
532: if (lambda!=solve->theta || n!=solve->n) {
533: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
534: Mshell = (solve->sincf)?extop->MF:solve->T;
535: MatShellGetContext(Mshell,&matctx);
536: NEP_KSPSetOperators(solve->ksp,matctx->T,matctx->P);
537: if (!extop->ref && n) {
538: PetscBLASIntCast(n,&n_);
539: PetscBLASIntCast(extop->szd,&szd_);
540: PetscBLASIntCast(extop->szd+1,&ldh_);
541: if (!extop->simpU) {
542: BVSetActiveColumns(solve->T_1U,0,n);
543: for (i=0;i<n;i++) {
544: BVGetColumn(matctx->U,i,&u);
545: BVGetColumn(solve->T_1U,i,&tu);
546: KSPSolve(solve->ksp,u,tu);
547: BVRestoreColumn(solve->T_1U,i,&tu);
548: BVRestoreColumn(matctx->U,i,&u);
549: }
550: MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
551: BVDot(solve->T_1U,extop->X,F);
552: MatDestroy(&F);
553: } else {
554: for (j=0;j<n;j++)
555: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
556: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
557: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
558: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
559: }
560: PetscArraycpy(solve->M,matctx->B,extop->szd*extop->szd);
561: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
562: PetscMalloc1(n,&p);
563: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
564: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
565: SlepcCheckLapackInfo("getrf",info);
566: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
567: SlepcCheckLapackInfo("getri",info);
568: PetscFPTrapPop();
569: PetscFree(p);
570: }
571: solve->theta = lambda;
572: solve->n = n;
573: }
574: PetscFunctionReturn(0);
575: }
577: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
578: {
579: Vec b1,x1;
580: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
581: NEP_DEF_MATSHELL *matctx;
582: NEP_DEF_FUN_SOLVE solve=extop->solve;
583: PetscBLASInt one=1,szd_,n_,ldh_;
584: PetscInt nloc,i;
585: PetscMPIInt np,count;
587: if (extop->ref) VecZeroEntries(x);
588: if (extop->szd) {
589: x1 = solve->w[0]; b1 = solve->w[1];
590: VecGetArray(x,&xx);
591: VecPlaceArray(x1,xx);
592: VecGetArray(b,&bb);
593: VecPlaceArray(b1,bb);
594: } else {
595: b1 = b; x1 = x;
596: }
597: KSPSolve(extop->solve->ksp,b1,x1);
598: if (!extop->ref && extop->n && extop->szd) {
599: PetscBLASIntCast(extop->szd,&szd_);
600: PetscBLASIntCast(extop->n,&n_);
601: PetscBLASIntCast(extop->szd+1,&ldh_);
602: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
603: PetscMalloc2(extop->n,&b2,extop->n,&x2);
604: MPI_Comm_size(PetscObjectComm((PetscObject)b),&np);
605: for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
606: w = solve->work; w2 = solve->work+extop->n;
607: MatShellGetContext(solve->sincf?extop->MF:solve->T,&matctx);
608: PetscArraycpy(w2,b2,extop->n);
609: BVDotVec(extop->X,x1,w);
610: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
611: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
612: if (extop->simpU) {
613: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
614: for (i=0;i<extop->n;i++) w[i] = x2[i];
615: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
616: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
617: BVMultVec(extop->X,-1.0,1.0,x1,w);
618: } else BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
619: for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
620: PetscMPIIntCast(extop->n,&count);
621: MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b));
622: }
623: if (extop->szd) {
624: VecResetArray(x1);
625: VecRestoreArray(x,&xx);
626: VecResetArray(b1);
627: VecRestoreArray(b,&bb);
628: if (!extop->ref && extop->n) PetscFree2(b2,x2);
629: }
630: PetscFunctionReturn(0);
631: }
633: PetscErrorCode NEPDeflationSetRefine(NEP_EXT_OP extop,PetscBool ref)
634: {
635: extop->ref = ref;
636: PetscFunctionReturn(0);
637: }
639: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
640: {
641: PetscInt j;
642: NEP_DEF_FUN_SOLVE solve;
644: if (!extop) PetscFunctionReturn(0);
645: PetscFree(extop->H);
646: BVDestroy(&extop->X);
647: if (extop->szd) {
648: VecDestroy(&extop->w);
649: PetscFree3(extop->Hj,extop->XpX,extop->bc);
650: BVDestroy(&extop->W);
651: }
652: MatDestroy(&extop->MF);
653: MatDestroy(&extop->MJ);
654: if (extop->solve) {
655: solve = extop->solve;
656: if (extop->szd) {
657: if (!extop->simpU) BVDestroy(&solve->T_1U);
658: PetscFree2(solve->M,solve->work);
659: VecDestroy(&solve->w[0]);
660: VecDestroy(&solve->w[1]);
661: }
662: if (!solve->sincf) MatDestroy(&solve->T);
663: PetscFree(extop->solve);
664: }
665: if (extop->proj) {
666: if (extop->szd) {
667: for (j=0;j<extop->nep->nt;j++) MatDestroy(&extop->proj->V1pApX[j]);
668: MatDestroy(&extop->proj->XpV1);
669: PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
670: VecDestroy(&extop->proj->w);
671: BVDestroy(&extop->proj->V1);
672: }
673: PetscFree(extop->proj);
674: }
675: PetscFree(extop);
676: PetscFunctionReturn(0);
677: }
679: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
680: {
681: NEP_EXT_OP op;
682: NEP_DEF_FUN_SOLVE solve;
683: PetscInt szd;
684: Vec x;
686: NEPDeflationReset(*extop);
687: PetscNew(&op);
688: *extop = op;
689: op->nep = nep;
690: op->n = 0;
691: op->szd = szd = sz-1;
692: op->max_midx = PetscMin(MAX_MINIDX,szd);
693: op->X = X;
694: if (!X) BVDuplicateResize(nep->V,sz,&op->X);
695: else PetscObjectReference((PetscObject)X);
696: PetscCalloc1(sz*sz,&(op)->H);
697: if (op->szd) {
698: BVGetColumn(op->X,0,&x);
699: VecDuplicate(x,&op->w);
700: BVRestoreColumn(op->X,0,&x);
701: op->simpU = PETSC_FALSE;
702: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
703: /* undocumented option to use the simple expression for U = T*X*inv(lambda-H) */
704: PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
705: } else {
706: op->simpU = PETSC_TRUE;
707: }
708: PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
709: BVDuplicateResize(op->X,op->szd,&op->W);
710: }
711: if (ksp) {
712: PetscNew(&solve);
713: op->solve = solve;
714: solve->ksp = ksp;
715: solve->sincf = sincfun;
716: solve->n = -1;
717: if (op->szd) {
718: if (!op->simpU) BVDuplicateResize(nep->V,szd,&solve->T_1U);
719: PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
720: BVCreateVec(nep->V,&solve->w[0]);
721: VecDuplicate(solve->w[0],&solve->w[1]);
722: }
723: }
724: PetscFunctionReturn(0);
725: }
727: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
728: {
729: PetscScalar *T,*Ei,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
730: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
731: const PetscScalar *E;
732: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
733: PetscBLASInt inc=1,nv_,ldds_,dim_,dim2,szdk,szd_,n_,ldh_;
734: PetscMPIInt np;
735: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
736: NEP_EXT_OP extop=proj->extop;
737: NEP nep=extop->nep;
739: DSGetDimensions(ds,&nv,NULL,NULL,NULL);
740: DSGetLeadingDimension(ds,&ldds);
741: DSGetArray(ds,mat,&T);
742: PetscArrayzero(T,ldds*nv);
743: PetscBLASIntCast(ldds*nv,&dim2);
744: /* mat = V1^*T(lambda)V1 */
745: for (i=0;i<nep->nt;i++) {
746: if (deriv) FNEvaluateDerivative(nep->f[i],lambda,&alpha);
747: else FNEvaluateFunction(nep->f[i],lambda,&alpha);
748: DSGetArray(ds,DSMatExtra[i],&Ei);
749: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dim2,&alpha,Ei,&inc,T,&inc));
750: DSRestoreArray(ds,DSMatExtra[i],&Ei);
751: }
752: if (!extop->ref && extop->n) {
753: n = extop->n;
754: szd = extop->szd;
755: PetscArrayzero(proj->work,proj->lwork);
756: PetscBLASIntCast(nv,&nv_);
757: PetscBLASIntCast(n,&n_);
758: PetscBLASIntCast(ldds,&ldds_);
759: PetscBLASIntCast(szd,&szd_);
760: PetscBLASIntCast(proj->dim,&dim_);
761: PetscBLASIntCast(extop->szd+1,&ldh_);
762: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
763: nwork += 2*proj->dim*proj->dim;
765: /* mat = mat + V1^*U(lambda)V2 */
766: for (i=0;i<nep->nt;i++) {
767: if (extop->simpU) {
768: if (deriv) FNEvaluateDerivative(nep->f[i],lambda,&alpha);
769: else FNEvaluateFunction(nep->f[i],lambda,&alpha);
770: ww = w1; w = w2;
771: PetscArraycpy(ww,proj->V2,szd*nv);
772: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
773: for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
774: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
775: alpha = -alpha;
776: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
777: if (deriv) {
778: PetscBLASIntCast(szd*nv,&szdk);
779: FNEvaluateFunction(nep->f[i],lambda,&alpha2);
780: PetscArraycpy(w,proj->V2,szd*nv);
781: for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
782: alpha2 = -alpha2;
783: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
784: alpha2 = 1.0;
785: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
786: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
787: }
788: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
789: } else {
790: NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
791: w = deriv?w2:w1; ww = deriv?w1:w2;
792: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
793: s = PetscSqrtReal(np);
794: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
795: }
796: MatDenseGetArrayRead(proj->V1pApX[i],&E);
797: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
798: MatDenseRestoreArrayRead(proj->V1pApX[i],&E);
799: }
801: /* mat = mat + V2^*A(lambda)V1 */
802: basisv = proj->work+nwork; nwork += szd;
803: hH = proj->work+nwork; nwork += szd*szd;
804: hHprev = proj->work+nwork; nwork += szd*szd;
805: AB = proj->work+nwork;
806: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
807: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
808: for (j=0;j<n;j++)
809: for (i=0;i<n;i++)
810: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
811: MatDenseGetArrayRead(proj->XpV1,&E);
812: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
813: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
814: MatDenseRestoreArrayRead(proj->XpV1,&E);
816: /* mat = mat + V2^*B(lambda)V2 */
817: PetscArrayzero(AB,szd*szd);
818: for (i=1;i<extop->midx;i++) {
819: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
820: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
821: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
822: pts = hHprev; hHprev = hH; hH = pts;
823: }
824: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
825: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
826: }
827: DSRestoreArray(ds,mat,&T);
828: PetscFunctionReturn(0);
829: }
831: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
832: {
833: PetscInt k,j,n=extop->n,dim;
834: Vec v,ve;
835: BV V1;
836: Mat G;
837: NEP nep=extop->nep;
838: NEP_DEF_PROJECT proj;
840: NEPCheckSplit(extop->nep,1);
841: proj = extop->proj;
842: if (!proj) {
843: /* Initialize the projection data structure */
844: PetscNew(&proj);
845: extop->proj = proj;
846: proj->extop = extop;
847: BVGetSizes(Vext,NULL,NULL,&dim);
848: proj->dim = dim;
849: if (extop->szd) {
850: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
851: PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
852: for (j=0;j<nep->nt;j++) MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
853: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
854: BVCreateVec(extop->X,&proj->w);
855: BVDuplicateResize(extop->X,proj->dim,&proj->V1);
856: }
857: DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
858: }
860: /* Split Vext in V1 and V2 */
861: if (extop->szd) {
862: for (j=j0;j<j1;j++) {
863: BVGetColumn(Vext,j,&ve);
864: BVGetColumn(proj->V1,j,&v);
865: NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
866: BVRestoreColumn(proj->V1,j,&v);
867: BVRestoreColumn(Vext,j,&ve);
868: }
869: V1 = proj->V1;
870: } else V1 = Vext;
872: /* Compute matrices V1^* A_i V1 */
873: BVSetActiveColumns(V1,j0,j1);
874: for (k=0;k<nep->nt;k++) {
875: DSGetMat(ds,DSMatExtra[k],&G);
876: BVMatProject(V1,nep->A[k],V1,G);
877: DSRestoreMat(ds,DSMatExtra[k],&G);
878: }
880: if (extop->n) {
881: if (extop->szd) {
882: /* Compute matrices V1^* A_i X and V1^* X */
883: BVSetActiveColumns(extop->W,0,n);
884: for (k=0;k<nep->nt;k++) {
885: BVMatMult(extop->X,nep->A[k],extop->W);
886: BVDot(extop->W,V1,proj->V1pApX[k]);
887: }
888: BVDot(V1,extop->X,proj->XpV1);
889: }
890: }
891: PetscFunctionReturn(0);
892: }