Subversion Repositories slepc-dev

Rev

Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2110 jroman 1
/*
2
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3
   SLEPc - Scalable Library for Eigenvalue Problem Computations
2575 eromero 4
   Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
1619 slepc 5
 
2110 jroman 6
   This file is part of SLEPc.
7
 
8
   SLEPc is free software: you can redistribute it and/or modify it under  the
9
   terms of version 3 of the GNU Lesser General Public License as published by
10
   the Free Software Foundation.
11
 
12
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
13
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
14
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
15
   more details.
16
 
17
   You  should have received a copy of the GNU Lesser General  Public  License
18
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20
*/
21
 
2346 eromero 22
#include <private/vecimplslepc.h>
1985 eromero 23
#include "davidson.h"
1619 slepc 24
 
1745 eromero 25
PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
1750 eromero 26
PetscLogEvent SLEPC_SlepcDenseNorm = 0;
1745 eromero 27
PetscLogEvent SLEPC_SlepcDenseOrth = 0;
28
PetscLogEvent SLEPC_SlepcDenseCopy = 0;
29
PetscLogEvent SLEPC_VecsMult = 0;
1630 slepc 30
 
1634 slepc 31
void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
1753 eromero 32
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
33
                                   void *ptr);
1634 slepc 34
 
2223 jroman 35
#undef __FUNCT__  
36
#define __FUNCT__ "SlepcDenseMatProd"
1619 slepc 37
/*
38
  Compute C <- a*A*B + b*C, where
39
    ldC, the leading dimension of C,
40
    ldA, the leading dimension of A,
41
    rA, cA, rows and columns of A,
42
    At, if true use the transpose of A instead,
43
    ldB, the leading dimension of B,
44
    rB, cB, rows and columns of B,
45
    Bt, if true use the transpose of B instead
46
*/
47
PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
48
  PetscScalar a,
2216 jroman 49
  const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
50
  const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
1619 slepc 51
{
1630 slepc 52
  PetscErrorCode  ierr;
53
  PetscInt        tmp;
54
  PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
55
  const char      *N = "N", *T = "C", *qA = N, *qB = N;
1619 slepc 56
 
57
  PetscFunctionBegin;
1675 slepc 58
 
59
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
2654 jroman 60
  PetscValidScalarPointer(C,1);
61
  PetscValidScalarPointer(A,5);
62
  PetscValidScalarPointer(B,10);
1675 slepc 63
 
1745 eromero 64
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1619 slepc 65
 
66
  /* Transpose if needed */
2177 jroman 67
  if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
68
  if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
1619 slepc 69
 
70
  /* Check size */
71
  if (cA != rB) {
2214 jroman 72
    SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
1619 slepc 73
  }
74
 
75
  /* Do stub */
1744 eromero 76
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
2556 eromero 77
    if (!At && !Bt) *C = *A * *B;
78
    else if (At && !Bt) *C = PetscConj(*A) * *B;
79
    else if (!At && Bt) *C = *A * PetscConj(*B);
80
    else *C = PetscConj(*A) * PetscConj(*B);
1744 eromero 81
    m = n = k = 1;
82
  } else {
83
    m = rA; n = cB; k = cA;
84
    BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
85
              &ldB, &b, C, &ldC);
86
  }
1619 slepc 87
 
1630 slepc 88
  ierr = PetscLogFlops(m*n*2*k);CHKERRQ(ierr);
1745 eromero 89
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1630 slepc 90
 
1619 slepc 91
  PetscFunctionReturn(0);
92
}
93
 
2223 jroman 94
#undef __FUNCT__  
95
#define __FUNCT__ "SlepcDenseMatProdTriang"
1626 slepc 96
/*
1747 eromero 97
  Compute C <- A*B, where
1988 eromero 98
    sC, structure of C,
1747 eromero 99
    ldC, the leading dimension of C,
1988 eromero 100
    sA, structure of A,
1747 eromero 101
    ldA, the leading dimension of A,
102
    rA, cA, rows and columns of A,
103
    At, if true use the transpose of A instead,
1988 eromero 104
    sB, structure of B,
1747 eromero 105
    ldB, the leading dimension of B,
106
    rB, cB, rows and columns of B,
107
    Bt, if true use the transpose of B instead
108
*/
109
PetscErrorCode SlepcDenseMatProdTriang(
110
  PetscScalar *C, MatType_t sC, PetscInt ldC,
111
  const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
2216 jroman 112
  PetscBool At,
1747 eromero 113
  const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
2216 jroman 114
  PetscBool Bt)
1747 eromero 115
{
116
  PetscErrorCode  ierr;
1988 eromero 117
  PetscInt        tmp;
118
  PetscScalar     one=1.0, zero=0.0;
1747 eromero 119
  PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
1750 eromero 120
 
1747 eromero 121
  PetscFunctionBegin;
122
 
123
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
2654 jroman 124
  PetscValidScalarPointer(C,1);
125
  PetscValidScalarPointer(A,4);
126
  PetscValidScalarPointer(B,10);
1747 eromero 127
 
128
  /* Transpose if needed */
2177 jroman 129
  if (At) tmp = rA, rA = cA, cA = tmp;
130
  if (Bt) tmp = rB, rB = cB, cB = tmp;
1747 eromero 131
 
132
  /* Check size */
2214 jroman 133
  if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
134
  if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
1747 eromero 135
 
1750 eromero 136
  /* Optimized version: trivial case */
137
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
2177 jroman 138
    if (!At && !Bt)     *C = *A * *B;
139
    else if (At && !Bt) *C = PetscConj(*A) * *B;
140
    else if (!At && Bt) *C = *A * PetscConj(*B);
141
    else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
1750 eromero 142
    PetscFunctionReturn(0);
143
  }
144
 
145
  /* Optimized versions: sA == 0 && sB == 0 */
146
  if ((sA == 0) && (sB == 0)) {
2177 jroman 147
    if (At) tmp = rA, rA = cA, cA = tmp;
148
    if (Bt) tmp = rB, rB = cB, cB = tmp;
1750 eromero 149
    ierr = SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
150
                             cB, Bt); CHKERRQ(ierr);
151
    PetscFunctionReturn(ierr);
152
  }
153
 
1988 eromero 154
  /* Optimized versions: A hermitian && (B not triang) */
1756 eromero 155
  if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
156
      DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
1988 eromero 157
      DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
1750 eromero 158
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
159
    rC = rA; cC = cB;
1988 eromero 160
    BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
1750 eromero 161
              (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
162
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
163
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
164
    PetscFunctionReturn(0);
165
  }
166
 
1988 eromero 167
  /* Optimized versions: B hermitian && (A not triang) */
168
  if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
169
      DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
170
      DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
171
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
172
    rC = rA; cC = cB;
173
    BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
174
              (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
175
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
176
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
177
    PetscFunctionReturn(0);
1747 eromero 178
  }
1988 eromero 179
 
2214 jroman 180
  SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
1747 eromero 181
}
182
 
2223 jroman 183
#undef __FUNCT__  
184
#define __FUNCT__ "SlepcDenseNorm"
1750 eromero 185
/*
1965 eromero 186
  Normalize the columns of the matrix A, where
1750 eromero 187
    ldA, the leading dimension of A,
1965 eromero 188
    rA, cA, rows and columns of A.
189
  if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
190
  are normalized as being one column.
1750 eromero 191
*/
192
PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
1965 eromero 193
                              PetscInt cA, PetscScalar *eigi)
1750 eromero 194
{
195
  PetscErrorCode  ierr;
1965 eromero 196
  PetscInt        i;
197
  PetscScalar     norm, norm0;
1750 eromero 198
  PetscBLASInt    rA = _rA, one=1;
1747 eromero 199
 
1750 eromero 200
  PetscFunctionBegin;
2654 jroman 201
  PetscValidScalarPointer(A,1);
202
  PetscValidScalarPointer(eigi,5);
1750 eromero 203
 
204
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
205
 
206
  for(i=0; i<cA; i++) {
1965 eromero 207
    if(eigi && eigi[i] != 0.0) {
208
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
209
      norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
210
      norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
211
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
212
      BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
213
      i++;
214
    } else {
215
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
216
      norm = 1.0 / norm;
217
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
218
     }
1750 eromero 219
  }
220
 
221
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
222
 
223
  PetscFunctionReturn(0);
224
}
225
 
226
 
2223 jroman 227
#undef __FUNCT__  
228
#define __FUNCT__ "SlepcDenseOrth"
1735 eromero 229
/*
1743 eromero 230
  Compute A <- orth(A), where
231
    ldA, the leading dimension of A,
232
    rA, cA, rows and columns of A,
233
    auxS, auxiliary vector of more size than cA+min(rA,cA),
234
    lauxS, size of auxS,
235
    ncA, new number of columns of A
236
*/
237
PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
238
                              PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
239
                              PetscInt *ncA)
240
{
1745 eromero 241
  PetscErrorCode  ierr;
1743 eromero 242
  PetscBLASInt    ldA = _ldA, rA = _rA, cA = _cA,
243
                  info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
244
  PetscScalar     *tau = auxS, *w = tau + ltau;
245
 
246
  PetscFunctionBegin;
247
 
248
  /* Quick exit */
249
  if ((_rA == 0) || (cA == 0)) { PetscFunctionReturn(0); }
2654 jroman 250
  PetscValidScalarPointer(A,1);
251
  PetscValidScalarPointer(auxS,5);
252
  PetscValidIntPointer(ncA,7);
1743 eromero 253
 
254
  /* Memory check */
2214 jroman 255
  if (lw < cA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory for xGEQRF");
1743 eromero 256
 
1745 eromero 257
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 258
  LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
2214 jroman 259
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
1743 eromero 260
  LAPACKorgqr_(&rA, &ltau, &ltau, A, &ldA, tau, w, &lw, &info);
2214 jroman 261
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
1745 eromero 262
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 263
 
264
  if (ncA) *ncA = ltau;
265
 
266
  PetscFunctionReturn(0);
267
}
268
 
2223 jroman 269
#undef __FUNCT__  
2779 eromero 270
#define __FUNCT__ "SlepcDenseIOrthEig"
271
/*
272
  Compute new_A=A*Y: new_A' * diag(l0) * new_A = l, where
273
    ldA, the leading dimension of A,
274
    rA, cA, rows and columns of A,
275
    l0, vector of size rA,
276
    l, vector of size cA,
277
    auxS, auxiliary vector of more size than 2*cA*cA+rA*cA,
278
    lauxS, size of auxS
279
*/
280
PetscErrorCode SlepcDenseIOrthEig(PetscScalar *A,PetscInt ldA,PetscInt rA,PetscInt cA,PetscReal *l0,PetscReal *l,PetscScalar *auxS, PetscInt lauxS)
281
{
282
  PetscErrorCode  ierr;
283
  PetscInt        i,j;
284
  PetscScalar     *X = auxS, *S = X+cA*cA, *M = S+cA*cA;
285
  PetscReal       n;
286
 
287
  PetscFunctionBegin;
288
 
289
  /* Quick exit */
290
  if ((rA == 0) || (cA == 0)) { PetscFunctionReturn(0); }
291
  PetscValidScalarPointer(A,1);
292
  PetscValidScalarPointer(auxS,5);
293
 
294
  /* Memory check */
295
  if (lauxS < 2*cA*cA+cA*rA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory!");
296
 
297
  /* M <- diag(l0) * A */
298
  for (i=0; i<cA; i++) {
299
    for (j=0; j<rA; j++) M[j+i*rA] = l0[j]*A[j+i*ldA];
300
  }
301
 
302
  /* S <- A' * M */
303
  ierr = SlepcDenseMatProd(S,cA,0.0,1.0,A,ldA,rA,cA,PETSC_TRUE,M,rA,rA,cA,PETSC_FALSE);CHKERRQ(ierr);
304
 
305
  /* [l,X] = eig(S) */
306
  ierr = EPSDenseHEP(cA,S,cA,l,X);CHKERRQ(ierr);
307
 
308
  /* A <- A * X / sqrt(abs(l)), l = sign(l) */
309
  ierr = SlepcDenseCopy(M,rA,A,ldA,rA,cA);CHKERRQ(ierr);
310
  for(i=0; i<cA; i++) {
311
    n = PetscSqrtReal(PetscAbs(l[i])); l[i] = PetscSign(l[i]);
312
    for(j=0; j<cA; j++) X[i*cA+j]/= n;
313
  }
314
  ierr = SlepcDenseMatProd(A,ldA,0.0,1.0,M,rA,rA,cA,PETSC_FALSE,X,cA,cA,cA,PETSC_FALSE);CHKERRQ(ierr);
315
 
316
  PetscFunctionReturn(0);
317
}
318
 
319
 
320
#undef __FUNCT__  
321
#define __FUNCT__ "SlepcDenseIOrth"
322
/*
323
  Compute new_A=A*Y: new_A' * diag(l0) * new_A = l, where
324
    ldA, the leading dimension of A,
325
    rA, cA, rows and columns of A,
326
    l0, vector of size rA,
327
    l, vector of size cA,
328
    auxS, auxiliary vector of more size than 2*cA*cA+rA*cA,
329
    lauxS, size of auxS
330
*/
331
PetscErrorCode SlepcDenseIOrth(PetscScalar *A,PetscInt ldA,PetscInt rA,PetscInt cA,PetscReal *l0,PetscReal *l,PetscScalar *auxS, PetscInt lauxS)
332
{
333
  PetscErrorCode  ierr;
334
  PetscInt        i,j,k;
335
  PetscBLASInt    one=1,rA_;
336
  PetscScalar     *m = auxS,*h = m+rA,nr0;
337
  PetscReal       nr_o,nr;
338
 
339
  PetscFunctionBegin;
340
 
341
  /* Quick exit */
342
  if ((rA == 0) || (cA == 0)) { PetscFunctionReturn(0); }
343
  PetscValidScalarPointer(A,1);
344
  PetscValidScalarPointer(auxS,5);
345
  rA_ = PetscBLASIntCast(rA);
346
 
347
  /* Memory check */
348
  if (lauxS < cA+rA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory!");
349
 
350
  for (i=0; i<cA; i++) {
351
    /* m <- diag(l0)*A[i] */
352
    for (k=0; k<rA; k++) m[k] = l0[k]*A[k+i*ldA];
353
    /* nr_o <- mynorm(A[i]'*m) */
354
    ierr = SlepcDenseMatProd(&nr0,1,0.0,1.0,&A[ldA*i],ldA,rA,1,PETSC_TRUE,m,rA,rA,1,PETSC_FALSE);CHKERRQ(ierr);
355
    nr = nr_o = PetscSign(PetscRealPart(nr0))*PetscSqrtReal(PetscAbs(nr0));
356
    for (j=0; j<3 && i>0; j++) {
357
      /* h <- A[0:i-1]'*m */
358
      ierr = SlepcDenseMatProd(h,i,0.0,1.0,A,ldA,rA,i,PETSC_TRUE,m,rA,rA,1,PETSC_FALSE);CHKERRQ(ierr);
359
      /* h <- diag(l)*h */
360
      for (k=0; k<i; k++) h[k] *= l[k];
361
      /* A[i] <- A[i] - A[0:i-1]*h */
362
      ierr = SlepcDenseMatProd(&A[ldA*i],ldA,1.0,-1.0,A,ldA,rA,i,PETSC_FALSE,h,i,i,1,PETSC_FALSE);CHKERRQ(ierr);
363
      /* m <- diag(l0)*A[i] */
364
      for (k=0; k<rA; k++) m[k] = l0[k]*A[k+i*ldA];
365
      /* nr_o <- mynorm(A[i]'*m) */
366
      ierr = SlepcDenseMatProd(&nr0,1,0.0,1.0,&A[ldA*i],ldA,rA,1,PETSC_TRUE,m,rA,rA,1,PETSC_FALSE);CHKERRQ(ierr);
367
      nr = PetscSign(PetscRealPart(nr0))*PetscSqrtReal(PetscAbs(nr0));
368
      if (PetscAbs(nr) < PETSC_MACHINE_EPSILON) SETERRQ(PETSC_COMM_SELF,1, "Linear dependency detected!");
369
      if (PetscAbs(nr) > 0.7*PetscAbs(nr_o)) break;
370
      nr_o = nr;
371
    }
372
    l[i] = PetscSign(nr);
373
    /* A[i] <- A[i]/|nr| */
374
    nr = 1.0/PetscAbs(nr);
375
    BLASscal_(&rA_,&nr,&A[i*ldA],&one);
376
  }
377
 
378
  PetscFunctionReturn(0);
379
}
380
 
381
 
382
 
383
#undef __FUNCT__  
2223 jroman 384
#define __FUNCT__ "SlepcDenseCopy"
1743 eromero 385
/*
386
  Y <- X, where
387
  ldX, leading dimension of X,
388
  rX, cX, rows and columns of X
389
  ldY, leading dimension of Y
390
*/
391
PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
392
                              PetscInt ldX, PetscInt rX, PetscInt cX)
393
{
394
  PetscErrorCode  ierr;
1795 eromero 395
  PetscInt        i;
1743 eromero 396
 
397
  PetscFunctionBegin;
2654 jroman 398
  PetscValidScalarPointer(Y,1);
399
  PetscValidScalarPointer(X,3);
1743 eromero 400
 
401
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 402
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1743 eromero 403
  }
1805 eromero 404
 
405
  /* Quick exit */
406
  if (Y == X) {
407
    if (ldX != ldY) {
2219 jroman 408
      SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1805 eromero 409
    }
410
    PetscFunctionReturn(0);
411
  }
1743 eromero 412
 
1745 eromero 413
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 414
  for(i=0; i<cX; i++) {
415
    ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
416
    CHKERRQ(ierr);
417
  }
1745 eromero 418
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 419
 
420
  PetscFunctionReturn(0);
421
}
422
 
2223 jroman 423
#undef __FUNCT__  
424
#define __FUNCT__ "SlepcDenseCopyTriang"
1752 eromero 425
/*
426
  Y <- X, where
427
  ldX, leading dimension of X,
428
  rX, cX, rows and columns of X
429
  ldY, leading dimension of Y
430
*/
431
PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
432
                                    PetscScalar *X, MatType_t sX, PetscInt ldX,
433
                                    PetscInt rX, PetscInt cX)
434
{
435
  PetscErrorCode  ierr;
436
  PetscInt        i,j,c;
1743 eromero 437
 
1752 eromero 438
  PetscFunctionBegin;
2654 jroman 439
  PetscValidScalarPointer(Y,1);
440
  PetscValidScalarPointer(X,4);
1752 eromero 441
 
442
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 443
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1752 eromero 444
  }
445
 
2605 eromero 446
  if (sY == 0 && sX == 0) {
447
    ierr = SlepcDenseCopy(Y, ldY, X, ldX, rX, cX);CHKERRQ(ierr);
448
    PetscFunctionReturn(0);
449
  }
450
 
1752 eromero 451
  if (rX != cX) {
2219 jroman 452
    SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
1752 eromero 453
  }
454
 
1756 eromero 455
  if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
456
      DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
457
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
458
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 459
      c = 0;                                      /*     so copy */
1756 eromero 460
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
461
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 462
      c = 1;                                      /*     so transpose */
463
    else                                          /* ... Full, */
464
      c = 2;                                      /*     so reflect from up */
1756 eromero 465
  } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
466
             DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
467
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
468
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 469
      c = 1;                                      /*     so transpose */
1756 eromero 470
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
471
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 472
      c = 0;                                      /*     so copy */
473
    else                                          /* ... Full, */
474
      c = 3;                                      /*     so reflect fr. down */
475
  } else                                          /* Full to any, */
476
    c = 0;                                        /*     so copy */
477
 
478
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
479
 
480
  switch(c) {
481
  case 0: /* copy */
482
    for(i=0; i<cX; i++) {
483
      ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
484
      CHKERRQ(ierr);
485
    }
486
    break;
487
 
488
  case 1: /* transpose */
489
    for(i=0; i<cX; i++)
490
      for(j=0; j<rX; j++)
2556 eromero 491
        Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
1752 eromero 492
    break;
493
 
494
  case 2: /* reflection from up */
495
    for(i=0; i<cX; i++)
496
      for(j=0; j<PetscMin(i+1,rX); j++)
1988 eromero 497
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 498
    break;
499
 
500
  case 3: /* reflection from down */
501
    for(i=0; i<cX; i++)
502
      for(j=i; j<rX; j++)
1988 eromero 503
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 504
    break;
505
  }
506
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
507
 
508
  PetscFunctionReturn(0);
509
}
510
 
511
 
2223 jroman 512
#undef __FUNCT__  
513
#define __FUNCT__ "SlepcUpdateVectorsZ"
1743 eromero 514
/*
1675 slepc 515
  Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
516
  where X and Y are contiguous global vectors.
1626 slepc 517
*/
1675 slepc 518
PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
519
  Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
520
  PetscInt cM)
1626 slepc 521
{
522
  PetscErrorCode  ierr;
1960 eromero 523
 
524
  PetscFunctionBegin;
525
 
526
  ierr = SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
527
  CHKERRQ(ierr);
528
 
529
  PetscFunctionReturn(0);
530
}
531
 
532
 
2223 jroman 533
#undef __FUNCT__  
534
#define __FUNCT__ "SlepcUpdateVectorsS"
1960 eromero 535
/*
1965 eromero 536
  Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
1960 eromero 537
  where X and Y are contiguous global vectors.
538
*/
539
PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
540
  PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
541
  PetscInt ldM, PetscInt rM, PetscInt cM)
542
{
2334 jroman 543
  PetscErrorCode    ierr;
544
  const PetscScalar *px;
545
  PetscScalar       *py;
546
  PetscInt          rX, rY, ldX, ldY, i, rcX;
1626 slepc 547
 
548
  PetscFunctionBegin;
2346 eromero 549
  SlepcValidVecsContiguous(Y,cM*dY,1);
550
  SlepcValidVecsContiguous(X,cX,5);
2654 jroman 551
  PetscValidScalarPointer(M,8);
1626 slepc 552
 
1965 eromero 553
  /* Compute the real number of columns */
554
  rcX = cX/dX;
555
  if (rcX != rM) {
2214 jroman 556
    SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");
1735 eromero 557
  }
558
 
1965 eromero 559
  if ((rcX == 0) || (rM == 0) || (cM == 0)) {
1626 slepc 560
    PetscFunctionReturn(0);
1960 eromero 561
  } else if ((Y + cM <= X) || (X + cX <= Y) ||
562
             ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
1626 slepc 563
    /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
564
 
565
    /* Get the dense matrices and dimensions associated to Y and X */
1960 eromero 566
    ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
567
    ierr = VecGetLocalSize(Y[0], &rY); CHKERRQ(ierr);
568
    if (rX != rY) {
2214 jroman 569
      SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
1626 slepc 570
    }
2334 jroman 571
    ierr = VecGetArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 572
    ierr = VecGetArray(Y[0], &py);CHKERRQ(ierr);
573
 
1960 eromero 574
    /* Update the strides */
575
    ldX = rX*dX; ldY= rY*dY;
576
 
1626 slepc 577
    /* Do operation */
1965 eromero 578
    ierr = SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
1675 slepc 579
                    PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE); CHKERRQ(ierr);
1626 slepc 580
 
2334 jroman 581
    ierr = VecRestoreArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 582
    ierr = VecRestoreArray(Y[0], &py);CHKERRQ(ierr);
1992 eromero 583
    for(i=1; i<cM; i++) {
584
      ierr = PetscObjectStateIncrease((PetscObject)Y[dY*i]); CHKERRQ(ierr);
585
    }
1626 slepc 586
 
1960 eromero 587
  } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
1626 slepc 588
    /* If not, call to SlepcUpdateVectors */
1965 eromero 589
    ierr = SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
590
                                    ldM, PETSC_FALSE); CHKERRQ(ierr);
1675 slepc 591
    if (alpha != 1.0)
592
      for (i=0; i<cM; i++) {
593
        ierr = VecScale(Y[i], alpha); CHKERRQ(ierr);
594
      }
1626 slepc 595
  } else {
2214 jroman 596
    SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
1626 slepc 597
  }
598
 
599
  PetscFunctionReturn(0);
600
}
601
 
2223 jroman 602
#undef __FUNCT__  
603
#define __FUNCT__ "SlepcUpdateVectorsD"
1965 eromero 604
/*
605
  Compute X <- alpha * X[0:dX:cX-1] * M
606
  where X is a matrix with non-consecutive columns
607
*/
608
PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
609
  const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
610
  PetscScalar *work, PetscInt lwork)
611
{
2334 jroman 612
  PetscErrorCode ierr;
613
  PetscScalar    **px, *Y, *Z;
614
  PetscInt       rX, i, j, rY, rY0, ldY;
1960 eromero 615
 
1965 eromero 616
  PetscFunctionBegin;
2346 eromero 617
  SlepcValidVecsContiguous(X,cX,1);
2654 jroman 618
  PetscValidScalarPointer(M,4);
619
  PetscValidScalarPointer(work,8);
1965 eromero 620
 
621
  if (cX != rM) {
2214 jroman 622
    SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");
1965 eromero 623
  }
624
 
625
  rY = (lwork/2)/cX;
626
  if (rY <= 0) {
2214 jroman 627
    SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
1965 eromero 628
  }
629
  Y = work; Z = &Y[cX*rY]; ldY = rY;
630
 
631
  if ((cX == 0) || (rM == 0) || (cM == 0)) {
632
    PetscFunctionReturn(0);
633
  }
634
 
635
  /* Get the dense vectors associated to the columns of X */
636
  ierr = PetscMalloc(sizeof(Vec)*cX, &px); CHKERRQ(ierr);
637
  for(i=0; i<cX; i++) {
638
    ierr = VecGetArray(X[i], &px[i]); CHKERRQ(ierr);
639
  }
640
  ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
641
 
642
  for(i=0, rY0=0; i<rX; i+=rY0) {
643
    rY0 = PetscMin(rY, rX-i);
644
 
645
    /* Y <- X[i0:i1,:] */
646
    for(j=0; j<cX; j++) {
647
      ierr = SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
648
      CHKERRQ(ierr);
649
    }
650
 
651
    /* Z <- Y * M */
652
    ierr = SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
653
                                                 M, ldM, rM, cM, PETSC_FALSE);
654
    CHKERRQ(ierr);
655
 
656
    /* X <- Z */
657
    for(j=0; j<cM; j++) {
658
      ierr = SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
659
      CHKERRQ(ierr);
660
    }
661
  }
662
 
663
  for(i=0; i<cX; i++) {
664
    ierr = VecRestoreArray(X[i], &px[i]); CHKERRQ(ierr);
665
  }
666
  ierr = PetscFree(px); CHKERRQ(ierr);
667
 
668
  PetscFunctionReturn(0);
669
}
670
 
671
 
672
 
2223 jroman 673
#undef __FUNCT__  
674
#define __FUNCT__ "VecsMult"
1634 slepc 675
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
676
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
677
  where W = U' * V.
1640 slepc 678
  workS0 and workS1 are an auxiliary scalar vector of size
2654 jroman 679
  (eU-sU)*sV*(sU!=0)+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
1640 slepc 680
  is needed, and of size eU*eV.
1634 slepc 681
*/
1747 eromero 682
PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
1634 slepc 683
                        Vec *U, PetscInt sU, PetscInt eU,
684
                        Vec *V, PetscInt sV, PetscInt eV,
1640 slepc 685
                        PetscScalar *workS0, PetscScalar *workS1)
1634 slepc 686
{
2334 jroman 687
  PetscErrorCode    ierr;
2654 jroman 688
  PetscInt          ldU, ldV, i, j, k, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
2334 jroman 689
  const PetscScalar *pu, *pv;
690
  PetscScalar       *W, *Wr;
1634 slepc 691
 
692
  PetscFunctionBegin;
693
 
694
  /* Check if quick exit */
695
  if ((eU-sU == 0) || (eV-sV == 0))
696
    PetscFunctionReturn(0);
2346 eromero 697
 
698
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 699
  SlepcValidVecsContiguous(V,eV,7);
2654 jroman 700
  PetscValidScalarPointer(M,1);
1634 slepc 701
 
702
  /* Get the dense matrices and dimensions associated to U and V */
703
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
704
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
705
  if (ldU != ldV) {
2214 jroman 706
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1634 slepc 707
  }
2334 jroman 708
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
709
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1634 slepc 710
 
2654 jroman 711
  if (workS0) {
712
    PetscValidScalarPointer(workS0,10);
1640 slepc 713
    W = workS0;
2654 jroman 714
  } else {
715
    ierr = PetscMalloc(sizeof(PetscScalar)*ms, &W);
1640 slepc 716
    CHKERRQ(ierr);
1634 slepc 717
  }
718
 
1745 eromero 719
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
720
 
1640 slepc 721
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
722
    /* Use the smart memory usage version */
1634 slepc 723
 
1640 slepc 724
    /* W <- U' * V */
1747 eromero 725
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
726
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
727
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
1640 slepc 728
    CHKERRQ(ierr);
729
 
730
    /* ReduceAll(W, SUM) */
1899 eromero 731
    ierr = MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
1640 slepc 732
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1747 eromero 733
  /* Full M matrix */
1756 eromero 734
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
735
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
2605 eromero 736
    if (workS1) {
2654 jroman 737
      PetscValidScalarPointer(workS1,11);
1640 slepc 738
      Wr = workS1;
2654 jroman 739
      if (PetscAbs(PetscMin(W-workS1, workS1-W)) < ms) {
2605 eromero 740
        SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
741
      }
742
    } else {
2654 jroman 743
      ierr = PetscMalloc(sizeof(PetscScalar)*ms, &Wr);
1640 slepc 744
      CHKERRQ(ierr);
745
    }
1747 eromero 746
 
1640 slepc 747
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
2654 jroman 748
    if (sU > 0) {
749
      ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
750
                               pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
751
                               pv       , ldV, ldV, sV,    PETSC_FALSE);
752
      CHKERRQ(ierr);
753
    }
1640 slepc 754
 
755
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
2654 jroman 756
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
1640 slepc 757
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
758
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
759
    CHKERRQ(ierr);
760
 
761
    /* ReduceAll(W, SUM) */
2654 jroman 762
    ierr = MPI_Allreduce(W, Wr, ms, MPIU_SCALAR,
1899 eromero 763
                      MPIU_SUM, ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1640 slepc 764
 
765
    /* M(...,...) <- W */
2654 jroman 766
    k = 0;
767
    if (sU > 0) for (i=0; i<sV; i++)
1640 slepc 768
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
769
    for (i=sV; i<eV; i++)
2654 jroman 770
      for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
1640 slepc 771
 
1747 eromero 772
    if (!workS1) {
773
      ierr = PetscFree(Wr); CHKERRQ(ierr);
774
    }
1745 eromero 775
 
1747 eromero 776
  /* Upper triangular M matrix */
1756 eromero 777
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
778
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
2605 eromero 779
    if (workS1) {
2654 jroman 780
      PetscValidScalarPointer(workS1,11);
1747 eromero 781
      Wr = workS1;
2605 eromero 782
      if (PetscAbs(PetscMin(W-workS1,workS1-W)) < (eV-sV)*eU) {
783
        SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
784
      }
785
    } else {
1747 eromero 786
      ierr = PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
787
      CHKERRQ(ierr);
788
    }
789
 
790
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
791
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
792
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
793
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
794
    CHKERRQ(ierr);
795
 
796
    /* ReduceAll(W, SUM) */
1899 eromero 797
    ierr = MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
1747 eromero 798
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
799
 
800
    /* M(...,...) <- W */
801
    for (i=sV,k=0; i<eV; i++)
802
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
803
 
1640 slepc 804
    if (!workS1) {
805
      ierr = PetscFree(Wr); CHKERRQ(ierr);
806
    }
1747 eromero 807
 
808
  /* Lower triangular M matrix */
1756 eromero 809
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
810
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
2605 eromero 811
    if (workS1) {
2654 jroman 812
      PetscValidScalarPointer(workS1,11);
1747 eromero 813
      Wr = workS1;
2605 eromero 814
      if (PetscMin(W - workS1, workS1 - W) < (eU-sU)*eV) {
815
        SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
816
      }
817
    } else {
1747 eromero 818
      ierr = PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
819
      CHKERRQ(ierr);
820
    }
821
 
822
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
823
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
824
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
825
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
826
    CHKERRQ(ierr);
827
 
828
    /* ReduceAll(W, SUM) */
1899 eromero 829
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
1747 eromero 830
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
831
 
832
    /* M(...,...) <- W */
833
    for (i=0,k=0; i<eV; i++)
834
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
835
 
836
    if (!workS1) {
837
      ierr = PetscFree(Wr); CHKERRQ(ierr);
838
    }
1634 slepc 839
  }
840
 
1747 eromero 841
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
842
 
1640 slepc 843
  if (!workS0) {
1634 slepc 844
    ierr = PetscFree(W); CHKERRQ(ierr);
845
  }
846
 
2334 jroman 847
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
848
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1634 slepc 849
  PetscFunctionReturn(0);
850
}
851
 
1795 eromero 852
 
2223 jroman 853
#undef __FUNCT__  
854
#define __FUNCT__ "VecsMultIa"
1753 eromero 855
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
856
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
1808 eromero 857
  where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
1795 eromero 858
  workS0 and workS1 are an auxiliary scalar vector of size
859
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
860
  is needed, and of size eU*eV.
861
*/
862
PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
863
                          Vec *U, PetscInt sU, PetscInt eU,
864
                          Vec *V, PetscInt sV, PetscInt eV)
865
{
866
  PetscErrorCode  ierr;
867
  PetscInt        ldU, ldV;
868
  PetscScalar     *pu, *pv;
869
 
870
  PetscFunctionBegin;
871
 
872
  /* Check if quick exit */
873
  if ((eU-sU == 0) || (eV-sV == 0))
874
    PetscFunctionReturn(0);
875
 
2346 eromero 876
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 877
  SlepcValidVecsContiguous(V,eV,7);
2654 jroman 878
  PetscValidScalarPointer(M,1);
2346 eromero 879
 
1795 eromero 880
  /* Get the dense matrices and dimensions associated to U and V */
881
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
882
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
883
  if (ldU != ldV) {
2214 jroman 884
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1795 eromero 885
  }
886
  ierr = VecGetArray(U[0], &pu);CHKERRQ(ierr);
887
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
888
 
889
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
890
    /* M <- local_U' * local_V */
891
    ierr = SlepcDenseMatProdTriang(M, sM, eU,
892
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
893
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
894
    CHKERRQ(ierr);
895
 
896
  /* Full M matrix */
897
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
898
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
899
    /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
900
    ierr = SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
901
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
902
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
903
    CHKERRQ(ierr);
904
 
905
    /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
906
    ierr = SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
907
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
908
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
909
    CHKERRQ(ierr);
910
 
911
  /* Other structures */
2214 jroman 912
  } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");
1795 eromero 913
 
914
  ierr = VecRestoreArray(U[0], &pu); CHKERRQ(ierr);
1992 eromero 915
  ierr = PetscObjectStateDecrease((PetscObject)U[0]); CHKERRQ(ierr);
1795 eromero 916
  ierr = VecRestoreArray(V[0], &pv); CHKERRQ(ierr);
1992 eromero 917
  ierr = PetscObjectStateDecrease((PetscObject)V[0]); CHKERRQ(ierr);
1795 eromero 918
 
919
  PetscFunctionReturn(0);
920
}
921
 
922
 
2223 jroman 923
#undef __FUNCT__  
924
#define __FUNCT__ "VecsMultIc"
1808 eromero 925
/* Computes M <- nprocs*M
926
  where nprocs is the number of processors.
927
*/
928
PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
929
                          PetscInt rM, PetscInt cM, Vec V)
930
{
2117 eromero 931
  int        i,j,n;
1808 eromero 932
 
933
  PetscFunctionBegin;
934
 
935
  /* Check if quick exit */
936
  if ((rM == 0) || (cM == 0))
937
    PetscFunctionReturn(0);
2654 jroman 938
  PetscValidScalarPointer(M,1);
1808 eromero 939
 
2214 jroman 940
  if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1808 eromero 941
 
942
  MPI_Comm_size(((PetscObject)V)->comm, &n);
943
 
944
  for(i=0; i<cM; i++)
945
    for(j=0; j<rM; j++)
946
      M[ldM*i+j]/= (PetscScalar)n;
947
 
948
  PetscFunctionReturn(0);
949
}
950
 
951
 
2223 jroman 952
#undef __FUNCT__  
953
#define __FUNCT__ "VecsMultIb"
1795 eromero 954
/* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
955
                          ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
1753 eromero 956
  where W = U' * V.
1795 eromero 957
  workS0 and workS1 are an auxiliary scalar vector of size
958
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
959
  is needed, and of size eU*eV.
960
*/
961
PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
962
                          PetscInt rM, PetscInt cM, PetscScalar *auxS,
963
                          Vec V)
964
{
965
  PetscErrorCode  ierr;
966
  PetscScalar     *W, *Wr;
967
 
968
  PetscFunctionBegin;
969
 
970
  /* Check if quick exit */
971
  if ((rM == 0) || (cM == 0))
972
    PetscFunctionReturn(0);
2654 jroman 973
  PetscValidScalarPointer(M,1);
1795 eromero 974
 
2779 eromero 975
  if (auxS) {
976
    PetscValidScalarPointer(auxS,6);
1795 eromero 977
    W = auxS;
2779 eromero 978
  } else {
1795 eromero 979
    ierr = PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
980
    CHKERRQ(ierr);
981
  }
982
  Wr = W + rM*cM;
983
 
984
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
985
 
986
  if (sM == 0) {
987
    /* W <- M */
988
    ierr = SlepcDenseCopy(W, rM, M, ldM, rM, cM); CHKERRQ(ierr);
989
 
990
    /* Wr <- ReduceAll(W, SUM) */
1899 eromero 991
    ierr = MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
1795 eromero 992
                         ((PetscObject)V)->comm); CHKERRQ(ierr);
993
 
994
    /* M <- Wr */
995
    ierr = SlepcDenseCopy(M, ldM, Wr, rM, rM, cM); CHKERRQ(ierr);
996
 
997
  /* Other structures */
2214 jroman 998
  } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1795 eromero 999
 
1000
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
1001
 
1002
  if (!auxS) {
1003
    ierr = PetscFree(W); CHKERRQ(ierr);
1004
  }
1005
 
1006
  PetscFunctionReturn(0);
1007
}
1008
 
1009
 
2223 jroman 1010
#undef __FUNCT__  
1011
#define __FUNCT__ "VecsMultS"
1795 eromero 1012
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
1013
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
1014
  where W = U' * V.
1753 eromero 1015
  r, a DvdReduction structure,
1016
  sr, an structure DvdMult_copy_func.
1017
*/
1018
PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
1019
                         Vec *U, PetscInt sU, PetscInt eU,
1020
                         Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
1021
                         DvdMult_copy_func *sr)
1022
{
2334 jroman 1023
  PetscErrorCode    ierr;
2682 jroman 1024
  PetscInt          ldU, ldV, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
2334 jroman 1025
  const PetscScalar *pu, *pv;
1026
  PetscScalar       *W;
1753 eromero 1027
 
1028
  PetscFunctionBegin;
1029
 
1030
  /* Check if quick exit */
1031
  if ((eU-sU == 0) || (eV-sV == 0))
1032
    PetscFunctionReturn(0);
1033
 
2346 eromero 1034
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 1035
  SlepcValidVecsContiguous(V,eV,7);
2654 jroman 1036
  PetscValidScalarPointer(M,1);
2346 eromero 1037
 
1753 eromero 1038
  /* Get the dense matrices and dimensions associated to U and V */
1039
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
1040
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
1041
  if (ldU != ldV) {
2214 jroman 1042
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1753 eromero 1043
  }
2334 jroman 1044
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
1045
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1753 eromero 1046
 
1047
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
1048
 
2654 jroman 1049
  if ((sU == 0) && (sV == 0)) {
1753 eromero 1050
    /* Use the smart memory usage version */
1051
 
1052
    /* Add the reduction to r */
1053
    ierr = SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
1054
    CHKERRQ(ierr);
1055
 
1056
    /* W <- U' * V */
1057
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
1058
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
1059
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
1060
    CHKERRQ(ierr);
1061
 
1062
    /* M <- ReduceAll(W, SUM) */
1063
    sr->M = M;    sr->ld = ldM;
1064
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
1065
                  sr->i2 = eV;
1066
 
1067
  /* Full M matrix */
1756 eromero 1068
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
1069
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 1070
    /* Add the reduction to r */
2682 jroman 1071
    ierr = SlepcAllReduceSum(r, ms, VecsMultS_copy_func, sr, &W);
1753 eromero 1072
    CHKERRQ(ierr);
1073
 
1074
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
1075
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
1076
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
1077
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
1078
    CHKERRQ(ierr);
1079
 
1080
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
2682 jroman 1081
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
1753 eromero 1082
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
1083
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
1084
    CHKERRQ(ierr);
1085
 
1086
    /* M <- ReduceAll(W, SUM) */
2682 jroman 1087
    sr->M = M;            sr->ld = ldM;
1088
    sr->i0 = sU>0?0:sV;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
1089
                          sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;
1753 eromero 1090
 
1091
  /* Upper triangular M matrix */
1756 eromero 1092
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
1093
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 1094
    /* Add the reduction to r */
1095
    ierr = SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
1096
    CHKERRQ(ierr);
1097
 
1098
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
1099
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
1100
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
1101
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
1102
    CHKERRQ(ierr);
1103
 
1104
    /* M <- ReduceAll(W, SUM) */
1105
    sr->M = M;    sr->ld = ldM;
1106
    sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
1107
                  sr->i2 = eV;
1108
 
1109
  /* Lower triangular M matrix */
1756 eromero 1110
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
1111
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1753 eromero 1112
    /* Add the reduction to r */
1113
    ierr = SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
1114
    CHKERRQ(ierr);
1115
 
1116
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
1117
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
1118
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
1119
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
1120
    CHKERRQ(ierr);
1121
 
1122
    /* ReduceAll(W, SUM) */
1123
    sr->M = M;    sr->ld = ldM;
1124
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
1125
                  sr->i2 = eV;
1126
  }
1127
 
1128
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
1129
 
2334 jroman 1130
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
1131
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1753 eromero 1132
  PetscFunctionReturn(0);
1133
}
1134
 
1135
#undef __FUNCT__  
1136
#define __FUNCT__ "VecsMultS_copy_func"
1137
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
1138
                                   void *ptr)
1139
{
1140
  PetscInt        i, j, k;
1141
  DvdMult_copy_func
1142
                  *sr = (DvdMult_copy_func*)ptr;
1143
 
1144
  PetscFunctionBegin;
2654 jroman 1145
  PetscValidScalarPointer(out,1);
1753 eromero 1146
 
1147
  for (i=sr->i0,k=0; i<sr->i1; i++)
1148
    for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
1149
  for (i=sr->i1; i<sr->i2; i++)
1150
    for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
1151
 
2214 jroman 1152
  if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
1753 eromero 1153
 
1154
  PetscFunctionReturn(0);
1155
}
1156
 
2223 jroman 1157
#undef __FUNCT__  
1158
#define __FUNCT__ "VecsOrthonormalize"
1675 slepc 1159
/* Orthonormalize a chunk of parallel vector.
1160
   NOTE: wS0 and wS1 must be of size n*n.
1161
*/
1162
PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
1163
                                  PetscScalar *wS1)
1164
{
1165
  PetscErrorCode  ierr;
1166
  PetscBLASInt    nn = n, info, ld;
1992 eromero 1167
  PetscInt        ldV, i;
1675 slepc 1168
  PetscScalar     *H, *T, one=1.0, *pv;
1169
 
1170
  PetscFunctionBegin;
1171
 
1172
  if (!wS0) {
1173
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &H); CHKERRQ(ierr);
2654 jroman 1174
  } else {
1175
    PetscValidScalarPointer(wS0,3);
1675 slepc 1176
    H = wS0;
2654 jroman 1177
  }
1675 slepc 1178
  if (!wS1) {
1179
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &T); CHKERRQ(ierr);
2654 jroman 1180
  } else {
1181
    PetscValidScalarPointer(wS1,4);
1675 slepc 1182
    T = wS1;
2654 jroman 1183
  }
1675 slepc 1184
 
1185
  /* H <- V' * V */
1747 eromero 1186
  ierr = VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL); CHKERRQ(ierr);
1675 slepc 1187
 
1188
  /* H <- chol(H) */
1189
  LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
2214 jroman 1190
  if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1675 slepc 1191
 
1192
  /* V <- V * inv(H) */
1193
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
1194
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
1195
  ld = ldV;
1196
  BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1197
  ierr = VecRestoreArray(V[0], &pv);CHKERRQ(ierr);
1992 eromero 1198
  for(i=1; i<n; i++) {
1199
    ierr = PetscObjectStateIncrease((PetscObject)V[i]); CHKERRQ(ierr);
1200
  }
1675 slepc 1201
 
1202
  if (!wS0) {
1203
    ierr = PetscFree(H); CHKERRQ(ierr);
1204
  }
1205
  if (!wS1) {
1206
    ierr = PetscFree(T); CHKERRQ(ierr);
1207
  }
1208
 
1209
  PetscFunctionReturn(0);
1210
}
1211
 
2223 jroman 1212
#undef __FUNCT__  
1213
#define __FUNCT__ "SlepcAllReduceSumBegin"
1753 eromero 1214
/*
1215
  Sum up several arrays with only one call to MPIReduce.
1216
*/
1217
PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1218
                                      PetscInt max_size_ops,
1219
                                      PetscScalar *in, PetscScalar *out,
1220
                                      PetscInt max_size_in, DvdReduction *r,
1221
                                      MPI_Comm comm)
1222
{
1223
  PetscFunctionBegin;
2654 jroman 1224
  PetscValidScalarPointer(in,3);
1225
  PetscValidScalarPointer(out,4);
1753 eromero 1226
 
1227
  r->in = in;
1228
  r->out = out;
1229
  r->size_in = 0;
1230
  r->max_size_in = max_size_in;
1231
  r->ops = ops;
1232
  r->size_ops = 0;
1233
  r->max_size_ops = max_size_ops;
1234
  r->comm = comm;
1235
 
1236
  PetscFunctionReturn(0);
1237
}
1238
 
1239
#undef __FUNCT__  
1240
#define __FUNCT__ "SlepcAllReduceSum"
1241
PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1242
                                 DvdReductionPostF f, void *ptr,
1243
                                 PetscScalar **in)
1244
{
1245
  PetscFunctionBegin;
1246
 
1247
  *in = r->in + r->size_in;
1248
  r->ops[r->size_ops].out = r->out + r->size_in;
1249
  r->ops[r->size_ops].size_out = size_in;
1250
  r->ops[r->size_ops].f = f;
1251
  r->ops[r->size_ops].ptr = ptr;
1252
  if (++(r->size_ops) > r->max_size_ops) {
2214 jroman 1253
    SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1753 eromero 1254
  }
1255
  if ((r->size_in+= size_in) > r->max_size_in) {
2214 jroman 1256
    SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1753 eromero 1257
  }
1258
 
1259
  PetscFunctionReturn(0);
1260
}
1261
 
1262
 
1263
#undef __FUNCT__  
1264
#define __FUNCT__ "SlepcAllReduceSumEnd"
1265
PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1266
{
1267
  PetscErrorCode  ierr;
1268
  PetscInt        i;
1269
 
1270
  PetscFunctionBegin;
1271
 
1795 eromero 1272
  /* Check if quick exit */
1273
  if (r->size_ops == 0)
1274
    PetscFunctionReturn(0);
1275
 
1753 eromero 1276
  /* Call the MPIAllReduce routine */
1899 eromero 1277
  ierr = MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1753 eromero 1278
                       r->comm); CHKERRQ(ierr);
1279
 
1280
  /* Call the postponed routines */
1281
  for(i=0; i<r->size_ops; i++) {
1282
    ierr = r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1283
    CHKERRQ(ierr);
1284
  }
1285
 
1286
  /* Tag the operation as done */
1287
  r->size_ops = 0;
1288
 
1289
  PetscFunctionReturn(0);
1290
}
1795 eromero 1291
 
1831 eromero 1292
 
1795 eromero 1293
#undef __FUNCT__  
1294
#define __FUNCT__ "dvd_orthV"
2605 eromero 1295
/* auxS: size_cX+V_new_e */
1989 eromero 1296
PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1297
                         PetscInt size_cX, Vec *V, PetscInt V_new_s,
2605 eromero 1298
                         PetscInt V_new_e, PetscScalar *auxS,
1989 eromero 1299
                         PetscRandom rand)
1795 eromero 1300
{
1301
  PetscErrorCode  ierr;
1302
  PetscInt        i, j;
2216 jroman 1303
  PetscBool       lindep;
1795 eromero 1304
  PetscReal       norm;
1869 eromero 1305
  PetscScalar     *auxS0 = auxS;
1831 eromero 1306
 
1795 eromero 1307
  PetscFunctionBegin;
1831 eromero 1308
 
1795 eromero 1309
  /* Orthonormalize V with IP */
1310
  for (i=V_new_s; i<V_new_e; i++) {
1311
    for(j=0; j<3; j++) {
1975 eromero 1312
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1831 eromero 1313
      if (cX + size_cX == V) {
1314
        /* If cX and V are contiguous, orthogonalize in one step */
1989 eromero 1315
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1831 eromero 1316
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1989 eromero 1317
      } else if (DS) {
1318
        /* Else orthogonalize first against DS, and then against cX and V */
1319
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1831 eromero 1320
                               V[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
2177 jroman 1321
        if(!lindep) {
1831 eromero 1322
          ierr = IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1323
                                 V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1324
        }
1989 eromero 1325
      } else {
1326
        /* Else orthogonalize first against cX and then against V */
1327
        ierr = IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1328
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1831 eromero 1329
      }
2177 jroman 1330
      if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
2499 jroman 1331
      ierr = PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);CHKERRQ(ierr);
1795 eromero 1332
    }
2177 jroman 1333
    if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
2214 jroman 1334
        SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1795 eromero 1335
    }
1336
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1337
  }
1831 eromero 1338
 
1795 eromero 1339
  PetscFunctionReturn(0);
1340
}
2555 eromero 1341
 
1342
 
1343
#undef __FUNCT__  
1344
#define __FUNCT__ "dvd_BorthV"
2605 eromero 1345
/* auxS: size_cX+V_new_e+1 */
2779 eromero 1346
PetscErrorCode dvd_BorthV(IP ip, Vec *DS, Vec *BDS,PetscReal *BDSn, PetscInt size_DS, Vec *cX,
1347
                         Vec *BcX, PetscReal *BcXn, PetscInt size_cX, Vec *V, Vec *BV,PetscReal *BVn,
2555 eromero 1348
                         PetscInt V_new_s, PetscInt V_new_e,
2605 eromero 1349
                         PetscScalar *auxS, PetscRandom rand)
2555 eromero 1350
{
1351
  PetscErrorCode  ierr;
1352
  PetscInt        i, j;
1353
  PetscBool       lindep;
1354
  PetscReal       norm;
1355
  PetscScalar     *auxS0 = auxS;
1831 eromero 1356
 
2555 eromero 1357
  PetscFunctionBegin;
1358
 
1359
  /* Orthonormalize V with IP */
1360
  for (i=V_new_s; i<V_new_e; i++) {
1361
    for(j=0; j<3; j++) {
1362
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
2779 eromero 1363
      if (cX + size_cX == V && BcX + size_cX == BV) {
2555 eromero 1364
        /* If cX and V are contiguous, orthogonalize in one step */
2779 eromero 1365
        ierr = IPBOrthogonalize(ip, size_DS, DS, BDS, BDSn, size_cX+i, PETSC_NULL, cX, BcX, BcXn,
2555 eromero 1366
                               V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1367
      } else if (DS) {
1368
        /* Else orthogonalize first against DS, and then against cX and V */
2779 eromero 1369
        ierr = IPBOrthogonalize(ip, size_DS, DS, BDS, BDSn, size_cX, PETSC_NULL, cX, BcX, BcXn,
2555 eromero 1370
                               V[i], BV[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
1371
        if(!lindep) {
2779 eromero 1372
          ierr = IPBOrthogonalize(ip, 0, PETSC_NULL, PETSC_NULL, PETSC_NULL, i, PETSC_NULL, V, BV, BVn,
2555 eromero 1373
                                  V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1374
        }
1375
      } else {
1376
        /* Else orthogonalize first against cX and then against V */
2779 eromero 1377
        ierr = IPBOrthogonalize(ip, size_cX, cX, BcX, BcXn, i, PETSC_NULL, V, BV, BVn,
2555 eromero 1378
                                V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1379
      }
2779 eromero 1380
      if(!lindep && (PetscAbs(norm) > PETSC_MACHINE_EPSILON)) break;
2555 eromero 1381
      ierr = PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1382
      CHKERRQ(ierr);
1383
    }
2779 eromero 1384
    if(lindep || (PetscAbs(norm) < PETSC_MACHINE_EPSILON)) {
2555 eromero 1385
        SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors!");
1386
    }
2779 eromero 1387
    if (BVn) BVn[i] = norm > 0.0 ? 1.0 : -1.0;
1388
    norm = PetscAbs(norm);
2555 eromero 1389
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1390
    ierr = VecScale(BV[i], 1.0/norm); CHKERRQ(ierr);
1391
  }
1392
 
1393
  PetscFunctionReturn(0);
1394
}
1395
 
1968 eromero 1396
#undef __FUNCT__  
1397
#define __FUNCT__ "dvd_compute_eigenvectors"
1398
/*
1399
  Compute eigenvectors associated to the Schur decomposition (S, T) and
1400
  save the left vectors in pY and the right vectors in pX, where
1401
  n, size of the eigenproblem
1402
  ldS, ldT, leading dimension of S and T
1403
  ldpX, ldpY, leading dimension of pX and pY
1404
  auxS, auxiliar scalar of length:
2654 jroman 1405
    double standard 3n, double generalized 6n,
1406
    complex standard 3n, complex generalized 3n
1968 eromero 1407
  size_auxS, the length of auxS
1408
  doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1409
*/
1410
PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
2654 jroman 1411
  PetscInt ldS_, PetscScalar *T, PetscInt ldT_, PetscScalar *pX,
1968 eromero 1412
  PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
2216 jroman 1413
  PetscInt size_auxS, PetscBool doProd)
1968 eromero 1414
{
2392 jroman 1415
#if defined(SLEPC_MISSING_LAPACK_GGEV)
1416
  PetscFunctionBegin;
1417
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GGEV - Lapack routine is unavailable.");
1418
#else
2654 jroman 1419
  PetscBLASInt    n, ldpX, ldpY, nout, info, ldS, ldT;
1968 eromero 1420
  const char      *side, *howmny;
1421
#if defined(PETSC_USE_COMPLEX)
1422
  PetscReal       *auxR;
1423
#endif
1424
 
1425
  PetscFunctionBegin;
2654 jroman 1426
  PetscValidScalarPointer(S,2);
1427
  if (T) { PetscValidScalarPointer(T,4); }
1428
  if (pX) { PetscValidScalarPointer(pX,6); }
1429
  if (pY) { PetscValidScalarPointer(pY,8); }
1430
  PetscValidScalarPointer(auxS,10);
2117 eromero 1431
  n = PetscBLASIntCast(n_);
2654 jroman 1432
  ldpX = PetscBLASIntCast(PetscMax(ldpX_,1));
1433
  ldpY = PetscBLASIntCast(PetscMax(ldpY_,1));
1434
  ldS = PetscBLASIntCast(PetscMax(ldS_,1));
1435
  ldT = PetscBLASIntCast(PetscMax(ldT_,1));
2117 eromero 1436
 
1968 eromero 1437
  if (pX && pY) side = "B";
1438
  else if (pX)  side = "R";
1439
  else if (pY)  side = "L";
1440
  else { PetscFunctionReturn(0); }
1441
 
2177 jroman 1442
  howmny = doProd?"B":"A";
1968 eromero 1443
 
1444
  if (T) {
1445
    /* [eigr, pX] = eig(S, T) */
1446
#if defined(PETSC_USE_COMPLEX)
1447
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1448
    if (size_auxS < 2*n)
2214 jroman 1449
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
2654 jroman 1450
    LAPACKtgevc_(side,howmny,PETSC_NULL,&n,S,&ldS,T,&ldT,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
2394 jroman 1451
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTGEVC %d",info);
1968 eromero 1452
#else
2654 jroman 1453
    if (size_auxS < 6*n)
1454
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
1455
    LAPACKtgevc_(side,howmny,PETSC_NULL,&n,S,&ldS,T,&ldT,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1968 eromero 1456
#endif
1457
  } else {
1458
    /* [eigr, pX] = eig(S) */
1459
#if defined(PETSC_USE_COMPLEX)
1460
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1461
    if (size_auxS < 2*n)
2214 jroman 1462
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
2654 jroman 1463
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,S,&ldS,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1968 eromero 1464
#else
2654 jroman 1465
    if (size_auxS < 3*n)
1466
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
1467
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,S,&ldS,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1968 eromero 1468
#endif
2394 jroman 1469
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
1968 eromero 1470
  }
1471
  PetscFunctionReturn(0);
2392 jroman 1472
#endif
1968 eromero 1473
}
2034 eromero 1474
 
2555 eromero 1475
 
1476
#undef __FUNCT__
1477
#define __FUNCT__ "EPSSortDenseHEP"
2556 eromero 1478
PetscErrorCode EPSSortDenseHEP(EPS eps, PetscInt n, PetscInt k, PetscScalar *w, PetscScalar *V, PetscInt ldV)
2034 eromero 1479
{
2555 eromero 1480
  PetscInt        i, j, result, pos;
2556 eromero 1481
  PetscReal       re;
1482
  PetscScalar     t;
2568 eromero 1483
  PetscBLASInt    n_,one=1;
2034 eromero 1484
  PetscErrorCode  ierr;
2555 eromero 1485
 
2034 eromero 1486
  PetscFunctionBegin;
2654 jroman 1487
  PetscValidScalarPointer(w,4);
1488
  PetscValidScalarPointer(V,5);
2568 eromero 1489
  n_ = PetscBLASIntCast(n);
2555 eromero 1490
 
1491
  /* selection sort */
1492
  for (i=k;i<n-1;i++) {
2556 eromero 1493
    re = PetscRealPart(w[i]);
2555 eromero 1494
    pos = 0;
1495
    /* find minimum eigenvalue */
1496
    for (j=i+1;j<n;j++) {
2556 eromero 1497
      ierr = EPSCompareEigenvalues(eps,re,0,PetscRealPart(w[j]),0,&result);CHKERRQ(ierr);
2555 eromero 1498
      if (result > 0) {
2556 eromero 1499
        re = PetscRealPart(w[j]);
2555 eromero 1500
        pos = j;
1501
      }
2034 eromero 1502
    }
2555 eromero 1503
    /* interchange the pairs i and pos */
1504
    if (pos) {
1505
      BLASswap_(&n_, &V[ldV*pos], &one, &V[ldV*i], &one);
1506
      t = w[i]; w[i] = w[pos]; w[pos] = t;
1507
    }
2034 eromero 1508
  }
2555 eromero 1509
 
2034 eromero 1510
  PetscFunctionReturn(0);
1511
}
2682 jroman 1512
 
1513
#undef __FUNCT__
1514
#define __FUNCT__ "EPSCleanDenseSchur"
1515
/* Write zeros from the column k to n in the lower triangular part of the
1516
   matrices S and T, and inside 2-by-2 diagonal blocks of T in order to
1517
   make (S,T) a valid Schur decompositon.
1518
*/
1519
PetscErrorCode EPSCleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *eigi,PetscScalar *X,PetscInt ldX,PetscBool doProd)
1520
{
1521
  PetscInt        i, j;
1522
#if defined(PETSC_USE_COMPLEX)
1523
  PetscScalar     s;
1524
#endif
1525
 
1526
  PetscFunctionBegin;
1527
  PetscValidScalarPointer(S,3);
1528
  if (T) { PetscValidScalarPointer(T,5); }
1529
 
1530
  if (!doProd && X) {
1531
    for (i=0; i<n; i++) for (j=0; j<n; j++) X[ldX*i+j] = 0.0;
1532
    for (i=0; i<n; i++) X[ldX*i+i] = 1.0;
1533
  }
1534
 
1535
#if defined(PETSC_USE_COMPLEX)
1536
  for (i=k; i<n; i++) {
1537
    /* Some functions need the diagonal elements in T be real */
1538
    if (T && PetscImaginaryPart(T[ldT*i+i]) != 0.0) {
1539
      s = PetscConj(T[ldT*i+i])/PetscAbsScalar(T[ldT*i+i]);
1540
      for(j=0; j<=i; j++)
1541
        T[ldT*i+j]*= s,
1542
        S[ldS*i+j]*= s;
1543
      T[ldT*i+i] = PetscRealPart(T[ldT*i+i]);
1544
      if (X) for(j=0; j<n; j++) X[ldX*i+j]*= s;
1545
    }
1546
    if ((j=i+1) < n) {
1547
      S[ldS*i+j] = 0.0;
1548
      if (T) T[ldT*i+j] = 0.0;
1549
    }
1550
  }
1551
#else
1552
  for (i=k; i<n; i++) {
1553
    if (S[ldS*i+i+1] != 0.0 && eigi && eigi[i] != 0.0) {
1554
      if ((j=i+2) < n) S[ldS*(i+1)+j] = 0.0;
1555
      if (T) {
1556
        /* T[ldT*(i+1)+i] = 0.0; */
1557
        {
1558
          /* Check if T(i+1,i) is negligible */
1559
          if (PetscAbs(T[ldT*(i+1)+i])+PetscAbs(T[ldT*i+i+1]) > (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1]))*PETSC_MACHINE_EPSILON) {
1560
            PetscBLASInt    ldS_,ldT_,n_i,n_i_1,one=1,n_,i_1,i_;
1561
            PetscScalar     b11,b22,sr,cr,sl,cl;
1562
            ldS_ = PetscBLASIntCast(ldS);
1563
            ldT_ = PetscBLASIntCast(ldT);
1564
            n_i = PetscBLASIntCast(n-i);
1565
            n_i_1 = n_i - 1;
1566
            i_1 = PetscBLASIntCast(i+1);
1567
            i_ = PetscBLASIntCast(i);
1568
            n_ = PetscBLASIntCast(n);
1569
            LAPACKlasv2_(&T[ldT*i+i],&T[ldT*i+i+1],&T[ldT*(i+1)+i+1],&b22,&b11,&sr,&cr,&sl,&cl);
1570
            if (b11 < 0.0) { cr=-cr; sr=-sr; b11=-b11; b22=-b22; }
1571
            BLASrot_(&n_i,&S[ldS*i+i],&ldS_,&S[ldS*i+i+1],&ldS_,&cl,&sl);
1572
            BLASrot_(&i_1,&S[ldS*i],&one,&S[ldS*(i+1)],&one,&cr,&sr);
1573
            if (n_i_1>0) BLASrot_(&n_i_1,&T[ldT*(i+2)+i],&ldT_,&T[ldT*(i+2)+i],&ldT_,&cl,&sl);
1574
            BLASrot_(&i_,&T[ldT*i],&one,&T[ldT*(i+1)],&one,&cr,&sr);
1575
            if (X) BLASrot_(&n_,&X[ldX*i],&one,&X[ldX*(i+1)],&one,&cr,&sr);
1576
            T[ldT*i+i] = b11; T[ldT*i+i+1] = T[ldT*(i+1)+i] = 0.0; T[ldT*(i+1)+i+1] = b22;
1577
          } else {
1578
            T[ldT*(i+1)+i] = T[ldT*i+i+1] = 0.0;
1579
          }
1580
        }
1581
        if ((j=i+1) < n) T[ldT*i+j] = 0.0;
1582
        if ((j=i+2) < n) T[ldT*(i+1)+j] = 0.0;
1583
      }
1584
      i++;
1585
    } else {
1586
      if ((j=i+1) < n) {
1587
        S[ldS*i+j] = 0.0;
1588
        if (T) T[ldT*i+j] = 0.0;
1589
      }
1590
    }
1591
  }
1592
#endif
1593
 
1594
  PetscFunctionReturn(0);
1595
}