Subversion Repositories slepc-dev

Rev

Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2110 jroman 1
/*
2
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3
   SLEPc - Scalable Library for Eigenvalue Problem Computations
2575 eromero 4
   Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
1619 slepc 5
 
2110 jroman 6
   This file is part of SLEPc.
7
 
8
   SLEPc is free software: you can redistribute it and/or modify it under  the
9
   terms of version 3 of the GNU Lesser General Public License as published by
10
   the Free Software Foundation.
11
 
12
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
13
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
14
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
15
   more details.
16
 
17
   You  should have received a copy of the GNU Lesser General  Public  License
18
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20
*/
21
 
2346 eromero 22
#include <private/vecimplslepc.h>
1985 eromero 23
#include "davidson.h"
1619 slepc 24
 
1745 eromero 25
PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
1750 eromero 26
PetscLogEvent SLEPC_SlepcDenseNorm = 0;
1745 eromero 27
PetscLogEvent SLEPC_SlepcDenseOrth = 0;
28
PetscLogEvent SLEPC_SlepcDenseCopy = 0;
29
PetscLogEvent SLEPC_VecsMult = 0;
1630 slepc 30
 
1634 slepc 31
void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
1753 eromero 32
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
33
                                   void *ptr);
1634 slepc 34
 
2223 jroman 35
#undef __FUNCT__  
36
#define __FUNCT__ "SlepcDenseMatProd"
1619 slepc 37
/*
38
  Compute C <- a*A*B + b*C, where
39
    ldC, the leading dimension of C,
40
    ldA, the leading dimension of A,
41
    rA, cA, rows and columns of A,
42
    At, if true use the transpose of A instead,
43
    ldB, the leading dimension of B,
44
    rB, cB, rows and columns of B,
45
    Bt, if true use the transpose of B instead
46
*/
47
PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
48
  PetscScalar a,
2216 jroman 49
  const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
50
  const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
1619 slepc 51
{
1630 slepc 52
  PetscErrorCode  ierr;
53
  PetscInt        tmp;
54
  PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
55
  const char      *N = "N", *T = "C", *qA = N, *qB = N;
1619 slepc 56
 
57
  PetscFunctionBegin;
1675 slepc 58
 
59
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
60
 
1745 eromero 61
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1619 slepc 62
 
63
  /* Transpose if needed */
2177 jroman 64
  if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
65
  if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
1619 slepc 66
 
67
  /* Check size */
68
  if (cA != rB) {
2214 jroman 69
    SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
1619 slepc 70
  }
71
 
72
  /* Do stub */
1744 eromero 73
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
2556 eromero 74
    if (!At && !Bt) *C = *A * *B;
75
    else if (At && !Bt) *C = PetscConj(*A) * *B;
76
    else if (!At && Bt) *C = *A * PetscConj(*B);
77
    else *C = PetscConj(*A) * PetscConj(*B);
1744 eromero 78
    m = n = k = 1;
79
  } else {
80
    m = rA; n = cB; k = cA;
81
    BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
82
              &ldB, &b, C, &ldC);
83
  }
1619 slepc 84
 
1630 slepc 85
  ierr = PetscLogFlops(m*n*2*k);CHKERRQ(ierr);
1745 eromero 86
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1630 slepc 87
 
1619 slepc 88
  PetscFunctionReturn(0);
89
}
90
 
2223 jroman 91
#undef __FUNCT__  
92
#define __FUNCT__ "SlepcDenseMatProdTriang"
1626 slepc 93
/*
1747 eromero 94
  Compute C <- A*B, where
1988 eromero 95
    sC, structure of C,
1747 eromero 96
    ldC, the leading dimension of C,
1988 eromero 97
    sA, structure of A,
1747 eromero 98
    ldA, the leading dimension of A,
99
    rA, cA, rows and columns of A,
100
    At, if true use the transpose of A instead,
1988 eromero 101
    sB, structure of B,
1747 eromero 102
    ldB, the leading dimension of B,
103
    rB, cB, rows and columns of B,
104
    Bt, if true use the transpose of B instead
105
*/
106
PetscErrorCode SlepcDenseMatProdTriang(
107
  PetscScalar *C, MatType_t sC, PetscInt ldC,
108
  const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
2216 jroman 109
  PetscBool At,
1747 eromero 110
  const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
2216 jroman 111
  PetscBool Bt)
1747 eromero 112
{
113
  PetscErrorCode  ierr;
1988 eromero 114
  PetscInt        tmp;
115
  PetscScalar     one=1.0, zero=0.0;
1747 eromero 116
  PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
1750 eromero 117
 
1747 eromero 118
  PetscFunctionBegin;
119
 
120
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
121
 
122
  /* Transpose if needed */
2177 jroman 123
  if (At) tmp = rA, rA = cA, cA = tmp;
124
  if (Bt) tmp = rB, rB = cB, cB = tmp;
1747 eromero 125
 
126
  /* Check size */
2214 jroman 127
  if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
128
  if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
1747 eromero 129
 
1750 eromero 130
  /* Optimized version: trivial case */
131
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
2177 jroman 132
    if (!At && !Bt)     *C = *A * *B;
133
    else if (At && !Bt) *C = PetscConj(*A) * *B;
134
    else if (!At && Bt) *C = *A * PetscConj(*B);
135
    else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
1750 eromero 136
    PetscFunctionReturn(0);
137
  }
138
 
139
  /* Optimized versions: sA == 0 && sB == 0 */
140
  if ((sA == 0) && (sB == 0)) {
2177 jroman 141
    if (At) tmp = rA, rA = cA, cA = tmp;
142
    if (Bt) tmp = rB, rB = cB, cB = tmp;
1750 eromero 143
    ierr = SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
144
                             cB, Bt); CHKERRQ(ierr);
145
    PetscFunctionReturn(ierr);
146
  }
147
 
1988 eromero 148
  /* Optimized versions: A hermitian && (B not triang) */
1756 eromero 149
  if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
150
      DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
1988 eromero 151
      DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
1750 eromero 152
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
153
    rC = rA; cC = cB;
1988 eromero 154
    BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
1750 eromero 155
              (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
156
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
157
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
158
    PetscFunctionReturn(0);
159
  }
160
 
1988 eromero 161
  /* Optimized versions: B hermitian && (A not triang) */
162
  if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
163
      DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
164
      DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
165
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
166
    rC = rA; cC = cB;
167
    BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
168
              (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
169
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
170
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
171
    PetscFunctionReturn(0);
1747 eromero 172
  }
1988 eromero 173
 
2214 jroman 174
  SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
1747 eromero 175
}
176
 
2223 jroman 177
#undef __FUNCT__  
178
#define __FUNCT__ "SlepcDenseNorm"
1750 eromero 179
/*
1965 eromero 180
  Normalize the columns of the matrix A, where
1750 eromero 181
    ldA, the leading dimension of A,
1965 eromero 182
    rA, cA, rows and columns of A.
183
  if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
184
  are normalized as being one column.
1750 eromero 185
*/
186
PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
1965 eromero 187
                              PetscInt cA, PetscScalar *eigi)
1750 eromero 188
{
189
  PetscErrorCode  ierr;
1965 eromero 190
  PetscInt        i;
191
  PetscScalar     norm, norm0;
1750 eromero 192
  PetscBLASInt    rA = _rA, one=1;
1747 eromero 193
 
1750 eromero 194
  PetscFunctionBegin;
195
 
196
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
197
 
198
  for(i=0; i<cA; i++) {
1965 eromero 199
    if(eigi && eigi[i] != 0.0) {
200
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
201
      norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
202
      norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
203
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
204
      BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
205
      i++;
206
    } else {
207
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
208
      norm = 1.0 / norm;
209
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
210
     }
1750 eromero 211
  }
212
 
213
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
214
 
215
  PetscFunctionReturn(0);
216
}
217
 
218
 
2223 jroman 219
#undef __FUNCT__  
220
#define __FUNCT__ "SlepcDenseOrth"
1735 eromero 221
/*
1743 eromero 222
  Compute A <- orth(A), where
223
    ldA, the leading dimension of A,
224
    rA, cA, rows and columns of A,
225
    auxS, auxiliary vector of more size than cA+min(rA,cA),
226
    lauxS, size of auxS,
227
    ncA, new number of columns of A
228
*/
229
PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
230
                              PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
231
                              PetscInt *ncA)
232
{
1745 eromero 233
  PetscErrorCode  ierr;
1743 eromero 234
  PetscBLASInt    ldA = _ldA, rA = _rA, cA = _cA,
235
                  info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
236
  PetscScalar     *tau = auxS, *w = tau + ltau;
237
 
238
  PetscFunctionBegin;
239
 
240
  /* Quick exit */
241
  if ((_rA == 0) || (cA == 0)) { PetscFunctionReturn(0); }
242
 
243
  /* Memory check */
2214 jroman 244
  if (lw < cA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory for xGEQRF");
1743 eromero 245
 
1745 eromero 246
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 247
  LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
2214 jroman 248
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
1743 eromero 249
  LAPACKorgqr_(&rA, &ltau, &ltau, A, &ldA, tau, w, &lw, &info);
2214 jroman 250
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
1745 eromero 251
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 252
 
253
  if (ncA) *ncA = ltau;
254
 
255
  PetscFunctionReturn(0);
256
}
257
 
2223 jroman 258
#undef __FUNCT__  
259
#define __FUNCT__ "SlepcDenseCopy"
1743 eromero 260
/*
261
  Y <- X, where
262
  ldX, leading dimension of X,
263
  rX, cX, rows and columns of X
264
  ldY, leading dimension of Y
265
*/
266
PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
267
                              PetscInt ldX, PetscInt rX, PetscInt cX)
268
{
269
  PetscErrorCode  ierr;
1795 eromero 270
  PetscInt        i;
1743 eromero 271
 
272
  PetscFunctionBegin;
273
 
274
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 275
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1743 eromero 276
  }
1805 eromero 277
 
278
  /* Quick exit */
279
  if (Y == X) {
280
    if (ldX != ldY) {
2219 jroman 281
      SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1805 eromero 282
    }
283
    PetscFunctionReturn(0);
284
  }
1743 eromero 285
 
1745 eromero 286
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 287
  for(i=0; i<cX; i++) {
288
    ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
289
    CHKERRQ(ierr);
290
  }
1745 eromero 291
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 292
 
293
  PetscFunctionReturn(0);
294
}
295
 
2223 jroman 296
#undef __FUNCT__  
297
#define __FUNCT__ "SlepcDenseCopyTriang"
1752 eromero 298
/*
299
  Y <- X, where
300
  ldX, leading dimension of X,
301
  rX, cX, rows and columns of X
302
  ldY, leading dimension of Y
303
*/
304
PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
305
                                    PetscScalar *X, MatType_t sX, PetscInt ldX,
306
                                    PetscInt rX, PetscInt cX)
307
{
308
  PetscErrorCode  ierr;
309
  PetscInt        i,j,c;
1743 eromero 310
 
1752 eromero 311
  PetscFunctionBegin;
312
 
313
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 314
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1752 eromero 315
  }
316
 
317
  if (rX != cX) {
2219 jroman 318
    SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
1752 eromero 319
  }
320
 
1756 eromero 321
  if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
322
      DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
323
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
324
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 325
      c = 0;                                      /*     so copy */
1756 eromero 326
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
327
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 328
      c = 1;                                      /*     so transpose */
329
    else                                          /* ... Full, */
330
      c = 2;                                      /*     so reflect from up */
1756 eromero 331
  } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
332
             DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
333
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
334
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 335
      c = 1;                                      /*     so transpose */
1756 eromero 336
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
337
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 338
      c = 0;                                      /*     so copy */
339
    else                                          /* ... Full, */
340
      c = 3;                                      /*     so reflect fr. down */
341
  } else                                          /* Full to any, */
342
    c = 0;                                        /*     so copy */
343
 
344
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
345
 
346
  switch(c) {
347
  case 0: /* copy */
348
    for(i=0; i<cX; i++) {
349
      ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
350
      CHKERRQ(ierr);
351
    }
352
    break;
353
 
354
  case 1: /* transpose */
355
    for(i=0; i<cX; i++)
356
      for(j=0; j<rX; j++)
2556 eromero 357
        Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
1752 eromero 358
    break;
359
 
360
  case 2: /* reflection from up */
361
    for(i=0; i<cX; i++)
362
      for(j=0; j<PetscMin(i+1,rX); j++)
1988 eromero 363
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 364
    break;
365
 
366
  case 3: /* reflection from down */
367
    for(i=0; i<cX; i++)
368
      for(j=i; j<rX; j++)
1988 eromero 369
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 370
    break;
371
  }
372
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
373
 
374
  PetscFunctionReturn(0);
375
}
376
 
377
 
2223 jroman 378
#undef __FUNCT__  
379
#define __FUNCT__ "SlepcUpdateVectorsZ"
1743 eromero 380
/*
1675 slepc 381
  Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
382
  where X and Y are contiguous global vectors.
1626 slepc 383
*/
1675 slepc 384
PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
385
  Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
386
  PetscInt cM)
1626 slepc 387
{
388
  PetscErrorCode  ierr;
1960 eromero 389
 
390
  PetscFunctionBegin;
391
 
392
  ierr = SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
393
  CHKERRQ(ierr);
394
 
395
  PetscFunctionReturn(0);
396
}
397
 
398
 
2223 jroman 399
#undef __FUNCT__  
400
#define __FUNCT__ "SlepcUpdateVectorsS"
1960 eromero 401
/*
1965 eromero 402
  Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
1960 eromero 403
  where X and Y are contiguous global vectors.
404
*/
405
PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
406
  PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
407
  PetscInt ldM, PetscInt rM, PetscInt cM)
408
{
2334 jroman 409
  PetscErrorCode    ierr;
410
  const PetscScalar *px;
411
  PetscScalar       *py;
412
  PetscInt          rX, rY, ldX, ldY, i, rcX;
1626 slepc 413
 
414
  PetscFunctionBegin;
2346 eromero 415
  SlepcValidVecsContiguous(Y,cM*dY,1);
416
  SlepcValidVecsContiguous(X,cX,5);
1626 slepc 417
 
1965 eromero 418
  /* Compute the real number of columns */
419
  rcX = cX/dX;
420
  if (rcX != rM) {
2214 jroman 421
    SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");
1735 eromero 422
  }
423
 
1965 eromero 424
  if ((rcX == 0) || (rM == 0) || (cM == 0)) {
1626 slepc 425
    PetscFunctionReturn(0);
1960 eromero 426
  } else if ((Y + cM <= X) || (X + cX <= Y) ||
427
             ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
1626 slepc 428
    /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
429
 
430
    /* Get the dense matrices and dimensions associated to Y and X */
1960 eromero 431
    ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
432
    ierr = VecGetLocalSize(Y[0], &rY); CHKERRQ(ierr);
433
    if (rX != rY) {
2214 jroman 434
      SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
1626 slepc 435
    }
2334 jroman 436
    ierr = VecGetArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 437
    ierr = VecGetArray(Y[0], &py);CHKERRQ(ierr);
438
 
1960 eromero 439
    /* Update the strides */
440
    ldX = rX*dX; ldY= rY*dY;
441
 
1626 slepc 442
    /* Do operation */
1965 eromero 443
    ierr = SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
1675 slepc 444
                    PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE); CHKERRQ(ierr);
1626 slepc 445
 
2334 jroman 446
    ierr = VecRestoreArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 447
    ierr = VecRestoreArray(Y[0], &py);CHKERRQ(ierr);
1992 eromero 448
    for(i=1; i<cM; i++) {
449
      ierr = PetscObjectStateIncrease((PetscObject)Y[dY*i]); CHKERRQ(ierr);
450
    }
1626 slepc 451
 
1960 eromero 452
  } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
1626 slepc 453
    /* If not, call to SlepcUpdateVectors */
1965 eromero 454
    ierr = SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
455
                                    ldM, PETSC_FALSE); CHKERRQ(ierr);
1675 slepc 456
    if (alpha != 1.0)
457
      for (i=0; i<cM; i++) {
458
        ierr = VecScale(Y[i], alpha); CHKERRQ(ierr);
459
      }
1626 slepc 460
  } else {
2214 jroman 461
    SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
1626 slepc 462
  }
463
 
464
  PetscFunctionReturn(0);
465
}
466
 
2223 jroman 467
#undef __FUNCT__  
468
#define __FUNCT__ "SlepcUpdateVectorsD"
1965 eromero 469
/*
470
  Compute X <- alpha * X[0:dX:cX-1] * M
471
  where X is a matrix with non-consecutive columns
472
*/
473
PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
474
  const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
475
  PetscScalar *work, PetscInt lwork)
476
{
2334 jroman 477
  PetscErrorCode ierr;
478
  PetscScalar    **px, *Y, *Z;
479
  PetscInt       rX, i, j, rY, rY0, ldY;
1960 eromero 480
 
1965 eromero 481
  PetscFunctionBegin;
2346 eromero 482
  SlepcValidVecsContiguous(X,cX,1);
1965 eromero 483
 
484
  if (cX != rM) {
2214 jroman 485
    SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");
1965 eromero 486
  }
487
 
488
  rY = (lwork/2)/cX;
489
  if (rY <= 0) {
2214 jroman 490
    SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
1965 eromero 491
  }
492
  Y = work; Z = &Y[cX*rY]; ldY = rY;
493
 
494
  if ((cX == 0) || (rM == 0) || (cM == 0)) {
495
    PetscFunctionReturn(0);
496
  }
497
 
498
  /* Get the dense vectors associated to the columns of X */
499
  ierr = PetscMalloc(sizeof(Vec)*cX, &px); CHKERRQ(ierr);
500
  for(i=0; i<cX; i++) {
501
    ierr = VecGetArray(X[i], &px[i]); CHKERRQ(ierr);
502
  }
503
  ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
504
 
505
  for(i=0, rY0=0; i<rX; i+=rY0) {
506
    rY0 = PetscMin(rY, rX-i);
507
 
508
    /* Y <- X[i0:i1,:] */
509
    for(j=0; j<cX; j++) {
510
      ierr = SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
511
      CHKERRQ(ierr);
512
    }
513
 
514
    /* Z <- Y * M */
515
    ierr = SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
516
                                                 M, ldM, rM, cM, PETSC_FALSE);
517
    CHKERRQ(ierr);
518
 
519
    /* X <- Z */
520
    for(j=0; j<cM; j++) {
521
      ierr = SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
522
      CHKERRQ(ierr);
523
    }
524
  }
525
 
526
  for(i=0; i<cX; i++) {
527
    ierr = VecRestoreArray(X[i], &px[i]); CHKERRQ(ierr);
528
  }
529
  ierr = PetscFree(px); CHKERRQ(ierr);
530
 
531
  PetscFunctionReturn(0);
532
}
533
 
534
 
535
 
2223 jroman 536
#undef __FUNCT__  
537
#define __FUNCT__ "VecsMult"
1634 slepc 538
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
539
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
540
  where W = U' * V.
1640 slepc 541
  workS0 and workS1 are an auxiliary scalar vector of size
542
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
543
  is needed, and of size eU*eV.
1634 slepc 544
*/
1747 eromero 545
PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
1634 slepc 546
                        Vec *U, PetscInt sU, PetscInt eU,
547
                        Vec *V, PetscInt sV, PetscInt eV,
1640 slepc 548
                        PetscScalar *workS0, PetscScalar *workS1)
1634 slepc 549
{
2334 jroman 550
  PetscErrorCode    ierr;
551
  PetscInt          ldU, ldV, i, j, k;
552
  const PetscScalar *pu, *pv;
553
  PetscScalar       *W, *Wr;
1634 slepc 554
 
555
  PetscFunctionBegin;
556
 
557
  /* Check if quick exit */
558
  if ((eU-sU == 0) || (eV-sV == 0))
559
    PetscFunctionReturn(0);
2346 eromero 560
 
561
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 562
  SlepcValidVecsContiguous(V,eV,7);
1634 slepc 563
 
564
  /* Get the dense matrices and dimensions associated to U and V */
565
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
566
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
567
  if (ldU != ldV) {
2214 jroman 568
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1634 slepc 569
  }
2334 jroman 570
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
571
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1634 slepc 572
 
1640 slepc 573
  if (workS0)
574
    W = workS0;
1634 slepc 575
  else {
1640 slepc 576
    ierr = PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &W);
577
    CHKERRQ(ierr);
1634 slepc 578
  }
579
 
1745 eromero 580
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
581
 
1640 slepc 582
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
583
    /* Use the smart memory usage version */
1634 slepc 584
 
1640 slepc 585
    /* W <- U' * V */
1747 eromero 586
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
587
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
588
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
1640 slepc 589
    CHKERRQ(ierr);
590
 
591
    /* ReduceAll(W, SUM) */
1899 eromero 592
    ierr = MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
1640 slepc 593
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1747 eromero 594
  /* Full M matrix */
1756 eromero 595
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
596
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1640 slepc 597
    if (workS1)
598
      Wr = workS1;
599
    else {
600
      ierr = PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &Wr);
601
      CHKERRQ(ierr);
602
    }
1747 eromero 603
 
1640 slepc 604
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
605
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
606
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
607
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
608
    CHKERRQ(ierr);
609
 
610
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
611
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
612
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
613
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
614
    CHKERRQ(ierr);
615
 
616
    /* ReduceAll(W, SUM) */
617
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*sV+(eV-sV)*eU, MPIU_SCALAR,
1899 eromero 618
                      MPIU_SUM, ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1640 slepc 619
 
620
    /* M(...,...) <- W */
621
    for (i=0,k=0; i<sV; i++)
622
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
623
    for (i=sV; i<eV; i++)
624
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
625
 
1747 eromero 626
    if (!workS1) {
627
      ierr = PetscFree(Wr); CHKERRQ(ierr);
628
    }
1745 eromero 629
 
1747 eromero 630
  /* Upper triangular M matrix */
1756 eromero 631
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
632
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1747 eromero 633
    if (workS1)
634
      Wr = workS1;
635
    else {
636
      ierr = PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
637
      CHKERRQ(ierr);
638
    }
639
 
640
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
641
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
642
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
643
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
644
    CHKERRQ(ierr);
645
 
646
    /* ReduceAll(W, SUM) */
1899 eromero 647
    ierr = MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
1747 eromero 648
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
649
 
650
    /* M(...,...) <- W */
651
    for (i=sV,k=0; i<eV; i++)
652
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
653
 
1640 slepc 654
    if (!workS1) {
655
      ierr = PetscFree(Wr); CHKERRQ(ierr);
656
    }
1747 eromero 657
 
658
  /* Lower triangular M matrix */
1756 eromero 659
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
660
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1747 eromero 661
    if (workS1)
662
      Wr = workS1;
663
    else {
664
      ierr = PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
665
      CHKERRQ(ierr);
666
    }
667
 
668
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
669
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
670
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
671
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
672
    CHKERRQ(ierr);
673
 
674
    /* ReduceAll(W, SUM) */
1899 eromero 675
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
1747 eromero 676
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
677
 
678
    /* M(...,...) <- W */
679
    for (i=0,k=0; i<eV; i++)
680
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
681
 
682
    if (!workS1) {
683
      ierr = PetscFree(Wr); CHKERRQ(ierr);
684
    }
1634 slepc 685
  }
686
 
1747 eromero 687
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
688
 
1640 slepc 689
  if (!workS0) {
1634 slepc 690
    ierr = PetscFree(W); CHKERRQ(ierr);
691
  }
692
 
2334 jroman 693
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
694
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1634 slepc 695
  PetscFunctionReturn(0);
696
}
697
 
1795 eromero 698
 
2223 jroman 699
#undef __FUNCT__  
700
#define __FUNCT__ "VecsMultIa"
1753 eromero 701
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
702
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
1808 eromero 703
  where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
1795 eromero 704
  workS0 and workS1 are an auxiliary scalar vector of size
705
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
706
  is needed, and of size eU*eV.
707
*/
708
PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
709
                          Vec *U, PetscInt sU, PetscInt eU,
710
                          Vec *V, PetscInt sV, PetscInt eV)
711
{
712
  PetscErrorCode  ierr;
713
  PetscInt        ldU, ldV;
714
  PetscScalar     *pu, *pv;
715
 
716
  PetscFunctionBegin;
717
 
718
  /* Check if quick exit */
719
  if ((eU-sU == 0) || (eV-sV == 0))
720
    PetscFunctionReturn(0);
721
 
2346 eromero 722
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 723
  SlepcValidVecsContiguous(V,eV,7);
2346 eromero 724
 
1795 eromero 725
  /* Get the dense matrices and dimensions associated to U and V */
726
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
727
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
728
  if (ldU != ldV) {
2214 jroman 729
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1795 eromero 730
  }
731
  ierr = VecGetArray(U[0], &pu);CHKERRQ(ierr);
732
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
733
 
734
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
735
    /* M <- local_U' * local_V */
736
    ierr = SlepcDenseMatProdTriang(M, sM, eU,
737
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
738
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
739
    CHKERRQ(ierr);
740
 
741
  /* Full M matrix */
742
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
743
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
744
    /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
745
    ierr = SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
746
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
747
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
748
    CHKERRQ(ierr);
749
 
750
    /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
751
    ierr = SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
752
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
753
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
754
    CHKERRQ(ierr);
755
 
756
  /* Other structures */
2214 jroman 757
  } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");
1795 eromero 758
 
759
  ierr = VecRestoreArray(U[0], &pu); CHKERRQ(ierr);
1992 eromero 760
  ierr = PetscObjectStateDecrease((PetscObject)U[0]); CHKERRQ(ierr);
1795 eromero 761
  ierr = VecRestoreArray(V[0], &pv); CHKERRQ(ierr);
1992 eromero 762
  ierr = PetscObjectStateDecrease((PetscObject)V[0]); CHKERRQ(ierr);
1795 eromero 763
 
764
  PetscFunctionReturn(0);
765
}
766
 
767
 
2223 jroman 768
#undef __FUNCT__  
769
#define __FUNCT__ "VecsMultIc"
1808 eromero 770
/* Computes M <- nprocs*M
771
  where nprocs is the number of processors.
772
*/
773
PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
774
                          PetscInt rM, PetscInt cM, Vec V)
775
{
2117 eromero 776
  int        i,j,n;
1808 eromero 777
 
778
  PetscFunctionBegin;
779
 
780
  /* Check if quick exit */
781
  if ((rM == 0) || (cM == 0))
782
    PetscFunctionReturn(0);
783
 
2214 jroman 784
  if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1808 eromero 785
 
786
  MPI_Comm_size(((PetscObject)V)->comm, &n);
787
 
788
  for(i=0; i<cM; i++)
789
    for(j=0; j<rM; j++)
790
      M[ldM*i+j]/= (PetscScalar)n;
791
 
792
  PetscFunctionReturn(0);
793
}
794
 
795
 
2223 jroman 796
#undef __FUNCT__  
797
#define __FUNCT__ "VecsMultIb"
1795 eromero 798
/* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
799
                          ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
1753 eromero 800
  where W = U' * V.
1795 eromero 801
  workS0 and workS1 are an auxiliary scalar vector of size
802
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
803
  is needed, and of size eU*eV.
804
*/
805
PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
806
                          PetscInt rM, PetscInt cM, PetscScalar *auxS,
807
                          Vec V)
808
{
809
  PetscErrorCode  ierr;
810
  PetscScalar     *W, *Wr;
811
 
812
  PetscFunctionBegin;
813
 
814
  /* Check if quick exit */
815
  if ((rM == 0) || (cM == 0))
816
    PetscFunctionReturn(0);
817
 
818
  if (auxS)
819
    W = auxS;
820
  else {
821
    ierr = PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
822
    CHKERRQ(ierr);
823
  }
824
  Wr = W + rM*cM;
825
 
826
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
827
 
828
  if (sM == 0) {
829
    /* W <- M */
830
    ierr = SlepcDenseCopy(W, rM, M, ldM, rM, cM); CHKERRQ(ierr);
831
 
832
    /* Wr <- ReduceAll(W, SUM) */
1899 eromero 833
    ierr = MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
1795 eromero 834
                         ((PetscObject)V)->comm); CHKERRQ(ierr);
835
 
836
    /* M <- Wr */
837
    ierr = SlepcDenseCopy(M, ldM, Wr, rM, rM, cM); CHKERRQ(ierr);
838
 
839
  /* Other structures */
2214 jroman 840
  } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1795 eromero 841
 
842
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
843
 
844
  if (!auxS) {
845
    ierr = PetscFree(W); CHKERRQ(ierr);
846
  }
847
 
848
  PetscFunctionReturn(0);
849
}
850
 
851
 
2223 jroman 852
#undef __FUNCT__  
853
#define __FUNCT__ "VecsMultS"
1795 eromero 854
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
855
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
856
  where W = U' * V.
1753 eromero 857
  r, a DvdReduction structure,
858
  sr, an structure DvdMult_copy_func.
859
*/
860
PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
861
                         Vec *U, PetscInt sU, PetscInt eU,
862
                         Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
863
                         DvdMult_copy_func *sr)
864
{
2334 jroman 865
  PetscErrorCode    ierr;
866
  PetscInt          ldU, ldV;
867
  const PetscScalar *pu, *pv;
868
  PetscScalar       *W;
1753 eromero 869
 
870
  PetscFunctionBegin;
871
 
872
  /* Check if quick exit */
873
  if ((eU-sU == 0) || (eV-sV == 0))
874
    PetscFunctionReturn(0);
875
 
2346 eromero 876
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 877
  SlepcValidVecsContiguous(V,eV,7);
2346 eromero 878
 
1753 eromero 879
  /* Get the dense matrices and dimensions associated to U and V */
880
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
881
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
882
  if (ldU != ldV) {
2214 jroman 883
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1753 eromero 884
  }
2334 jroman 885
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
886
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1753 eromero 887
 
888
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
889
 
890
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
891
    /* Use the smart memory usage version */
892
 
893
    /* Add the reduction to r */
894
    ierr = SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
895
    CHKERRQ(ierr);
896
 
897
    /* W <- U' * V */
898
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
899
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
900
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
901
    CHKERRQ(ierr);
902
 
903
    /* M <- ReduceAll(W, SUM) */
904
    sr->M = M;    sr->ld = ldM;
905
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
906
                  sr->i2 = eV;
907
 
908
  /* Full M matrix */
1756 eromero 909
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
910
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 911
    /* Add the reduction to r */
912
    ierr = SlepcAllReduceSum(r, (eU-sU)*sV+(eV-sV)*eU, VecsMultS_copy_func,
913
                             sr, &W);
914
    CHKERRQ(ierr);
915
 
916
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
917
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
918
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
919
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
920
    CHKERRQ(ierr);
921
 
922
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
923
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
924
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
925
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
926
    CHKERRQ(ierr);
927
 
928
    /* M <- ReduceAll(W, SUM) */
929
    sr->M = M;    sr->ld = ldM;
930
    sr->i0 = 0;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
931
                  sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;
932
 
933
  /* Upper triangular M matrix */
1756 eromero 934
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
935
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 936
    /* Add the reduction to r */
937
    ierr = SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
938
    CHKERRQ(ierr);
939
 
940
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
941
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
942
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
943
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
944
    CHKERRQ(ierr);
945
 
946
    /* M <- ReduceAll(W, SUM) */
947
    sr->M = M;    sr->ld = ldM;
948
    sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
949
                  sr->i2 = eV;
950
 
951
  /* Lower triangular M matrix */
1756 eromero 952
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
953
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1753 eromero 954
    /* Add the reduction to r */
955
    ierr = SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
956
    CHKERRQ(ierr);
957
 
958
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
959
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
960
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
961
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
962
    CHKERRQ(ierr);
963
 
964
    /* ReduceAll(W, SUM) */
965
    sr->M = M;    sr->ld = ldM;
966
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
967
                  sr->i2 = eV;
968
  }
969
 
970
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
971
 
2334 jroman 972
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
973
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1753 eromero 974
  PetscFunctionReturn(0);
975
}
976
 
977
#undef __FUNCT__  
978
#define __FUNCT__ "VecsMultS_copy_func"
979
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
980
                                   void *ptr)
981
{
982
  PetscInt        i, j, k;
983
  DvdMult_copy_func
984
                  *sr = (DvdMult_copy_func*)ptr;
985
 
986
  PetscFunctionBegin;
987
 
988
  for (i=sr->i0,k=0; i<sr->i1; i++)
989
    for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
990
  for (i=sr->i1; i<sr->i2; i++)
991
    for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
992
 
2214 jroman 993
  if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
1753 eromero 994
 
995
  PetscFunctionReturn(0);
996
}
997
 
2223 jroman 998
#undef __FUNCT__  
999
#define __FUNCT__ "VecsOrthonormalize"
1675 slepc 1000
/* Orthonormalize a chunk of parallel vector.
1001
   NOTE: wS0 and wS1 must be of size n*n.
1002
*/
1003
PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
1004
                                  PetscScalar *wS1)
1005
{
1006
  PetscErrorCode  ierr;
1007
  PetscBLASInt    nn = n, info, ld;
1992 eromero 1008
  PetscInt        ldV, i;
1675 slepc 1009
  PetscScalar     *H, *T, one=1.0, *pv;
1010
 
1011
  PetscFunctionBegin;
1012
 
1013
  if (!wS0) {
1014
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &H); CHKERRQ(ierr);
1015
  } else
1016
    H = wS0;
1017
  if (!wS1) {
1018
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &T); CHKERRQ(ierr);
1019
  } else
1020
    T = wS1;
1021
 
1022
  /* H <- V' * V */
1747 eromero 1023
  ierr = VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL); CHKERRQ(ierr);
1675 slepc 1024
 
1025
  /* H <- chol(H) */
1026
  LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
2214 jroman 1027
  if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1675 slepc 1028
 
1029
  /* V <- V * inv(H) */
1030
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
1031
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
1032
  ld = ldV;
1033
  BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1034
  ierr = VecRestoreArray(V[0], &pv);CHKERRQ(ierr);
1992 eromero 1035
  for(i=1; i<n; i++) {
1036
    ierr = PetscObjectStateIncrease((PetscObject)V[i]); CHKERRQ(ierr);
1037
  }
1675 slepc 1038
 
1039
  if (!wS0) {
1040
    ierr = PetscFree(H); CHKERRQ(ierr);
1041
  }
1042
  if (!wS1) {
1043
    ierr = PetscFree(T); CHKERRQ(ierr);
1044
  }
1045
 
1046
  PetscFunctionReturn(0);
1047
}
1048
 
2223 jroman 1049
#undef __FUNCT__  
1050
#define __FUNCT__ "SlepcAllReduceSumBegin"
1753 eromero 1051
/*
1052
  Sum up several arrays with only one call to MPIReduce.
1053
*/
1054
PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1055
                                      PetscInt max_size_ops,
1056
                                      PetscScalar *in, PetscScalar *out,
1057
                                      PetscInt max_size_in, DvdReduction *r,
1058
                                      MPI_Comm comm)
1059
{
1060
  PetscFunctionBegin;
1061
 
1062
  r->in = in;
1063
  r->out = out;
1064
  r->size_in = 0;
1065
  r->max_size_in = max_size_in;
1066
  r->ops = ops;
1067
  r->size_ops = 0;
1068
  r->max_size_ops = max_size_ops;
1069
  r->comm = comm;
1070
 
1071
  PetscFunctionReturn(0);
1072
}
1073
 
1074
#undef __FUNCT__  
1075
#define __FUNCT__ "SlepcAllReduceSum"
1076
PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1077
                                 DvdReductionPostF f, void *ptr,
1078
                                 PetscScalar **in)
1079
{
1080
  PetscFunctionBegin;
1081
 
1082
  *in = r->in + r->size_in;
1083
  r->ops[r->size_ops].out = r->out + r->size_in;
1084
  r->ops[r->size_ops].size_out = size_in;
1085
  r->ops[r->size_ops].f = f;
1086
  r->ops[r->size_ops].ptr = ptr;
1087
  if (++(r->size_ops) > r->max_size_ops) {
2214 jroman 1088
    SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1753 eromero 1089
  }
1090
  if ((r->size_in+= size_in) > r->max_size_in) {
2214 jroman 1091
    SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1753 eromero 1092
  }
1093
 
1094
  PetscFunctionReturn(0);
1095
}
1096
 
1097
 
1098
#undef __FUNCT__  
1099
#define __FUNCT__ "SlepcAllReduceSumEnd"
1100
PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1101
{
1102
  PetscErrorCode  ierr;
1103
  PetscInt        i;
1104
 
1105
  PetscFunctionBegin;
1106
 
1795 eromero 1107
  /* Check if quick exit */
1108
  if (r->size_ops == 0)
1109
    PetscFunctionReturn(0);
1110
 
1753 eromero 1111
  /* Call the MPIAllReduce routine */
1899 eromero 1112
  ierr = MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1753 eromero 1113
                       r->comm); CHKERRQ(ierr);
1114
 
1115
  /* Call the postponed routines */
1116
  for(i=0; i<r->size_ops; i++) {
1117
    ierr = r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1118
    CHKERRQ(ierr);
1119
  }
1120
 
1121
  /* Tag the operation as done */
1122
  r->size_ops = 0;
1123
 
1124
  PetscFunctionReturn(0);
1125
}
1795 eromero 1126
 
1831 eromero 1127
 
1795 eromero 1128
#undef __FUNCT__  
1129
#define __FUNCT__ "dvd_orthV"
1989 eromero 1130
PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1131
                         PetscInt size_cX, Vec *V, PetscInt V_new_s,
1132
                         PetscInt V_new_e, PetscScalar *auxS, Vec auxV,
1133
                         PetscRandom rand)
1795 eromero 1134
{
1135
  PetscErrorCode  ierr;
1136
  PetscInt        i, j;
2216 jroman 1137
  PetscBool       lindep;
1795 eromero 1138
  PetscReal       norm;
1869 eromero 1139
  PetscScalar     *auxS0 = auxS;
1831 eromero 1140
 
1795 eromero 1141
  PetscFunctionBegin;
1831 eromero 1142
 
1795 eromero 1143
  /* Orthonormalize V with IP */
1144
  for (i=V_new_s; i<V_new_e; i++) {
1145
    for(j=0; j<3; j++) {
1975 eromero 1146
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1831 eromero 1147
      if (cX + size_cX == V) {
1148
        /* If cX and V are contiguous, orthogonalize in one step */
1989 eromero 1149
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1831 eromero 1150
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1989 eromero 1151
      } else if (DS) {
1152
        /* Else orthogonalize first against DS, and then against cX and V */
1153
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1831 eromero 1154
                               V[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
2177 jroman 1155
        if(!lindep) {
1831 eromero 1156
          ierr = IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1157
                                 V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1158
        }
1989 eromero 1159
      } else {
1160
        /* Else orthogonalize first against cX and then against V */
1161
        ierr = IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1162
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1831 eromero 1163
      }
2177 jroman 1164
      if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
2499 jroman 1165
      ierr = PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);CHKERRQ(ierr);
1795 eromero 1166
    }
2177 jroman 1167
    if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
2214 jroman 1168
        SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1795 eromero 1169
    }
1170
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1171
  }
1831 eromero 1172
 
1795 eromero 1173
  PetscFunctionReturn(0);
1174
}
2555 eromero 1175
 
1176
 
1177
#undef __FUNCT__  
1178
#define __FUNCT__ "dvd_BorthV"
1179
PetscErrorCode dvd_BorthV(IP ip, Vec *DS, Vec *BDS, PetscInt size_DS, Vec *cX,
1180
                         Vec *BcX, PetscInt size_cX, Vec *V, Vec *BV,
1181
                         PetscInt V_new_s, PetscInt V_new_e,
1182
                         PetscScalar *auxS, Vec auxV, PetscRandom rand)
1183
{
1184
  PetscErrorCode  ierr;
1185
  PetscInt        i, j;
1186
  PetscBool       lindep;
1187
  PetscReal       norm;
1188
  PetscScalar     *auxS0 = auxS;
1831 eromero 1189
 
2555 eromero 1190
  PetscFunctionBegin;
1191
 
1192
  /* Orthonormalize V with IP */
1193
  for (i=V_new_s; i<V_new_e; i++) {
1194
    for(j=0; j<3; j++) {
1195
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1196
      if (cX + size_cX == V) {
1197
        /* If cX and V are contiguous, orthogonalize in one step */
1198
        ierr = IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX+i, PETSC_NULL, cX, BcX,
1199
                               V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1200
      } else if (DS) {
1201
        /* Else orthogonalize first against DS, and then against cX and V */
1202
        ierr = IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX, PETSC_NULL, cX, BcX,
1203
                               V[i], BV[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
1204
        if(!lindep) {
1205
          ierr = IPBOrthogonalize(ip, 0, PETSC_NULL, PETSC_NULL, i, PETSC_NULL, V, BV,
1206
                                  V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1207
        }
1208
      } else {
1209
        /* Else orthogonalize first against cX and then against V */
1210
        ierr = IPBOrthogonalize(ip, size_cX, cX, BcX, i, PETSC_NULL, V, BV,
1211
                                V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1212
      }
1213
      if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1214
      ierr = PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1215
      CHKERRQ(ierr);
1216
    }
1217
    if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1218
        SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors!");
1219
    }
1220
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1221
    ierr = VecScale(BV[i], 1.0/norm); CHKERRQ(ierr);
1222
  }
1223
 
1224
  PetscFunctionReturn(0);
1225
}
1226
 
1968 eromero 1227
#undef __FUNCT__  
1228
#define __FUNCT__ "dvd_compute_eigenvectors"
1229
/*
1230
  Compute eigenvectors associated to the Schur decomposition (S, T) and
1231
  save the left vectors in pY and the right vectors in pX, where
1232
  n, size of the eigenproblem
1233
  ldS, ldT, leading dimension of S and T
1234
  ldpX, ldpY, leading dimension of pX and pY
1235
  auxS, auxiliar scalar of length:
1236
    double standard 3n+n*n, double generalized 11n+4n*n,
2022 eromero 1237
    complex standard 3n+n*n, complex generalized 3n+2n*n
1968 eromero 1238
  size_auxS, the length of auxS
1239
  doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1240
*/
1241
PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1242
  PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *pX,
1243
  PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
2216 jroman 1244
  PetscInt size_auxS, PetscBool doProd)
1968 eromero 1245
{
2392 jroman 1246
#if defined(SLEPC_MISSING_LAPACK_GGEV)
1247
  PetscFunctionBegin;
1248
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GGEV - Lapack routine is unavailable.");
1249
#else
1968 eromero 1250
  PetscErrorCode  ierr;
2152 eromero 1251
  PetscBLASInt    n, ldpX, ldpY, nout, info;
1968 eromero 1252
  PetscScalar     *Sc, *Tc;
1253
  const char      *side, *howmny;
1254
#if defined(PETSC_USE_COMPLEX)
1255
  PetscReal       *auxR;
1256
#else
1257
  PetscScalar     *pA,*pB;
1258
  PetscBLASInt    n1, ldpA,ldpB;
1259
  PetscScalar     *alphar, *alphai, *beta;
1260
#endif
1261
 
1262
  PetscFunctionBegin;
2117 eromero 1263
  n = PetscBLASIntCast(n_);
1264
  ldpX = PetscBLASIntCast(ldpX_);
1265
  ldpY = PetscBLASIntCast(ldpY_);
1266
 
1968 eromero 1267
  if (pX && pY) side = "B";
1268
  else if (pX)  side = "R";
1269
  else if (pY)  side = "L";
1270
  else { PetscFunctionReturn(0); }
1271
 
1272
  if (!pX) ldpX = 1;
1273
  if (!pY) ldpY = 1;
1274
 
2177 jroman 1275
  howmny = doProd?"B":"A";
1968 eromero 1276
 
1277
  Sc = auxS; auxS+= n*n; size_auxS-= n*n;
1278
  if (T) Tc = auxS, auxS+= n*n, size_auxS-= n*n;
1279
  else   Tc = PETSC_NULL;
1280
 
1281
  /* Sc <- S, Tc <- T */
1282
  ierr = SlepcDenseCopy(Sc, n, S, ldS, n, n); CHKERRQ(ierr);
1283
  if (T) {
1284
    ierr = SlepcDenseCopy(Tc, n, T, ldT, n, n); CHKERRQ(ierr);
1285
  }
1286
 
1287
  if (T) {
1288
    /* [eigr, pX] = eig(S, T) */
1289
#if defined(PETSC_USE_COMPLEX)
1290
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1291
    if (size_auxS < 2*n)
2214 jroman 1292
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
1968 eromero 1293
    LAPACKtgevc_(side,howmny,PETSC_NULL,&n,Sc,&n,Tc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
2394 jroman 1294
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTGEVC %d",info);
1968 eromero 1295
#else
1296
    alphar = auxS; auxS+= n; size_auxS-= n;
1297
    alphai = auxS; auxS+= n; size_auxS-= n;
1298
    beta = auxS; auxS+= n; size_auxS-= n;
2177 jroman 1299
    if (doProd) {
1968 eromero 1300
      if (pX) pA = auxS, auxS+= n*n, size_auxS-= n*n, ldpA = n;
1301
      else    pA = PETSC_NULL, ldpA = 0;
1302
      if (pY) pB = auxS, auxS+= n*n, size_auxS-= n*n, ldpB = n;
1303
      else    pB = PETSC_NULL, ldpB = 0;
1304
    } else {
1305
      pA = pX; pB = pY; ldpA = ldpX; ldpB = ldpY;
1306
    }
1307
    /* LAPACKtgevc_ needs the element i,i+1 in the 2-by-2 digonal blocs
1308
       of T that represent complex conjugate eigenpairs to be zero */
1309
    n1 = size_auxS;
1310
    if (size_auxS < 8*n)
2214 jroman 1311
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xGGEV");
1968 eromero 1312
    LAPACKggev_(pY?"V":"N",pX?"V":"N",&n,Sc,&n,Tc,&n,alphar,alphai,beta,pB,&ldpB,pA,&ldpA,auxS,&n1,&info);
2394 jroman 1313
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGGEV %d",info);
2177 jroman 1314
    if (doProd) {
1968 eromero 1315
      if (pX) {
1316
        /* pX <- pX * pA */
1317
        ierr = SlepcDenseCopy(Sc, n, pX, ldpX, n, n); CHKERRQ(ierr);
2022 eromero 1318
        ierr = SlepcDenseMatProd(pX, ldpX, 0.0, 1.0,
1968 eromero 1319
                                 Sc, n, n, n, PETSC_FALSE,
1320
                                 pA, n, n, n, PETSC_FALSE); CHKERRQ(ierr);
1321
      }
1322
      if (pY) {
1323
        /* pY <- pY * pB */
1324
        ierr = SlepcDenseCopy(Sc, n, pY, ldpY, n, n); CHKERRQ(ierr);
2022 eromero 1325
        ierr = SlepcDenseMatProd(pY, ldpY, 0.0, 1.0,
1968 eromero 1326
                                 Sc, n, n, n, PETSC_FALSE,
1327
                                 pB, n, n, n, PETSC_FALSE); CHKERRQ(ierr);
1328
      }
1329
    }
1330
#endif
1331
  } else {
1332
    /* [eigr, pX] = eig(S) */
1333
#if defined(PETSC_USE_COMPLEX)
1334
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1335
    if (size_auxS < 2*n)
2214 jroman 1336
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
1968 eromero 1337
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1338
#else
1339
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1340
#endif
2394 jroman 1341
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
1968 eromero 1342
  }
1343
  PetscFunctionReturn(0);
2392 jroman 1344
#endif
1968 eromero 1345
}
2034 eromero 1346
 
2555 eromero 1347
 
1348
#undef __FUNCT__
1349
#define __FUNCT__ "EPSSortDenseHEP"
2556 eromero 1350
PetscErrorCode EPSSortDenseHEP(EPS eps, PetscInt n, PetscInt k, PetscScalar *w, PetscScalar *V, PetscInt ldV)
2034 eromero 1351
{
2555 eromero 1352
  PetscInt        i, j, result, pos;
2556 eromero 1353
  PetscReal       re;
1354
  PetscScalar     t;
2568 eromero 1355
  PetscBLASInt    n_,one=1;
2034 eromero 1356
  PetscErrorCode  ierr;
2555 eromero 1357
 
2034 eromero 1358
  PetscFunctionBegin;
2568 eromero 1359
  n_ = PetscBLASIntCast(n);
2555 eromero 1360
 
1361
  /* selection sort */
1362
  for (i=k;i<n-1;i++) {
2556 eromero 1363
    re = PetscRealPart(w[i]);
2555 eromero 1364
    pos = 0;
1365
    /* find minimum eigenvalue */
1366
    for (j=i+1;j<n;j++) {
2556 eromero 1367
      ierr = EPSCompareEigenvalues(eps,re,0,PetscRealPart(w[j]),0,&result);CHKERRQ(ierr);
2555 eromero 1368
      if (result > 0) {
2556 eromero 1369
        re = PetscRealPart(w[j]);
2555 eromero 1370
        pos = j;
1371
      }
2034 eromero 1372
    }
2555 eromero 1373
    /* interchange the pairs i and pos */
1374
    if (pos) {
1375
      BLASswap_(&n_, &V[ldV*pos], &one, &V[ldV*i], &one);
1376
      t = w[i]; w[i] = w[pos]; w[pos] = t;
1377
    }
2034 eromero 1378
  }
2555 eromero 1379
 
2034 eromero 1380
  PetscFunctionReturn(0);
1381
}