Subversion Repositories slepc-dev

Rev

Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2110 jroman 1
/*
2
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3
   SLEPc - Scalable Library for Eigenvalue Problem Computations
2116 eromero 4
   Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain
1619 slepc 5
 
2110 jroman 6
   This file is part of SLEPc.
7
 
8
   SLEPc is free software: you can redistribute it and/or modify it under  the
9
   terms of version 3 of the GNU Lesser General Public License as published by
10
   the Free Software Foundation.
11
 
12
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
13
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
14
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
15
   more details.
16
 
17
   You  should have received a copy of the GNU Lesser General  Public  License
18
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20
*/
21
 
2346 eromero 22
#include <private/vecimplslepc.h>
1985 eromero 23
#include "davidson.h"
1619 slepc 24
 
1745 eromero 25
PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
26
PetscLogEvent SLEPC_SlepcDenseMatInvProd = 0;
1750 eromero 27
PetscLogEvent SLEPC_SlepcDenseNorm = 0;
1745 eromero 28
PetscLogEvent SLEPC_SlepcDenseOrth = 0;
29
PetscLogEvent SLEPC_SlepcDenseCopy = 0;
30
PetscLogEvent SLEPC_VecsMult = 0;
1630 slepc 31
 
1634 slepc 32
void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
1753 eromero 33
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
34
                                   void *ptr);
1634 slepc 35
 
2223 jroman 36
#undef __FUNCT__  
37
#define __FUNCT__ "SlepcDenseMatProd"
1619 slepc 38
/*
39
  Compute C <- a*A*B + b*C, where
40
    ldC, the leading dimension of C,
41
    ldA, the leading dimension of A,
42
    rA, cA, rows and columns of A,
43
    At, if true use the transpose of A instead,
44
    ldB, the leading dimension of B,
45
    rB, cB, rows and columns of B,
46
    Bt, if true use the transpose of B instead
47
*/
48
PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
49
  PetscScalar a,
2216 jroman 50
  const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
51
  const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
1619 slepc 52
{
1630 slepc 53
  PetscErrorCode  ierr;
54
  PetscInt        tmp;
55
  PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
56
  const char      *N = "N", *T = "C", *qA = N, *qB = N;
1619 slepc 57
 
58
  PetscFunctionBegin;
1675 slepc 59
 
60
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
61
 
1745 eromero 62
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1619 slepc 63
 
64
  /* Transpose if needed */
2177 jroman 65
  if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
66
  if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
1619 slepc 67
 
68
  /* Check size */
69
  if (cA != rB) {
2214 jroman 70
    SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
1619 slepc 71
  }
72
 
73
  /* Do stub */
1744 eromero 74
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
75
    *C = *A * *B;
76
    m = n = k = 1;
77
  } else {
78
    m = rA; n = cB; k = cA;
79
    BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
80
              &ldB, &b, C, &ldC);
81
  }
1619 slepc 82
 
1630 slepc 83
  ierr = PetscLogFlops(m*n*2*k);CHKERRQ(ierr);
1745 eromero 84
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1630 slepc 85
 
1619 slepc 86
  PetscFunctionReturn(0);
87
}
88
 
2223 jroman 89
#undef __FUNCT__  
90
#define __FUNCT__ "SlepcDenseMatProdTriang"
1626 slepc 91
/*
1747 eromero 92
  Compute C <- A*B, where
1988 eromero 93
    sC, structure of C,
1747 eromero 94
    ldC, the leading dimension of C,
1988 eromero 95
    sA, structure of A,
1747 eromero 96
    ldA, the leading dimension of A,
97
    rA, cA, rows and columns of A,
98
    At, if true use the transpose of A instead,
1988 eromero 99
    sB, structure of B,
1747 eromero 100
    ldB, the leading dimension of B,
101
    rB, cB, rows and columns of B,
102
    Bt, if true use the transpose of B instead
103
*/
104
PetscErrorCode SlepcDenseMatProdTriang(
105
  PetscScalar *C, MatType_t sC, PetscInt ldC,
106
  const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
2216 jroman 107
  PetscBool At,
1747 eromero 108
  const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
2216 jroman 109
  PetscBool Bt)
1747 eromero 110
{
111
  PetscErrorCode  ierr;
1988 eromero 112
  PetscInt        tmp;
113
  PetscScalar     one=1.0, zero=0.0;
1747 eromero 114
  PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
1750 eromero 115
 
1747 eromero 116
  PetscFunctionBegin;
117
 
118
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
119
 
120
  /* Transpose if needed */
2177 jroman 121
  if (At) tmp = rA, rA = cA, cA = tmp;
122
  if (Bt) tmp = rB, rB = cB, cB = tmp;
1747 eromero 123
 
124
  /* Check size */
2214 jroman 125
  if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
126
  if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
1747 eromero 127
 
1750 eromero 128
  /* Optimized version: trivial case */
129
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
2177 jroman 130
    if (!At && !Bt)     *C = *A * *B;
131
    else if (At && !Bt) *C = PetscConj(*A) * *B;
132
    else if (!At && Bt) *C = *A * PetscConj(*B);
133
    else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
1750 eromero 134
    PetscFunctionReturn(0);
135
  }
136
 
137
  /* Optimized versions: sA == 0 && sB == 0 */
138
  if ((sA == 0) && (sB == 0)) {
2177 jroman 139
    if (At) tmp = rA, rA = cA, cA = tmp;
140
    if (Bt) tmp = rB, rB = cB, cB = tmp;
1750 eromero 141
    ierr = SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
142
                             cB, Bt); CHKERRQ(ierr);
143
    PetscFunctionReturn(ierr);
144
  }
145
 
1988 eromero 146
  /* Optimized versions: A hermitian && (B not triang) */
1756 eromero 147
  if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
148
      DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
1988 eromero 149
      DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
1750 eromero 150
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
151
    rC = rA; cC = cB;
1988 eromero 152
    BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
1750 eromero 153
              (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
154
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
155
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
156
    PetscFunctionReturn(0);
157
  }
158
 
1988 eromero 159
  /* Optimized versions: B hermitian && (A not triang) */
160
  if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
161
      DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
162
      DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
163
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
164
    rC = rA; cC = cB;
165
    BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
166
              (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
167
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
168
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
169
    PetscFunctionReturn(0);
1747 eromero 170
  }
1988 eromero 171
 
2214 jroman 172
  SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
1747 eromero 173
}
174
 
2223 jroman 175
#undef __FUNCT__  
176
#define __FUNCT__ "SlepcDenseNorm"
1750 eromero 177
/*
1965 eromero 178
  Normalize the columns of the matrix A, where
1750 eromero 179
    ldA, the leading dimension of A,
1965 eromero 180
    rA, cA, rows and columns of A.
181
  if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
182
  are normalized as being one column.
1750 eromero 183
*/
184
PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
1965 eromero 185
                              PetscInt cA, PetscScalar *eigi)
1750 eromero 186
{
187
  PetscErrorCode  ierr;
1965 eromero 188
  PetscInt        i;
189
  PetscScalar     norm, norm0;
1750 eromero 190
  PetscBLASInt    rA = _rA, one=1;
1747 eromero 191
 
1750 eromero 192
  PetscFunctionBegin;
193
 
194
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
195
 
196
  for(i=0; i<cA; i++) {
1965 eromero 197
    if(eigi && eigi[i] != 0.0) {
198
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
199
      norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
200
      norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
201
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
202
      BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
203
      i++;
204
    } else {
205
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
206
      norm = 1.0 / norm;
207
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
208
     }
1750 eromero 209
  }
210
 
211
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
212
 
213
  PetscFunctionReturn(0);
214
}
215
 
216
 
2223 jroman 217
#undef __FUNCT__  
218
#define __FUNCT__ "SlepcDenseMatInvProd"
1747 eromero 219
/*
1735 eromero 220
  Compute B <- A\B, where
221
    ldA, the leading dimension of A,
222
    ldB, the leading dimension of B,
223
    dimA, rows and columns of A,
224
    rB, cB, rows and columns of B,
225
    auxI, auxiliary vector of size dimA,
226
    auxS, auxiliary vector of size cB
227
*/
228
PetscErrorCode SlepcDenseMatInvProd(
229
  PetscScalar *A, PetscInt _ldA, PetscInt _dimA,
230
  PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt _cB,
231
  PetscInt *auxI)
232
{
1745 eromero 233
  PetscErrorCode  ierr;
1735 eromero 234
  PetscBLASInt    *p = (PetscBLASInt*)auxI, dimA = _dimA, cB = _cB,
235
                  ldA = _ldA, ldB = _ldB, info;
236
 
237
  PetscFunctionBegin;
238
 
239
  /* Check size */
240
  if (_dimA != rB) {
2214 jroman 241
    SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
1735 eromero 242
  }
243
 
244
  /* Quick exit */
245
  if ((_dimA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
246
 
1745 eromero 247
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatInvProd,0,0,0,0);CHKERRQ(ierr);
1744 eromero 248
  if (dimA == 1) {
249
    *B = *B / *A;
1745 eromero 250
    ierr = PetscLogFlops(1);CHKERRQ(ierr);
1744 eromero 251
  } else {
252
   LAPACKgesv_(&dimA, &cB, A, &ldA, p, B, &ldB, &info);
2214 jroman 253
   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack GESV %d", info);
1744 eromero 254
  }
1745 eromero 255
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatInvProd,0,0,0,0);CHKERRQ(ierr);
1744 eromero 256
 
1735 eromero 257
  PetscFunctionReturn(0);
258
}
259
 
2223 jroman 260
#undef __FUNCT__  
261
#define __FUNCT__ "SlepcDenseOrth"
1735 eromero 262
/*
1743 eromero 263
  Compute A <- orth(A), where
264
    ldA, the leading dimension of A,
265
    rA, cA, rows and columns of A,
266
    auxS, auxiliary vector of more size than cA+min(rA,cA),
267
    lauxS, size of auxS,
268
    ncA, new number of columns of A
269
*/
270
PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
271
                              PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
272
                              PetscInt *ncA)
273
{
1745 eromero 274
  PetscErrorCode  ierr;
1743 eromero 275
  PetscBLASInt    ldA = _ldA, rA = _rA, cA = _cA,
276
                  info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
277
  PetscScalar     *tau = auxS, *w = tau + ltau;
278
 
279
  PetscFunctionBegin;
280
 
281
  /* Quick exit */
282
  if ((_rA == 0) || (cA == 0)) { PetscFunctionReturn(0); }
283
 
284
  /* Memory check */
2214 jroman 285
  if (lw < cA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory for xGEQRF");
1743 eromero 286
 
1745 eromero 287
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 288
  LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
2214 jroman 289
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
1743 eromero 290
  LAPACKorgqr_(&rA, &ltau, &ltau, A, &ldA, tau, w, &lw, &info);
2214 jroman 291
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
1745 eromero 292
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 293
 
294
  if (ncA) *ncA = ltau;
295
 
296
  PetscFunctionReturn(0);
297
}
298
 
2223 jroman 299
#undef __FUNCT__  
300
#define __FUNCT__ "SlepcDenseCopy"
1743 eromero 301
/*
302
  Y <- X, where
303
  ldX, leading dimension of X,
304
  rX, cX, rows and columns of X
305
  ldY, leading dimension of Y
306
*/
307
PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
308
                              PetscInt ldX, PetscInt rX, PetscInt cX)
309
{
310
  PetscErrorCode  ierr;
1795 eromero 311
  PetscInt        i;
1743 eromero 312
 
313
  PetscFunctionBegin;
314
 
315
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 316
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1743 eromero 317
  }
1805 eromero 318
 
319
  /* Quick exit */
320
  if (Y == X) {
321
    if (ldX != ldY) {
2219 jroman 322
      SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1805 eromero 323
    }
324
    PetscFunctionReturn(0);
325
  }
1743 eromero 326
 
1745 eromero 327
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 328
  for(i=0; i<cX; i++) {
329
    ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
330
    CHKERRQ(ierr);
331
  }
1745 eromero 332
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 333
 
334
  PetscFunctionReturn(0);
335
}
336
 
2223 jroman 337
#undef __FUNCT__  
338
#define __FUNCT__ "SlepcDenseCopyTriang"
1752 eromero 339
/*
340
  Y <- X, where
341
  ldX, leading dimension of X,
342
  rX, cX, rows and columns of X
343
  ldY, leading dimension of Y
344
*/
345
PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
346
                                    PetscScalar *X, MatType_t sX, PetscInt ldX,
347
                                    PetscInt rX, PetscInt cX)
348
{
349
  PetscErrorCode  ierr;
350
  PetscInt        i,j,c;
1743 eromero 351
 
1752 eromero 352
  PetscFunctionBegin;
353
 
354
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 355
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1752 eromero 356
  }
357
 
358
  if (rX != cX) {
2219 jroman 359
    SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
1752 eromero 360
  }
361
 
1756 eromero 362
  if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
363
      DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
364
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
365
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 366
      c = 0;                                      /*     so copy */
1756 eromero 367
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
368
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 369
      c = 1;                                      /*     so transpose */
370
    else                                          /* ... Full, */
371
      c = 2;                                      /*     so reflect from up */
1756 eromero 372
  } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
373
             DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
374
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
375
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 376
      c = 1;                                      /*     so transpose */
1756 eromero 377
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
378
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 379
      c = 0;                                      /*     so copy */
380
    else                                          /* ... Full, */
381
      c = 3;                                      /*     so reflect fr. down */
382
  } else                                          /* Full to any, */
383
    c = 0;                                        /*     so copy */
384
 
385
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
386
 
387
  switch(c) {
388
  case 0: /* copy */
389
    for(i=0; i<cX; i++) {
390
      ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
391
      CHKERRQ(ierr);
392
    }
393
    break;
394
 
395
  case 1: /* transpose */
396
    for(i=0; i<cX; i++)
397
      for(j=0; j<rX; j++)
398
        Y[ldY*j+i] = X[ldX*i+j];
399
    break;
400
 
401
  case 2: /* reflection from up */
402
    for(i=0; i<cX; i++)
403
      for(j=0; j<PetscMin(i+1,rX); j++)
1988 eromero 404
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 405
    break;
406
 
407
  case 3: /* reflection from down */
408
    for(i=0; i<cX; i++)
409
      for(j=i; j<rX; j++)
1988 eromero 410
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 411
    break;
412
  }
413
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
414
 
415
  PetscFunctionReturn(0);
416
}
417
 
418
 
2223 jroman 419
#undef __FUNCT__  
420
#define __FUNCT__ "SlepcUpdateVectorsZ"
1743 eromero 421
/*
1675 slepc 422
  Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
423
  where X and Y are contiguous global vectors.
1626 slepc 424
*/
1675 slepc 425
PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
426
  Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
427
  PetscInt cM)
1626 slepc 428
{
429
  PetscErrorCode  ierr;
1960 eromero 430
 
431
  PetscFunctionBegin;
432
 
433
  ierr = SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
434
  CHKERRQ(ierr);
435
 
436
  PetscFunctionReturn(0);
437
}
438
 
439
 
2223 jroman 440
#undef __FUNCT__  
441
#define __FUNCT__ "SlepcUpdateVectorsS"
1960 eromero 442
/*
1965 eromero 443
  Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
1960 eromero 444
  where X and Y are contiguous global vectors.
445
*/
446
PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
447
  PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
448
  PetscInt ldM, PetscInt rM, PetscInt cM)
449
{
2334 jroman 450
  PetscErrorCode    ierr;
451
  const PetscScalar *px;
452
  PetscScalar       *py;
453
  PetscInt          rX, rY, ldX, ldY, i, rcX;
1626 slepc 454
 
455
  PetscFunctionBegin;
2346 eromero 456
  SlepcValidVecsContiguous(Y,cM*dY,1);
457
  SlepcValidVecsContiguous(X,cX,5);
1626 slepc 458
 
1965 eromero 459
  /* Compute the real number of columns */
460
  rcX = cX/dX;
461
  if (rcX != rM) {
2214 jroman 462
    SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");
1735 eromero 463
  }
464
 
1965 eromero 465
  if ((rcX == 0) || (rM == 0) || (cM == 0)) {
1626 slepc 466
    PetscFunctionReturn(0);
1960 eromero 467
  } else if ((Y + cM <= X) || (X + cX <= Y) ||
468
             ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
1626 slepc 469
    /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
470
 
471
    /* Get the dense matrices and dimensions associated to Y and X */
1960 eromero 472
    ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
473
    ierr = VecGetLocalSize(Y[0], &rY); CHKERRQ(ierr);
474
    if (rX != rY) {
2214 jroman 475
      SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
1626 slepc 476
    }
2334 jroman 477
    ierr = VecGetArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 478
    ierr = VecGetArray(Y[0], &py);CHKERRQ(ierr);
479
 
1960 eromero 480
    /* Update the strides */
481
    ldX = rX*dX; ldY= rY*dY;
482
 
1626 slepc 483
    /* Do operation */
1965 eromero 484
    ierr = SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
1675 slepc 485
                    PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE); CHKERRQ(ierr);
1626 slepc 486
 
2334 jroman 487
    ierr = VecRestoreArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 488
    ierr = VecRestoreArray(Y[0], &py);CHKERRQ(ierr);
1992 eromero 489
    for(i=1; i<cM; i++) {
490
      ierr = PetscObjectStateIncrease((PetscObject)Y[dY*i]); CHKERRQ(ierr);
491
    }
1626 slepc 492
 
1960 eromero 493
  } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
1626 slepc 494
    /* If not, call to SlepcUpdateVectors */
1965 eromero 495
    ierr = SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
496
                                    ldM, PETSC_FALSE); CHKERRQ(ierr);
1675 slepc 497
    if (alpha != 1.0)
498
      for (i=0; i<cM; i++) {
499
        ierr = VecScale(Y[i], alpha); CHKERRQ(ierr);
500
      }
1626 slepc 501
  } else {
2214 jroman 502
    SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
1626 slepc 503
  }
504
 
505
  PetscFunctionReturn(0);
506
}
507
 
2223 jroman 508
#undef __FUNCT__  
509
#define __FUNCT__ "SlepcUpdateVectorsD"
1965 eromero 510
/*
511
  Compute X <- alpha * X[0:dX:cX-1] * M
512
  where X is a matrix with non-consecutive columns
513
*/
514
PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
515
  const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
516
  PetscScalar *work, PetscInt lwork)
517
{
2334 jroman 518
  PetscErrorCode ierr;
519
  PetscScalar    **px, *Y, *Z;
520
  PetscInt       rX, i, j, rY, rY0, ldY;
1960 eromero 521
 
1965 eromero 522
  PetscFunctionBegin;
2346 eromero 523
  SlepcValidVecsContiguous(X,cX,1);
1965 eromero 524
 
525
  if (cX != rM) {
2214 jroman 526
    SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");
1965 eromero 527
  }
528
 
529
  rY = (lwork/2)/cX;
530
  if (rY <= 0) {
2214 jroman 531
    SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
1965 eromero 532
  }
533
  Y = work; Z = &Y[cX*rY]; ldY = rY;
534
 
535
  if ((cX == 0) || (rM == 0) || (cM == 0)) {
536
    PetscFunctionReturn(0);
537
  }
538
 
539
  /* Get the dense vectors associated to the columns of X */
540
  ierr = PetscMalloc(sizeof(Vec)*cX, &px); CHKERRQ(ierr);
541
  for(i=0; i<cX; i++) {
542
    ierr = VecGetArray(X[i], &px[i]); CHKERRQ(ierr);
543
  }
544
  ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
545
 
546
  for(i=0, rY0=0; i<rX; i+=rY0) {
547
    rY0 = PetscMin(rY, rX-i);
548
 
549
    /* Y <- X[i0:i1,:] */
550
    for(j=0; j<cX; j++) {
551
      ierr = SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
552
      CHKERRQ(ierr);
553
    }
554
 
555
    /* Z <- Y * M */
556
    ierr = SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
557
                                                 M, ldM, rM, cM, PETSC_FALSE);
558
    CHKERRQ(ierr);
559
 
560
    /* X <- Z */
561
    for(j=0; j<cM; j++) {
562
      ierr = SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
563
      CHKERRQ(ierr);
564
    }
565
  }
566
 
567
  for(i=0; i<cX; i++) {
568
    ierr = VecRestoreArray(X[i], &px[i]); CHKERRQ(ierr);
569
  }
570
  ierr = PetscFree(px); CHKERRQ(ierr);
571
 
572
  PetscFunctionReturn(0);
573
}
574
 
575
 
576
 
2223 jroman 577
#undef __FUNCT__  
578
#define __FUNCT__ "VecsMult"
1634 slepc 579
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
580
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
581
  where W = U' * V.
1640 slepc 582
  workS0 and workS1 are an auxiliary scalar vector of size
583
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
584
  is needed, and of size eU*eV.
1634 slepc 585
*/
1747 eromero 586
PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
1634 slepc 587
                        Vec *U, PetscInt sU, PetscInt eU,
588
                        Vec *V, PetscInt sV, PetscInt eV,
1640 slepc 589
                        PetscScalar *workS0, PetscScalar *workS1)
1634 slepc 590
{
2334 jroman 591
  PetscErrorCode    ierr;
592
  PetscInt          ldU, ldV, i, j, k;
593
  const PetscScalar *pu, *pv;
594
  PetscScalar       *W, *Wr;
1634 slepc 595
 
596
  PetscFunctionBegin;
597
 
598
  /* Check if quick exit */
599
  if ((eU-sU == 0) || (eV-sV == 0))
600
    PetscFunctionReturn(0);
2346 eromero 601
 
602
  SlepcValidVecsContiguous(U,eU,4);
603
  SlepcValidVecsContiguous(V,eU,7);
1634 slepc 604
 
605
  /* Get the dense matrices and dimensions associated to U and V */
606
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
607
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
608
  if (ldU != ldV) {
2214 jroman 609
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1634 slepc 610
  }
2334 jroman 611
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
612
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1634 slepc 613
 
1640 slepc 614
  if (workS0)
615
    W = workS0;
1634 slepc 616
  else {
1640 slepc 617
    ierr = PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &W);
618
    CHKERRQ(ierr);
1634 slepc 619
  }
620
 
1745 eromero 621
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
622
 
1640 slepc 623
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
624
    /* Use the smart memory usage version */
1634 slepc 625
 
1640 slepc 626
    /* W <- U' * V */
1747 eromero 627
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
628
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
629
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
1640 slepc 630
    CHKERRQ(ierr);
631
 
632
    /* ReduceAll(W, SUM) */
1899 eromero 633
    ierr = MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
1640 slepc 634
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1747 eromero 635
  /* Full M matrix */
1756 eromero 636
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
637
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1640 slepc 638
    if (workS1)
639
      Wr = workS1;
640
    else {
641
      ierr = PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &Wr);
642
      CHKERRQ(ierr);
643
    }
1747 eromero 644
 
1640 slepc 645
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
646
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
647
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
648
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
649
    CHKERRQ(ierr);
650
 
651
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
652
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
653
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
654
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
655
    CHKERRQ(ierr);
656
 
657
    /* ReduceAll(W, SUM) */
658
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*sV+(eV-sV)*eU, MPIU_SCALAR,
1899 eromero 659
                      MPIU_SUM, ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1640 slepc 660
 
661
    /* M(...,...) <- W */
662
    for (i=0,k=0; i<sV; i++)
663
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
664
    for (i=sV; i<eV; i++)
665
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
666
 
1747 eromero 667
    if (!workS1) {
668
      ierr = PetscFree(Wr); CHKERRQ(ierr);
669
    }
1745 eromero 670
 
1747 eromero 671
  /* Upper triangular M matrix */
1756 eromero 672
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
673
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1747 eromero 674
    if (workS1)
675
      Wr = workS1;
676
    else {
677
      ierr = PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
678
      CHKERRQ(ierr);
679
    }
680
 
681
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
682
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
683
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
684
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
685
    CHKERRQ(ierr);
686
 
687
    /* ReduceAll(W, SUM) */
1899 eromero 688
    ierr = MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
1747 eromero 689
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
690
 
691
    /* M(...,...) <- W */
692
    for (i=sV,k=0; i<eV; i++)
693
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
694
 
1640 slepc 695
    if (!workS1) {
696
      ierr = PetscFree(Wr); CHKERRQ(ierr);
697
    }
1747 eromero 698
 
699
  /* Lower triangular M matrix */
1756 eromero 700
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
701
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1747 eromero 702
    if (workS1)
703
      Wr = workS1;
704
    else {
705
      ierr = PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
706
      CHKERRQ(ierr);
707
    }
708
 
709
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
710
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
711
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
712
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
713
    CHKERRQ(ierr);
714
 
715
    /* ReduceAll(W, SUM) */
1899 eromero 716
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
1747 eromero 717
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
718
 
719
    /* M(...,...) <- W */
720
    for (i=0,k=0; i<eV; i++)
721
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
722
 
723
    if (!workS1) {
724
      ierr = PetscFree(Wr); CHKERRQ(ierr);
725
    }
1634 slepc 726
  }
727
 
1747 eromero 728
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
729
 
1640 slepc 730
  if (!workS0) {
1634 slepc 731
    ierr = PetscFree(W); CHKERRQ(ierr);
732
  }
733
 
2334 jroman 734
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
735
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1634 slepc 736
  PetscFunctionReturn(0);
737
}
738
 
1795 eromero 739
 
2223 jroman 740
#undef __FUNCT__  
741
#define __FUNCT__ "VecsMultIa"
1753 eromero 742
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
743
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
1808 eromero 744
  where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
1795 eromero 745
  workS0 and workS1 are an auxiliary scalar vector of size
746
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
747
  is needed, and of size eU*eV.
748
*/
749
PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
750
                          Vec *U, PetscInt sU, PetscInt eU,
751
                          Vec *V, PetscInt sV, PetscInt eV)
752
{
753
  PetscErrorCode  ierr;
754
  PetscInt        ldU, ldV;
755
  PetscScalar     *pu, *pv;
756
 
757
  PetscFunctionBegin;
758
 
759
  /* Check if quick exit */
760
  if ((eU-sU == 0) || (eV-sV == 0))
761
    PetscFunctionReturn(0);
762
 
2346 eromero 763
  SlepcValidVecsContiguous(U,eU,4);
764
  SlepcValidVecsContiguous(V,eU,7);
765
 
1795 eromero 766
  /* Get the dense matrices and dimensions associated to U and V */
767
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
768
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
769
  if (ldU != ldV) {
2214 jroman 770
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1795 eromero 771
  }
772
  ierr = VecGetArray(U[0], &pu);CHKERRQ(ierr);
773
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
774
 
775
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
776
    /* M <- local_U' * local_V */
777
    ierr = SlepcDenseMatProdTriang(M, sM, eU,
778
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
779
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
780
    CHKERRQ(ierr);
781
 
782
  /* Full M matrix */
783
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
784
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
785
    /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
786
    ierr = SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
787
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
788
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
789
    CHKERRQ(ierr);
790
 
791
    /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
792
    ierr = SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
793
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
794
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
795
    CHKERRQ(ierr);
796
 
797
  /* Other structures */
2214 jroman 798
  } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");
1795 eromero 799
 
800
  ierr = VecRestoreArray(U[0], &pu); CHKERRQ(ierr);
1992 eromero 801
  ierr = PetscObjectStateDecrease((PetscObject)U[0]); CHKERRQ(ierr);
1795 eromero 802
  ierr = VecRestoreArray(V[0], &pv); CHKERRQ(ierr);
1992 eromero 803
  ierr = PetscObjectStateDecrease((PetscObject)V[0]); CHKERRQ(ierr);
1795 eromero 804
 
805
  PetscFunctionReturn(0);
806
}
807
 
808
 
2223 jroman 809
#undef __FUNCT__  
810
#define __FUNCT__ "VecsMultIc"
1808 eromero 811
/* Computes M <- nprocs*M
812
  where nprocs is the number of processors.
813
*/
814
PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
815
                          PetscInt rM, PetscInt cM, Vec V)
816
{
2117 eromero 817
  int        i,j,n;
1808 eromero 818
 
819
  PetscFunctionBegin;
820
 
821
  /* Check if quick exit */
822
  if ((rM == 0) || (cM == 0))
823
    PetscFunctionReturn(0);
824
 
2214 jroman 825
  if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1808 eromero 826
 
827
  MPI_Comm_size(((PetscObject)V)->comm, &n);
828
 
829
  for(i=0; i<cM; i++)
830
    for(j=0; j<rM; j++)
831
      M[ldM*i+j]/= (PetscScalar)n;
832
 
833
  PetscFunctionReturn(0);
834
}
835
 
836
 
2223 jroman 837
#undef __FUNCT__  
838
#define __FUNCT__ "VecsMultIb"
1795 eromero 839
/* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
840
                          ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
1753 eromero 841
  where W = U' * V.
1795 eromero 842
  workS0 and workS1 are an auxiliary scalar vector of size
843
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
844
  is needed, and of size eU*eV.
845
*/
846
PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
847
                          PetscInt rM, PetscInt cM, PetscScalar *auxS,
848
                          Vec V)
849
{
850
  PetscErrorCode  ierr;
851
  PetscScalar     *W, *Wr;
852
 
853
  PetscFunctionBegin;
854
 
855
  /* Check if quick exit */
856
  if ((rM == 0) || (cM == 0))
857
    PetscFunctionReturn(0);
858
 
859
  if (auxS)
860
    W = auxS;
861
  else {
862
    ierr = PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
863
    CHKERRQ(ierr);
864
  }
865
  Wr = W + rM*cM;
866
 
867
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
868
 
869
  if (sM == 0) {
870
    /* W <- M */
871
    ierr = SlepcDenseCopy(W, rM, M, ldM, rM, cM); CHKERRQ(ierr);
872
 
873
    /* Wr <- ReduceAll(W, SUM) */
1899 eromero 874
    ierr = MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
1795 eromero 875
                         ((PetscObject)V)->comm); CHKERRQ(ierr);
876
 
877
    /* M <- Wr */
878
    ierr = SlepcDenseCopy(M, ldM, Wr, rM, rM, cM); CHKERRQ(ierr);
879
 
880
  /* Other structures */
2214 jroman 881
  } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1795 eromero 882
 
883
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
884
 
885
  if (!auxS) {
886
    ierr = PetscFree(W); CHKERRQ(ierr);
887
  }
888
 
889
  PetscFunctionReturn(0);
890
}
891
 
892
 
2223 jroman 893
#undef __FUNCT__  
894
#define __FUNCT__ "VecsMultS"
1795 eromero 895
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
896
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
897
  where W = U' * V.
1753 eromero 898
  r, a DvdReduction structure,
899
  sr, an structure DvdMult_copy_func.
900
*/
901
PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
902
                         Vec *U, PetscInt sU, PetscInt eU,
903
                         Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
904
                         DvdMult_copy_func *sr)
905
{
2334 jroman 906
  PetscErrorCode    ierr;
907
  PetscInt          ldU, ldV;
908
  const PetscScalar *pu, *pv;
909
  PetscScalar       *W;
1753 eromero 910
 
911
  PetscFunctionBegin;
912
 
913
  /* Check if quick exit */
914
  if ((eU-sU == 0) || (eV-sV == 0))
915
    PetscFunctionReturn(0);
916
 
2346 eromero 917
  SlepcValidVecsContiguous(U,eU,4);
918
  SlepcValidVecsContiguous(V,eU,7);
919
 
1753 eromero 920
  /* Get the dense matrices and dimensions associated to U and V */
921
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
922
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
923
  if (ldU != ldV) {
2214 jroman 924
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1753 eromero 925
  }
2334 jroman 926
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
927
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1753 eromero 928
 
929
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
930
 
931
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
932
    /* Use the smart memory usage version */
933
 
934
    /* Add the reduction to r */
935
    ierr = SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
936
    CHKERRQ(ierr);
937
 
938
    /* W <- U' * V */
939
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
940
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
941
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
942
    CHKERRQ(ierr);
943
 
944
    /* M <- ReduceAll(W, SUM) */
945
    sr->M = M;    sr->ld = ldM;
946
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
947
                  sr->i2 = eV;
948
 
949
  /* Full M matrix */
1756 eromero 950
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
951
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 952
    /* Add the reduction to r */
953
    ierr = SlepcAllReduceSum(r, (eU-sU)*sV+(eV-sV)*eU, VecsMultS_copy_func,
954
                             sr, &W);
955
    CHKERRQ(ierr);
956
 
957
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
958
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
959
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
960
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
961
    CHKERRQ(ierr);
962
 
963
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
964
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
965
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
966
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
967
    CHKERRQ(ierr);
968
 
969
    /* M <- ReduceAll(W, SUM) */
970
    sr->M = M;    sr->ld = ldM;
971
    sr->i0 = 0;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
972
                  sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;
973
 
974
  /* Upper triangular M matrix */
1756 eromero 975
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
976
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 977
    /* Add the reduction to r */
978
    ierr = SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
979
    CHKERRQ(ierr);
980
 
981
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
982
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
983
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
984
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
985
    CHKERRQ(ierr);
986
 
987
    /* M <- ReduceAll(W, SUM) */
988
    sr->M = M;    sr->ld = ldM;
989
    sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
990
                  sr->i2 = eV;
991
 
992
  /* Lower triangular M matrix */
1756 eromero 993
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
994
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1753 eromero 995
    /* Add the reduction to r */
996
    ierr = SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
997
    CHKERRQ(ierr);
998
 
999
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
1000
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
1001
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
1002
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
1003
    CHKERRQ(ierr);
1004
 
1005
    /* ReduceAll(W, SUM) */
1006
    sr->M = M;    sr->ld = ldM;
1007
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
1008
                  sr->i2 = eV;
1009
  }
1010
 
1011
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
1012
 
2334 jroman 1013
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
1014
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1753 eromero 1015
  PetscFunctionReturn(0);
1016
}
1017
 
1018
#undef __FUNCT__  
1019
#define __FUNCT__ "VecsMultS_copy_func"
1020
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
1021
                                   void *ptr)
1022
{
1023
  PetscInt        i, j, k;
1024
  DvdMult_copy_func
1025
                  *sr = (DvdMult_copy_func*)ptr;
1026
 
1027
  PetscFunctionBegin;
1028
 
1029
  for (i=sr->i0,k=0; i<sr->i1; i++)
1030
    for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
1031
  for (i=sr->i1; i<sr->i2; i++)
1032
    for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
1033
 
2214 jroman 1034
  if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
1753 eromero 1035
 
1036
  PetscFunctionReturn(0);
1037
}
1038
 
2223 jroman 1039
#undef __FUNCT__  
1040
#define __FUNCT__ "VecsOrthonormalize"
1675 slepc 1041
/* Orthonormalize a chunk of parallel vector.
1042
   NOTE: wS0 and wS1 must be of size n*n.
1043
*/
1044
PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
1045
                                  PetscScalar *wS1)
1046
{
1047
  PetscErrorCode  ierr;
1048
  PetscBLASInt    nn = n, info, ld;
1992 eromero 1049
  PetscInt        ldV, i;
1675 slepc 1050
  PetscScalar     *H, *T, one=1.0, *pv;
1051
 
1052
  PetscFunctionBegin;
1053
 
1054
  if (!wS0) {
1055
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &H); CHKERRQ(ierr);
1056
  } else
1057
    H = wS0;
1058
  if (!wS1) {
1059
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &T); CHKERRQ(ierr);
1060
  } else
1061
    T = wS1;
1062
 
1063
  /* H <- V' * V */
1747 eromero 1064
  ierr = VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL); CHKERRQ(ierr);
1675 slepc 1065
 
1066
  /* H <- chol(H) */
1067
  LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
2214 jroman 1068
  if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1675 slepc 1069
 
1070
  /* V <- V * inv(H) */
1071
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
1072
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
1073
  ld = ldV;
1074
  BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1075
  ierr = VecRestoreArray(V[0], &pv);CHKERRQ(ierr);
1992 eromero 1076
  for(i=1; i<n; i++) {
1077
    ierr = PetscObjectStateIncrease((PetscObject)V[i]); CHKERRQ(ierr);
1078
  }
1675 slepc 1079
 
1080
  if (!wS0) {
1081
    ierr = PetscFree(H); CHKERRQ(ierr);
1082
  }
1083
  if (!wS1) {
1084
    ierr = PetscFree(T); CHKERRQ(ierr);
1085
  }
1086
 
1087
  PetscFunctionReturn(0);
1088
}
1089
 
2223 jroman 1090
#undef __FUNCT__  
1091
#define __FUNCT__ "SlepcAllReduceSumBegin"
1753 eromero 1092
/*
1093
  Sum up several arrays with only one call to MPIReduce.
1094
*/
1095
PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1096
                                      PetscInt max_size_ops,
1097
                                      PetscScalar *in, PetscScalar *out,
1098
                                      PetscInt max_size_in, DvdReduction *r,
1099
                                      MPI_Comm comm)
1100
{
1101
  PetscFunctionBegin;
1102
 
1103
  r->in = in;
1104
  r->out = out;
1105
  r->size_in = 0;
1106
  r->max_size_in = max_size_in;
1107
  r->ops = ops;
1108
  r->size_ops = 0;
1109
  r->max_size_ops = max_size_ops;
1110
  r->comm = comm;
1111
 
1112
  PetscFunctionReturn(0);
1113
}
1114
 
1115
#undef __FUNCT__  
1116
#define __FUNCT__ "SlepcAllReduceSum"
1117
PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1118
                                 DvdReductionPostF f, void *ptr,
1119
                                 PetscScalar **in)
1120
{
1121
  PetscFunctionBegin;
1122
 
1123
  *in = r->in + r->size_in;
1124
  r->ops[r->size_ops].out = r->out + r->size_in;
1125
  r->ops[r->size_ops].size_out = size_in;
1126
  r->ops[r->size_ops].f = f;
1127
  r->ops[r->size_ops].ptr = ptr;
1128
  if (++(r->size_ops) > r->max_size_ops) {
2214 jroman 1129
    SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1753 eromero 1130
  }
1131
  if ((r->size_in+= size_in) > r->max_size_in) {
2214 jroman 1132
    SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1753 eromero 1133
  }
1134
 
1135
  PetscFunctionReturn(0);
1136
}
1137
 
1138
 
1139
#undef __FUNCT__  
1140
#define __FUNCT__ "SlepcAllReduceSumEnd"
1141
PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1142
{
1143
  PetscErrorCode  ierr;
1144
  PetscInt        i;
1145
 
1146
  PetscFunctionBegin;
1147
 
1795 eromero 1148
  /* Check if quick exit */
1149
  if (r->size_ops == 0)
1150
    PetscFunctionReturn(0);
1151
 
1753 eromero 1152
  /* Call the MPIAllReduce routine */
1899 eromero 1153
  ierr = MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1753 eromero 1154
                       r->comm); CHKERRQ(ierr);
1155
 
1156
  /* Call the postponed routines */
1157
  for(i=0; i<r->size_ops; i++) {
1158
    ierr = r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1159
    CHKERRQ(ierr);
1160
  }
1161
 
1162
  /* Tag the operation as done */
1163
  r->size_ops = 0;
1164
 
1165
  PetscFunctionReturn(0);
1166
}
1795 eromero 1167
 
1831 eromero 1168
 
1795 eromero 1169
#undef __FUNCT__  
1170
#define __FUNCT__ "dvd_orthV"
1989 eromero 1171
PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1172
                         PetscInt size_cX, Vec *V, PetscInt V_new_s,
1173
                         PetscInt V_new_e, PetscScalar *auxS, Vec auxV,
1174
                         PetscRandom rand)
1795 eromero 1175
{
1176
  PetscErrorCode  ierr;
1177
  PetscInt        i, j;
2216 jroman 1178
  PetscBool       lindep;
1795 eromero 1179
  PetscReal       norm;
1869 eromero 1180
  PetscScalar     *auxS0 = auxS;
1831 eromero 1181
 
1795 eromero 1182
  PetscFunctionBegin;
1831 eromero 1183
 
1795 eromero 1184
  /* Orthonormalize V with IP */
1185
  for (i=V_new_s; i<V_new_e; i++) {
1186
    for(j=0; j<3; j++) {
1975 eromero 1187
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1831 eromero 1188
      if (cX + size_cX == V) {
1189
        /* If cX and V are contiguous, orthogonalize in one step */
1989 eromero 1190
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1831 eromero 1191
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1989 eromero 1192
      } else if (DS) {
1193
        /* Else orthogonalize first against DS, and then against cX and V */
1194
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1831 eromero 1195
                               V[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
2177 jroman 1196
        if(!lindep) {
1831 eromero 1197
          ierr = IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1198
                                 V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1199
        }
1989 eromero 1200
      } else {
1201
        /* Else orthogonalize first against cX and then against V */
1202
        ierr = IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1203
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1831 eromero 1204
      }
2177 jroman 1205
      if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
2394 jroman 1206
      ierr = PetscInfo1(ip, "Orthonormalization problems adding the vector %D to the searching subspace\n", i);
2013 eromero 1207
      CHKERRQ(ierr);
1795 eromero 1208
    }
2177 jroman 1209
    if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
2214 jroman 1210
        SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1795 eromero 1211
    }
1212
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1213
  }
1831 eromero 1214
 
1795 eromero 1215
  PetscFunctionReturn(0);
1216
}
1831 eromero 1217
 
1968 eromero 1218
#undef __FUNCT__  
1219
#define __FUNCT__ "dvd_compute_eigenvectors"
1220
/*
1221
  Compute eigenvectors associated to the Schur decomposition (S, T) and
1222
  save the left vectors in pY and the right vectors in pX, where
1223
  n, size of the eigenproblem
1224
  ldS, ldT, leading dimension of S and T
1225
  ldpX, ldpY, leading dimension of pX and pY
1226
  auxS, auxiliar scalar of length:
1227
    double standard 3n+n*n, double generalized 11n+4n*n,
2022 eromero 1228
    complex standard 3n+n*n, complex generalized 3n+2n*n
1968 eromero 1229
  size_auxS, the length of auxS
1230
  doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1231
*/
1232
PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1233
  PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *pX,
1234
  PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
2216 jroman 1235
  PetscInt size_auxS, PetscBool doProd)
1968 eromero 1236
{
2392 jroman 1237
#if defined(SLEPC_MISSING_LAPACK_GGEV)
1238
  PetscFunctionBegin;
1239
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GGEV - Lapack routine is unavailable.");
1240
#else
1968 eromero 1241
  PetscErrorCode  ierr;
2152 eromero 1242
  PetscBLASInt    n, ldpX, ldpY, nout, info;
1968 eromero 1243
  PetscScalar     *Sc, *Tc;
1244
  const char      *side, *howmny;
1245
#if defined(PETSC_USE_COMPLEX)
1246
  PetscReal       *auxR;
1247
#else
1248
  PetscScalar     *pA,*pB;
1249
  PetscBLASInt    n1, ldpA,ldpB;
1250
  PetscScalar     *alphar, *alphai, *beta;
1251
#endif
1252
 
1253
  PetscFunctionBegin;
2117 eromero 1254
  n = PetscBLASIntCast(n_);
1255
  ldpX = PetscBLASIntCast(ldpX_);
1256
  ldpY = PetscBLASIntCast(ldpY_);
1257
 
1968 eromero 1258
  if (pX && pY) side = "B";
1259
  else if (pX)  side = "R";
1260
  else if (pY)  side = "L";
1261
  else { PetscFunctionReturn(0); }
1262
 
1263
  if (!pX) ldpX = 1;
1264
  if (!pY) ldpY = 1;
1265
 
2177 jroman 1266
  howmny = doProd?"B":"A";
1968 eromero 1267
 
1268
  Sc = auxS; auxS+= n*n; size_auxS-= n*n;
1269
  if (T) Tc = auxS, auxS+= n*n, size_auxS-= n*n;
1270
  else   Tc = PETSC_NULL;
1271
 
1272
  /* Sc <- S, Tc <- T */
1273
  ierr = SlepcDenseCopy(Sc, n, S, ldS, n, n); CHKERRQ(ierr);
1274
  if (T) {
1275
    ierr = SlepcDenseCopy(Tc, n, T, ldT, n, n); CHKERRQ(ierr);
1276
  }
1277
 
1278
  if (T) {
1279
    /* [eigr, pX] = eig(S, T) */
1280
#if defined(PETSC_USE_COMPLEX)
1281
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1282
    if (size_auxS < 2*n)
2214 jroman 1283
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
1968 eromero 1284
    LAPACKtgevc_(side,howmny,PETSC_NULL,&n,Sc,&n,Tc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
2394 jroman 1285
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTGEVC %d",info);
1968 eromero 1286
#else
1287
    alphar = auxS; auxS+= n; size_auxS-= n;
1288
    alphai = auxS; auxS+= n; size_auxS-= n;
1289
    beta = auxS; auxS+= n; size_auxS-= n;
2177 jroman 1290
    if (doProd) {
1968 eromero 1291
      if (pX) pA = auxS, auxS+= n*n, size_auxS-= n*n, ldpA = n;
1292
      else    pA = PETSC_NULL, ldpA = 0;
1293
      if (pY) pB = auxS, auxS+= n*n, size_auxS-= n*n, ldpB = n;
1294
      else    pB = PETSC_NULL, ldpB = 0;
1295
    } else {
1296
      pA = pX; pB = pY; ldpA = ldpX; ldpB = ldpY;
1297
    }
1298
    /* LAPACKtgevc_ needs the element i,i+1 in the 2-by-2 digonal blocs
1299
       of T that represent complex conjugate eigenpairs to be zero */
1300
    n1 = size_auxS;
1301
    if (size_auxS < 8*n)
2214 jroman 1302
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xGGEV");
1968 eromero 1303
    LAPACKggev_(pY?"V":"N",pX?"V":"N",&n,Sc,&n,Tc,&n,alphar,alphai,beta,pB,&ldpB,pA,&ldpA,auxS,&n1,&info);
2394 jroman 1304
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGGEV %d",info);
2177 jroman 1305
    if (doProd) {
1968 eromero 1306
      if (pX) {
1307
        /* pX <- pX * pA */
1308
        ierr = SlepcDenseCopy(Sc, n, pX, ldpX, n, n); CHKERRQ(ierr);
2022 eromero 1309
        ierr = SlepcDenseMatProd(pX, ldpX, 0.0, 1.0,
1968 eromero 1310
                                 Sc, n, n, n, PETSC_FALSE,
1311
                                 pA, n, n, n, PETSC_FALSE); CHKERRQ(ierr);
1312
      }
1313
      if (pY) {
1314
        /* pY <- pY * pB */
1315
        ierr = SlepcDenseCopy(Sc, n, pY, ldpY, n, n); CHKERRQ(ierr);
2022 eromero 1316
        ierr = SlepcDenseMatProd(pY, ldpY, 0.0, 1.0,
1968 eromero 1317
                                 Sc, n, n, n, PETSC_FALSE,
1318
                                 pB, n, n, n, PETSC_FALSE); CHKERRQ(ierr);
1319
      }
1320
    }
1321
#endif
1322
  } else {
1323
    /* [eigr, pX] = eig(S) */
1324
#if defined(PETSC_USE_COMPLEX)
1325
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1326
    if (size_auxS < 2*n)
2214 jroman 1327
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
1968 eromero 1328
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1329
#else
1330
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1331
#endif
2394 jroman 1332
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
1968 eromero 1333
  }
1334
  PetscFunctionReturn(0);
2392 jroman 1335
#endif
1968 eromero 1336
}
2034 eromero 1337
 
1338
#undef __FUNCT__  
1339
#define __FUNCT__ "dvd_compute_eigenvalues"
1340
/*
1341
  Compute the eigenvalues eigr+eigi*i associated to the Schur decomposition
1342
  (S, T), where
1343
  n, size of the eigenproblem
1344
  ldS, ldT, leading dimension of S and T
1345
*/
1346
PetscErrorCode dvd_compute_eigenvalues(PetscInt n, PetscScalar *S,
1347
  PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *eigr,
1348
  PetscScalar *eigi)
1349
{
2392 jroman 1350
#if defined(SLEPC_MISSING_LAPACK_GGEV)
1351
  PetscFunctionBegin;
1352
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GGEV - Lapack routine is unavailable.");
1353
#else
2034 eromero 1354
  PetscInt        i;
1355
#if !defined(PETSC_USE_COMPLEX)
1356
  PetscErrorCode  ierr;
1357
  PetscScalar     Sc[4], Tc[4], beta[2], auxS[16];
1358
  PetscBLASInt    two=2, info, size_auxS=16;
1359
#endif
1360
 
1361
  PetscFunctionBegin;
1362
  for (i=0; i<n; i++) {
1363
#if !defined(PETSC_USE_COMPLEX)
1364
    if (i<n-1 && S[i*ldS+i+1] != 0.0) {
1365
      ierr = SlepcDenseCopy(Sc, 2, &S[i*ldS+i], ldS, 2, 2); CHKERRQ(ierr);
1366
      ierr = SlepcDenseCopy(Tc, 2, &T[i*ldT+i], ldT, 2, 2); CHKERRQ(ierr);
1367
      LAPACKggev_("N","N",&two,Sc,&two,Tc,&two,&eigr[i],&eigi[i],beta,
1368
                  PETSC_NULL, &two,PETSC_NULL,&two,auxS,&size_auxS,&info);
2394 jroman 1369
      if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGGEV %d",info);
2034 eromero 1370
      eigr[i]  /= beta[0]; eigi[i]  /= beta[0];
1371
      eigr[i+1]/= beta[1]; eigi[i+1]/= beta[1];
1372
      i++;
1373
    } else
1374
#endif
1375
    {
1376
      if (T[i*ldT+i] == 0.0) {
2279 jroman 1377
        if (PetscRealPart(S[i*ldS+i]) < 0.0) eigr[i] = PETSC_MIN_REAL;
2280 jroman 1378
        else eigr[i] = PETSC_MAX_REAL;
2034 eromero 1379
      } else eigr[i] = S[i*ldS+i] / T[i*ldT+i];
1380
      if (eigi) eigi[i] = 0.0;
1381
    }
1382
  }
1383
  PetscFunctionReturn(0);
2392 jroman 1384
#endif
2034 eromero 1385
}