Subversion Repositories slepc-dev

Rev

Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2110 jroman 1
/*
2
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3
   SLEPc - Scalable Library for Eigenvalue Problem Computations
2116 eromero 4
   Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain
1619 slepc 5
 
2110 jroman 6
   This file is part of SLEPc.
7
 
8
   SLEPc is free software: you can redistribute it and/or modify it under  the
9
   terms of version 3 of the GNU Lesser General Public License as published by
10
   the Free Software Foundation.
11
 
12
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
13
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
14
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
15
   more details.
16
 
17
   You  should have received a copy of the GNU Lesser General  Public  License
18
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20
*/
21
 
1619 slepc 22
#include "slepc.h" /*I "slepc.h" I*/
1745 eromero 23
#include "slepceps.h" /*I "slepc.h" I*/
1675 slepc 24
#include "slepcblaslapack.h"
1985 eromero 25
#include "davidson.h"
1619 slepc 26
 
1745 eromero 27
PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
28
PetscLogEvent SLEPC_SlepcDenseMatInvProd = 0;
1750 eromero 29
PetscLogEvent SLEPC_SlepcDenseNorm = 0;
1745 eromero 30
PetscLogEvent SLEPC_SlepcDenseOrth = 0;
31
PetscLogEvent SLEPC_SlepcDenseCopy = 0;
32
PetscLogEvent SLEPC_VecsMult = 0;
1630 slepc 33
 
1634 slepc 34
void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
1753 eromero 35
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
36
                                   void *ptr);
1634 slepc 37
 
1745 eromero 38
#undef __FUNCT__  
39
#define __FUNCT__ "dvd_blas_prof_init"
40
PetscErrorCode dvd_blas_prof_init() {
41
  PetscErrorCode  ierr;
42
 
43
  PetscFunctionBegin;
44
 
1978 eromero 45
  if (SLEPC_SlepcDenseMatProd) PetscFunctionReturn(0);
1745 eromero 46
  ierr = PetscLogEventRegister("DenseMatProd", EPS_COOKIE,
47
                               &SLEPC_SlepcDenseMatProd); CHKERRQ(ierr);
48
  ierr = PetscLogEventRegister("DenseOrth", EPS_COOKIE,
49
                               &SLEPC_SlepcDenseOrth); CHKERRQ(ierr);
50
  ierr = PetscLogEventRegister("DenseMatInvProd", EPS_COOKIE,
51
                               &SLEPC_SlepcDenseMatInvProd); CHKERRQ(ierr);
1750 eromero 52
  ierr = PetscLogEventRegister("DenseMatNorm", EPS_COOKIE,
53
                               &SLEPC_SlepcDenseNorm); CHKERRQ(ierr);
1745 eromero 54
  ierr = PetscLogEventRegister("DenseCopy", EPS_COOKIE,
55
                               &SLEPC_SlepcDenseCopy); CHKERRQ(ierr);
56
  ierr = PetscLogEventRegister("VecsMult", EPS_COOKIE,
57
                               &SLEPC_VecsMult); CHKERRQ(ierr);
58
  PetscFunctionReturn(0);
59
}
60
 
1619 slepc 61
/*
62
  Compute C <- a*A*B + b*C, where
63
    ldC, the leading dimension of C,
64
    ldA, the leading dimension of A,
65
    rA, cA, rows and columns of A,
66
    At, if true use the transpose of A instead,
67
    ldB, the leading dimension of B,
68
    rB, cB, rows and columns of B,
69
    Bt, if true use the transpose of B instead
70
*/
1675 slepc 71
#undef __FUNCT__  
72
#define __FUNCT__ "SlepcDenseMatProd"
1619 slepc 73
PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
74
  PetscScalar a,
75
  const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscTruth At,
76
  const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscTruth Bt)
77
{
1630 slepc 78
  PetscErrorCode  ierr;
79
  PetscInt        tmp;
80
  PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
81
  const char      *N = "N", *T = "C", *qA = N, *qB = N;
1619 slepc 82
 
83
  PetscFunctionBegin;
1675 slepc 84
 
85
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
86
 
1745 eromero 87
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1619 slepc 88
 
89
  /* Transpose if needed */
90
  if (At == PETSC_TRUE) tmp = rA, rA = cA, cA = tmp, qA = T;
91
  if (Bt == PETSC_TRUE) tmp = rB, rB = cB, cB = tmp, qB = T;
92
 
93
  /* Check size */
94
  if (cA != rB) {
95
    SETERRQ(1, "Matrix dimensions doesn't match!");
96
  }
97
 
98
  /* Do stub */
1744 eromero 99
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
100
    *C = *A * *B;
101
    m = n = k = 1;
102
  } else {
103
    m = rA; n = cB; k = cA;
104
    BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
105
              &ldB, &b, C, &ldC);
106
  }
1619 slepc 107
 
1630 slepc 108
  ierr = PetscLogFlops(m*n*2*k);CHKERRQ(ierr);
1745 eromero 109
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1630 slepc 110
 
1619 slepc 111
  PetscFunctionReturn(0);
112
}
113
 
1626 slepc 114
/*
1747 eromero 115
  Compute C <- A*B, where
1988 eromero 116
    sC, structure of C,
1747 eromero 117
    ldC, the leading dimension of C,
1988 eromero 118
    sA, structure of A,
1747 eromero 119
    ldA, the leading dimension of A,
120
    rA, cA, rows and columns of A,
121
    At, if true use the transpose of A instead,
1988 eromero 122
    sB, structure of B,
1747 eromero 123
    ldB, the leading dimension of B,
124
    rB, cB, rows and columns of B,
125
    Bt, if true use the transpose of B instead
126
*/
127
#undef __FUNCT__  
128
#define __FUNCT__ "SlepcDenseMatProdTriang"
129
PetscErrorCode SlepcDenseMatProdTriang(
130
  PetscScalar *C, MatType_t sC, PetscInt ldC,
131
  const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
132
  PetscTruth At,
133
  const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
134
  PetscTruth Bt)
135
{
136
  PetscErrorCode  ierr;
1988 eromero 137
  PetscInt        tmp;
138
  PetscScalar     one=1.0, zero=0.0;
1747 eromero 139
  PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
1750 eromero 140
 
1747 eromero 141
  PetscFunctionBegin;
142
 
143
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
144
 
145
  /* Transpose if needed */
146
  if (At == PETSC_TRUE) tmp = rA, rA = cA, cA = tmp;
147
  if (Bt == PETSC_TRUE) tmp = rB, rB = cB, cB = tmp;
148
 
149
  /* Check size */
150
  if (cA != rB) {
151
    SETERRQ(1, "Matrix dimensions doesn't match!");
152
    PetscFunctionReturn(1);
153
  }
154
 
155
  if (sB != 0) {
156
    SETERRQ(1, "It doesn't support B matrix type!");
157
    PetscFunctionReturn(1);
158
  }
159
 
1750 eromero 160
  /* Optimized version: trivial case */
161
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
1988 eromero 162
    if ((At == PETSC_FALSE) && (Bt == PETSC_FALSE))     *C = *A * *B;
163
    else if ((At == PETSC_TRUE) && (Bt == PETSC_FALSE)) *C = PetscConj(*A) * *B;
164
    else if ((At == PETSC_FALSE) && (Bt == PETSC_TRUE)) *C = *A * PetscConj(*B);
165
    else if ((At == PETSC_TRUE) && (Bt == PETSC_TRUE))  *C = PetscConj(*A) * PetscConj(*B);
1750 eromero 166
    PetscFunctionReturn(0);
167
  }
168
 
169
  /* Optimized versions: sA == 0 && sB == 0 */
170
  if ((sA == 0) && (sB == 0)) {
171
    if (At == PETSC_TRUE) tmp = rA, rA = cA, cA = tmp;
172
    if (Bt == PETSC_TRUE) tmp = rB, rB = cB, cB = tmp;
173
    ierr = SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
174
                             cB, Bt); CHKERRQ(ierr);
175
    PetscFunctionReturn(ierr);
176
  }
177
 
1988 eromero 178
  /* Optimized versions: A hermitian && (B not triang) */
1756 eromero 179
  if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
180
      DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
1988 eromero 181
      DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
1750 eromero 182
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
183
    rC = rA; cC = cB;
1988 eromero 184
    BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
1750 eromero 185
              (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
186
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
187
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
188
    PetscFunctionReturn(0);
189
  }
190
 
1988 eromero 191
  /* Optimized versions: B hermitian && (A not triang) */
192
  if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
193
      DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
194
      DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
195
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
196
    rC = rA; cC = cB;
197
    BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
198
              (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
199
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
200
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
201
    PetscFunctionReturn(0);
1747 eromero 202
  }
1988 eromero 203
 
204
  SETERRQ(1, "It doesn't support A matrix type!");
205
  PetscFunctionReturn(1);
1747 eromero 206
}
207
 
1750 eromero 208
/*
1965 eromero 209
  Normalize the columns of the matrix A, where
1750 eromero 210
    ldA, the leading dimension of A,
1965 eromero 211
    rA, cA, rows and columns of A.
212
  if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
213
  are normalized as being one column.
1750 eromero 214
*/
215
#undef __FUNCT__  
216
#define __FUNCT__ "SlepcDenseNorm"
217
PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
1965 eromero 218
                              PetscInt cA, PetscScalar *eigi)
1750 eromero 219
{
220
  PetscErrorCode  ierr;
1965 eromero 221
  PetscInt        i;
222
  PetscScalar     norm, norm0;
1750 eromero 223
  PetscBLASInt    rA = _rA, one=1;
1747 eromero 224
 
1750 eromero 225
  PetscFunctionBegin;
226
 
227
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
228
 
229
  for(i=0; i<cA; i++) {
1965 eromero 230
    if(eigi && eigi[i] != 0.0) {
231
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
232
      norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
233
      norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
234
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
235
      BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
236
      i++;
237
    } else {
238
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
239
      norm = 1.0 / norm;
240
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
241
     }
1750 eromero 242
  }
243
 
244
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
245
 
246
  PetscFunctionReturn(0);
247
}
248
 
249
 
1747 eromero 250
/*
1735 eromero 251
  Compute B <- A\B, where
252
    ldA, the leading dimension of A,
253
    ldB, the leading dimension of B,
254
    dimA, rows and columns of A,
255
    rB, cB, rows and columns of B,
256
    auxI, auxiliary vector of size dimA,
257
    auxS, auxiliary vector of size cB
258
*/
259
#undef __FUNCT__  
260
#define __FUNCT__ "SlepcDenseMatInvProd"
261
PetscErrorCode SlepcDenseMatInvProd(
262
  PetscScalar *A, PetscInt _ldA, PetscInt _dimA,
263
  PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt _cB,
264
  PetscInt *auxI)
265
{
1745 eromero 266
  PetscErrorCode  ierr;
1735 eromero 267
  PetscBLASInt    *p = (PetscBLASInt*)auxI, dimA = _dimA, cB = _cB,
268
                  ldA = _ldA, ldB = _ldB, info;
269
 
270
  PetscFunctionBegin;
271
 
272
  /* Check size */
273
  if (_dimA != rB) {
274
    SETERRQ(1, "Matrix dimensions doesn't match!");
275
  }
276
 
277
  /* Quick exit */
278
  if ((_dimA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
279
 
1745 eromero 280
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatInvProd,0,0,0,0);CHKERRQ(ierr);
1744 eromero 281
  if (dimA == 1) {
282
    *B = *B / *A;
1745 eromero 283
    ierr = PetscLogFlops(1);CHKERRQ(ierr);
1744 eromero 284
  } else {
285
   LAPACKgesv_(&dimA, &cB, A, &ldA, p, B, &ldB, &info);
1745 eromero 286
   if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack GESV %d", info);
1744 eromero 287
  }
1745 eromero 288
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatInvProd,0,0,0,0);CHKERRQ(ierr);
1744 eromero 289
 
1735 eromero 290
  PetscFunctionReturn(0);
291
}
292
 
293
/*
1743 eromero 294
  Compute A <- orth(A), where
295
    ldA, the leading dimension of A,
296
    rA, cA, rows and columns of A,
297
    auxS, auxiliary vector of more size than cA+min(rA,cA),
298
    lauxS, size of auxS,
299
    ncA, new number of columns of A
300
*/
301
#undef __FUNCT__  
302
#define __FUNCT__ "SlepcDenseOrth"
303
PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
304
                              PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
305
                              PetscInt *ncA)
306
{
1745 eromero 307
  PetscErrorCode  ierr;
1743 eromero 308
  PetscBLASInt    ldA = _ldA, rA = _rA, cA = _cA,
309
                  info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
310
  PetscScalar     *tau = auxS, *w = tau + ltau;
311
 
312
  PetscFunctionBegin;
313
 
314
  /* Quick exit */
315
  if ((_rA == 0) || (cA == 0)) { PetscFunctionReturn(0); }
316
 
317
  /* Memory check */
318
  if (lw < cA) {
319
    SETERRQ(1, "Insufficient memory for xGEQRF!");
320
    PetscFunctionReturn(1);
321
  }
322
 
1745 eromero 323
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 324
  LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
325
  if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
326
  LAPACKorgqr_(&rA, &ltau, &ltau, A, &ldA, tau, w, &lw, &info);
327
  if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
1745 eromero 328
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 329
 
330
  if (ncA) *ncA = ltau;
331
 
332
  PetscFunctionReturn(0);
333
}
334
 
335
/*
336
  Y <- X, where
337
  ldX, leading dimension of X,
338
  rX, cX, rows and columns of X
339
  ldY, leading dimension of Y
340
*/
341
#undef __FUNCT__  
342
#define __FUNCT__ "SlepcDenseCopy"
343
PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
344
                              PetscInt ldX, PetscInt rX, PetscInt cX)
345
{
346
  PetscErrorCode  ierr;
1795 eromero 347
  PetscInt        i;
1743 eromero 348
 
349
  PetscFunctionBegin;
350
 
351
  if ((ldX < rX) || (ldY < rX)) {
352
    SETERRQ(1, "Leading dimension error!");
353
  }
1805 eromero 354
 
355
  /* Quick exit */
356
  if (Y == X) {
357
    if (ldX != ldY) {
358
      SETERRQ(1, "Leading dimension error!");
359
    }
360
    PetscFunctionReturn(0);
361
  }
1743 eromero 362
 
1745 eromero 363
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 364
  for(i=0; i<cX; i++) {
365
    ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
366
    CHKERRQ(ierr);
367
  }
1745 eromero 368
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 369
 
370
  PetscFunctionReturn(0);
371
}
372
 
1752 eromero 373
/*
374
  Y <- X, where
375
  ldX, leading dimension of X,
376
  rX, cX, rows and columns of X
377
  ldY, leading dimension of Y
378
*/
379
#undef __FUNCT__  
380
#define __FUNCT__ "SlepcDenseCopyTriang"
381
PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
382
                                    PetscScalar *X, MatType_t sX, PetscInt ldX,
383
                                    PetscInt rX, PetscInt cX)
384
{
385
  PetscErrorCode  ierr;
386
  PetscInt        i,j,c;
1743 eromero 387
 
1752 eromero 388
  PetscFunctionBegin;
389
 
390
  if ((ldX < rX) || (ldY < rX)) {
391
    SETERRQ(1, "Leading dimension error!");
392
  }
393
 
394
  if (rX != cX) {
395
    SETERRQ(1, "SlepcDenseCopyTriang doesn't support rectangular matrices!");
396
  }
397
 
1756 eromero 398
  if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
399
      DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
400
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
401
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 402
      c = 0;                                      /*     so copy */
1756 eromero 403
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
404
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 405
      c = 1;                                      /*     so transpose */
406
    else                                          /* ... Full, */
407
      c = 2;                                      /*     so reflect from up */
1756 eromero 408
  } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
409
             DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
410
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
411
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 412
      c = 1;                                      /*     so transpose */
1756 eromero 413
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
414
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 415
      c = 0;                                      /*     so copy */
416
    else                                          /* ... Full, */
417
      c = 3;                                      /*     so reflect fr. down */
418
  } else                                          /* Full to any, */
419
    c = 0;                                        /*     so copy */
420
 
421
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
422
 
423
  switch(c) {
424
  case 0: /* copy */
425
    for(i=0; i<cX; i++) {
426
      ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
427
      CHKERRQ(ierr);
428
    }
429
    break;
430
 
431
  case 1: /* transpose */
432
    for(i=0; i<cX; i++)
433
      for(j=0; j<rX; j++)
434
        Y[ldY*j+i] = X[ldX*i+j];
435
    break;
436
 
437
  case 2: /* reflection from up */
438
    for(i=0; i<cX; i++)
439
      for(j=0; j<PetscMin(i+1,rX); j++)
1988 eromero 440
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 441
    break;
442
 
443
  case 3: /* reflection from down */
444
    for(i=0; i<cX; i++)
445
      for(j=i; j<rX; j++)
1988 eromero 446
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 447
    break;
448
  }
449
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
450
 
451
  PetscFunctionReturn(0);
452
}
453
 
454
 
1743 eromero 455
/*
1675 slepc 456
  Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
457
  where X and Y are contiguous global vectors.
1626 slepc 458
*/
1675 slepc 459
#undef __FUNCT__  
460
#define __FUNCT__ "SlepcUpdateVectorsZ"
461
PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
462
  Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
463
  PetscInt cM)
1626 slepc 464
{
465
  PetscErrorCode  ierr;
1960 eromero 466
 
467
  PetscFunctionBegin;
468
 
469
  ierr = SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
470
  CHKERRQ(ierr);
471
 
472
  PetscFunctionReturn(0);
473
}
474
 
475
 
476
/*
1965 eromero 477
  Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
1960 eromero 478
  where X and Y are contiguous global vectors.
479
*/
480
#undef __FUNCT__  
481
#define __FUNCT__ "SlepcUpdateVectorsS"
482
PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
483
  PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
484
  PetscInt ldM, PetscInt rM, PetscInt cM)
485
{
486
  PetscErrorCode  ierr;
1626 slepc 487
  PetscScalar     *px, *py;
1965 eromero 488
  PetscInt        rX, rY, ldX, ldY, i, rcX;
1626 slepc 489
 
490
  PetscFunctionBegin;
491
 
1965 eromero 492
  /* Compute the real number of columns */
493
  rcX = cX/dX;
494
  if (rcX != rM) {
1735 eromero 495
    SETERRQ(1, "Matrix dimensions doesn't match!");
496
  }
497
 
1965 eromero 498
  if ((rcX == 0) || (rM == 0) || (cM == 0)) {
1626 slepc 499
    PetscFunctionReturn(0);
1960 eromero 500
  } else if ((Y + cM <= X) || (X + cX <= Y) ||
501
             ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
1626 slepc 502
    /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
503
 
504
    /* Get the dense matrices and dimensions associated to Y and X */
1960 eromero 505
    ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
506
    ierr = VecGetLocalSize(Y[0], &rY); CHKERRQ(ierr);
507
    if (rX != rY) {
508
      SETERRQ(1, "The multivectors doesn't have the same dimension!");
1626 slepc 509
    }
510
    ierr = VecGetArray(X[0], &px);CHKERRQ(ierr);
511
    ierr = VecGetArray(Y[0], &py);CHKERRQ(ierr);
512
 
1960 eromero 513
    /* Update the strides */
514
    ldX = rX*dX; ldY= rY*dY;
515
 
1626 slepc 516
    /* Do operation */
1965 eromero 517
    ierr = SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
1675 slepc 518
                    PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE); CHKERRQ(ierr);
1626 slepc 519
 
520
    ierr = VecRestoreArray(X[0], &px);CHKERRQ(ierr);
1992 eromero 521
    ierr = PetscObjectStateDecrease((PetscObject)X[0]); CHKERRQ(ierr);
1626 slepc 522
    ierr = VecRestoreArray(Y[0], &py);CHKERRQ(ierr);
1992 eromero 523
    for(i=1; i<cM; i++) {
524
      ierr = PetscObjectStateIncrease((PetscObject)Y[dY*i]); CHKERRQ(ierr);
525
    }
1626 slepc 526
 
1960 eromero 527
  } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
1626 slepc 528
    /* If not, call to SlepcUpdateVectors */
1965 eromero 529
    ierr = SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
530
                                    ldM, PETSC_FALSE); CHKERRQ(ierr);
1675 slepc 531
    if (alpha != 1.0)
532
      for (i=0; i<cM; i++) {
533
        ierr = VecScale(Y[i], alpha); CHKERRQ(ierr);
534
      }
1626 slepc 535
  } else {
536
    SETERRQ(1, "I don't support this case!");
537
  }
538
 
539
  PetscFunctionReturn(0);
540
}
541
 
1965 eromero 542
/*
543
  Compute X <- alpha * X[0:dX:cX-1] * M
544
  where X is a matrix with non-consecutive columns
545
*/
546
#undef __FUNCT__  
547
#define __FUNCT__ "SlepcUpdateVectorsD"
548
PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
549
  const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
550
  PetscScalar *work, PetscInt lwork)
551
{
552
  PetscErrorCode  ierr;
553
  PetscScalar     **px, *Y, *Z;
554
  PetscInt        rX, i, j, rY, rY0, ldY;
1960 eromero 555
 
1965 eromero 556
  PetscFunctionBegin;
557
 
558
  if (cX != rM) {
559
    SETERRQ(1, "Matrix dimensions doesn't match!");
560
  }
561
 
562
  rY = (lwork/2)/cX;
563
  if (rY <= 0) {
564
    SETERRQ(1, "Insufficient work space given!");
565
  }
566
  Y = work; Z = &Y[cX*rY]; ldY = rY;
567
 
568
  if ((cX == 0) || (rM == 0) || (cM == 0)) {
569
    PetscFunctionReturn(0);
570
  }
571
 
572
  /* Get the dense vectors associated to the columns of X */
573
  ierr = PetscMalloc(sizeof(Vec)*cX, &px); CHKERRQ(ierr);
574
  for(i=0; i<cX; i++) {
575
    ierr = VecGetArray(X[i], &px[i]); CHKERRQ(ierr);
576
  }
577
  ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
578
 
579
  for(i=0, rY0=0; i<rX; i+=rY0) {
580
    rY0 = PetscMin(rY, rX-i);
581
 
582
    /* Y <- X[i0:i1,:] */
583
    for(j=0; j<cX; j++) {
584
      ierr = SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
585
      CHKERRQ(ierr);
586
    }
587
 
588
    /* Z <- Y * M */
589
    ierr = SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
590
                                                 M, ldM, rM, cM, PETSC_FALSE);
591
    CHKERRQ(ierr);
592
 
593
    /* X <- Z */
594
    for(j=0; j<cM; j++) {
595
      ierr = SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
596
      CHKERRQ(ierr);
597
    }
598
  }
599
 
600
  for(i=0; i<cX; i++) {
601
    ierr = VecRestoreArray(X[i], &px[i]); CHKERRQ(ierr);
602
  }
603
  ierr = PetscFree(px); CHKERRQ(ierr);
604
 
605
  PetscFunctionReturn(0);
606
}
607
 
608
 
609
 
1634 slepc 610
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
611
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
612
  where W = U' * V.
1640 slepc 613
  workS0 and workS1 are an auxiliary scalar vector of size
614
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
615
  is needed, and of size eU*eV.
1634 slepc 616
*/
617
 
1675 slepc 618
#undef __FUNCT__  
619
#define __FUNCT__ "VecsMult"
1747 eromero 620
PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
1634 slepc 621
                        Vec *U, PetscInt sU, PetscInt eU,
622
                        Vec *V, PetscInt sV, PetscInt eV,
1640 slepc 623
                        PetscScalar *workS0, PetscScalar *workS1)
1634 slepc 624
{
625
  PetscErrorCode  ierr;
1640 slepc 626
  PetscInt        ldU, ldV, i, j, k;
627
  PetscScalar     *pu, *pv, *W, *Wr;
1634 slepc 628
 
629
  PetscFunctionBegin;
630
 
631
  /* Check if quick exit */
632
  if ((eU-sU == 0) || (eV-sV == 0))
633
    PetscFunctionReturn(0);
634
 
635
  /* Get the dense matrices and dimensions associated to U and V */
636
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
637
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
638
  if (ldU != ldV) {
639
    SETERRQ(1, "Matrix dimensions doesn't match!");
640
  }
641
  ierr = VecGetArray(U[0], &pu);CHKERRQ(ierr);
642
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
643
 
1640 slepc 644
  if (workS0)
645
    W = workS0;
1634 slepc 646
  else {
1640 slepc 647
    ierr = PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &W);
648
    CHKERRQ(ierr);
1634 slepc 649
  }
650
 
1745 eromero 651
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
652
 
1640 slepc 653
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
654
    /* Use the smart memory usage version */
1634 slepc 655
 
1640 slepc 656
    /* W <- U' * V */
1747 eromero 657
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
658
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
659
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
1640 slepc 660
    CHKERRQ(ierr);
661
 
662
    /* ReduceAll(W, SUM) */
1899 eromero 663
    ierr = MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
1640 slepc 664
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1747 eromero 665
  /* Full M matrix */
1756 eromero 666
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
667
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1640 slepc 668
    if (workS1)
669
      Wr = workS1;
670
    else {
671
      ierr = PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &Wr);
672
      CHKERRQ(ierr);
673
    }
1747 eromero 674
 
1640 slepc 675
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
676
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
677
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
678
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
679
    CHKERRQ(ierr);
680
 
681
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
682
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
683
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
684
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
685
    CHKERRQ(ierr);
686
 
687
    /* ReduceAll(W, SUM) */
688
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*sV+(eV-sV)*eU, MPIU_SCALAR,
1899 eromero 689
                      MPIU_SUM, ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1640 slepc 690
 
691
    /* M(...,...) <- W */
692
    for (i=0,k=0; i<sV; i++)
693
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
694
    for (i=sV; i<eV; i++)
695
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
696
 
1747 eromero 697
    if (!workS1) {
698
      ierr = PetscFree(Wr); CHKERRQ(ierr);
699
    }
1745 eromero 700
 
1747 eromero 701
  /* Upper triangular M matrix */
1756 eromero 702
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
703
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1747 eromero 704
    if (workS1)
705
      Wr = workS1;
706
    else {
707
      ierr = PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
708
      CHKERRQ(ierr);
709
    }
710
 
711
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
712
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
713
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
714
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
715
    CHKERRQ(ierr);
716
 
717
    /* ReduceAll(W, SUM) */
1899 eromero 718
    ierr = MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
1747 eromero 719
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
720
 
721
    /* M(...,...) <- W */
722
    for (i=sV,k=0; i<eV; i++)
723
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
724
 
1640 slepc 725
    if (!workS1) {
726
      ierr = PetscFree(Wr); CHKERRQ(ierr);
727
    }
1747 eromero 728
 
729
  /* Lower triangular M matrix */
1756 eromero 730
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
731
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1747 eromero 732
    if (workS1)
733
      Wr = workS1;
734
    else {
735
      ierr = PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
736
      CHKERRQ(ierr);
737
    }
738
 
739
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
740
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
741
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
742
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
743
    CHKERRQ(ierr);
744
 
745
    /* ReduceAll(W, SUM) */
1899 eromero 746
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
1747 eromero 747
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
748
 
749
    /* M(...,...) <- W */
750
    for (i=0,k=0; i<eV; i++)
751
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
752
 
753
    if (!workS1) {
754
      ierr = PetscFree(Wr); CHKERRQ(ierr);
755
    }
1634 slepc 756
  }
757
 
1747 eromero 758
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
759
 
1640 slepc 760
  if (!workS0) {
1634 slepc 761
    ierr = PetscFree(W); CHKERRQ(ierr);
762
  }
763
 
764
  ierr = VecRestoreArray(U[0], &pu); CHKERRQ(ierr);
1992 eromero 765
  ierr = PetscObjectStateDecrease((PetscObject)U[0]); CHKERRQ(ierr);
1634 slepc 766
  ierr = VecRestoreArray(V[0], &pv); CHKERRQ(ierr);
1992 eromero 767
  ierr = PetscObjectStateDecrease((PetscObject)V[0]); CHKERRQ(ierr);
1634 slepc 768
 
769
  PetscFunctionReturn(0);
770
}
771
 
1795 eromero 772
 
773
 
1753 eromero 774
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
775
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
1808 eromero 776
  where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
1795 eromero 777
  workS0 and workS1 are an auxiliary scalar vector of size
778
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
779
  is needed, and of size eU*eV.
780
*/
781
 
782
#undef __FUNCT__  
783
#define __FUNCT__ "VecsMultIa"
784
PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
785
                          Vec *U, PetscInt sU, PetscInt eU,
786
                          Vec *V, PetscInt sV, PetscInt eV)
787
{
788
  PetscErrorCode  ierr;
789
  PetscInt        ldU, ldV;
790
  PetscScalar     *pu, *pv;
791
 
792
  PetscFunctionBegin;
793
 
794
  /* Check if quick exit */
795
  if ((eU-sU == 0) || (eV-sV == 0))
796
    PetscFunctionReturn(0);
797
 
798
  /* Get the dense matrices and dimensions associated to U and V */
799
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
800
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
801
  if (ldU != ldV) {
802
    SETERRQ(1, "Matrix dimensions doesn't match!");
803
  }
804
  ierr = VecGetArray(U[0], &pu);CHKERRQ(ierr);
805
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
806
 
807
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
808
    /* M <- local_U' * local_V */
809
    ierr = SlepcDenseMatProdTriang(M, sM, eU,
810
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
811
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
812
    CHKERRQ(ierr);
813
 
814
  /* Full M matrix */
815
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
816
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
817
    /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
818
    ierr = SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
819
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
820
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
821
    CHKERRQ(ierr);
822
 
823
    /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
824
    ierr = SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
825
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
826
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
827
    CHKERRQ(ierr);
828
 
829
  /* Other structures */
830
  } else {
831
    SETERRQ(1, "Matrix structure doesn't support by VecsMultI!");
832
    PetscFunctionReturn(1);
833
  }
834
 
835
  ierr = VecRestoreArray(U[0], &pu); CHKERRQ(ierr);
1992 eromero 836
  ierr = PetscObjectStateDecrease((PetscObject)U[0]); CHKERRQ(ierr);
1795 eromero 837
  ierr = VecRestoreArray(V[0], &pv); CHKERRQ(ierr);
1992 eromero 838
  ierr = PetscObjectStateDecrease((PetscObject)V[0]); CHKERRQ(ierr);
1795 eromero 839
 
840
  PetscFunctionReturn(0);
841
}
842
 
843
 
1808 eromero 844
/* Computes M <- nprocs*M
845
  where nprocs is the number of processors.
846
*/
847
 
848
#undef __FUNCT__  
849
#define __FUNCT__ "VecsMultIc"
850
PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
851
                          PetscInt rM, PetscInt cM, Vec V)
852
{
853
  PetscInt        i,j,n;
854
 
855
  PetscFunctionBegin;
856
 
857
  /* Check if quick exit */
858
  if ((rM == 0) || (cM == 0))
859
    PetscFunctionReturn(0);
860
 
861
  if (sM != 0) {
862
    SETERRQ(1, "Matrix structure doesn't support by VecsMultIc!");
863
    PetscFunctionReturn(1);
864
  }
865
 
866
  MPI_Comm_size(((PetscObject)V)->comm, &n);
867
 
868
  for(i=0; i<cM; i++)
869
    for(j=0; j<rM; j++)
870
      M[ldM*i+j]/= (PetscScalar)n;
871
 
872
  PetscFunctionReturn(0);
873
}
874
 
875
 
1795 eromero 876
/* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
877
                          ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
1753 eromero 878
  where W = U' * V.
1795 eromero 879
  workS0 and workS1 are an auxiliary scalar vector of size
880
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
881
  is needed, and of size eU*eV.
882
*/
883
 
884
#undef __FUNCT__  
885
#define __FUNCT__ "VecsMultIb"
886
PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
887
                          PetscInt rM, PetscInt cM, PetscScalar *auxS,
888
                          Vec V)
889
{
890
  PetscErrorCode  ierr;
891
  PetscScalar     *W, *Wr;
892
 
893
  PetscFunctionBegin;
894
 
895
  /* Check if quick exit */
896
  if ((rM == 0) || (cM == 0))
897
    PetscFunctionReturn(0);
898
 
899
  if (auxS)
900
    W = auxS;
901
  else {
902
    ierr = PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
903
    CHKERRQ(ierr);
904
  }
905
  Wr = W + rM*cM;
906
 
907
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
908
 
909
  if (sM == 0) {
910
    /* W <- M */
911
    ierr = SlepcDenseCopy(W, rM, M, ldM, rM, cM); CHKERRQ(ierr);
912
 
913
    /* Wr <- ReduceAll(W, SUM) */
1899 eromero 914
    ierr = MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
1795 eromero 915
                         ((PetscObject)V)->comm); CHKERRQ(ierr);
916
 
917
    /* M <- Wr */
918
    ierr = SlepcDenseCopy(M, ldM, Wr, rM, rM, cM); CHKERRQ(ierr);
919
 
920
  /* Other structures */
921
  } else {
922
    SETERRQ(1, "Matrix structure doesn't support by VecsMultI!");
923
    PetscFunctionReturn(1);
924
  }
925
 
926
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
927
 
928
  if (!auxS) {
929
    ierr = PetscFree(W); CHKERRQ(ierr);
930
  }
931
 
932
  PetscFunctionReturn(0);
933
}
934
 
935
 
936
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
937
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
938
  where W = U' * V.
1753 eromero 939
  r, a DvdReduction structure,
940
  sr, an structure DvdMult_copy_func.
941
*/
1634 slepc 942
 
1753 eromero 943
#undef __FUNCT__  
944
#define __FUNCT__ "VecsMultS"
945
PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
946
                         Vec *U, PetscInt sU, PetscInt eU,
947
                         Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
948
                         DvdMult_copy_func *sr)
949
{
950
  PetscErrorCode  ierr;
951
  PetscInt        ldU, ldV;
952
  PetscScalar     *pu, *pv, *W;
953
 
954
  PetscFunctionBegin;
955
 
956
  /* Check if quick exit */
957
  if ((eU-sU == 0) || (eV-sV == 0))
958
    PetscFunctionReturn(0);
959
 
960
  /* Get the dense matrices and dimensions associated to U and V */
961
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
962
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
963
  if (ldU != ldV) {
964
    SETERRQ(1, "Matrix dimensions doesn't match!");
965
  }
966
  ierr = VecGetArray(U[0], &pu);CHKERRQ(ierr);
967
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
968
 
969
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
970
 
971
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
972
    /* Use the smart memory usage version */
973
 
974
    /* Add the reduction to r */
975
    ierr = SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
976
    CHKERRQ(ierr);
977
 
978
    /* W <- U' * V */
979
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
980
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
981
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
982
    CHKERRQ(ierr);
983
 
984
    /* M <- ReduceAll(W, SUM) */
985
    sr->M = M;    sr->ld = ldM;
986
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
987
                  sr->i2 = eV;
988
 
989
  /* Full M matrix */
1756 eromero 990
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
991
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 992
    /* Add the reduction to r */
993
    ierr = SlepcAllReduceSum(r, (eU-sU)*sV+(eV-sV)*eU, VecsMultS_copy_func,
994
                             sr, &W);
995
    CHKERRQ(ierr);
996
 
997
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
998
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
999
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
1000
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
1001
    CHKERRQ(ierr);
1002
 
1003
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
1004
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
1005
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
1006
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
1007
    CHKERRQ(ierr);
1008
 
1009
    /* M <- ReduceAll(W, SUM) */
1010
    sr->M = M;    sr->ld = ldM;
1011
    sr->i0 = 0;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
1012
                  sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;
1013
 
1014
  /* Upper triangular M matrix */
1756 eromero 1015
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
1016
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 1017
    /* Add the reduction to r */
1018
    ierr = SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
1019
    CHKERRQ(ierr);
1020
 
1021
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
1022
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
1023
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
1024
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
1025
    CHKERRQ(ierr);
1026
 
1027
    /* M <- ReduceAll(W, SUM) */
1028
    sr->M = M;    sr->ld = ldM;
1029
    sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
1030
                  sr->i2 = eV;
1031
 
1032
  /* Lower triangular M matrix */
1756 eromero 1033
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
1034
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1753 eromero 1035
    /* Add the reduction to r */
1036
    ierr = SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
1037
    CHKERRQ(ierr);
1038
 
1039
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
1040
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
1041
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
1042
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
1043
    CHKERRQ(ierr);
1044
 
1045
    /* ReduceAll(W, SUM) */
1046
    sr->M = M;    sr->ld = ldM;
1047
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
1048
                  sr->i2 = eV;
1049
  }
1050
 
1051
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
1052
 
1053
  ierr = VecRestoreArray(U[0], &pu); CHKERRQ(ierr);
1992 eromero 1054
  ierr = PetscObjectStateDecrease((PetscObject)U[0]); CHKERRQ(ierr);
1753 eromero 1055
  ierr = VecRestoreArray(V[0], &pv); CHKERRQ(ierr);
1992 eromero 1056
  ierr = PetscObjectStateDecrease((PetscObject)V[0]); CHKERRQ(ierr);
1753 eromero 1057
 
1058
  PetscFunctionReturn(0);
1059
}
1060
 
1061
#undef __FUNCT__  
1062
#define __FUNCT__ "VecsMultS_copy_func"
1063
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
1064
                                   void *ptr)
1065
{
1066
  PetscInt        i, j, k;
1067
  DvdMult_copy_func
1068
                  *sr = (DvdMult_copy_func*)ptr;
1069
 
1070
  PetscFunctionBegin;
1071
 
1072
  for (i=sr->i0,k=0; i<sr->i1; i++)
1073
    for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
1074
  for (i=sr->i1; i<sr->i2; i++)
1075
    for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
1076
 
1077
  if (k != size_out) {
1078
    SETERRQ(1, "Error in VecsMultS_copy_func!");
1079
    PetscFunctionReturn(1);
1080
  }
1081
 
1082
  PetscFunctionReturn(0);
1083
}
1084
 
1675 slepc 1085
/* Orthonormalize a chunk of parallel vector.
1086
   NOTE: wS0 and wS1 must be of size n*n.
1087
*/
1088
#undef __FUNCT__  
1089
#define __FUNCT__ "VecsOrthonormalize"
1090
PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
1091
                                  PetscScalar *wS1)
1092
{
1093
  PetscErrorCode  ierr;
1094
  PetscBLASInt    nn = n, info, ld;
1992 eromero 1095
  PetscInt        ldV, i;
1675 slepc 1096
  PetscScalar     *H, *T, one=1.0, *pv;
1097
 
1098
  PetscFunctionBegin;
1099
 
1100
  if (!wS0) {
1101
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &H); CHKERRQ(ierr);
1102
  } else
1103
    H = wS0;
1104
  if (!wS1) {
1105
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &T); CHKERRQ(ierr);
1106
  } else
1107
    T = wS1;
1108
 
1109
  /* H <- V' * V */
1747 eromero 1110
  ierr = VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL); CHKERRQ(ierr);
1675 slepc 1111
 
1112
  /* H <- chol(H) */
1113
  LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
1114
  if (info) SETERRQ1(PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1115
 
1116
  /* V <- V * inv(H) */
1117
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
1118
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
1119
  ld = ldV;
1120
  BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1121
  ierr = VecRestoreArray(V[0], &pv);CHKERRQ(ierr);
1992 eromero 1122
  for(i=1; i<n; i++) {
1123
    ierr = PetscObjectStateIncrease((PetscObject)V[i]); CHKERRQ(ierr);
1124
  }
1675 slepc 1125
 
1126
  if (!wS0) {
1127
    ierr = PetscFree(H); CHKERRQ(ierr);
1128
  }
1129
  if (!wS1) {
1130
    ierr = PetscFree(T); CHKERRQ(ierr);
1131
  }
1132
 
1133
  PetscFunctionReturn(0);
1134
}
1135
 
1753 eromero 1136
/*
1137
  Sum up several arrays with only one call to MPIReduce.
1138
*/
1139
 
1140
#undef __FUNCT__  
1141
#define __FUNCT__ "SlepcAllReduceSumBegin"
1142
PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1143
                                      PetscInt max_size_ops,
1144
                                      PetscScalar *in, PetscScalar *out,
1145
                                      PetscInt max_size_in, DvdReduction *r,
1146
                                      MPI_Comm comm)
1147
{
1148
  PetscFunctionBegin;
1149
 
1150
  r->in = in;
1151
  r->out = out;
1152
  r->size_in = 0;
1153
  r->max_size_in = max_size_in;
1154
  r->ops = ops;
1155
  r->size_ops = 0;
1156
  r->max_size_ops = max_size_ops;
1157
  r->comm = comm;
1158
 
1159
  PetscFunctionReturn(0);
1160
}
1161
 
1162
#undef __FUNCT__  
1163
#define __FUNCT__ "SlepcAllReduceSum"
1164
PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1165
                                 DvdReductionPostF f, void *ptr,
1166
                                 PetscScalar **in)
1167
{
1168
  PetscFunctionBegin;
1169
 
1170
  *in = r->in + r->size_in;
1171
  r->ops[r->size_ops].out = r->out + r->size_in;
1172
  r->ops[r->size_ops].size_out = size_in;
1173
  r->ops[r->size_ops].f = f;
1174
  r->ops[r->size_ops].ptr = ptr;
1175
  if (++(r->size_ops) > r->max_size_ops) {
1176
    SETERRQ(1, "max_size_ops is not enought!");
1177
  }
1178
  if ((r->size_in+= size_in) > r->max_size_in) {
1179
    SETERRQ(1, "max_size_in is not enought!");
1180
  }
1181
 
1182
  PetscFunctionReturn(0);
1183
}
1184
 
1185
 
1186
#undef __FUNCT__  
1187
#define __FUNCT__ "SlepcAllReduceSumEnd"
1188
PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1189
{
1190
  PetscErrorCode  ierr;
1191
  PetscInt        i;
1192
 
1193
  PetscFunctionBegin;
1194
 
1795 eromero 1195
  /* Check if quick exit */
1196
  if (r->size_ops == 0)
1197
    PetscFunctionReturn(0);
1198
 
1753 eromero 1199
  /* Call the MPIAllReduce routine */
1899 eromero 1200
  ierr = MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1753 eromero 1201
                       r->comm); CHKERRQ(ierr);
1202
 
1203
  /* Call the postponed routines */
1204
  for(i=0; i<r->size_ops; i++) {
1205
    ierr = r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1206
    CHKERRQ(ierr);
1207
  }
1208
 
1209
  /* Tag the operation as done */
1210
  r->size_ops = 0;
1211
 
1212
  PetscFunctionReturn(0);
1213
}
1795 eromero 1214
 
1831 eromero 1215
 
1795 eromero 1216
#undef __FUNCT__  
1217
#define __FUNCT__ "dvd_orthV"
1989 eromero 1218
PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1219
                         PetscInt size_cX, Vec *V, PetscInt V_new_s,
1220
                         PetscInt V_new_e, PetscScalar *auxS, Vec auxV,
1221
                         PetscRandom rand)
1795 eromero 1222
{
1223
  PetscErrorCode  ierr;
1224
  PetscInt        i, j;
1225
  PetscTruth      lindep;
1226
  PetscReal       norm;
1869 eromero 1227
  PetscScalar     *auxS0 = auxS;
1831 eromero 1228
 
1795 eromero 1229
  PetscFunctionBegin;
1831 eromero 1230
 
1795 eromero 1231
  /* Orthonormalize V with IP */
1232
  for (i=V_new_s; i<V_new_e; i++) {
1233
    for(j=0; j<3; j++) {
1975 eromero 1234
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1831 eromero 1235
      if (cX + size_cX == V) {
1236
        /* If cX and V are contiguous, orthogonalize in one step */
1989 eromero 1237
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1831 eromero 1238
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1989 eromero 1239
      } else if (DS) {
1240
        /* Else orthogonalize first against DS, and then against cX and V */
1241
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1831 eromero 1242
                               V[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
1243
        if(lindep == PETSC_FALSE) {
1244
          ierr = IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1245
                                 V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1246
        }
1989 eromero 1247
      } else {
1248
        /* Else orthogonalize first against cX and then against V */
1249
        ierr = IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1250
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1831 eromero 1251
      }
1795 eromero 1252
      if((lindep == PETSC_FALSE) && (norm > PETSC_MACHINE_EPSILON)) break;
2007 eromero 1253
      ierr = PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
2013 eromero 1254
      CHKERRQ(ierr);
1795 eromero 1255
    }
1256
    if((lindep == PETSC_TRUE) || (norm < PETSC_MACHINE_EPSILON)) {
1257
        SETERRQ(1, "Error during the orthonormalization of the eigenvectors!");
1258
    }
1259
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1260
  }
1831 eromero 1261
 
1795 eromero 1262
  PetscFunctionReturn(0);
1263
}
1831 eromero 1264
 
1968 eromero 1265
#undef __FUNCT__  
1266
#define __FUNCT__ "dvd_compute_eigenvectors"
1267
/*
1268
  Compute eigenvectors associated to the Schur decomposition (S, T) and
1269
  save the left vectors in pY and the right vectors in pX, where
1270
  n, size of the eigenproblem
1271
  ldS, ldT, leading dimension of S and T
1272
  ldpX, ldpY, leading dimension of pX and pY
1273
  auxS, auxiliar scalar of length:
1274
    double standard 3n+n*n, double generalized 11n+4n*n,
2022 eromero 1275
    complex standard 3n+n*n, complex generalized 3n+2n*n
1968 eromero 1276
  size_auxS, the length of auxS
1277
  doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1278
*/
1279
PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1280
  PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *pX,
1281
  PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
1282
  PetscInt size_auxS, PetscTruth doProd)
1283
{
1284
  PetscErrorCode  ierr;
1285
  PetscBLASInt    n = PetscBLASIntCast(n_), ldpX = PetscBLASIntCast(ldpX_),
1286
                  ldpY = PetscBLASIntCast(ldpY_), nout = n, info;
1287
  PetscScalar     *Sc, *Tc;
1288
  const char      *side, *howmny;
1289
#if defined(PETSC_USE_COMPLEX)
1290
  PetscReal       *auxR;
1291
#else
1292
  PetscScalar     *pA,*pB;
1293
  PetscBLASInt    n1, ldpA,ldpB;
1294
  PetscScalar     *alphar, *alphai, *beta;
1295
#endif
1296
 
1297
  PetscFunctionBegin;
1795 eromero 1298
 
1968 eromero 1299
  if (pX && pY) side = "B";
1300
  else if (pX)  side = "R";
1301
  else if (pY)  side = "L";
1302
  else { PetscFunctionReturn(0); }
1303
 
1304
  if (!pX) ldpX = 1;
1305
  if (!pY) ldpY = 1;
1306
 
1307
  howmny = (doProd == PETSC_TRUE)?"B":"A";
1308
 
1309
  Sc = auxS; auxS+= n*n; size_auxS-= n*n;
1310
  if (T) Tc = auxS, auxS+= n*n, size_auxS-= n*n;
1311
  else   Tc = PETSC_NULL;
1312
 
1313
  /* Sc <- S, Tc <- T */
1314
  ierr = SlepcDenseCopy(Sc, n, S, ldS, n, n); CHKERRQ(ierr);
1315
  if (T) {
1316
    ierr = SlepcDenseCopy(Tc, n, T, ldT, n, n); CHKERRQ(ierr);
1317
  }
1318
 
1319
  if (T) {
1320
    /* [eigr, pX] = eig(S, T) */
1321
#if defined(PETSC_USE_COMPLEX)
1322
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1323
    if (size_auxS < 2*n)
1324
      SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xTGEVC");
1325
    LAPACKtgevc_(side,howmny,PETSC_NULL,&n,Sc,&n,Tc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1326
    if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTGEVC %i",info);
1327
#else
1328
    alphar = auxS; auxS+= n; size_auxS-= n;
1329
    alphai = auxS; auxS+= n; size_auxS-= n;
1330
    beta = auxS; auxS+= n; size_auxS-= n;
1331
    if (doProd == PETSC_TRUE) {
1332
      if (pX) pA = auxS, auxS+= n*n, size_auxS-= n*n, ldpA = n;
1333
      else    pA = PETSC_NULL, ldpA = 0;
1334
      if (pY) pB = auxS, auxS+= n*n, size_auxS-= n*n, ldpB = n;
1335
      else    pB = PETSC_NULL, ldpB = 0;
1336
    } else {
1337
      pA = pX; pB = pY; ldpA = ldpX; ldpB = ldpY;
1338
    }
1339
    /* LAPACKtgevc_ needs the element i,i+1 in the 2-by-2 digonal blocs
1340
       of T that represent complex conjugate eigenpairs to be zero */
1341
    n1 = size_auxS;
1342
    if (size_auxS < 8*n)
1343
      SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xGGEV");
1344
    LAPACKggev_(pY?"V":"N",pX?"V":"N",&n,Sc,&n,Tc,&n,alphar,alphai,beta,pB,&ldpB,pA,&ldpA,auxS,&n1,&info);
2034 eromero 1345
    if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xGGEV %i",info);
1968 eromero 1346
    if (doProd == PETSC_TRUE) {
1347
      if (pX) {
1348
        /* pX <- pX * pA */
1349
        ierr = SlepcDenseCopy(Sc, n, pX, ldpX, n, n); CHKERRQ(ierr);
2022 eromero 1350
        ierr = SlepcDenseMatProd(pX, ldpX, 0.0, 1.0,
1968 eromero 1351
                                 Sc, n, n, n, PETSC_FALSE,
1352
                                 pA, n, n, n, PETSC_FALSE); CHKERRQ(ierr);
1353
      }
1354
      if (pY) {
1355
        /* pY <- pY * pB */
1356
        ierr = SlepcDenseCopy(Sc, n, pY, ldpY, n, n); CHKERRQ(ierr);
2022 eromero 1357
        ierr = SlepcDenseMatProd(pY, ldpY, 0.0, 1.0,
1968 eromero 1358
                                 Sc, n, n, n, PETSC_FALSE,
1359
                                 pB, n, n, n, PETSC_FALSE); CHKERRQ(ierr);
1360
      }
1361
    }
1362
#endif
1363
  } else {
1364
    /* [eigr, pX] = eig(S) */
1365
#if defined(PETSC_USE_COMPLEX)
1366
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1367
    if (size_auxS < 2*n)
1368
      SETERRQ(PETSC_ERR_LIB,"Insufficient auxiliar memory for xTREVC");
1369
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1370
#else
1371
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1372
#endif
1373
    if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xTREVC %i",info);
1374
  }
1375
 
1376
  PetscFunctionReturn(0);
1377
}
2034 eromero 1378
 
1379
#undef __FUNCT__  
1380
#define __FUNCT__ "dvd_compute_eigenvalues"
1381
/*
1382
  Compute the eigenvalues eigr+eigi*i associated to the Schur decomposition
1383
  (S, T), where
1384
  n, size of the eigenproblem
1385
  ldS, ldT, leading dimension of S and T
1386
*/
1387
PetscErrorCode dvd_compute_eigenvalues(PetscInt n, PetscScalar *S,
1388
  PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *eigr,
1389
  PetscScalar *eigi)
1390
{
1391
  PetscInt        i;
1392
#if !defined(PETSC_USE_COMPLEX)
1393
  PetscErrorCode  ierr;
1394
  PetscScalar     Sc[4], Tc[4], beta[2], auxS[16];
1395
  PetscBLASInt    two=2, info, size_auxS=16;
1396
#endif
1397
 
1398
  PetscFunctionBegin;
1399
 
1400
  for (i=0; i<n; i++) {
1401
#if !defined(PETSC_USE_COMPLEX)
1402
    if (i<n-1 && S[i*ldS+i+1] != 0.0) {
1403
      ierr = SlepcDenseCopy(Sc, 2, &S[i*ldS+i], ldS, 2, 2); CHKERRQ(ierr);
1404
      ierr = SlepcDenseCopy(Tc, 2, &T[i*ldT+i], ldT, 2, 2); CHKERRQ(ierr);
1405
      LAPACKggev_("N","N",&two,Sc,&two,Tc,&two,&eigr[i],&eigi[i],beta,
1406
                  PETSC_NULL, &two,PETSC_NULL,&two,auxS,&size_auxS,&info);
1407
      if (info) SETERRQ1(PETSC_ERR_LIB,"Error in Lapack xGGEV %i",info);
1408
      eigr[i]  /= beta[0]; eigi[i]  /= beta[0];
1409
      eigr[i+1]/= beta[1]; eigi[i+1]/= beta[1];
1410
      i++;
1411
    } else
1412
#endif
1413
    {
1414
      if (T[i*ldT+i] == 0.0) {
1415
        if (PetscRealPart(S[i*ldS+i]) < 0.0) eigr[i] = PETSC_MIN;
1416
        else eigr[i] = PETSC_MAX;
1417
      } else eigr[i] = S[i*ldS+i] / T[i*ldT+i];
1418
      if (eigi) eigi[i] = 0.0;
1419
    }
1420
  }
1421
 
1422
  PetscFunctionReturn(0);
1423
}