Subversion Repositories slepc-dev

Rev

Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2110 jroman 1
/*
2
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3
   SLEPc - Scalable Library for Eigenvalue Problem Computations
2575 eromero 4
   Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
1619 slepc 5
 
2110 jroman 6
   This file is part of SLEPc.
7
 
8
   SLEPc is free software: you can redistribute it and/or modify it under  the
9
   terms of version 3 of the GNU Lesser General Public License as published by
10
   the Free Software Foundation.
11
 
12
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
13
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
14
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
15
   more details.
16
 
17
   You  should have received a copy of the GNU Lesser General  Public  License
18
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20
*/
21
 
2729 jroman 22
#include <slepc-private/vecimplslepc.h>
1985 eromero 23
#include "davidson.h"
1619 slepc 24
 
1745 eromero 25
PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
1750 eromero 26
PetscLogEvent SLEPC_SlepcDenseNorm = 0;
1745 eromero 27
PetscLogEvent SLEPC_SlepcDenseOrth = 0;
28
PetscLogEvent SLEPC_SlepcDenseCopy = 0;
29
PetscLogEvent SLEPC_VecsMult = 0;
1630 slepc 30
 
1634 slepc 31
void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
1753 eromero 32
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
33
                                   void *ptr);
1634 slepc 34
 
2223 jroman 35
#undef __FUNCT__  
36
#define __FUNCT__ "SlepcDenseMatProd"
1619 slepc 37
/*
38
  Compute C <- a*A*B + b*C, where
39
    ldC, the leading dimension of C,
40
    ldA, the leading dimension of A,
41
    rA, cA, rows and columns of A,
42
    At, if true use the transpose of A instead,
43
    ldB, the leading dimension of B,
44
    rB, cB, rows and columns of B,
45
    Bt, if true use the transpose of B instead
46
*/
47
PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
48
  PetscScalar a,
2216 jroman 49
  const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
50
  const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
1619 slepc 51
{
1630 slepc 52
  PetscErrorCode  ierr;
53
  PetscInt        tmp;
54
  PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
55
  const char      *N = "N", *T = "C", *qA = N, *qB = N;
1619 slepc 56
 
57
  PetscFunctionBegin;
1675 slepc 58
 
59
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
2651 eromero 60
  PetscValidScalarPointer(C,1);
61
  PetscValidScalarPointer(A,5);
62
  PetscValidScalarPointer(B,10);
1675 slepc 63
 
1745 eromero 64
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1619 slepc 65
 
66
  /* Transpose if needed */
2177 jroman 67
  if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
68
  if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
1619 slepc 69
 
70
  /* Check size */
71
  if (cA != rB) {
2214 jroman 72
    SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
1619 slepc 73
  }
74
 
75
  /* Do stub */
1744 eromero 76
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
2556 eromero 77
    if (!At && !Bt) *C = *A * *B;
78
    else if (At && !Bt) *C = PetscConj(*A) * *B;
79
    else if (!At && Bt) *C = *A * PetscConj(*B);
80
    else *C = PetscConj(*A) * PetscConj(*B);
1744 eromero 81
    m = n = k = 1;
82
  } else {
83
    m = rA; n = cB; k = cA;
84
    BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
85
              &ldB, &b, C, &ldC);
86
  }
1619 slepc 87
 
1630 slepc 88
  ierr = PetscLogFlops(m*n*2*k);CHKERRQ(ierr);
1745 eromero 89
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1630 slepc 90
 
1619 slepc 91
  PetscFunctionReturn(0);
92
}
93
 
2223 jroman 94
#undef __FUNCT__  
95
#define __FUNCT__ "SlepcDenseMatProdTriang"
1626 slepc 96
/*
1747 eromero 97
  Compute C <- A*B, where
1988 eromero 98
    sC, structure of C,
1747 eromero 99
    ldC, the leading dimension of C,
1988 eromero 100
    sA, structure of A,
1747 eromero 101
    ldA, the leading dimension of A,
102
    rA, cA, rows and columns of A,
103
    At, if true use the transpose of A instead,
1988 eromero 104
    sB, structure of B,
1747 eromero 105
    ldB, the leading dimension of B,
106
    rB, cB, rows and columns of B,
107
    Bt, if true use the transpose of B instead
108
*/
109
PetscErrorCode SlepcDenseMatProdTriang(
110
  PetscScalar *C, MatType_t sC, PetscInt ldC,
111
  const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
2216 jroman 112
  PetscBool At,
1747 eromero 113
  const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
2216 jroman 114
  PetscBool Bt)
1747 eromero 115
{
116
  PetscErrorCode  ierr;
1988 eromero 117
  PetscInt        tmp;
118
  PetscScalar     one=1.0, zero=0.0;
1747 eromero 119
  PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
1750 eromero 120
 
1747 eromero 121
  PetscFunctionBegin;
122
 
123
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
2651 eromero 124
  PetscValidScalarPointer(C,1);
125
  PetscValidScalarPointer(A,4);
126
  PetscValidScalarPointer(B,10);
1747 eromero 127
 
128
  /* Transpose if needed */
2177 jroman 129
  if (At) tmp = rA, rA = cA, cA = tmp;
130
  if (Bt) tmp = rB, rB = cB, cB = tmp;
1747 eromero 131
 
132
  /* Check size */
2214 jroman 133
  if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
134
  if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
1747 eromero 135
 
1750 eromero 136
  /* Optimized version: trivial case */
137
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
2177 jroman 138
    if (!At && !Bt)     *C = *A * *B;
139
    else if (At && !Bt) *C = PetscConj(*A) * *B;
140
    else if (!At && Bt) *C = *A * PetscConj(*B);
141
    else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
1750 eromero 142
    PetscFunctionReturn(0);
143
  }
144
 
145
  /* Optimized versions: sA == 0 && sB == 0 */
146
  if ((sA == 0) && (sB == 0)) {
2177 jroman 147
    if (At) tmp = rA, rA = cA, cA = tmp;
148
    if (Bt) tmp = rB, rB = cB, cB = tmp;
1750 eromero 149
    ierr = SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
150
                             cB, Bt); CHKERRQ(ierr);
151
    PetscFunctionReturn(ierr);
152
  }
153
 
1988 eromero 154
  /* Optimized versions: A hermitian && (B not triang) */
1756 eromero 155
  if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
156
      DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
1988 eromero 157
      DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
1750 eromero 158
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
159
    rC = rA; cC = cB;
2736 jroman 160
    BLASsymm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
1750 eromero 161
              (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
162
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
163
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
164
    PetscFunctionReturn(0);
165
  }
166
 
1988 eromero 167
  /* Optimized versions: B hermitian && (A not triang) */
168
  if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
169
      DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
170
      DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
171
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
172
    rC = rA; cC = cB;
2736 jroman 173
    BLASsymm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
1988 eromero 174
              (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
175
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
176
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
177
    PetscFunctionReturn(0);
1747 eromero 178
  }
1988 eromero 179
 
2214 jroman 180
  SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
1747 eromero 181
}
182
 
2223 jroman 183
#undef __FUNCT__  
184
#define __FUNCT__ "SlepcDenseNorm"
1750 eromero 185
/*
1965 eromero 186
  Normalize the columns of the matrix A, where
1750 eromero 187
    ldA, the leading dimension of A,
1965 eromero 188
    rA, cA, rows and columns of A.
189
  if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
190
  are normalized as being one column.
1750 eromero 191
*/
192
PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
1965 eromero 193
                              PetscInt cA, PetscScalar *eigi)
1750 eromero 194
{
195
  PetscErrorCode  ierr;
1965 eromero 196
  PetscInt        i;
197
  PetscScalar     norm, norm0;
1750 eromero 198
  PetscBLASInt    rA = _rA, one=1;
1747 eromero 199
 
1750 eromero 200
  PetscFunctionBegin;
2651 eromero 201
  PetscValidScalarPointer(A,1);
202
  PetscValidScalarPointer(eigi,5);
1750 eromero 203
 
204
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
205
 
206
  for(i=0; i<cA; i++) {
1965 eromero 207
    if(eigi && eigi[i] != 0.0) {
208
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
209
      norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
210
      norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
211
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
212
      BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
213
      i++;
214
    } else {
215
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
216
      norm = 1.0 / norm;
217
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
218
     }
1750 eromero 219
  }
220
 
221
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
222
 
223
  PetscFunctionReturn(0);
224
}
225
 
226
 
2223 jroman 227
#undef __FUNCT__  
228
#define __FUNCT__ "SlepcDenseOrth"
1735 eromero 229
/*
1743 eromero 230
  Compute A <- orth(A), where
231
    ldA, the leading dimension of A,
232
    rA, cA, rows and columns of A,
233
    auxS, auxiliary vector of more size than cA+min(rA,cA),
234
    lauxS, size of auxS,
235
    ncA, new number of columns of A
236
*/
237
PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
238
                              PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
239
                              PetscInt *ncA)
240
{
1745 eromero 241
  PetscErrorCode  ierr;
1743 eromero 242
  PetscBLASInt    ldA = _ldA, rA = _rA, cA = _cA,
243
                  info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
244
  PetscScalar     *tau = auxS, *w = tau + ltau;
245
 
246
  PetscFunctionBegin;
247
 
248
  /* Quick exit */
249
  if ((_rA == 0) || (cA == 0)) { PetscFunctionReturn(0); }
2651 eromero 250
  PetscValidScalarPointer(A,1);
251
  PetscValidScalarPointer(auxS,5);
252
  PetscValidIntPointer(ncA,7);
1743 eromero 253
 
254
  /* Memory check */
2214 jroman 255
  if (lw < cA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory for xGEQRF");
1743 eromero 256
 
1745 eromero 257
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 258
  LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
2214 jroman 259
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
1743 eromero 260
  LAPACKorgqr_(&rA, &ltau, &ltau, A, &ldA, tau, w, &lw, &info);
2214 jroman 261
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
1745 eromero 262
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 263
 
264
  if (ncA) *ncA = ltau;
265
 
266
  PetscFunctionReturn(0);
267
}
268
 
2223 jroman 269
#undef __FUNCT__  
270
#define __FUNCT__ "SlepcDenseCopy"
1743 eromero 271
/*
272
  Y <- X, where
273
  ldX, leading dimension of X,
274
  rX, cX, rows and columns of X
275
  ldY, leading dimension of Y
276
*/
277
PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
278
                              PetscInt ldX, PetscInt rX, PetscInt cX)
279
{
280
  PetscErrorCode  ierr;
1795 eromero 281
  PetscInt        i;
1743 eromero 282
 
283
  PetscFunctionBegin;
2651 eromero 284
  PetscValidScalarPointer(Y,1);
285
  PetscValidScalarPointer(X,3);
1743 eromero 286
 
287
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 288
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1743 eromero 289
  }
1805 eromero 290
 
291
  /* Quick exit */
292
  if (Y == X) {
293
    if (ldX != ldY) {
2219 jroman 294
      SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1805 eromero 295
    }
296
    PetscFunctionReturn(0);
297
  }
1743 eromero 298
 
1745 eromero 299
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 300
  for(i=0; i<cX; i++) {
301
    ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
302
    CHKERRQ(ierr);
303
  }
1745 eromero 304
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 305
 
306
  PetscFunctionReturn(0);
307
}
308
 
2223 jroman 309
#undef __FUNCT__  
310
#define __FUNCT__ "SlepcDenseCopyTriang"
1752 eromero 311
/*
312
  Y <- X, where
313
  ldX, leading dimension of X,
314
  rX, cX, rows and columns of X
315
  ldY, leading dimension of Y
316
*/
317
PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
318
                                    PetscScalar *X, MatType_t sX, PetscInt ldX,
319
                                    PetscInt rX, PetscInt cX)
320
{
321
  PetscErrorCode  ierr;
322
  PetscInt        i,j,c;
1743 eromero 323
 
1752 eromero 324
  PetscFunctionBegin;
2651 eromero 325
  PetscValidScalarPointer(Y,1);
326
  PetscValidScalarPointer(X,4);
1752 eromero 327
 
328
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 329
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1752 eromero 330
  }
331
 
2605 eromero 332
  if (sY == 0 && sX == 0) {
333
    ierr = SlepcDenseCopy(Y, ldY, X, ldX, rX, cX);CHKERRQ(ierr);
334
    PetscFunctionReturn(0);
335
  }
336
 
1752 eromero 337
  if (rX != cX) {
2219 jroman 338
    SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
1752 eromero 339
  }
340
 
1756 eromero 341
  if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
342
      DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
343
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
344
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 345
      c = 0;                                      /*     so copy */
1756 eromero 346
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
347
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 348
      c = 1;                                      /*     so transpose */
349
    else                                          /* ... Full, */
350
      c = 2;                                      /*     so reflect from up */
1756 eromero 351
  } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
352
             DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
353
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
354
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 355
      c = 1;                                      /*     so transpose */
1756 eromero 356
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
357
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 358
      c = 0;                                      /*     so copy */
359
    else                                          /* ... Full, */
360
      c = 3;                                      /*     so reflect fr. down */
361
  } else                                          /* Full to any, */
362
    c = 0;                                        /*     so copy */
363
 
364
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
365
 
366
  switch(c) {
367
  case 0: /* copy */
368
    for(i=0; i<cX; i++) {
369
      ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
370
      CHKERRQ(ierr);
371
    }
372
    break;
373
 
374
  case 1: /* transpose */
375
    for(i=0; i<cX; i++)
376
      for(j=0; j<rX; j++)
2556 eromero 377
        Y[ldY*j+i] = PetscConj(X[ldX*i+j]);
1752 eromero 378
    break;
379
 
380
  case 2: /* reflection from up */
381
    for(i=0; i<cX; i++)
382
      for(j=0; j<PetscMin(i+1,rX); j++)
1988 eromero 383
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 384
    break;
385
 
386
  case 3: /* reflection from down */
387
    for(i=0; i<cX; i++)
388
      for(j=i; j<rX; j++)
1988 eromero 389
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 390
    break;
391
  }
392
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
393
 
394
  PetscFunctionReturn(0);
395
}
396
 
397
 
2223 jroman 398
#undef __FUNCT__  
399
#define __FUNCT__ "SlepcUpdateVectorsZ"
1743 eromero 400
/*
1675 slepc 401
  Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
402
  where X and Y are contiguous global vectors.
1626 slepc 403
*/
1675 slepc 404
PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
405
  Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
406
  PetscInt cM)
1626 slepc 407
{
408
  PetscErrorCode  ierr;
1960 eromero 409
 
410
  PetscFunctionBegin;
411
 
412
  ierr = SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
413
  CHKERRQ(ierr);
414
 
415
  PetscFunctionReturn(0);
416
}
417
 
418
 
2223 jroman 419
#undef __FUNCT__  
420
#define __FUNCT__ "SlepcUpdateVectorsS"
1960 eromero 421
/*
1965 eromero 422
  Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
1960 eromero 423
  where X and Y are contiguous global vectors.
424
*/
425
PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
426
  PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
427
  PetscInt ldM, PetscInt rM, PetscInt cM)
428
{
2334 jroman 429
  PetscErrorCode    ierr;
430
  const PetscScalar *px;
431
  PetscScalar       *py;
432
  PetscInt          rX, rY, ldX, ldY, i, rcX;
1626 slepc 433
 
434
  PetscFunctionBegin;
2346 eromero 435
  SlepcValidVecsContiguous(Y,cM*dY,1);
436
  SlepcValidVecsContiguous(X,cX,5);
2651 eromero 437
  PetscValidScalarPointer(M,8);
1626 slepc 438
 
1965 eromero 439
  /* Compute the real number of columns */
440
  rcX = cX/dX;
441
  if (rcX != rM) {
2214 jroman 442
    SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");
1735 eromero 443
  }
444
 
1965 eromero 445
  if ((rcX == 0) || (rM == 0) || (cM == 0)) {
1626 slepc 446
    PetscFunctionReturn(0);
1960 eromero 447
  } else if ((Y + cM <= X) || (X + cX <= Y) ||
448
             ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
1626 slepc 449
    /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
450
 
451
    /* Get the dense matrices and dimensions associated to Y and X */
1960 eromero 452
    ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
453
    ierr = VecGetLocalSize(Y[0], &rY); CHKERRQ(ierr);
454
    if (rX != rY) {
2214 jroman 455
      SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
1626 slepc 456
    }
2334 jroman 457
    ierr = VecGetArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 458
    ierr = VecGetArray(Y[0], &py);CHKERRQ(ierr);
459
 
1960 eromero 460
    /* Update the strides */
461
    ldX = rX*dX; ldY= rY*dY;
462
 
1626 slepc 463
    /* Do operation */
1965 eromero 464
    ierr = SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
1675 slepc 465
                    PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE); CHKERRQ(ierr);
1626 slepc 466
 
2334 jroman 467
    ierr = VecRestoreArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 468
    ierr = VecRestoreArray(Y[0], &py);CHKERRQ(ierr);
1992 eromero 469
    for(i=1; i<cM; i++) {
470
      ierr = PetscObjectStateIncrease((PetscObject)Y[dY*i]); CHKERRQ(ierr);
471
    }
1626 slepc 472
 
1960 eromero 473
  } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
1626 slepc 474
    /* If not, call to SlepcUpdateVectors */
1965 eromero 475
    ierr = SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
476
                                    ldM, PETSC_FALSE); CHKERRQ(ierr);
1675 slepc 477
    if (alpha != 1.0)
478
      for (i=0; i<cM; i++) {
479
        ierr = VecScale(Y[i], alpha); CHKERRQ(ierr);
480
      }
1626 slepc 481
  } else {
2214 jroman 482
    SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
1626 slepc 483
  }
484
 
485
  PetscFunctionReturn(0);
486
}
487
 
2223 jroman 488
#undef __FUNCT__  
489
#define __FUNCT__ "SlepcUpdateVectorsD"
1965 eromero 490
/*
491
  Compute X <- alpha * X[0:dX:cX-1] * M
492
  where X is a matrix with non-consecutive columns
493
*/
494
PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
495
  const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
496
  PetscScalar *work, PetscInt lwork)
497
{
2334 jroman 498
  PetscErrorCode ierr;
499
  PetscScalar    **px, *Y, *Z;
500
  PetscInt       rX, i, j, rY, rY0, ldY;
1960 eromero 501
 
1965 eromero 502
  PetscFunctionBegin;
2346 eromero 503
  SlepcValidVecsContiguous(X,cX,1);
2651 eromero 504
  PetscValidScalarPointer(M,4);
505
  PetscValidScalarPointer(work,8);
1965 eromero 506
 
507
  if (cX != rM) {
2214 jroman 508
    SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");
1965 eromero 509
  }
510
 
511
  rY = (lwork/2)/cX;
512
  if (rY <= 0) {
2214 jroman 513
    SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
1965 eromero 514
  }
515
  Y = work; Z = &Y[cX*rY]; ldY = rY;
516
 
517
  if ((cX == 0) || (rM == 0) || (cM == 0)) {
518
    PetscFunctionReturn(0);
519
  }
520
 
521
  /* Get the dense vectors associated to the columns of X */
522
  ierr = PetscMalloc(sizeof(Vec)*cX, &px); CHKERRQ(ierr);
523
  for(i=0; i<cX; i++) {
524
    ierr = VecGetArray(X[i], &px[i]); CHKERRQ(ierr);
525
  }
526
  ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
527
 
528
  for(i=0, rY0=0; i<rX; i+=rY0) {
529
    rY0 = PetscMin(rY, rX-i);
530
 
531
    /* Y <- X[i0:i1,:] */
532
    for(j=0; j<cX; j++) {
533
      ierr = SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
534
      CHKERRQ(ierr);
535
    }
536
 
537
    /* Z <- Y * M */
538
    ierr = SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
539
                                                 M, ldM, rM, cM, PETSC_FALSE);
540
    CHKERRQ(ierr);
541
 
542
    /* X <- Z */
543
    for(j=0; j<cM; j++) {
544
      ierr = SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
545
      CHKERRQ(ierr);
546
    }
547
  }
548
 
549
  for(i=0; i<cX; i++) {
550
    ierr = VecRestoreArray(X[i], &px[i]); CHKERRQ(ierr);
551
  }
552
  ierr = PetscFree(px); CHKERRQ(ierr);
553
 
554
  PetscFunctionReturn(0);
555
}
556
 
557
 
558
 
2223 jroman 559
#undef __FUNCT__  
560
#define __FUNCT__ "VecsMult"
1634 slepc 561
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
562
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
563
  where W = U' * V.
1640 slepc 564
  workS0 and workS1 are an auxiliary scalar vector of size
2650 eromero 565
  (eU-sU)*sV*(sU!=0)+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
1640 slepc 566
  is needed, and of size eU*eV.
1634 slepc 567
*/
1747 eromero 568
PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
1634 slepc 569
                        Vec *U, PetscInt sU, PetscInt eU,
570
                        Vec *V, PetscInt sV, PetscInt eV,
1640 slepc 571
                        PetscScalar *workS0, PetscScalar *workS1)
1634 slepc 572
{
2334 jroman 573
  PetscErrorCode    ierr;
2650 eromero 574
  PetscInt          ldU, ldV, i, j, k, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
2334 jroman 575
  const PetscScalar *pu, *pv;
576
  PetscScalar       *W, *Wr;
1634 slepc 577
 
578
  PetscFunctionBegin;
579
 
580
  /* Check if quick exit */
581
  if ((eU-sU == 0) || (eV-sV == 0))
582
    PetscFunctionReturn(0);
2346 eromero 583
 
584
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 585
  SlepcValidVecsContiguous(V,eV,7);
2651 eromero 586
  PetscValidScalarPointer(M,1);
1634 slepc 587
 
588
  /* Get the dense matrices and dimensions associated to U and V */
589
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
590
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
591
  if (ldU != ldV) {
2214 jroman 592
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1634 slepc 593
  }
2334 jroman 594
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
595
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1634 slepc 596
 
2651 eromero 597
  if (workS0) {
598
    PetscValidScalarPointer(workS0,10);
1640 slepc 599
    W = workS0;
2651 eromero 600
  } else {
2650 eromero 601
    ierr = PetscMalloc(sizeof(PetscScalar)*ms, &W);
1640 slepc 602
    CHKERRQ(ierr);
1634 slepc 603
  }
604
 
1745 eromero 605
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
606
 
1640 slepc 607
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
608
    /* Use the smart memory usage version */
1634 slepc 609
 
1640 slepc 610
    /* W <- U' * V */
1747 eromero 611
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
612
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
613
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
1640 slepc 614
    CHKERRQ(ierr);
615
 
616
    /* ReduceAll(W, SUM) */
1899 eromero 617
    ierr = MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
1640 slepc 618
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1747 eromero 619
  /* Full M matrix */
1756 eromero 620
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
621
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
2605 eromero 622
    if (workS1) {
2651 eromero 623
      PetscValidScalarPointer(workS1,11);
1640 slepc 624
      Wr = workS1;
2650 eromero 625
      if (PetscAbs(PetscMin(W-workS1, workS1-W)) < ms) {
2605 eromero 626
        SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
627
      }
628
    } else {
2650 eromero 629
      ierr = PetscMalloc(sizeof(PetscScalar)*ms, &Wr);
1640 slepc 630
      CHKERRQ(ierr);
631
    }
1747 eromero 632
 
1640 slepc 633
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
2650 eromero 634
    if (sU > 0) {
635
      ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
636
                               pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
637
                               pv       , ldV, ldV, sV,    PETSC_FALSE);
638
      CHKERRQ(ierr);
639
    }
1640 slepc 640
 
641
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
2650 eromero 642
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
1640 slepc 643
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
644
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
645
    CHKERRQ(ierr);
646
 
647
    /* ReduceAll(W, SUM) */
2650 eromero 648
    ierr = MPI_Allreduce(W, Wr, ms, MPIU_SCALAR,
1899 eromero 649
                      MPIU_SUM, ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1640 slepc 650
 
651
    /* M(...,...) <- W */
2650 eromero 652
    k = 0;
653
    if (sU > 0) for (i=0; i<sV; i++)
1640 slepc 654
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
655
    for (i=sV; i<eV; i++)
2650 eromero 656
      for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
1640 slepc 657
 
1747 eromero 658
    if (!workS1) {
659
      ierr = PetscFree(Wr); CHKERRQ(ierr);
660
    }
1745 eromero 661
 
1747 eromero 662
  /* Upper triangular M matrix */
1756 eromero 663
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
664
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
2605 eromero 665
    if (workS1) {
2651 eromero 666
      PetscValidScalarPointer(workS1,11);
1747 eromero 667
      Wr = workS1;
2605 eromero 668
      if (PetscAbs(PetscMin(W-workS1,workS1-W)) < (eV-sV)*eU) {
669
        SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
670
      }
671
    } else {
1747 eromero 672
      ierr = PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
673
      CHKERRQ(ierr);
674
    }
675
 
676
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
677
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
678
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
679
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
680
    CHKERRQ(ierr);
681
 
682
    /* ReduceAll(W, SUM) */
1899 eromero 683
    ierr = MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
1747 eromero 684
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
685
 
686
    /* M(...,...) <- W */
687
    for (i=sV,k=0; i<eV; i++)
688
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
689
 
1640 slepc 690
    if (!workS1) {
691
      ierr = PetscFree(Wr); CHKERRQ(ierr);
692
    }
1747 eromero 693
 
694
  /* Lower triangular M matrix */
1756 eromero 695
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
696
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
2605 eromero 697
    if (workS1) {
2651 eromero 698
      PetscValidScalarPointer(workS1,11);
1747 eromero 699
      Wr = workS1;
2605 eromero 700
      if (PetscMin(W - workS1, workS1 - W) < (eU-sU)*eV) {
701
        SETERRQ(PETSC_COMM_SELF,1, "Consistency broken!");
702
      }
703
    } else {
1747 eromero 704
      ierr = PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
705
      CHKERRQ(ierr);
706
    }
707
 
708
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
709
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
710
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
711
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
712
    CHKERRQ(ierr);
713
 
714
    /* ReduceAll(W, SUM) */
1899 eromero 715
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
1747 eromero 716
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
717
 
718
    /* M(...,...) <- W */
719
    for (i=0,k=0; i<eV; i++)
720
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
721
 
722
    if (!workS1) {
723
      ierr = PetscFree(Wr); CHKERRQ(ierr);
724
    }
1634 slepc 725
  }
726
 
1747 eromero 727
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
728
 
1640 slepc 729
  if (!workS0) {
1634 slepc 730
    ierr = PetscFree(W); CHKERRQ(ierr);
731
  }
732
 
2334 jroman 733
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
734
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1634 slepc 735
  PetscFunctionReturn(0);
736
}
737
 
1795 eromero 738
 
2223 jroman 739
#undef __FUNCT__  
740
#define __FUNCT__ "VecsMultIa"
1753 eromero 741
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
742
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
1808 eromero 743
  where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
1795 eromero 744
  workS0 and workS1 are an auxiliary scalar vector of size
745
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
746
  is needed, and of size eU*eV.
747
*/
748
PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
749
                          Vec *U, PetscInt sU, PetscInt eU,
750
                          Vec *V, PetscInt sV, PetscInt eV)
751
{
752
  PetscErrorCode  ierr;
753
  PetscInt        ldU, ldV;
754
  PetscScalar     *pu, *pv;
755
 
756
  PetscFunctionBegin;
757
 
758
  /* Check if quick exit */
759
  if ((eU-sU == 0) || (eV-sV == 0))
760
    PetscFunctionReturn(0);
761
 
2346 eromero 762
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 763
  SlepcValidVecsContiguous(V,eV,7);
2651 eromero 764
  PetscValidScalarPointer(M,1);
2346 eromero 765
 
1795 eromero 766
  /* Get the dense matrices and dimensions associated to U and V */
767
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
768
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
769
  if (ldU != ldV) {
2214 jroman 770
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1795 eromero 771
  }
772
  ierr = VecGetArray(U[0], &pu);CHKERRQ(ierr);
773
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
774
 
775
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
776
    /* M <- local_U' * local_V */
777
    ierr = SlepcDenseMatProdTriang(M, sM, eU,
778
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
779
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
780
    CHKERRQ(ierr);
781
 
782
  /* Full M matrix */
783
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
784
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
785
    /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
786
    ierr = SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
787
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
788
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
789
    CHKERRQ(ierr);
790
 
791
    /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
792
    ierr = SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
793
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
794
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
795
    CHKERRQ(ierr);
796
 
797
  /* Other structures */
2214 jroman 798
  } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");
1795 eromero 799
 
800
  ierr = VecRestoreArray(U[0], &pu); CHKERRQ(ierr);
1992 eromero 801
  ierr = PetscObjectStateDecrease((PetscObject)U[0]); CHKERRQ(ierr);
1795 eromero 802
  ierr = VecRestoreArray(V[0], &pv); CHKERRQ(ierr);
1992 eromero 803
  ierr = PetscObjectStateDecrease((PetscObject)V[0]); CHKERRQ(ierr);
1795 eromero 804
 
805
  PetscFunctionReturn(0);
806
}
807
 
808
 
2223 jroman 809
#undef __FUNCT__  
810
#define __FUNCT__ "VecsMultIc"
1808 eromero 811
/* Computes M <- nprocs*M
812
  where nprocs is the number of processors.
813
*/
814
PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
815
                          PetscInt rM, PetscInt cM, Vec V)
816
{
2117 eromero 817
  int        i,j,n;
1808 eromero 818
 
819
  PetscFunctionBegin;
820
 
821
  /* Check if quick exit */
822
  if ((rM == 0) || (cM == 0))
823
    PetscFunctionReturn(0);
2651 eromero 824
  PetscValidScalarPointer(M,1);
1808 eromero 825
 
2214 jroman 826
  if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1808 eromero 827
 
828
  MPI_Comm_size(((PetscObject)V)->comm, &n);
829
 
830
  for(i=0; i<cM; i++)
831
    for(j=0; j<rM; j++)
832
      M[ldM*i+j]/= (PetscScalar)n;
833
 
834
  PetscFunctionReturn(0);
835
}
836
 
837
 
2223 jroman 838
#undef __FUNCT__  
839
#define __FUNCT__ "VecsMultIb"
1795 eromero 840
/* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
841
                          ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
1753 eromero 842
  where W = U' * V.
1795 eromero 843
  workS0 and workS1 are an auxiliary scalar vector of size
844
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
845
  is needed, and of size eU*eV.
846
*/
847
PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
848
                          PetscInt rM, PetscInt cM, PetscScalar *auxS,
849
                          Vec V)
850
{
851
  PetscErrorCode  ierr;
852
  PetscScalar     *W, *Wr;
853
 
854
  PetscFunctionBegin;
855
 
856
  /* Check if quick exit */
857
  if ((rM == 0) || (cM == 0))
858
    PetscFunctionReturn(0);
2651 eromero 859
  PetscValidScalarPointer(M,1);
860
  PetscValidScalarPointer(auxS,6);
1795 eromero 861
 
862
  if (auxS)
863
    W = auxS;
864
  else {
865
    ierr = PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
866
    CHKERRQ(ierr);
867
  }
868
  Wr = W + rM*cM;
869
 
870
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
871
 
872
  if (sM == 0) {
873
    /* W <- M */
874
    ierr = SlepcDenseCopy(W, rM, M, ldM, rM, cM); CHKERRQ(ierr);
875
 
876
    /* Wr <- ReduceAll(W, SUM) */
1899 eromero 877
    ierr = MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
1795 eromero 878
                         ((PetscObject)V)->comm); CHKERRQ(ierr);
879
 
880
    /* M <- Wr */
881
    ierr = SlepcDenseCopy(M, ldM, Wr, rM, rM, cM); CHKERRQ(ierr);
882
 
883
  /* Other structures */
2214 jroman 884
  } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1795 eromero 885
 
886
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
887
 
888
  if (!auxS) {
889
    ierr = PetscFree(W); CHKERRQ(ierr);
890
  }
891
 
892
  PetscFunctionReturn(0);
893
}
894
 
895
 
2223 jroman 896
#undef __FUNCT__  
897
#define __FUNCT__ "VecsMultS"
1795 eromero 898
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
899
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
900
  where W = U' * V.
1753 eromero 901
  r, a DvdReduction structure,
902
  sr, an structure DvdMult_copy_func.
903
*/
904
PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
905
                         Vec *U, PetscInt sU, PetscInt eU,
906
                         Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
907
                         DvdMult_copy_func *sr)
908
{
2334 jroman 909
  PetscErrorCode    ierr;
2675 eromero 910
  PetscInt          ldU, ldV, ms = (eU-sU)*sV*(sU==0?0:1)+(eV-sV)*eU;
2334 jroman 911
  const PetscScalar *pu, *pv;
912
  PetscScalar       *W;
1753 eromero 913
 
914
  PetscFunctionBegin;
915
 
916
  /* Check if quick exit */
917
  if ((eU-sU == 0) || (eV-sV == 0))
918
    PetscFunctionReturn(0);
919
 
2346 eromero 920
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 921
  SlepcValidVecsContiguous(V,eV,7);
2651 eromero 922
  PetscValidScalarPointer(M,1);
2346 eromero 923
 
1753 eromero 924
  /* Get the dense matrices and dimensions associated to U and V */
925
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
926
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
927
  if (ldU != ldV) {
2214 jroman 928
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1753 eromero 929
  }
2334 jroman 930
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
931
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1753 eromero 932
 
933
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
934
 
2650 eromero 935
  if ((sU == 0) && (sV == 0)) {
1753 eromero 936
    /* Use the smart memory usage version */
937
 
938
    /* Add the reduction to r */
939
    ierr = SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
940
    CHKERRQ(ierr);
941
 
942
    /* W <- U' * V */
943
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
944
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
945
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
946
    CHKERRQ(ierr);
947
 
948
    /* M <- ReduceAll(W, SUM) */
949
    sr->M = M;    sr->ld = ldM;
950
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
951
                  sr->i2 = eV;
952
 
953
  /* Full M matrix */
1756 eromero 954
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
955
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 956
    /* Add the reduction to r */
2675 eromero 957
    ierr = SlepcAllReduceSum(r, ms, VecsMultS_copy_func, sr, &W);
1753 eromero 958
    CHKERRQ(ierr);
959
 
960
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
961
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
962
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
963
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
964
    CHKERRQ(ierr);
965
 
966
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
2675 eromero 967
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV*(sU > 0?1:0), eU, 0.0, 1.0,
1753 eromero 968
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
969
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
970
    CHKERRQ(ierr);
971
 
972
    /* M <- ReduceAll(W, SUM) */
2675 eromero 973
    sr->M = M;            sr->ld = ldM;
974
    sr->i0 = sU>0?0:sV;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
975
                          sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;
1753 eromero 976
 
977
  /* Upper triangular M matrix */
1756 eromero 978
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
979
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 980
    /* Add the reduction to r */
981
    ierr = SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
982
    CHKERRQ(ierr);
983
 
984
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
985
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
986
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
987
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
988
    CHKERRQ(ierr);
989
 
990
    /* M <- ReduceAll(W, SUM) */
991
    sr->M = M;    sr->ld = ldM;
992
    sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
993
                  sr->i2 = eV;
994
 
995
  /* Lower triangular M matrix */
1756 eromero 996
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
997
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1753 eromero 998
    /* Add the reduction to r */
999
    ierr = SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
1000
    CHKERRQ(ierr);
1001
 
1002
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
1003
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
1004
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
1005
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
1006
    CHKERRQ(ierr);
1007
 
1008
    /* ReduceAll(W, SUM) */
1009
    sr->M = M;    sr->ld = ldM;
1010
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
1011
                  sr->i2 = eV;
1012
  }
1013
 
1014
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
1015
 
2334 jroman 1016
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
1017
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1753 eromero 1018
  PetscFunctionReturn(0);
1019
}
1020
 
1021
#undef __FUNCT__  
1022
#define __FUNCT__ "VecsMultS_copy_func"
1023
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
1024
                                   void *ptr)
1025
{
1026
  PetscInt        i, j, k;
1027
  DvdMult_copy_func
1028
                  *sr = (DvdMult_copy_func*)ptr;
1029
 
1030
  PetscFunctionBegin;
2651 eromero 1031
  PetscValidScalarPointer(out,1);
1753 eromero 1032
 
1033
  for (i=sr->i0,k=0; i<sr->i1; i++)
1034
    for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
1035
  for (i=sr->i1; i<sr->i2; i++)
1036
    for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
1037
 
2214 jroman 1038
  if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
1753 eromero 1039
 
1040
  PetscFunctionReturn(0);
1041
}
1042
 
2223 jroman 1043
#undef __FUNCT__  
1044
#define __FUNCT__ "VecsOrthonormalize"
1675 slepc 1045
/* Orthonormalize a chunk of parallel vector.
1046
   NOTE: wS0 and wS1 must be of size n*n.
1047
*/
1048
PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
1049
                                  PetscScalar *wS1)
1050
{
1051
  PetscErrorCode  ierr;
1052
  PetscBLASInt    nn = n, info, ld;
1992 eromero 1053
  PetscInt        ldV, i;
1675 slepc 1054
  PetscScalar     *H, *T, one=1.0, *pv;
1055
 
1056
  PetscFunctionBegin;
1057
 
1058
  if (!wS0) {
1059
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &H); CHKERRQ(ierr);
2651 eromero 1060
  } else {
1061
    PetscValidScalarPointer(wS0,3);
1675 slepc 1062
    H = wS0;
2651 eromero 1063
  }
1675 slepc 1064
  if (!wS1) {
1065
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &T); CHKERRQ(ierr);
2651 eromero 1066
  } else {
1067
    PetscValidScalarPointer(wS1,4);
1675 slepc 1068
    T = wS1;
2651 eromero 1069
  }
1675 slepc 1070
 
1071
  /* H <- V' * V */
1747 eromero 1072
  ierr = VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL); CHKERRQ(ierr);
1675 slepc 1073
 
1074
  /* H <- chol(H) */
1075
  LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
2214 jroman 1076
  if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1675 slepc 1077
 
1078
  /* V <- V * inv(H) */
1079
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
1080
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
1081
  ld = ldV;
1082
  BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1083
  ierr = VecRestoreArray(V[0], &pv);CHKERRQ(ierr);
1992 eromero 1084
  for(i=1; i<n; i++) {
1085
    ierr = PetscObjectStateIncrease((PetscObject)V[i]); CHKERRQ(ierr);
1086
  }
1675 slepc 1087
 
1088
  if (!wS0) {
1089
    ierr = PetscFree(H); CHKERRQ(ierr);
1090
  }
1091
  if (!wS1) {
1092
    ierr = PetscFree(T); CHKERRQ(ierr);
1093
  }
1094
 
1095
  PetscFunctionReturn(0);
1096
}
1097
 
2223 jroman 1098
#undef __FUNCT__  
1099
#define __FUNCT__ "SlepcAllReduceSumBegin"
1753 eromero 1100
/*
1101
  Sum up several arrays with only one call to MPIReduce.
1102
*/
1103
PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1104
                                      PetscInt max_size_ops,
1105
                                      PetscScalar *in, PetscScalar *out,
1106
                                      PetscInt max_size_in, DvdReduction *r,
1107
                                      MPI_Comm comm)
1108
{
1109
  PetscFunctionBegin;
2651 eromero 1110
  PetscValidScalarPointer(in,3);
1111
  PetscValidScalarPointer(out,4);
1753 eromero 1112
 
1113
  r->in = in;
1114
  r->out = out;
1115
  r->size_in = 0;
1116
  r->max_size_in = max_size_in;
1117
  r->ops = ops;
1118
  r->size_ops = 0;
1119
  r->max_size_ops = max_size_ops;
1120
  r->comm = comm;
1121
 
1122
  PetscFunctionReturn(0);
1123
}
1124
 
1125
#undef __FUNCT__  
1126
#define __FUNCT__ "SlepcAllReduceSum"
1127
PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1128
                                 DvdReductionPostF f, void *ptr,
1129
                                 PetscScalar **in)
1130
{
1131
  PetscFunctionBegin;
1132
 
1133
  *in = r->in + r->size_in;
1134
  r->ops[r->size_ops].out = r->out + r->size_in;
1135
  r->ops[r->size_ops].size_out = size_in;
1136
  r->ops[r->size_ops].f = f;
1137
  r->ops[r->size_ops].ptr = ptr;
1138
  if (++(r->size_ops) > r->max_size_ops) {
2214 jroman 1139
    SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1753 eromero 1140
  }
1141
  if ((r->size_in+= size_in) > r->max_size_in) {
2214 jroman 1142
    SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1753 eromero 1143
  }
1144
 
1145
  PetscFunctionReturn(0);
1146
}
1147
 
1148
 
1149
#undef __FUNCT__  
1150
#define __FUNCT__ "SlepcAllReduceSumEnd"
1151
PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1152
{
1153
  PetscErrorCode  ierr;
1154
  PetscInt        i;
1155
 
1156
  PetscFunctionBegin;
1157
 
1795 eromero 1158
  /* Check if quick exit */
1159
  if (r->size_ops == 0)
1160
    PetscFunctionReturn(0);
1161
 
1753 eromero 1162
  /* Call the MPIAllReduce routine */
1899 eromero 1163
  ierr = MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1753 eromero 1164
                       r->comm); CHKERRQ(ierr);
1165
 
1166
  /* Call the postponed routines */
1167
  for(i=0; i<r->size_ops; i++) {
1168
    ierr = r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1169
    CHKERRQ(ierr);
1170
  }
1171
 
1172
  /* Tag the operation as done */
1173
  r->size_ops = 0;
1174
 
1175
  PetscFunctionReturn(0);
1176
}
1795 eromero 1177
 
1831 eromero 1178
 
1795 eromero 1179
#undef __FUNCT__  
1180
#define __FUNCT__ "dvd_orthV"
2605 eromero 1181
/* auxS: size_cX+V_new_e */
1989 eromero 1182
PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1183
                         PetscInt size_cX, Vec *V, PetscInt V_new_s,
2605 eromero 1184
                         PetscInt V_new_e, PetscScalar *auxS,
1989 eromero 1185
                         PetscRandom rand)
1795 eromero 1186
{
1187
  PetscErrorCode  ierr;
1188
  PetscInt        i, j;
2216 jroman 1189
  PetscBool       lindep;
1795 eromero 1190
  PetscReal       norm;
1869 eromero 1191
  PetscScalar     *auxS0 = auxS;
1831 eromero 1192
 
1795 eromero 1193
  PetscFunctionBegin;
1831 eromero 1194
 
1795 eromero 1195
  /* Orthonormalize V with IP */
1196
  for (i=V_new_s; i<V_new_e; i++) {
1197
    for(j=0; j<3; j++) {
1975 eromero 1198
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1831 eromero 1199
      if (cX + size_cX == V) {
1200
        /* If cX and V are contiguous, orthogonalize in one step */
1989 eromero 1201
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1831 eromero 1202
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1989 eromero 1203
      } else if (DS) {
1204
        /* Else orthogonalize first against DS, and then against cX and V */
1205
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1831 eromero 1206
                               V[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
2177 jroman 1207
        if(!lindep) {
1831 eromero 1208
          ierr = IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1209
                                 V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1210
        }
1989 eromero 1211
      } else {
1212
        /* Else orthogonalize first against cX and then against V */
1213
        ierr = IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1214
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1831 eromero 1215
      }
2177 jroman 1216
      if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
2499 jroman 1217
      ierr = PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);CHKERRQ(ierr);
1795 eromero 1218
    }
2177 jroman 1219
    if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
2214 jroman 1220
        SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1795 eromero 1221
    }
1222
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1223
  }
1831 eromero 1224
 
1795 eromero 1225
  PetscFunctionReturn(0);
1226
}
2555 eromero 1227
 
1228
 
1229
#undef __FUNCT__  
1230
#define __FUNCT__ "dvd_BorthV"
2605 eromero 1231
/* auxS: size_cX+V_new_e+1 */
2555 eromero 1232
PetscErrorCode dvd_BorthV(IP ip, Vec *DS, Vec *BDS, PetscInt size_DS, Vec *cX,
1233
                         Vec *BcX, PetscInt size_cX, Vec *V, Vec *BV,
1234
                         PetscInt V_new_s, PetscInt V_new_e,
2605 eromero 1235
                         PetscScalar *auxS, PetscRandom rand)
2555 eromero 1236
{
1237
  PetscErrorCode  ierr;
1238
  PetscInt        i, j;
1239
  PetscBool       lindep;
1240
  PetscReal       norm;
1241
  PetscScalar     *auxS0 = auxS;
1831 eromero 1242
 
2555 eromero 1243
  PetscFunctionBegin;
1244
 
1245
  /* Orthonormalize V with IP */
1246
  for (i=V_new_s; i<V_new_e; i++) {
1247
    for(j=0; j<3; j++) {
1248
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1249
      if (cX + size_cX == V) {
1250
        /* If cX and V are contiguous, orthogonalize in one step */
1251
        ierr = IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX+i, PETSC_NULL, cX, BcX,
1252
                               V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1253
      } else if (DS) {
1254
        /* Else orthogonalize first against DS, and then against cX and V */
1255
        ierr = IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX, PETSC_NULL, cX, BcX,
1256
                               V[i], BV[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
1257
        if(!lindep) {
1258
          ierr = IPBOrthogonalize(ip, 0, PETSC_NULL, PETSC_NULL, i, PETSC_NULL, V, BV,
1259
                                  V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1260
        }
1261
      } else {
1262
        /* Else orthogonalize first against cX and then against V */
1263
        ierr = IPBOrthogonalize(ip, size_cX, cX, BcX, i, PETSC_NULL, V, BV,
1264
                                V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1265
      }
1266
      if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1267
      ierr = PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1268
      CHKERRQ(ierr);
1269
    }
1270
    if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1271
        SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors!");
1272
    }
1273
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1274
    ierr = VecScale(BV[i], 1.0/norm); CHKERRQ(ierr);
1275
  }
1276
 
1277
  PetscFunctionReturn(0);
1278
}
1279
 
1968 eromero 1280
#undef __FUNCT__  
1281
#define __FUNCT__ "dvd_compute_eigenvectors"
1282
/*
1283
  Compute eigenvectors associated to the Schur decomposition (S, T) and
1284
  save the left vectors in pY and the right vectors in pX, where
1285
  n, size of the eigenproblem
1286
  ldS, ldT, leading dimension of S and T
1287
  ldpX, ldpY, leading dimension of pX and pY
1288
  auxS, auxiliar scalar of length:
2650 eromero 1289
    double standard 3n, double generalized 6n,
1290
    complex standard 3n, complex generalized 3n
1968 eromero 1291
  size_auxS, the length of auxS
1292
  doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1293
*/
1294
PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
2650 eromero 1295
  PetscInt ldS_, PetscScalar *T, PetscInt ldT_, PetscScalar *pX,
1968 eromero 1296
  PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
2216 jroman 1297
  PetscInt size_auxS, PetscBool doProd)
1968 eromero 1298
{
2392 jroman 1299
#if defined(SLEPC_MISSING_LAPACK_GGEV)
1300
  PetscFunctionBegin;
1301
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GGEV - Lapack routine is unavailable.");
1302
#else
2650 eromero 1303
  PetscBLASInt    n, ldpX, ldpY, nout, info, ldS, ldT;
1968 eromero 1304
  const char      *side, *howmny;
1305
#if defined(PETSC_USE_COMPLEX)
1306
  PetscReal       *auxR;
1307
#endif
1308
 
1309
  PetscFunctionBegin;
2651 eromero 1310
  PetscValidScalarPointer(S,2);
1311
  if (T) { PetscValidScalarPointer(T,4); }
1312
  if (pX) { PetscValidScalarPointer(pX,6); }
1313
  if (pY) { PetscValidScalarPointer(pY,8); }
1314
  PetscValidScalarPointer(auxS,10);
2117 eromero 1315
  n = PetscBLASIntCast(n_);
2650 eromero 1316
  ldpX = PetscBLASIntCast(PetscMax(ldpX_,1));
1317
  ldpY = PetscBLASIntCast(PetscMax(ldpY_,1));
1318
  ldS = PetscBLASIntCast(PetscMax(ldS_,1));
1319
  ldT = PetscBLASIntCast(PetscMax(ldT_,1));
2117 eromero 1320
 
1968 eromero 1321
  if (pX && pY) side = "B";
1322
  else if (pX)  side = "R";
1323
  else if (pY)  side = "L";
1324
  else { PetscFunctionReturn(0); }
1325
 
2177 jroman 1326
  howmny = doProd?"B":"A";
1968 eromero 1327
 
1328
  if (T) {
1329
    /* [eigr, pX] = eig(S, T) */
1330
#if defined(PETSC_USE_COMPLEX)
1331
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1332
    if (size_auxS < 2*n)
2214 jroman 1333
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
2650 eromero 1334
    LAPACKtgevc_(side,howmny,PETSC_NULL,&n,S,&ldS,T,&ldT,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
2394 jroman 1335
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTGEVC %d",info);
1968 eromero 1336
#else
2650 eromero 1337
    if (size_auxS < 6*n)
1338
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
1339
    LAPACKtgevc_(side,howmny,PETSC_NULL,&n,S,&ldS,T,&ldT,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1968 eromero 1340
#endif
1341
  } else {
1342
    /* [eigr, pX] = eig(S) */
1343
#if defined(PETSC_USE_COMPLEX)
1344
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1345
    if (size_auxS < 2*n)
2214 jroman 1346
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
2650 eromero 1347
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,S,&ldS,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1968 eromero 1348
#else
2650 eromero 1349
    if (size_auxS < 3*n)
1350
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
1351
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,S,&ldS,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1968 eromero 1352
#endif
2394 jroman 1353
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
1968 eromero 1354
  }
1355
  PetscFunctionReturn(0);
2392 jroman 1356
#endif
1968 eromero 1357
}
2034 eromero 1358
 
2555 eromero 1359
 
1360
#undef __FUNCT__
1361
#define __FUNCT__ "EPSSortDenseHEP"
2556 eromero 1362
PetscErrorCode EPSSortDenseHEP(EPS eps, PetscInt n, PetscInt k, PetscScalar *w, PetscScalar *V, PetscInt ldV)
2034 eromero 1363
{
2555 eromero 1364
  PetscInt        i, j, result, pos;
2556 eromero 1365
  PetscReal       re;
1366
  PetscScalar     t;
2568 eromero 1367
  PetscBLASInt    n_,one=1;
2034 eromero 1368
  PetscErrorCode  ierr;
2555 eromero 1369
 
2034 eromero 1370
  PetscFunctionBegin;
2651 eromero 1371
  PetscValidScalarPointer(w,4);
1372
  PetscValidScalarPointer(V,5);
2568 eromero 1373
  n_ = PetscBLASIntCast(n);
2555 eromero 1374
 
1375
  /* selection sort */
1376
  for (i=k;i<n-1;i++) {
2556 eromero 1377
    re = PetscRealPart(w[i]);
2555 eromero 1378
    pos = 0;
1379
    /* find minimum eigenvalue */
1380
    for (j=i+1;j<n;j++) {
2556 eromero 1381
      ierr = EPSCompareEigenvalues(eps,re,0,PetscRealPart(w[j]),0,&result);CHKERRQ(ierr);
2555 eromero 1382
      if (result > 0) {
2556 eromero 1383
        re = PetscRealPart(w[j]);
2555 eromero 1384
        pos = j;
1385
      }
2034 eromero 1386
    }
2555 eromero 1387
    /* interchange the pairs i and pos */
1388
    if (pos) {
1389
      BLASswap_(&n_, &V[ldV*pos], &one, &V[ldV*i], &one);
1390
      t = w[i]; w[i] = w[pos]; w[pos] = t;
1391
    }
2034 eromero 1392
  }
2555 eromero 1393
 
2034 eromero 1394
  PetscFunctionReturn(0);
1395
}
2672 eromero 1396
 
1397
#undef __FUNCT__
1398
#define __FUNCT__ "EPSCleanDenseSchur"
1399
/* Write zeros from the column k to n in the lower triangular part of the
1400
   matrices S and T, and inside 2-by-2 diagonal blocks of T in order to
2680 jroman 1401
   make (S,T) a valid Schur decompositon.
2672 eromero 1402
*/
1403
PetscErrorCode EPSCleanDenseSchur(PetscInt n,PetscInt k,PetscScalar *S,PetscInt ldS,PetscScalar *T,PetscInt ldT,PetscScalar *eigi,PetscScalar *X,PetscInt ldX,PetscBool doProd)
1404
{
1405
  PetscInt        i, j;
2681 eromero 1406
#if defined(PETSC_USE_COMPLEX)
1407
  PetscScalar     s;
1408
#endif
2672 eromero 1409
 
1410
  PetscFunctionBegin;
1411
  PetscValidScalarPointer(S,3);
1412
  if (T) { PetscValidScalarPointer(T,5); }
1413
 
1414
  if (!doProd && X) {
1415
    for (i=0; i<n; i++) for (j=0; j<n; j++) X[ldX*i+j] = 0.0;
1416
    for (i=0; i<n; i++) X[ldX*i+i] = 1.0;
1417
  }
1418
 
1419
#if defined(PETSC_USE_COMPLEX)
1420
  for (i=k; i<n; i++) {
2681 eromero 1421
    /* Some functions need the diagonal elements in T be real */
1422
    if (T && PetscImaginaryPart(T[ldT*i+i]) != 0.0) {
1423
      s = PetscConj(T[ldT*i+i])/PetscAbsScalar(T[ldT*i+i]);
1424
      for(j=0; j<=i; j++)
1425
        T[ldT*i+j]*= s,
1426
        S[ldS*i+j]*= s;
1427
      T[ldT*i+i] = PetscRealPart(T[ldT*i+i]);
1428
      if (X) for(j=0; j<n; j++) X[ldX*i+j]*= s;
1429
    }
1430
    if ((j=i+1) < n) {
2673 eromero 1431
      S[ldS*i+j] = 0.0;
2672 eromero 1432
      if (T) T[ldT*i+j] = 0.0;
1433
    }
1434
  }
1435
#else
1436
  for (i=k; i<n; i++) {
1437
    if (S[ldS*i+i+1] != 0.0 && eigi && eigi[i] != 0.0) {
2681 eromero 1438
      if ((j=i+2) < n) S[ldS*(i+1)+j] = 0.0;
2672 eromero 1439
      if (T) {
1440
        /* T[ldT*(i+1)+i] = 0.0; */
1441
        {
1442
          /* Check if T(i+1,i) is negligible */
1443
          if (PetscAbs(T[ldT*(i+1)+i])+PetscAbs(T[ldT*i+i+1]) > (PetscAbs(T[ldT*i+i])+PetscAbs(T[ldT*(i+1)+i+1]))*PETSC_MACHINE_EPSILON) {
1444
            PetscBLASInt    ldS_,ldT_,n_i,n_i_1,one=1,n_,i_1,i_;
1445
            PetscScalar     b11,b22,sr,cr,sl,cl;
1446
            ldS_ = PetscBLASIntCast(ldS);
1447
            ldT_ = PetscBLASIntCast(ldT);
1448
            n_i = PetscBLASIntCast(n-i);
1449
            n_i_1 = n_i - 1;
1450
            i_1 = PetscBLASIntCast(i+1);
1451
            i_ = PetscBLASIntCast(i);
1452
            n_ = PetscBLASIntCast(n);
1453
            LAPACKlasv2_(&T[ldT*i+i],&T[ldT*i+i+1],&T[ldT*(i+1)+i+1],&b22,&b11,&sr,&cr,&sl,&cl);
1454
            if (b11 < 0.0) { cr=-cr; sr=-sr; b11=-b11; b22=-b22; }
1455
            BLASrot_(&n_i,&S[ldS*i+i],&ldS_,&S[ldS*i+i+1],&ldS_,&cl,&sl);
1456
            BLASrot_(&i_1,&S[ldS*i],&one,&S[ldS*(i+1)],&one,&cr,&sr);
1457
            if (n_i_1>0) BLASrot_(&n_i_1,&T[ldT*(i+2)+i],&ldT_,&T[ldT*(i+2)+i],&ldT_,&cl,&sl);
1458
            BLASrot_(&i_,&T[ldT*i],&one,&T[ldT*(i+1)],&one,&cr,&sr);
1459
            if (X) BLASrot_(&n_,&X[ldX*i],&one,&X[ldX*(i+1)],&one,&cr,&sr);
1460
            T[ldT*i+i] = b11; T[ldT*i+i+1] = T[ldT*(i+1)+i] = 0.0; T[ldT*(i+1)+i+1] = b22;
1461
          } else {
1462
            T[ldT*(i+1)+i] = T[ldT*i+i+1] = 0.0;
1463
          }
1464
        }
2681 eromero 1465
        if ((j=i+1) < n) T[ldT*i+j] = 0.0;
1466
        if ((j=i+2) < n) T[ldT*(i+1)+j] = 0.0;
2672 eromero 1467
      }
1468
      i++;
1469
    } else {
2681 eromero 1470
      if ((j=i+1) < n) {
2672 eromero 1471
        S[ldS*i+j] = 0.0;
1472
        if (T) T[ldT*i+j] = 0.0;
1473
      }
1474
    }
1475
  }
1476
#endif
1477
 
1478
  PetscFunctionReturn(0);
1479
}