Subversion Repositories slepc-dev

Rev

Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2110 jroman 1
/*
2
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3
   SLEPc - Scalable Library for Eigenvalue Problem Computations
2116 eromero 4
   Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain
1619 slepc 5
 
2110 jroman 6
   This file is part of SLEPc.
7
 
8
   SLEPc is free software: you can redistribute it and/or modify it under  the
9
   terms of version 3 of the GNU Lesser General Public License as published by
10
   the Free Software Foundation.
11
 
12
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
13
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
14
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
15
   more details.
16
 
17
   You  should have received a copy of the GNU Lesser General  Public  License
18
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20
*/
21
 
2346 eromero 22
#include <private/vecimplslepc.h>
1985 eromero 23
#include "davidson.h"
1619 slepc 24
 
1745 eromero 25
PetscLogEvent SLEPC_SlepcDenseMatProd = 0;
1750 eromero 26
PetscLogEvent SLEPC_SlepcDenseNorm = 0;
1745 eromero 27
PetscLogEvent SLEPC_SlepcDenseOrth = 0;
28
PetscLogEvent SLEPC_SlepcDenseCopy = 0;
29
PetscLogEvent SLEPC_VecsMult = 0;
1630 slepc 30
 
1634 slepc 31
void dvd_sum_local(void *in, void *out, PetscMPIInt *cnt,MPI_Datatype *t);
1753 eromero 32
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
33
                                   void *ptr);
1634 slepc 34
 
2223 jroman 35
#undef __FUNCT__  
36
#define __FUNCT__ "SlepcDenseMatProd"
1619 slepc 37
/*
38
  Compute C <- a*A*B + b*C, where
39
    ldC, the leading dimension of C,
40
    ldA, the leading dimension of A,
41
    rA, cA, rows and columns of A,
42
    At, if true use the transpose of A instead,
43
    ldB, the leading dimension of B,
44
    rB, cB, rows and columns of B,
45
    Bt, if true use the transpose of B instead
46
*/
47
PetscErrorCode SlepcDenseMatProd(PetscScalar *C, PetscInt _ldC, PetscScalar b,
48
  PetscScalar a,
2216 jroman 49
  const PetscScalar *A, PetscInt _ldA, PetscInt rA, PetscInt cA, PetscBool At,
50
  const PetscScalar *B, PetscInt _ldB, PetscInt rB, PetscInt cB, PetscBool Bt)
1619 slepc 51
{
1630 slepc 52
  PetscErrorCode  ierr;
53
  PetscInt        tmp;
54
  PetscBLASInt    m, n, k, ldA = _ldA, ldB = _ldB, ldC = _ldC;
55
  const char      *N = "N", *T = "C", *qA = N, *qB = N;
1619 slepc 56
 
57
  PetscFunctionBegin;
1675 slepc 58
 
59
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
60
 
1745 eromero 61
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1619 slepc 62
 
63
  /* Transpose if needed */
2177 jroman 64
  if (At) tmp = rA, rA = cA, cA = tmp, qA = T;
65
  if (Bt) tmp = rB, rB = cB, cB = tmp, qB = T;
1619 slepc 66
 
67
  /* Check size */
68
  if (cA != rB) {
2214 jroman 69
    SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
1619 slepc 70
  }
71
 
72
  /* Do stub */
1744 eromero 73
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
74
    *C = *A * *B;
75
    m = n = k = 1;
76
  } else {
77
    m = rA; n = cB; k = cA;
78
    BLASgemm_(qA, qB, &m, &n, &k, &a, (PetscScalar*)A, &ldA, (PetscScalar*)B,
79
              &ldB, &b, C, &ldC);
80
  }
1619 slepc 81
 
1630 slepc 82
  ierr = PetscLogFlops(m*n*2*k);CHKERRQ(ierr);
1745 eromero 83
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
1630 slepc 84
 
1619 slepc 85
  PetscFunctionReturn(0);
86
}
87
 
2223 jroman 88
#undef __FUNCT__  
89
#define __FUNCT__ "SlepcDenseMatProdTriang"
1626 slepc 90
/*
1747 eromero 91
  Compute C <- A*B, where
1988 eromero 92
    sC, structure of C,
1747 eromero 93
    ldC, the leading dimension of C,
1988 eromero 94
    sA, structure of A,
1747 eromero 95
    ldA, the leading dimension of A,
96
    rA, cA, rows and columns of A,
97
    At, if true use the transpose of A instead,
1988 eromero 98
    sB, structure of B,
1747 eromero 99
    ldB, the leading dimension of B,
100
    rB, cB, rows and columns of B,
101
    Bt, if true use the transpose of B instead
102
*/
103
PetscErrorCode SlepcDenseMatProdTriang(
104
  PetscScalar *C, MatType_t sC, PetscInt ldC,
105
  const PetscScalar *A, MatType_t sA, PetscInt ldA, PetscInt rA, PetscInt cA,
2216 jroman 106
  PetscBool At,
1747 eromero 107
  const PetscScalar *B, MatType_t sB, PetscInt ldB, PetscInt rB, PetscInt cB,
2216 jroman 108
  PetscBool Bt)
1747 eromero 109
{
110
  PetscErrorCode  ierr;
1988 eromero 111
  PetscInt        tmp;
112
  PetscScalar     one=1.0, zero=0.0;
1747 eromero 113
  PetscBLASInt    rC, cC, _ldA = ldA, _ldB = ldB, _ldC = ldC;
1750 eromero 114
 
1747 eromero 115
  PetscFunctionBegin;
116
 
117
  if ((rA == 0) || (cB == 0)) { PetscFunctionReturn(0); }
118
 
119
  /* Transpose if needed */
2177 jroman 120
  if (At) tmp = rA, rA = cA, cA = tmp;
121
  if (Bt) tmp = rB, rB = cB, cB = tmp;
1747 eromero 122
 
123
  /* Check size */
2214 jroman 124
  if (cA != rB) SETERRQ(PETSC_COMM_SELF,1, "Matrix dimensions do not match");
125
  if (sB != 0) SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for B");
1747 eromero 126
 
1750 eromero 127
  /* Optimized version: trivial case */
128
  if ((rA == 1) && (cA == 1) && (cB == 1)) {
2177 jroman 129
    if (!At && !Bt)     *C = *A * *B;
130
    else if (At && !Bt) *C = PetscConj(*A) * *B;
131
    else if (!At && Bt) *C = *A * PetscConj(*B);
132
    else if (At && Bt)  *C = PetscConj(*A) * PetscConj(*B);
1750 eromero 133
    PetscFunctionReturn(0);
134
  }
135
 
136
  /* Optimized versions: sA == 0 && sB == 0 */
137
  if ((sA == 0) && (sB == 0)) {
2177 jroman 138
    if (At) tmp = rA, rA = cA, cA = tmp;
139
    if (Bt) tmp = rB, rB = cB, cB = tmp;
1750 eromero 140
    ierr = SlepcDenseMatProd(C, ldC, 0.0, 1.0, A, ldA, rA, cA, At, B, ldB, rB,
141
                             cB, Bt); CHKERRQ(ierr);
142
    PetscFunctionReturn(ierr);
143
  }
144
 
1988 eromero 145
  /* Optimized versions: A hermitian && (B not triang) */
1756 eromero 146
  if (DVD_IS(sA,DVD_MAT_HERMITIAN) &&
147
      DVD_ISNOT(sB,DVD_MAT_UTRIANG) &&
1988 eromero 148
      DVD_ISNOT(sB,DVD_MAT_LTRIANG)    ) {
1750 eromero 149
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
150
    rC = rA; cC = cB;
1988 eromero 151
    BLAShemm_("L", DVD_ISNOT(sA,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
1750 eromero 152
              (PetscScalar*)A, &_ldA, (PetscScalar*)B, &_ldB, &zero, C, &_ldC);
153
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
154
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
155
    PetscFunctionReturn(0);
156
  }
157
 
1988 eromero 158
  /* Optimized versions: B hermitian && (A not triang) */
159
  if (DVD_IS(sB,DVD_MAT_HERMITIAN) &&
160
      DVD_ISNOT(sA,DVD_MAT_UTRIANG) &&
161
      DVD_ISNOT(sA,DVD_MAT_LTRIANG)    ) {
162
    ierr = PetscLogEventBegin(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
163
    rC = rA; cC = cB;
164
    BLAShemm_("R", DVD_ISNOT(sB,DVD_MAT_LTRIANG)?"U":"L", &rC, &cC, &one,
165
              (PetscScalar*)B, &_ldB, (PetscScalar*)A, &_ldA, &zero, C, &_ldC);
166
    ierr = PetscLogFlops(rA*cB*cA); CHKERRQ(ierr);
167
    ierr = PetscLogEventEnd(SLEPC_SlepcDenseMatProd,0,0,0,0);CHKERRQ(ierr);
168
    PetscFunctionReturn(0);
1747 eromero 169
  }
1988 eromero 170
 
2214 jroman 171
  SETERRQ(PETSC_COMM_SELF,1, "Matrix type not supported for A");
1747 eromero 172
}
173
 
2223 jroman 174
#undef __FUNCT__  
175
#define __FUNCT__ "SlepcDenseNorm"
1750 eromero 176
/*
1965 eromero 177
  Normalize the columns of the matrix A, where
1750 eromero 178
    ldA, the leading dimension of A,
1965 eromero 179
    rA, cA, rows and columns of A.
180
  if eigi is given, the pairs of contiguous columns i i+1 such as eigi[i] != 0
181
  are normalized as being one column.
1750 eromero 182
*/
183
PetscErrorCode SlepcDenseNorm(PetscScalar *A, PetscInt ldA, PetscInt _rA,
1965 eromero 184
                              PetscInt cA, PetscScalar *eigi)
1750 eromero 185
{
186
  PetscErrorCode  ierr;
1965 eromero 187
  PetscInt        i;
188
  PetscScalar     norm, norm0;
1750 eromero 189
  PetscBLASInt    rA = _rA, one=1;
1747 eromero 190
 
1750 eromero 191
  PetscFunctionBegin;
192
 
193
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
194
 
195
  for(i=0; i<cA; i++) {
1965 eromero 196
    if(eigi && eigi[i] != 0.0) {
197
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
198
      norm0 = BLASnrm2_(&rA, &A[(i+1)*ldA], &one);
199
      norm = 1.0/PetscSqrtScalar(norm*norm + norm0*norm0);
200
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
201
      BLASscal_(&rA, &norm, &A[(i+1)*ldA], &one);
202
      i++;
203
    } else {
204
      norm = BLASnrm2_(&rA, &A[i*ldA], &one);
205
      norm = 1.0 / norm;
206
      BLASscal_(&rA, &norm, &A[i*ldA], &one);
207
     }
1750 eromero 208
  }
209
 
210
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseNorm,0,0,0,0);CHKERRQ(ierr);
211
 
212
  PetscFunctionReturn(0);
213
}
214
 
215
 
2223 jroman 216
#undef __FUNCT__  
217
#define __FUNCT__ "SlepcDenseOrth"
1735 eromero 218
/*
1743 eromero 219
  Compute A <- orth(A), where
220
    ldA, the leading dimension of A,
221
    rA, cA, rows and columns of A,
222
    auxS, auxiliary vector of more size than cA+min(rA,cA),
223
    lauxS, size of auxS,
224
    ncA, new number of columns of A
225
*/
226
PetscErrorCode SlepcDenseOrth(PetscScalar *A, PetscInt _ldA, PetscInt _rA,
227
                              PetscInt _cA, PetscScalar *auxS, PetscInt _lauxS,
228
                              PetscInt *ncA)
229
{
1745 eromero 230
  PetscErrorCode  ierr;
1743 eromero 231
  PetscBLASInt    ldA = _ldA, rA = _rA, cA = _cA,
232
                  info, ltau = PetscMin(_cA, _rA), lw = _lauxS - ltau;
233
  PetscScalar     *tau = auxS, *w = tau + ltau;
234
 
235
  PetscFunctionBegin;
236
 
237
  /* Quick exit */
238
  if ((_rA == 0) || (cA == 0)) { PetscFunctionReturn(0); }
239
 
240
  /* Memory check */
2214 jroman 241
  if (lw < cA) SETERRQ(PETSC_COMM_SELF,1, "Insufficient memory for xGEQRF");
1743 eromero 242
 
1745 eromero 243
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 244
  LAPACKgeqrf_(&rA, &cA, A, &ldA, tau, w, &lw, &info);
2214 jroman 245
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xGEQRF %d", info);
1743 eromero 246
  LAPACKorgqr_(&rA, &ltau, &ltau, A, &ldA, tau, w, &lw, &info);
2214 jroman 247
  if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB, "Error in Lapack xORGQR %d", info);
1745 eromero 248
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseOrth,0,0,0,0);CHKERRQ(ierr);
1743 eromero 249
 
250
  if (ncA) *ncA = ltau;
251
 
252
  PetscFunctionReturn(0);
253
}
254
 
2223 jroman 255
#undef __FUNCT__  
256
#define __FUNCT__ "SlepcDenseCopy"
1743 eromero 257
/*
258
  Y <- X, where
259
  ldX, leading dimension of X,
260
  rX, cX, rows and columns of X
261
  ldY, leading dimension of Y
262
*/
263
PetscErrorCode SlepcDenseCopy(PetscScalar *Y, PetscInt ldY, PetscScalar *X,
264
                              PetscInt ldX, PetscInt rX, PetscInt cX)
265
{
266
  PetscErrorCode  ierr;
1795 eromero 267
  PetscInt        i;
1743 eromero 268
 
269
  PetscFunctionBegin;
270
 
271
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 272
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1743 eromero 273
  }
1805 eromero 274
 
275
  /* Quick exit */
276
  if (Y == X) {
277
    if (ldX != ldY) {
2219 jroman 278
      SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1805 eromero 279
    }
280
    PetscFunctionReturn(0);
281
  }
1743 eromero 282
 
1745 eromero 283
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 284
  for(i=0; i<cX; i++) {
285
    ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
286
    CHKERRQ(ierr);
287
  }
1745 eromero 288
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
1743 eromero 289
 
290
  PetscFunctionReturn(0);
291
}
292
 
2223 jroman 293
#undef __FUNCT__  
294
#define __FUNCT__ "SlepcDenseCopyTriang"
1752 eromero 295
/*
296
  Y <- X, where
297
  ldX, leading dimension of X,
298
  rX, cX, rows and columns of X
299
  ldY, leading dimension of Y
300
*/
301
PetscErrorCode SlepcDenseCopyTriang(PetscScalar *Y, MatType_t sY, PetscInt ldY,
302
                                    PetscScalar *X, MatType_t sX, PetscInt ldX,
303
                                    PetscInt rX, PetscInt cX)
304
{
305
  PetscErrorCode  ierr;
306
  PetscInt        i,j,c;
1743 eromero 307
 
1752 eromero 308
  PetscFunctionBegin;
309
 
310
  if ((ldX < rX) || (ldY < rX)) {
2219 jroman 311
    SETERRQ(PETSC_COMM_SELF,1, "Leading dimension error");
1752 eromero 312
  }
313
 
314
  if (rX != cX) {
2219 jroman 315
    SETERRQ(PETSC_COMM_SELF,1, "Rectangular matrices not supported");
1752 eromero 316
  }
317
 
1756 eromero 318
  if (DVD_IS(sX,DVD_MAT_UTRIANG) &&
319
      DVD_ISNOT(sX,DVD_MAT_LTRIANG)) {        /* UpTr to ... */
320
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
321
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 322
      c = 0;                                      /*     so copy */
1756 eromero 323
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
324
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 325
      c = 1;                                      /*     so transpose */
326
    else                                          /* ... Full, */
327
      c = 2;                                      /*     so reflect from up */
1756 eromero 328
  } else if (DVD_ISNOT(sX,DVD_MAT_UTRIANG) &&
329
             DVD_IS(sX,DVD_MAT_LTRIANG)) {    /* LoTr to ... */
330
    if (DVD_IS(sY,DVD_MAT_UTRIANG) &&
331
        DVD_ISNOT(sY,DVD_MAT_LTRIANG))        /* ... UpTr, */
1752 eromero 332
      c = 1;                                      /*     so transpose */
1756 eromero 333
    else if(DVD_ISNOT(sY,DVD_MAT_UTRIANG) &&
334
            DVD_IS(sY,DVD_MAT_LTRIANG))       /* ... LoTr, */
1752 eromero 335
      c = 0;                                      /*     so copy */
336
    else                                          /* ... Full, */
337
      c = 3;                                      /*     so reflect fr. down */
338
  } else                                          /* Full to any, */
339
    c = 0;                                        /*     so copy */
340
 
341
  ierr = PetscLogEventBegin(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
342
 
343
  switch(c) {
344
  case 0: /* copy */
345
    for(i=0; i<cX; i++) {
346
      ierr = PetscMemcpy(&Y[ldY*i], &X[ldX*i], sizeof(PetscScalar)*rX);
347
      CHKERRQ(ierr);
348
    }
349
    break;
350
 
351
  case 1: /* transpose */
352
    for(i=0; i<cX; i++)
353
      for(j=0; j<rX; j++)
354
        Y[ldY*j+i] = X[ldX*i+j];
355
    break;
356
 
357
  case 2: /* reflection from up */
358
    for(i=0; i<cX; i++)
359
      for(j=0; j<PetscMin(i+1,rX); j++)
1988 eromero 360
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 361
    break;
362
 
363
  case 3: /* reflection from down */
364
    for(i=0; i<cX; i++)
365
      for(j=i; j<rX; j++)
1988 eromero 366
        Y[ldY*j+i] = PetscConj(Y[ldY*i+j] = X[ldX*i+j]);
1752 eromero 367
    break;
368
  }
369
  ierr = PetscLogEventEnd(SLEPC_SlepcDenseCopy,0,0,0,0);CHKERRQ(ierr);
370
 
371
  PetscFunctionReturn(0);
372
}
373
 
374
 
2223 jroman 375
#undef __FUNCT__  
376
#define __FUNCT__ "SlepcUpdateVectorsZ"
1743 eromero 377
/*
1675 slepc 378
  Compute Y[0..cM-1] <- alpha * X[0..cX-1] * M + beta * Y[0..cM-1],
379
  where X and Y are contiguous global vectors.
1626 slepc 380
*/
1675 slepc 381
PetscErrorCode SlepcUpdateVectorsZ(Vec *Y, PetscScalar beta, PetscScalar alpha,
382
  Vec *X, PetscInt cX, const PetscScalar *M, PetscInt ldM, PetscInt rM,
383
  PetscInt cM)
1626 slepc 384
{
385
  PetscErrorCode  ierr;
1960 eromero 386
 
387
  PetscFunctionBegin;
388
 
389
  ierr = SlepcUpdateVectorsS(Y, 1, beta, alpha, X, cX, 1, M, ldM, rM, cM);
390
  CHKERRQ(ierr);
391
 
392
  PetscFunctionReturn(0);
393
}
394
 
395
 
2223 jroman 396
#undef __FUNCT__  
397
#define __FUNCT__ "SlepcUpdateVectorsS"
1960 eromero 398
/*
1965 eromero 399
  Compute Y[0:dY:cM*dY-1] <- alpha * X[0:dX:cX-1] * M + beta * Y[0:dY:cM*dY-1],
1960 eromero 400
  where X and Y are contiguous global vectors.
401
*/
402
PetscErrorCode SlepcUpdateVectorsS(Vec *Y, PetscInt dY, PetscScalar beta,
403
  PetscScalar alpha, Vec *X, PetscInt cX, PetscInt dX, const PetscScalar *M,
404
  PetscInt ldM, PetscInt rM, PetscInt cM)
405
{
2334 jroman 406
  PetscErrorCode    ierr;
407
  const PetscScalar *px;
408
  PetscScalar       *py;
409
  PetscInt          rX, rY, ldX, ldY, i, rcX;
1626 slepc 410
 
411
  PetscFunctionBegin;
2346 eromero 412
  SlepcValidVecsContiguous(Y,cM*dY,1);
413
  SlepcValidVecsContiguous(X,cX,5);
1626 slepc 414
 
1965 eromero 415
  /* Compute the real number of columns */
416
  rcX = cX/dX;
417
  if (rcX != rM) {
2214 jroman 418
    SETERRQ(((PetscObject)*Y)->comm,1, "Matrix dimensions do not match");
1735 eromero 419
  }
420
 
1965 eromero 421
  if ((rcX == 0) || (rM == 0) || (cM == 0)) {
1626 slepc 422
    PetscFunctionReturn(0);
1960 eromero 423
  } else if ((Y + cM <= X) || (X + cX <= Y) ||
424
             ((X != Y) && ((PetscMax(dX,dY))%(PetscMin(dX,dY))!=0))) {
1626 slepc 425
    /* If Y[0..cM-1] and X[0..cX-1] are not overlapped... */
426
 
427
    /* Get the dense matrices and dimensions associated to Y and X */
1960 eromero 428
    ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
429
    ierr = VecGetLocalSize(Y[0], &rY); CHKERRQ(ierr);
430
    if (rX != rY) {
2214 jroman 431
      SETERRQ(((PetscObject)*Y)->comm,1, "The multivectors do not have the same dimension");
1626 slepc 432
    }
2334 jroman 433
    ierr = VecGetArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 434
    ierr = VecGetArray(Y[0], &py);CHKERRQ(ierr);
435
 
1960 eromero 436
    /* Update the strides */
437
    ldX = rX*dX; ldY= rY*dY;
438
 
1626 slepc 439
    /* Do operation */
1965 eromero 440
    ierr = SlepcDenseMatProd(py, ldY, beta, alpha, px, ldX, rX, rcX,
1675 slepc 441
                    PETSC_FALSE, M, ldM, rM, cM, PETSC_FALSE); CHKERRQ(ierr);
1626 slepc 442
 
2334 jroman 443
    ierr = VecRestoreArrayRead(X[0], &px);CHKERRQ(ierr);
1626 slepc 444
    ierr = VecRestoreArray(Y[0], &py);CHKERRQ(ierr);
1992 eromero 445
    for(i=1; i<cM; i++) {
446
      ierr = PetscObjectStateIncrease((PetscObject)Y[dY*i]); CHKERRQ(ierr);
447
    }
1626 slepc 448
 
1960 eromero 449
  } else if ((Y >= X) && (beta == 0.0) && (dY == dX)) {
1626 slepc 450
    /* If not, call to SlepcUpdateVectors */
1965 eromero 451
    ierr = SlepcUpdateStrideVectors(cX, X, Y-X, dX, Y-X+cM*dX, M-ldM*(Y-X),
452
                                    ldM, PETSC_FALSE); CHKERRQ(ierr);
1675 slepc 453
    if (alpha != 1.0)
454
      for (i=0; i<cM; i++) {
455
        ierr = VecScale(Y[i], alpha); CHKERRQ(ierr);
456
      }
1626 slepc 457
  } else {
2214 jroman 458
    SETERRQ(((PetscObject)*Y)->comm,1, "Unsupported case");
1626 slepc 459
  }
460
 
461
  PetscFunctionReturn(0);
462
}
463
 
2223 jroman 464
#undef __FUNCT__  
465
#define __FUNCT__ "SlepcUpdateVectorsD"
1965 eromero 466
/*
467
  Compute X <- alpha * X[0:dX:cX-1] * M
468
  where X is a matrix with non-consecutive columns
469
*/
470
PetscErrorCode SlepcUpdateVectorsD(Vec *X, PetscInt cX, PetscScalar alpha,
471
  const PetscScalar *M, PetscInt ldM, PetscInt rM, PetscInt cM,
472
  PetscScalar *work, PetscInt lwork)
473
{
2334 jroman 474
  PetscErrorCode ierr;
475
  PetscScalar    **px, *Y, *Z;
476
  PetscInt       rX, i, j, rY, rY0, ldY;
1960 eromero 477
 
1965 eromero 478
  PetscFunctionBegin;
2346 eromero 479
  SlepcValidVecsContiguous(X,cX,1);
1965 eromero 480
 
481
  if (cX != rM) {
2214 jroman 482
    SETERRQ(((PetscObject)*X)->comm,1, "Matrix dimensions do not match");
1965 eromero 483
  }
484
 
485
  rY = (lwork/2)/cX;
486
  if (rY <= 0) {
2214 jroman 487
    SETERRQ(((PetscObject)*X)->comm,1, "Insufficient work space given");
1965 eromero 488
  }
489
  Y = work; Z = &Y[cX*rY]; ldY = rY;
490
 
491
  if ((cX == 0) || (rM == 0) || (cM == 0)) {
492
    PetscFunctionReturn(0);
493
  }
494
 
495
  /* Get the dense vectors associated to the columns of X */
496
  ierr = PetscMalloc(sizeof(Vec)*cX, &px); CHKERRQ(ierr);
497
  for(i=0; i<cX; i++) {
498
    ierr = VecGetArray(X[i], &px[i]); CHKERRQ(ierr);
499
  }
500
  ierr = VecGetLocalSize(X[0], &rX); CHKERRQ(ierr);
501
 
502
  for(i=0, rY0=0; i<rX; i+=rY0) {
503
    rY0 = PetscMin(rY, rX-i);
504
 
505
    /* Y <- X[i0:i1,:] */
506
    for(j=0; j<cX; j++) {
507
      ierr = SlepcDenseCopy(&Y[ldY*j], ldY, px[j]+i, rX, rY0, 1);
508
      CHKERRQ(ierr);
509
    }
510
 
511
    /* Z <- Y * M */
512
    ierr = SlepcDenseMatProd(Z, ldY, 0.0, alpha, Y, ldY, rY0, cX, PETSC_FALSE,
513
                                                 M, ldM, rM, cM, PETSC_FALSE);
514
    CHKERRQ(ierr);
515
 
516
    /* X <- Z */
517
    for(j=0; j<cM; j++) {
518
      ierr = SlepcDenseCopy(px[j]+i, rX, &Z[j*ldY], ldY, rY0, 1);
519
      CHKERRQ(ierr);
520
    }
521
  }
522
 
523
  for(i=0; i<cX; i++) {
524
    ierr = VecRestoreArray(X[i], &px[i]); CHKERRQ(ierr);
525
  }
526
  ierr = PetscFree(px); CHKERRQ(ierr);
527
 
528
  PetscFunctionReturn(0);
529
}
530
 
531
 
532
 
2223 jroman 533
#undef __FUNCT__  
534
#define __FUNCT__ "VecsMult"
1634 slepc 535
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
536
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
537
  where W = U' * V.
1640 slepc 538
  workS0 and workS1 are an auxiliary scalar vector of size
539
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
540
  is needed, and of size eU*eV.
1634 slepc 541
*/
1747 eromero 542
PetscErrorCode VecsMult(PetscScalar *M, MatType_t sM, PetscInt ldM,
1634 slepc 543
                        Vec *U, PetscInt sU, PetscInt eU,
544
                        Vec *V, PetscInt sV, PetscInt eV,
1640 slepc 545
                        PetscScalar *workS0, PetscScalar *workS1)
1634 slepc 546
{
2334 jroman 547
  PetscErrorCode    ierr;
548
  PetscInt          ldU, ldV, i, j, k;
549
  const PetscScalar *pu, *pv;
550
  PetscScalar       *W, *Wr;
1634 slepc 551
 
552
  PetscFunctionBegin;
553
 
554
  /* Check if quick exit */
555
  if ((eU-sU == 0) || (eV-sV == 0))
556
    PetscFunctionReturn(0);
2346 eromero 557
 
558
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 559
  SlepcValidVecsContiguous(V,eV,7);
1634 slepc 560
 
561
  /* Get the dense matrices and dimensions associated to U and V */
562
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
563
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
564
  if (ldU != ldV) {
2214 jroman 565
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1634 slepc 566
  }
2334 jroman 567
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
568
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1634 slepc 569
 
1640 slepc 570
  if (workS0)
571
    W = workS0;
1634 slepc 572
  else {
1640 slepc 573
    ierr = PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &W);
574
    CHKERRQ(ierr);
1634 slepc 575
  }
576
 
1745 eromero 577
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
578
 
1640 slepc 579
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
580
    /* Use the smart memory usage version */
1634 slepc 581
 
1640 slepc 582
    /* W <- U' * V */
1747 eromero 583
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
584
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
585
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
1640 slepc 586
    CHKERRQ(ierr);
587
 
588
    /* ReduceAll(W, SUM) */
1899 eromero 589
    ierr = MPI_Allreduce(W, M, eU*eV, MPIU_SCALAR, MPIU_SUM,
1640 slepc 590
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1747 eromero 591
  /* Full M matrix */
1756 eromero 592
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
593
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1640 slepc 594
    if (workS1)
595
      Wr = workS1;
596
    else {
597
      ierr = PetscMalloc(sizeof(PetscScalar)*((eU-sU)*sV+(eV-sV)*eU), &Wr);
598
      CHKERRQ(ierr);
599
    }
1747 eromero 600
 
1640 slepc 601
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
602
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
603
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
604
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
605
    CHKERRQ(ierr);
606
 
607
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
608
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
609
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
610
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
611
    CHKERRQ(ierr);
612
 
613
    /* ReduceAll(W, SUM) */
614
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*sV+(eV-sV)*eU, MPIU_SCALAR,
1899 eromero 615
                      MPIU_SUM, ((PetscObject)U[0])->comm); CHKERRQ(ierr);
1640 slepc 616
 
617
    /* M(...,...) <- W */
618
    for (i=0,k=0; i<sV; i++)
619
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
620
    for (i=sV; i<eV; i++)
621
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
622
 
1747 eromero 623
    if (!workS1) {
624
      ierr = PetscFree(Wr); CHKERRQ(ierr);
625
    }
1745 eromero 626
 
1747 eromero 627
  /* Upper triangular M matrix */
1756 eromero 628
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
629
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1747 eromero 630
    if (workS1)
631
      Wr = workS1;
632
    else {
633
      ierr = PetscMalloc(sizeof(PetscScalar)*(eV-sV)*eU, &Wr);
634
      CHKERRQ(ierr);
635
    }
636
 
637
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
638
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
639
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
640
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
641
    CHKERRQ(ierr);
642
 
643
    /* ReduceAll(W, SUM) */
1899 eromero 644
    ierr = MPI_Allreduce(W, Wr, (eV-sV)*eU, MPIU_SCALAR, MPIU_SUM,
1747 eromero 645
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
646
 
647
    /* M(...,...) <- W */
648
    for (i=sV,k=0; i<eV; i++)
649
        for (j=ldM*i; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
650
 
1640 slepc 651
    if (!workS1) {
652
      ierr = PetscFree(Wr); CHKERRQ(ierr);
653
    }
1747 eromero 654
 
655
  /* Lower triangular M matrix */
1756 eromero 656
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
657
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1747 eromero 658
    if (workS1)
659
      Wr = workS1;
660
    else {
661
      ierr = PetscMalloc(sizeof(PetscScalar)*(eU-sU)*eV, &Wr);
662
      CHKERRQ(ierr);
663
    }
664
 
665
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
666
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
667
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
668
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
669
    CHKERRQ(ierr);
670
 
671
    /* ReduceAll(W, SUM) */
1899 eromero 672
    ierr = MPI_Allreduce(W, Wr, (eU-sU)*eV, MPIU_SCALAR, MPIU_SUM,
1747 eromero 673
                         ((PetscObject)U[0])->comm); CHKERRQ(ierr);
674
 
675
    /* M(...,...) <- W */
676
    for (i=0,k=0; i<eV; i++)
677
      for (j=ldM*i+sU; j<ldM*i+eU; j++,k++) M[j] = Wr[k];
678
 
679
    if (!workS1) {
680
      ierr = PetscFree(Wr); CHKERRQ(ierr);
681
    }
1634 slepc 682
  }
683
 
1747 eromero 684
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
685
 
1640 slepc 686
  if (!workS0) {
1634 slepc 687
    ierr = PetscFree(W); CHKERRQ(ierr);
688
  }
689
 
2334 jroman 690
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
691
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1634 slepc 692
  PetscFunctionReturn(0);
693
}
694
 
1795 eromero 695
 
2223 jroman 696
#undef __FUNCT__  
697
#define __FUNCT__ "VecsMultIa"
1753 eromero 698
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
699
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
1808 eromero 700
  where W = local_U' * local_V. Needs VecsMultIb for completing the operation!
1795 eromero 701
  workS0 and workS1 are an auxiliary scalar vector of size
702
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
703
  is needed, and of size eU*eV.
704
*/
705
PetscErrorCode VecsMultIa(PetscScalar *M, MatType_t sM, PetscInt ldM,
706
                          Vec *U, PetscInt sU, PetscInt eU,
707
                          Vec *V, PetscInt sV, PetscInt eV)
708
{
709
  PetscErrorCode  ierr;
710
  PetscInt        ldU, ldV;
711
  PetscScalar     *pu, *pv;
712
 
713
  PetscFunctionBegin;
714
 
715
  /* Check if quick exit */
716
  if ((eU-sU == 0) || (eV-sV == 0))
717
    PetscFunctionReturn(0);
718
 
2346 eromero 719
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 720
  SlepcValidVecsContiguous(V,eV,7);
2346 eromero 721
 
1795 eromero 722
  /* Get the dense matrices and dimensions associated to U and V */
723
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
724
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
725
  if (ldU != ldV) {
2214 jroman 726
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1795 eromero 727
  }
728
  ierr = VecGetArray(U[0], &pu);CHKERRQ(ierr);
729
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
730
 
731
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
732
    /* M <- local_U' * local_V */
733
    ierr = SlepcDenseMatProdTriang(M, sM, eU,
734
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
735
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
736
    CHKERRQ(ierr);
737
 
738
  /* Full M matrix */
739
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
740
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
741
    /* M(sU:eU-1,0:sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
742
    ierr = SlepcDenseMatProd(&M[sU], ldM, 0.0, 1.0,
743
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
744
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
745
    CHKERRQ(ierr);
746
 
747
    /* M(0:eU-1,sV:eV-1) <- U(0:eU-1)' * V(sV:eV-1) */
748
    ierr = SlepcDenseMatProd(&M[ldM*sV], ldM, 0.0, 1.0,
749
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
750
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
751
    CHKERRQ(ierr);
752
 
753
  /* Other structures */
2214 jroman 754
  } else SETERRQ(((PetscObject)*U)->comm,1, "Matrix structure not supported");
1795 eromero 755
 
756
  ierr = VecRestoreArray(U[0], &pu); CHKERRQ(ierr);
1992 eromero 757
  ierr = PetscObjectStateDecrease((PetscObject)U[0]); CHKERRQ(ierr);
1795 eromero 758
  ierr = VecRestoreArray(V[0], &pv); CHKERRQ(ierr);
1992 eromero 759
  ierr = PetscObjectStateDecrease((PetscObject)V[0]); CHKERRQ(ierr);
1795 eromero 760
 
761
  PetscFunctionReturn(0);
762
}
763
 
764
 
2223 jroman 765
#undef __FUNCT__  
766
#define __FUNCT__ "VecsMultIc"
1808 eromero 767
/* Computes M <- nprocs*M
768
  where nprocs is the number of processors.
769
*/
770
PetscErrorCode VecsMultIc(PetscScalar *M, MatType_t sM, PetscInt ldM,
771
                          PetscInt rM, PetscInt cM, Vec V)
772
{
2117 eromero 773
  int        i,j,n;
1808 eromero 774
 
775
  PetscFunctionBegin;
776
 
777
  /* Check if quick exit */
778
  if ((rM == 0) || (cM == 0))
779
    PetscFunctionReturn(0);
780
 
2214 jroman 781
  if (sM != 0) SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1808 eromero 782
 
783
  MPI_Comm_size(((PetscObject)V)->comm, &n);
784
 
785
  for(i=0; i<cM; i++)
786
    for(j=0; j<rM; j++)
787
      M[ldM*i+j]/= (PetscScalar)n;
788
 
789
  PetscFunctionReturn(0);
790
}
791
 
792
 
2223 jroman 793
#undef __FUNCT__  
794
#define __FUNCT__ "VecsMultIb"
1795 eromero 795
/* Computes N <- Allreduce( [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ] )
796
                          ( [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ] )
1753 eromero 797
  where W = U' * V.
1795 eromero 798
  workS0 and workS1 are an auxiliary scalar vector of size
799
  (eU-sU)*sV+(eV-sV)*eU. But, if sU == 0, sV == 0 and eU == ldM, only workS0
800
  is needed, and of size eU*eV.
801
*/
802
PetscErrorCode VecsMultIb(PetscScalar *M, MatType_t sM, PetscInt ldM,
803
                          PetscInt rM, PetscInt cM, PetscScalar *auxS,
804
                          Vec V)
805
{
806
  PetscErrorCode  ierr;
807
  PetscScalar     *W, *Wr;
808
 
809
  PetscFunctionBegin;
810
 
811
  /* Check if quick exit */
812
  if ((rM == 0) || (cM == 0))
813
    PetscFunctionReturn(0);
814
 
815
  if (auxS)
816
    W = auxS;
817
  else {
818
    ierr = PetscMalloc(sizeof(PetscScalar)*rM*cM*2, &W);
819
    CHKERRQ(ierr);
820
  }
821
  Wr = W + rM*cM;
822
 
823
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
824
 
825
  if (sM == 0) {
826
    /* W <- M */
827
    ierr = SlepcDenseCopy(W, rM, M, ldM, rM, cM); CHKERRQ(ierr);
828
 
829
    /* Wr <- ReduceAll(W, SUM) */
1899 eromero 830
    ierr = MPI_Allreduce(W, Wr, rM*cM, MPIU_SCALAR, MPIU_SUM,
1795 eromero 831
                         ((PetscObject)V)->comm); CHKERRQ(ierr);
832
 
833
    /* M <- Wr */
834
    ierr = SlepcDenseCopy(M, ldM, Wr, rM, rM, cM); CHKERRQ(ierr);
835
 
836
  /* Other structures */
2214 jroman 837
  } else SETERRQ(((PetscObject)V)->comm,1, "Matrix structure not supported");
1795 eromero 838
 
839
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
840
 
841
  if (!auxS) {
842
    ierr = PetscFree(W); CHKERRQ(ierr);
843
  }
844
 
845
  PetscFunctionReturn(0);
846
}
847
 
848
 
2223 jroman 849
#undef __FUNCT__  
850
#define __FUNCT__ "VecsMultS"
1795 eromero 851
/* Computes M <- [ M(0:sU-1,  0:sV-1) W(0:sU-1,  sV:eV-1) ]
852
                 [ W(sU:eU-1, 0:sV-1) W(sU:eU-1, sV:eV-1) ]
853
  where W = U' * V.
1753 eromero 854
  r, a DvdReduction structure,
855
  sr, an structure DvdMult_copy_func.
856
*/
857
PetscErrorCode VecsMultS(PetscScalar *M, MatType_t sM, PetscInt ldM,
858
                         Vec *U, PetscInt sU, PetscInt eU,
859
                         Vec *V, PetscInt sV, PetscInt eV, DvdReduction *r,
860
                         DvdMult_copy_func *sr)
861
{
2334 jroman 862
  PetscErrorCode    ierr;
863
  PetscInt          ldU, ldV;
864
  const PetscScalar *pu, *pv;
865
  PetscScalar       *W;
1753 eromero 866
 
867
  PetscFunctionBegin;
868
 
869
  /* Check if quick exit */
870
  if ((eU-sU == 0) || (eV-sV == 0))
871
    PetscFunctionReturn(0);
872
 
2346 eromero 873
  SlepcValidVecsContiguous(U,eU,4);
2555 eromero 874
  SlepcValidVecsContiguous(V,eV,7);
2346 eromero 875
 
1753 eromero 876
  /* Get the dense matrices and dimensions associated to U and V */
877
  ierr = VecGetLocalSize(U[0], &ldU); CHKERRQ(ierr);
878
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
879
  if (ldU != ldV) {
2214 jroman 880
    SETERRQ(((PetscObject)*U)->comm,1, "Matrix dimensions do not match");
1753 eromero 881
  }
2334 jroman 882
  ierr = VecGetArrayRead(U[0], &pu);CHKERRQ(ierr);
883
  ierr = VecGetArrayRead(V[0], &pv);CHKERRQ(ierr);
1753 eromero 884
 
885
  ierr = PetscLogEventBegin(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
886
 
887
  if ((sU == 0) && (sV == 0) && (eU == ldM)) {
888
    /* Use the smart memory usage version */
889
 
890
    /* Add the reduction to r */
891
    ierr = SlepcAllReduceSum(r, eU*eV, VecsMultS_copy_func, sr, &W);
892
    CHKERRQ(ierr);
893
 
894
    /* W <- U' * V */
895
    ierr = SlepcDenseMatProdTriang(W, sM, eU,
896
                                   pu, 0, ldU, ldU, eU, PETSC_TRUE,
897
                                   pv, 0, ldV, ldV, eV, PETSC_FALSE);
898
    CHKERRQ(ierr);
899
 
900
    /* M <- ReduceAll(W, SUM) */
901
    sr->M = M;    sr->ld = ldM;
902
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
903
                  sr->i2 = eV;
904
 
905
  /* Full M matrix */
1756 eromero 906
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
907
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 908
    /* Add the reduction to r */
909
    ierr = SlepcAllReduceSum(r, (eU-sU)*sV+(eV-sV)*eU, VecsMultS_copy_func,
910
                             sr, &W);
911
    CHKERRQ(ierr);
912
 
913
    /* W(0:(eU-sU)*sV-1) <- U(sU:eU-1)' * V(0:sV-1) */
914
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
915
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
916
                             pv       , ldV, ldV, sV,    PETSC_FALSE);
917
    CHKERRQ(ierr);
918
 
919
    /* W((eU-sU)*sV:(eU-sU)*sV+(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
920
    ierr = SlepcDenseMatProd(W+(eU-sU)*sV, eU, 0.0, 1.0,
921
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
922
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
923
    CHKERRQ(ierr);
924
 
925
    /* M <- ReduceAll(W, SUM) */
926
    sr->M = M;    sr->ld = ldM;
927
    sr->i0 = 0;   sr->i1 = sV;    sr->s0 = sU;    sr->e0 = eU;
928
                  sr->i2 = eV;    sr->s1 = 0;     sr->e1 = eU;
929
 
930
  /* Upper triangular M matrix */
1756 eromero 931
  } else if (DVD_IS(sM,DVD_MAT_UTRIANG) &&
932
             DVD_ISNOT(sM,DVD_MAT_LTRIANG)) {
1753 eromero 933
    /* Add the reduction to r */
934
    ierr = SlepcAllReduceSum(r, (eV-sV)*eU, VecsMultS_copy_func, sr, &W);
935
    CHKERRQ(ierr);
936
 
937
    /* W(0:(eV-sV)*eU-1) <- U(0:eU-1)' * V(sV:eV-1) */
938
    ierr = SlepcDenseMatProd(W,         eU,  0.0, 1.0,
939
                             pu,        ldU, ldU, eU,    PETSC_TRUE,
940
                             pv+ldV*sV, ldV, ldV, eV-sV, PETSC_FALSE);
941
    CHKERRQ(ierr);
942
 
943
    /* M <- ReduceAll(W, SUM) */
944
    sr->M = M;    sr->ld = ldM;
945
    sr->i0 = sV;  sr->i1 = eV;    sr->s0 = 0;     sr->e0 = eU;
946
                  sr->i2 = eV;
947
 
948
  /* Lower triangular M matrix */
1756 eromero 949
  } else if (DVD_ISNOT(sM,DVD_MAT_UTRIANG) &&
950
             DVD_IS(sM,DVD_MAT_LTRIANG)) {
1753 eromero 951
    /* Add the reduction to r */
952
    ierr = SlepcAllReduceSum(r, (eU-sU)*eV, VecsMultS_copy_func, sr, &W);
953
    CHKERRQ(ierr);
954
 
955
    /* W(0:(eU-sU)*eV-1) <- U(sU:eU-1)' * V(0:eV-1) */
956
    ierr = SlepcDenseMatProd(W, eU-sU, 0.0, 1.0,
957
                             pu+ldU*sU, ldU, ldU, eU-sU, PETSC_TRUE,
958
                             pv       , ldV, ldV, eV,    PETSC_FALSE);
959
    CHKERRQ(ierr);
960
 
961
    /* ReduceAll(W, SUM) */
962
    sr->M = M;    sr->ld = ldM;
963
    sr->i0 = 0;   sr->i1 = eV;    sr->s0 = sU;    sr->e0 = eU;
964
                  sr->i2 = eV;
965
  }
966
 
967
  ierr = PetscLogEventEnd(SLEPC_VecsMult,0,0,0,0);CHKERRQ(ierr);
968
 
2334 jroman 969
  ierr = VecRestoreArrayRead(U[0], &pu); CHKERRQ(ierr);
970
  ierr = VecRestoreArrayRead(V[0], &pv); CHKERRQ(ierr);
1753 eromero 971
  PetscFunctionReturn(0);
972
}
973
 
974
#undef __FUNCT__  
975
#define __FUNCT__ "VecsMultS_copy_func"
976
PetscErrorCode VecsMultS_copy_func(PetscScalar *out, PetscInt size_out,
977
                                   void *ptr)
978
{
979
  PetscInt        i, j, k;
980
  DvdMult_copy_func
981
                  *sr = (DvdMult_copy_func*)ptr;
982
 
983
  PetscFunctionBegin;
984
 
985
  for (i=sr->i0,k=0; i<sr->i1; i++)
986
    for (j=sr->ld*i+sr->s0; j<sr->ld*i+sr->e0; j++,k++) sr->M[j] = out[k];
987
  for (i=sr->i1; i<sr->i2; i++)
988
    for (j=sr->ld*i+sr->s1; j<sr->ld*i+sr->e1; j++,k++) sr->M[j] = out[k];
989
 
2214 jroman 990
  if (k != size_out) SETERRQ(PETSC_COMM_SELF,1, "Wrong size");
1753 eromero 991
 
992
  PetscFunctionReturn(0);
993
}
994
 
2223 jroman 995
#undef __FUNCT__  
996
#define __FUNCT__ "VecsOrthonormalize"
1675 slepc 997
/* Orthonormalize a chunk of parallel vector.
998
   NOTE: wS0 and wS1 must be of size n*n.
999
*/
1000
PetscErrorCode VecsOrthonormalize(Vec *V, PetscInt n, PetscScalar *wS0,
1001
                                  PetscScalar *wS1)
1002
{
1003
  PetscErrorCode  ierr;
1004
  PetscBLASInt    nn = n, info, ld;
1992 eromero 1005
  PetscInt        ldV, i;
1675 slepc 1006
  PetscScalar     *H, *T, one=1.0, *pv;
1007
 
1008
  PetscFunctionBegin;
1009
 
1010
  if (!wS0) {
1011
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &H); CHKERRQ(ierr);
1012
  } else
1013
    H = wS0;
1014
  if (!wS1) {
1015
    ierr = PetscMalloc(sizeof(PetscScalar)*n*n, &T); CHKERRQ(ierr);
1016
  } else
1017
    T = wS1;
1018
 
1019
  /* H <- V' * V */
1747 eromero 1020
  ierr = VecsMult(H, 0, n, V, 0, n, V, 0, n, T, PETSC_NULL); CHKERRQ(ierr);
1675 slepc 1021
 
1022
  /* H <- chol(H) */
1023
  LAPACKpbtrf_("U", &nn, &nn, H, &nn, &info);
2214 jroman 1024
  if (info) SETERRQ1(((PetscObject)*V)->comm,PETSC_ERR_LIB, "Error in Lapack PBTRF %d", info);
1675 slepc 1025
 
1026
  /* V <- V * inv(H) */
1027
  ierr = VecGetLocalSize(V[0], &ldV); CHKERRQ(ierr);
1028
  ierr = VecGetArray(V[0], &pv);CHKERRQ(ierr);
1029
  ld = ldV;
1030
  BLAStrsm_("R", "U", "N", "N", &ld, &nn, &one, H, &nn, pv, &ld);
1031
  ierr = VecRestoreArray(V[0], &pv);CHKERRQ(ierr);
1992 eromero 1032
  for(i=1; i<n; i++) {
1033
    ierr = PetscObjectStateIncrease((PetscObject)V[i]); CHKERRQ(ierr);
1034
  }
1675 slepc 1035
 
1036
  if (!wS0) {
1037
    ierr = PetscFree(H); CHKERRQ(ierr);
1038
  }
1039
  if (!wS1) {
1040
    ierr = PetscFree(T); CHKERRQ(ierr);
1041
  }
1042
 
1043
  PetscFunctionReturn(0);
1044
}
1045
 
2223 jroman 1046
#undef __FUNCT__  
1047
#define __FUNCT__ "SlepcAllReduceSumBegin"
1753 eromero 1048
/*
1049
  Sum up several arrays with only one call to MPIReduce.
1050
*/
1051
PetscErrorCode SlepcAllReduceSumBegin(DvdReductionChunk *ops,
1052
                                      PetscInt max_size_ops,
1053
                                      PetscScalar *in, PetscScalar *out,
1054
                                      PetscInt max_size_in, DvdReduction *r,
1055
                                      MPI_Comm comm)
1056
{
1057
  PetscFunctionBegin;
1058
 
1059
  r->in = in;
1060
  r->out = out;
1061
  r->size_in = 0;
1062
  r->max_size_in = max_size_in;
1063
  r->ops = ops;
1064
  r->size_ops = 0;
1065
  r->max_size_ops = max_size_ops;
1066
  r->comm = comm;
1067
 
1068
  PetscFunctionReturn(0);
1069
}
1070
 
1071
#undef __FUNCT__  
1072
#define __FUNCT__ "SlepcAllReduceSum"
1073
PetscErrorCode SlepcAllReduceSum(DvdReduction *r, PetscInt size_in,
1074
                                 DvdReductionPostF f, void *ptr,
1075
                                 PetscScalar **in)
1076
{
1077
  PetscFunctionBegin;
1078
 
1079
  *in = r->in + r->size_in;
1080
  r->ops[r->size_ops].out = r->out + r->size_in;
1081
  r->ops[r->size_ops].size_out = size_in;
1082
  r->ops[r->size_ops].f = f;
1083
  r->ops[r->size_ops].ptr = ptr;
1084
  if (++(r->size_ops) > r->max_size_ops) {
2214 jroman 1085
    SETERRQ(PETSC_COMM_SELF,1, "max_size_ops is not large enough");
1753 eromero 1086
  }
1087
  if ((r->size_in+= size_in) > r->max_size_in) {
2214 jroman 1088
    SETERRQ(PETSC_COMM_SELF,1, "max_size_in is not large enough");
1753 eromero 1089
  }
1090
 
1091
  PetscFunctionReturn(0);
1092
}
1093
 
1094
 
1095
#undef __FUNCT__  
1096
#define __FUNCT__ "SlepcAllReduceSumEnd"
1097
PetscErrorCode SlepcAllReduceSumEnd(DvdReduction *r)
1098
{
1099
  PetscErrorCode  ierr;
1100
  PetscInt        i;
1101
 
1102
  PetscFunctionBegin;
1103
 
1795 eromero 1104
  /* Check if quick exit */
1105
  if (r->size_ops == 0)
1106
    PetscFunctionReturn(0);
1107
 
1753 eromero 1108
  /* Call the MPIAllReduce routine */
1899 eromero 1109
  ierr = MPI_Allreduce(r->in, r->out, r->size_in, MPIU_SCALAR, MPIU_SUM,
1753 eromero 1110
                       r->comm); CHKERRQ(ierr);
1111
 
1112
  /* Call the postponed routines */
1113
  for(i=0; i<r->size_ops; i++) {
1114
    ierr = r->ops[i].f(r->ops[i].out, r->ops[i].size_out, r->ops[i].ptr);
1115
    CHKERRQ(ierr);
1116
  }
1117
 
1118
  /* Tag the operation as done */
1119
  r->size_ops = 0;
1120
 
1121
  PetscFunctionReturn(0);
1122
}
1795 eromero 1123
 
1831 eromero 1124
 
1795 eromero 1125
#undef __FUNCT__  
1126
#define __FUNCT__ "dvd_orthV"
1989 eromero 1127
PetscErrorCode dvd_orthV(IP ip, Vec *DS, PetscInt size_DS, Vec *cX,
1128
                         PetscInt size_cX, Vec *V, PetscInt V_new_s,
1129
                         PetscInt V_new_e, PetscScalar *auxS, Vec auxV,
1130
                         PetscRandom rand)
1795 eromero 1131
{
1132
  PetscErrorCode  ierr;
1133
  PetscInt        i, j;
2216 jroman 1134
  PetscBool       lindep;
1795 eromero 1135
  PetscReal       norm;
1869 eromero 1136
  PetscScalar     *auxS0 = auxS;
1831 eromero 1137
 
1795 eromero 1138
  PetscFunctionBegin;
1831 eromero 1139
 
1795 eromero 1140
  /* Orthonormalize V with IP */
1141
  for (i=V_new_s; i<V_new_e; i++) {
1142
    for(j=0; j<3; j++) {
1975 eromero 1143
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1831 eromero 1144
      if (cX + size_cX == V) {
1145
        /* If cX and V are contiguous, orthogonalize in one step */
1989 eromero 1146
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX+i, PETSC_NULL, cX,
1831 eromero 1147
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1989 eromero 1148
      } else if (DS) {
1149
        /* Else orthogonalize first against DS, and then against cX and V */
1150
        ierr = IPOrthogonalize(ip, size_DS, DS, size_cX, PETSC_NULL, cX,
1831 eromero 1151
                               V[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
2177 jroman 1152
        if(!lindep) {
1831 eromero 1153
          ierr = IPOrthogonalize(ip, 0, PETSC_NULL, i, PETSC_NULL, V,
1154
                                 V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1155
        }
1989 eromero 1156
      } else {
1157
        /* Else orthogonalize first against cX and then against V */
1158
        ierr = IPOrthogonalize(ip, size_cX, cX, i, PETSC_NULL, V,
1159
                               V[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1831 eromero 1160
      }
2177 jroman 1161
      if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
2499 jroman 1162
      ierr = PetscInfo1(ip,"Orthonormalization problems adding the vector %D to the searching subspace\n",i);CHKERRQ(ierr);
1795 eromero 1163
    }
2177 jroman 1164
    if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
2214 jroman 1165
        SETERRQ(((PetscObject)ip)->comm,1, "Error during orthonormalization of eigenvectors");
1795 eromero 1166
    }
1167
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1168
  }
1831 eromero 1169
 
1795 eromero 1170
  PetscFunctionReturn(0);
1171
}
2555 eromero 1172
 
1173
 
1174
#undef __FUNCT__  
1175
#define __FUNCT__ "dvd_BorthV"
1176
PetscErrorCode dvd_BorthV(IP ip, Vec *DS, Vec *BDS, PetscInt size_DS, Vec *cX,
1177
                         Vec *BcX, PetscInt size_cX, Vec *V, Vec *BV,
1178
                         PetscInt V_new_s, PetscInt V_new_e,
1179
                         PetscScalar *auxS, Vec auxV, PetscRandom rand)
1180
{
1181
  PetscErrorCode  ierr;
1182
  PetscInt        i, j;
1183
  PetscBool       lindep;
1184
  PetscReal       norm;
1185
  PetscScalar     *auxS0 = auxS;
1831 eromero 1186
 
2555 eromero 1187
  PetscFunctionBegin;
1188
 
1189
  /* Orthonormalize V with IP */
1190
  for (i=V_new_s; i<V_new_e; i++) {
1191
    for(j=0; j<3; j++) {
1192
      if (j>0) { ierr = SlepcVecSetRandom(V[i], rand); CHKERRQ(ierr); }
1193
      if (cX + size_cX == V) {
1194
        /* If cX and V are contiguous, orthogonalize in one step */
1195
        ierr = IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX+i, PETSC_NULL, cX, BcX,
1196
                               V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1197
      } else if (DS) {
1198
        /* Else orthogonalize first against DS, and then against cX and V */
1199
        ierr = IPBOrthogonalize(ip, size_DS, DS, BDS, size_cX, PETSC_NULL, cX, BcX,
1200
                               V[i], BV[i], auxS0, PETSC_NULL, &lindep); CHKERRQ(ierr);
1201
        if(!lindep) {
1202
          ierr = IPBOrthogonalize(ip, 0, PETSC_NULL, PETSC_NULL, i, PETSC_NULL, V, BV,
1203
                                  V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1204
        }
1205
      } else {
1206
        /* Else orthogonalize first against cX and then against V */
1207
        ierr = IPBOrthogonalize(ip, size_cX, cX, BcX, i, PETSC_NULL, V, BV,
1208
                                V[i], BV[i], auxS0, &norm, &lindep); CHKERRQ(ierr);
1209
      }
1210
      if(!lindep && (norm > PETSC_MACHINE_EPSILON)) break;
1211
      ierr = PetscInfo1(ip, "Orthonormalization problems adding the vector %d to the searching subspace\n", i);
1212
      CHKERRQ(ierr);
1213
    }
1214
    if(lindep || (norm < PETSC_MACHINE_EPSILON)) {
1215
        SETERRQ(((PetscObject)ip)->comm,1, "Error during the orthonormalization of the eigenvectors!");
1216
    }
1217
    ierr = VecScale(V[i], 1.0/norm); CHKERRQ(ierr);
1218
    ierr = VecScale(BV[i], 1.0/norm); CHKERRQ(ierr);
1219
  }
1220
 
1221
  PetscFunctionReturn(0);
1222
}
1223
 
1968 eromero 1224
#undef __FUNCT__  
1225
#define __FUNCT__ "dvd_compute_eigenvectors"
1226
/*
1227
  Compute eigenvectors associated to the Schur decomposition (S, T) and
1228
  save the left vectors in pY and the right vectors in pX, where
1229
  n, size of the eigenproblem
1230
  ldS, ldT, leading dimension of S and T
1231
  ldpX, ldpY, leading dimension of pX and pY
1232
  auxS, auxiliar scalar of length:
1233
    double standard 3n+n*n, double generalized 11n+4n*n,
2022 eromero 1234
    complex standard 3n+n*n, complex generalized 3n+2n*n
1968 eromero 1235
  size_auxS, the length of auxS
1236
  doProd, if true pX and pY return the eigenvectors premultiplied by the input vectors stored in pX and pY respectively
1237
*/
1238
PetscErrorCode dvd_compute_eigenvectors(PetscInt n_, PetscScalar *S,
1239
  PetscInt ldS, PetscScalar *T, PetscInt ldT, PetscScalar *pX,
1240
  PetscInt ldpX_, PetscScalar *pY, PetscInt ldpY_, PetscScalar *auxS,
2216 jroman 1241
  PetscInt size_auxS, PetscBool doProd)
1968 eromero 1242
{
2392 jroman 1243
#if defined(SLEPC_MISSING_LAPACK_GGEV)
1244
  PetscFunctionBegin;
1245
  SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GGEV - Lapack routine is unavailable.");
1246
#else
1968 eromero 1247
  PetscErrorCode  ierr;
2152 eromero 1248
  PetscBLASInt    n, ldpX, ldpY, nout, info;
1968 eromero 1249
  PetscScalar     *Sc, *Tc;
1250
  const char      *side, *howmny;
1251
#if defined(PETSC_USE_COMPLEX)
1252
  PetscReal       *auxR;
1253
#else
1254
  PetscScalar     *pA,*pB;
1255
  PetscBLASInt    n1, ldpA,ldpB;
1256
  PetscScalar     *alphar, *alphai, *beta;
1257
#endif
1258
 
1259
  PetscFunctionBegin;
2117 eromero 1260
  n = PetscBLASIntCast(n_);
1261
  ldpX = PetscBLASIntCast(ldpX_);
1262
  ldpY = PetscBLASIntCast(ldpY_);
1263
 
1968 eromero 1264
  if (pX && pY) side = "B";
1265
  else if (pX)  side = "R";
1266
  else if (pY)  side = "L";
1267
  else { PetscFunctionReturn(0); }
1268
 
1269
  if (!pX) ldpX = 1;
1270
  if (!pY) ldpY = 1;
1271
 
2177 jroman 1272
  howmny = doProd?"B":"A";
1968 eromero 1273
 
1274
  Sc = auxS; auxS+= n*n; size_auxS-= n*n;
1275
  if (T) Tc = auxS, auxS+= n*n, size_auxS-= n*n;
1276
  else   Tc = PETSC_NULL;
1277
 
1278
  /* Sc <- S, Tc <- T */
1279
  ierr = SlepcDenseCopy(Sc, n, S, ldS, n, n); CHKERRQ(ierr);
1280
  if (T) {
1281
    ierr = SlepcDenseCopy(Tc, n, T, ldT, n, n); CHKERRQ(ierr);
1282
  }
1283
 
1284
  if (T) {
1285
    /* [eigr, pX] = eig(S, T) */
1286
#if defined(PETSC_USE_COMPLEX)
1287
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+2*n); size_auxS-= 2*n;
1288
    if (size_auxS < 2*n)
2214 jroman 1289
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTGEVC");
1968 eromero 1290
    LAPACKtgevc_(side,howmny,PETSC_NULL,&n,Sc,&n,Tc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
2394 jroman 1291
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTGEVC %d",info);
1968 eromero 1292
#else
1293
    alphar = auxS; auxS+= n; size_auxS-= n;
1294
    alphai = auxS; auxS+= n; size_auxS-= n;
1295
    beta = auxS; auxS+= n; size_auxS-= n;
2177 jroman 1296
    if (doProd) {
1968 eromero 1297
      if (pX) pA = auxS, auxS+= n*n, size_auxS-= n*n, ldpA = n;
1298
      else    pA = PETSC_NULL, ldpA = 0;
1299
      if (pY) pB = auxS, auxS+= n*n, size_auxS-= n*n, ldpB = n;
1300
      else    pB = PETSC_NULL, ldpB = 0;
1301
    } else {
1302
      pA = pX; pB = pY; ldpA = ldpX; ldpB = ldpY;
1303
    }
1304
    /* LAPACKtgevc_ needs the element i,i+1 in the 2-by-2 digonal blocs
1305
       of T that represent complex conjugate eigenpairs to be zero */
1306
    n1 = size_auxS;
1307
    if (size_auxS < 8*n)
2214 jroman 1308
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xGGEV");
1968 eromero 1309
    LAPACKggev_(pY?"V":"N",pX?"V":"N",&n,Sc,&n,Tc,&n,alphar,alphai,beta,pB,&ldpB,pA,&ldpA,auxS,&n1,&info);
2394 jroman 1310
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xGGEV %d",info);
2177 jroman 1311
    if (doProd) {
1968 eromero 1312
      if (pX) {
1313
        /* pX <- pX * pA */
1314
        ierr = SlepcDenseCopy(Sc, n, pX, ldpX, n, n); CHKERRQ(ierr);
2022 eromero 1315
        ierr = SlepcDenseMatProd(pX, ldpX, 0.0, 1.0,
1968 eromero 1316
                                 Sc, n, n, n, PETSC_FALSE,
1317
                                 pA, n, n, n, PETSC_FALSE); CHKERRQ(ierr);
1318
      }
1319
      if (pY) {
1320
        /* pY <- pY * pB */
1321
        ierr = SlepcDenseCopy(Sc, n, pY, ldpY, n, n); CHKERRQ(ierr);
2022 eromero 1322
        ierr = SlepcDenseMatProd(pY, ldpY, 0.0, 1.0,
1968 eromero 1323
                                 Sc, n, n, n, PETSC_FALSE,
1324
                                 pB, n, n, n, PETSC_FALSE); CHKERRQ(ierr);
1325
      }
1326
    }
1327
#endif
1328
  } else {
1329
    /* [eigr, pX] = eig(S) */
1330
#if defined(PETSC_USE_COMPLEX)
1331
    auxR = (PetscReal*)auxS; auxS = (PetscScalar*)(auxR+n); size_auxS-= n;
1332
    if (size_auxS < 2*n)
2214 jroman 1333
      SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Insufficient work space for xTREVC");
1968 eromero 1334
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,auxR,&info);
1335
#else
1336
    LAPACKtrevc_(side,howmny,PETSC_NULL,&n,Sc,&n,pY,&ldpY,pX,&ldpX,&n,&nout,auxS,&info);
1337
#endif
2394 jroman 1338
    if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in Lapack xTREVC %d",info);
1968 eromero 1339
  }
1340
  PetscFunctionReturn(0);
2392 jroman 1341
#endif
1968 eromero 1342
}
2034 eromero 1343
 
2555 eromero 1344
 
1345
EXTERN_C_BEGIN
1346
#undef __FUNCT__
1347
#define __FUNCT__ "EPSSortDenseHEP"
1348
PetscErrorCode EPSSortDenseHEP(EPS eps, PetscInt n, PetscInt k, PetscReal *w, PetscScalar *V, PetscInt ldV)
2034 eromero 1349
{
2555 eromero 1350
  PetscInt        i, j, result, pos;
1351
  PetscReal       t, re;
1352
  PetscBLASInt    n_ = PetscBLASIntCast(n), one=1;
2034 eromero 1353
  PetscErrorCode  ierr;
2555 eromero 1354
 
2034 eromero 1355
  PetscFunctionBegin;
2555 eromero 1356
 
1357
  /* selection sort */
1358
  for (i=k;i<n-1;i++) {
1359
    re = w[i];
1360
    pos = 0;
1361
    /* find minimum eigenvalue */
1362
    for (j=i+1;j<n;j++) {
1363
      ierr = EPSCompareEigenvalues(eps,re,0,w[j],0,&result);CHKERRQ(ierr);
1364
      if (result > 0) {
1365
        re = w[j];
1366
        pos = j;
1367
      }
2034 eromero 1368
    }
2555 eromero 1369
    /* interchange the pairs i and pos */
1370
    if (pos) {
1371
      BLASswap_(&n_, &V[ldV*pos], &one, &V[ldV*i], &one);
1372
      t = w[i]; w[i] = w[pos]; w[pos] = t;
1373
    }
2034 eromero 1374
  }
2555 eromero 1375
 
2034 eromero 1376
  PetscFunctionReturn(0);
1377
}
2555 eromero 1378
EXTERN_C_END