Subversion Repositories slepc-dev

Rev

Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2339 jroman 1
/*
2
   Subroutines related to special Vecs that share a common contiguous storage.
3
 
4
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
5
   SLEPc - Scalable Library for Eigenvalue Problem Computations
6
   Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain
7
 
8
   This file is part of SLEPc.
9
 
10
   SLEPc is free software: you can redistribute it and/or modify it under  the
11
   terms of version 3 of the GNU Lesser General Public License as published by
12
   the Free Software Foundation.
13
 
14
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
15
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
16
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
17
   more details.
18
 
19
   You  should have received a copy of the GNU Lesser General  Public  License
20
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
21
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
22
*/
23
 
2341 jroman 24
#include <private/vecimplslepc.h>            /*I "slepcvec.h" I*/
2340 jroman 25
#include <petscblaslapack.h>
2339 jroman 26
 
2340 jroman 27
PetscLogEvent SLEPC_UpdateVectors = 0,SLEPC_VecMAXPBY = 0;
28
 
2339 jroman 29
#undef __FUNCT__
30
#define __FUNCT__ "Vecs_ContiguousDestroy"
31
/*
32
  Frees the array of the contiguous vectors when all vectors have been destroyed.
33
*/
34
static PetscErrorCode Vecs_ContiguousDestroy(void *ctx)
35
{
36
  PetscErrorCode  ierr;
37
  Vecs_Contiguous *vc = (Vecs_Contiguous*)ctx;
38
 
39
  PetscFunctionBegin;
40
  ierr = PetscFree(vc->array);CHKERRQ(ierr);
41
  ierr = PetscFree(vc);CHKERRQ(ierr);
42
  PetscFunctionReturn(0);
43
}
44
 
2410 jroman 45
#undef __FUNCT__
46
#define __FUNCT__ "VecDuplicateVecs_Contiguous"
47
/*
48
  Version of VecDuplicateVecs that sets contiguous storage.
49
*/
50
static PetscErrorCode VecDuplicateVecs_Contiguous(Vec v,PetscInt m,Vec *V[])
2339 jroman 51
{
52
  PetscErrorCode  ierr;
53
  PetscInt        i,nloc;
54
  PetscScalar     *pV;
55
  PetscContainer  container;
56
  Vecs_Contiguous *vc;
57
 
58
  PetscFunctionBegin;
59
  /* Allocate array */
60
  ierr = VecGetLocalSize(v,&nloc);CHKERRQ(ierr);
61
  ierr = PetscMalloc(m*nloc*sizeof(PetscScalar),&pV);CHKERRQ(ierr);
62
  /* Create container */
63
  ierr = PetscNew(Vecs_Contiguous,&vc);CHKERRQ(ierr);
64
  vc->nvecs = m;
65
  vc->array = pV;
66
  ierr = PetscContainerCreate(((PetscObject)v)->comm,&container);CHKERRQ(ierr);
67
  ierr = PetscContainerSetPointer(container,vc);CHKERRQ(ierr);
68
  ierr = PetscContainerSetUserDestroy(container,Vecs_ContiguousDestroy);CHKERRQ(ierr);
69
  /* Create vectors */
70
  ierr = PetscMalloc(m*sizeof(Vec),V);CHKERRQ(ierr);
71
  for (i=0;i<m;i++) {
72
    ierr = VecCreateMPIWithArray(((PetscObject)v)->comm,nloc,PETSC_DECIDE,pV+i*nloc,*V+i);CHKERRQ(ierr);
73
    ierr = PetscObjectCompose((PetscObject)*(*V+i),"contiguous",(PetscObject)container);CHKERRQ(ierr);
74
  }
75
  ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
76
  PetscFunctionReturn(0);
77
}
78
 
79
#undef __FUNCT__  
2410 jroman 80
#define __FUNCT__ "SlepcVecSetTemplate"
2339 jroman 81
/*@
2410 jroman 82
   SlepcVecSetTemplate - Sets a vector as a template for contiguous storage.
2339 jroman 83
 
84
   Collective on Vec
85
 
86
   Input Parameters:
2410 jroman 87
.  v - the vector
2339 jroman 88
 
2410 jroman 89
   Note:
90
   Once this function is called, subsequent calls to VecDuplicateVecs()
91
   with this vector will use a special version that generates vectors with
92
   contiguous storage, that is, the array of values of V[1] immediately
93
   follows the array of V[0], and so on.
94
 
2339 jroman 95
   Level: developer
2410 jroman 96
@*/
97
PetscErrorCode SlepcVecSetTemplate(Vec v)
98
{
99
  PetscErrorCode ierr;
100
  PetscBool      flg;
2339 jroman 101
 
2410 jroman 102
  PetscFunctionBegin;
103
  PetscValidHeaderSpecific(v,VEC_CLASSID,1);
104
  ierr = PetscTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");CHKERRQ(ierr);
105
  if (!flg) SETERRQ(((PetscObject)v)->comm,PETSC_ERR_SUP,"Only available for standard vectors (VECSEQ or VECMPI)");
106
  v->ops->duplicatevecs = VecDuplicateVecs_Contiguous;
107
  PetscFunctionReturn(0);
108
}
109
 
110
#undef __FUNCT__  
111
#define __FUNCT__ "SlepcMatGetVecsTemplate"
112
/*@
113
   SlepcMatGetVecsTemplate - Get vectors compatible with a matrix,
114
   i.e. with the same parallel layout, and mark them as templates
115
   for contiguous storage.
116
 
117
   Collective on Mat
118
 
119
   Input Parameter:
120
.  mat - the matrix
121
 
122
   Output Parameters:
123
+  right - (optional) vector that the matrix can be multiplied against
124
-  left  - (optional) vector that the matrix vector product can be stored in
125
 
126
   Options Database Keys:
127
.  -slepc_non_contiguous - Disable contiguous vector storage
128
 
129
   Notes:
130
   Use -slepc_non_contiguous to disable contiguous storage throughout SLEPc.
131
   Contiguous storage is currently also disabled in AIJCUSP matrices.
132
 
133
   Level: developer
134
 
135
.seealso: SlepcVecSetTemplate()
2339 jroman 136
@*/
2410 jroman 137
PetscErrorCode SlepcMatGetVecsTemplate(Mat mat,Vec *right,Vec *left)
2339 jroman 138
{
139
  PetscErrorCode ierr;
2410 jroman 140
  PetscBool      flg;
141
  Vec            v;
2339 jroman 142
 
143
  PetscFunctionBegin;
2410 jroman 144
  PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
145
  PetscValidType(mat,1);
146
  ierr = MatGetVecs(mat,right,left);CHKERRQ(ierr);
147
  v = right? *right: *left;
148
  ierr = PetscTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");CHKERRQ(ierr);
149
  if (!flg) PetscFunctionReturn(0);
150
  ierr = PetscOptionsHasName(PETSC_NULL,"-slepc_non_contiguous",&flg);CHKERRQ(ierr);
151
  if (!flg) {
152
    if (right) { ierr = SlepcVecSetTemplate(*right);CHKERRQ(ierr); }
153
    if (left) { ierr = SlepcVecSetTemplate(*left);CHKERRQ(ierr); }
2339 jroman 154
  }
155
  PetscFunctionReturn(0);
156
}
157
 
2340 jroman 158
#undef __FUNCT__  
159
#define __FUNCT__ "SlepcUpdateVectors"
160
/*@
161
   SlepcUpdateVectors - Update a set of vectors V as V(:,s:e-1) = V*Q(:,s:e-1).
162
 
163
   Not Collective
164
 
165
   Input parameters:
166
+  n      - number of vectors in V
167
.  s      - first column of V to be overwritten
168
.  e      - first column of V not to be overwritten
169
.  Q      - matrix containing the coefficients of the update
170
.  ldq    - leading dimension of Q
171
-  qtrans - flag indicating if Q is to be transposed
172
 
173
   Input/Output parameter:
174
.  V      - set of vectors
175
 
176
   Notes:
177
   This function computes V(:,s:e-1) = V*Q(:,s:e-1), that is, given a set of
178
   vectors V, columns from s to e-1 are overwritten with columns from s to
179
   e-1 of the matrix-matrix product V*Q.
180
 
181
   Matrix V is represented as an array of Vec, whereas Q is represented as
182
   a column-major dense array of leading dimension ldq. Only columns s to e-1
183
   of Q are referenced.
184
 
185
   If qtrans=PETSC_TRUE, the operation is V*Q'.
186
 
187
   This routine is implemented with a call to BLAS, therefore V is an array
188
   of Vec which have the data stored contiguously in memory as a Fortran matrix.
189
   PETSc does not create such arrays by default.
190
 
191
   Level: developer
192
 
193
@*/
194
PetscErrorCode SlepcUpdateVectors(PetscInt n_,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
195
{
196
  PetscErrorCode ierr;
197
 
198
  PetscFunctionBegin;
199
  ierr = SlepcUpdateStrideVectors(n_,V,s,1,e,Q,ldq_,qtrans);CHKERRQ(ierr);
200
  PetscFunctionReturn(0);
201
}
202
 
203
#undef __FUNCT__  
204
#define __FUNCT__ "SlepcUpdateStrideVectors"
205
/*@
206
   SlepcUpdateStrideVectors - Update a set of vectors V as
207
   V(:,s:d:e-1) = V*Q(:,s:e-1).
208
 
209
   Not Collective
210
 
211
   Input parameters:
212
+  n      - number of vectors in V
213
.  s      - first column of V to be overwritten
214
.  d      - stride
215
.  e      - first column of V not to be overwritten
216
.  Q      - matrix containing the coefficients of the update
217
.  ldq    - leading dimension of Q
218
-  qtrans - flag indicating if Q is to be transposed
219
 
220
   Input/Output parameter:
221
.  V      - set of vectors
222
 
223
   Notes:
224
   This function computes V(:,s:d:e-1) = V*Q(:,s:e-1), that is, given a set
225
   of vectors V, columns from s to e-1 are overwritten with columns from s to
226
   e-1 of the matrix-matrix product V*Q.
227
 
228
   Matrix V is represented as an array of Vec, whereas Q is represented as
229
   a column-major dense array of leading dimension ldq. Only columns s to e-1
230
   of Q are referenced.
231
 
232
   If qtrans=PETSC_TRUE, the operation is V*Q'.
233
 
234
   This routine is implemented with a call to BLAS, therefore V is an array
235
   of Vec which have the data stored contiguously in memory as a Fortran matrix.
236
   PETSc does not create such arrays by default.
237
 
238
   Level: developer
239
 
240
@*/
241
PetscErrorCode SlepcUpdateStrideVectors(PetscInt n_,Vec *V,PetscInt s,PetscInt d,PetscInt e,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
242
{
243
  PetscErrorCode ierr;
244
  PetscInt       l;
245
  PetscBLASInt   i,j,k,bs=64,m,n,ldq,ls,ld;
246
  PetscScalar    *pv,*pw,*pq,*work,*pwork,one=1.0,zero=0.0;
247
  const char     *qt;
248
 
249
  PetscFunctionBegin;
250
  n = PetscBLASIntCast(n_/d);
251
  ldq = PetscBLASIntCast(ldq_);
252
  m = (e-s)/d;
253
  if (m==0) PetscFunctionReturn(0);
254
  PetscValidIntPointer(Q,5);
255
  if (m<0 || n<0 || s<0 || m>n) SETERRQ(((PetscObject)*V)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Index argument out of range");
256
  ierr = PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
257
  ierr = VecGetLocalSize(V[0],&l);CHKERRQ(ierr);
258
  ls = PetscBLASIntCast(l);
259
  ld = ls*PetscBLASIntCast(d);
260
  ierr = VecGetArray(V[0],&pv);CHKERRQ(ierr);
261
  if (qtrans) {
262
    pq = (PetscScalar*)Q+s;
263
    qt = "T";
264
  } else {
265
    pq = (PetscScalar*)Q+s*ldq;
266
    qt = "N";
267
  }
268
  ierr = PetscMalloc(sizeof(PetscScalar)*bs*m,&work);CHKERRQ(ierr);
269
  k = ls % bs;
270
  if (k) {
271
    BLASgemm_("N",qt,&k,&m,&n,&one,pv,&ld,pq,&ldq,&zero,work,&k);
272
    for (j=0;j<m;j++) {
273
      pw = pv+(s+j)*ld;
274
      pwork = work+j*k;
275
      for (i=0;i<k;i++) {
276
        *pw++ = *pwork++;
277
      }
278
    }        
279
  }
280
  for (;k<ls;k+=bs) {
281
    BLASgemm_("N",qt,&bs,&m,&n,&one,pv+k,&ld,pq,&ldq,&zero,work,&bs);
282
    for (j=0;j<m;j++) {
283
      pw = pv+(s+j)*ld+k;
284
      pwork = work+j*bs;
285
      for (i=0;i<bs;i++) {
286
        *pw++ = *pwork++;
287
      }
288
    }
289
  }
290
  ierr = VecRestoreArray(V[0],&pv);CHKERRQ(ierr);
291
  ierr = PetscFree(work);CHKERRQ(ierr);
292
  ierr = PetscLogFlops(m*n*2.0*ls);CHKERRQ(ierr);
293
  ierr = PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
294
  PetscFunctionReturn(0);
295
}
296
 
297
#undef __FUNCT__  
298
#define __FUNCT__ "SlepcVecMAXPBY"
299
/*@
300
   SlepcVecMAXPBY - Computes y = beta*y + sum alpha*a[j]*x[j]
301
 
302
   Logically Collective on Vec
303
 
304
   Input parameters:
305
+  beta   - scalar beta
306
.  alpha  - scalar alpha
307
.  nv     - number of vectors in x and scalars in a
308
.  a      - array of scalars
309
-  x      - set of vectors
310
 
311
   Input/Output parameter:
312
.  y      - the vector to update
313
 
314
   Notes:
2410 jroman 315
   If x are Vec's with contiguous storage, then the operation is done
316
   through a call to BLAS. Otherwise, VecMAXPY() is called.
2340 jroman 317
 
318
   Level: developer
319
 
2410 jroman 320
.seealso: SlepcVecSetTemplate()
2340 jroman 321
@*/
2386 jroman 322
PetscErrorCode SlepcVecMAXPBY(Vec y,PetscScalar beta,PetscScalar alpha,PetscInt nv,const PetscScalar a[],Vec x[])
2340 jroman 323
{
324
  PetscErrorCode    ierr;
325
  PetscBLASInt      n,m,one=1;
326
  PetscScalar       *py;
327
  const PetscScalar *px;
2386 jroman 328
  PetscContainer    container;
329
  Vec               z;
2340 jroman 330
 
331
  PetscFunctionBegin;
332
  PetscValidHeaderSpecific(y,VEC_CLASSID,1);
333
  if (!nv || !(y)->map->n) PetscFunctionReturn(0);
334
  if (nv < 0) SETERRQ1(((PetscObject)y)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",nv);
335
  PetscValidLogicalCollectiveScalar(y,alpha,2);
336
  PetscValidLogicalCollectiveScalar(y,beta,3);
337
  PetscValidLogicalCollectiveInt(y,nv,4);
338
  PetscValidScalarPointer(a,5);
339
  PetscValidPointer(x,6);
340
  PetscValidHeaderSpecific(*x,VEC_CLASSID,6);
341
  PetscValidType(y,1);
342
  PetscValidType(*x,6);
343
  PetscCheckSameTypeAndComm(y,1,*x,6);
344
  if ((*x)->map->N != (y)->map->N) SETERRQ(((PetscObject)y)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector global lengths");
345
  if ((*x)->map->n != (y)->map->n) SETERRQ(((PetscObject)y)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector local lengths");
346
 
2386 jroman 347
  ierr = PetscObjectQuery((PetscObject)(x[0]),"contiguous",(PetscObject*)&container);CHKERRQ(ierr);
348
  if (container) {
349
    /* assume x Vecs are contiguous, use BLAS calls */
350
    ierr = PetscLogEventBegin(SLEPC_VecMAXPBY,*x,y,0,0);CHKERRQ(ierr);
351
    ierr = VecGetArray(y,&py);CHKERRQ(ierr);
352
    ierr = VecGetArrayRead(*x,&px);CHKERRQ(ierr);
353
    n = PetscBLASIntCast(nv);
354
    m = PetscBLASIntCast((y)->map->n);
355
    BLASgemv_("N",&m,&n,&alpha,px,&m,a,&one,&beta,py,&one);
356
    ierr = VecRestoreArray(y,&py);CHKERRQ(ierr);
357
    ierr = VecRestoreArrayRead(*x,&px);CHKERRQ(ierr);
358
    ierr = PetscLogFlops(nv*2*(y)->map->n);CHKERRQ(ierr);
359
    ierr = PetscLogEventEnd(SLEPC_VecMAXPBY,*x,y,0,0);CHKERRQ(ierr);
360
  } else {
361
    /* use regular Vec operations */
362
    ierr = VecDuplicate(y,&z);CHKERRQ(ierr);
363
    ierr = VecCopy(y,z);CHKERRQ(ierr);
364
    ierr = VecMAXPY(y,nv,a,x);CHKERRQ(ierr);
365
    ierr = VecAXPBY(y,beta-alpha,alpha,z);CHKERRQ(ierr);
366
    ierr = VecDestroy(&z);CHKERRQ(ierr);
367
  }
2340 jroman 368
  PetscFunctionReturn(0);
369
}
370