| 2339 |
jroman |
1 |
/*
|
|
|
2 |
Subroutines related to special Vecs that share a common contiguous storage.
|
|
|
3 |
|
|
|
4 |
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
5 |
SLEPc - Scalable Library for Eigenvalue Problem Computations
|
| 2575 |
eromero |
6 |
Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
|
| 2339 |
jroman |
7 |
|
|
|
8 |
This file is part of SLEPc.
|
|
|
9 |
|
|
|
10 |
SLEPc is free software: you can redistribute it and/or modify it under the
|
|
|
11 |
terms of version 3 of the GNU Lesser General Public License as published by
|
|
|
12 |
the Free Software Foundation.
|
|
|
13 |
|
|
|
14 |
SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
|
15 |
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
|
16 |
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
|
|
|
17 |
more details.
|
|
|
18 |
|
|
|
19 |
You should have received a copy of the GNU Lesser General Public License
|
|
|
20 |
along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
|
|
|
21 |
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
|
22 |
*/
|
|
|
23 |
|
| 2341 |
jroman |
24 |
#include <private/vecimplslepc.h> /*I "slepcvec.h" I*/
|
| 2340 |
jroman |
25 |
#include <petscblaslapack.h>
|
| 2339 |
jroman |
26 |
|
| 2340 |
jroman |
27 |
PetscLogEvent SLEPC_UpdateVectors = 0,SLEPC_VecMAXPBY = 0;
|
|
|
28 |
|
| 2339 |
jroman |
29 |
#undef __FUNCT__
|
|
|
30 |
#define __FUNCT__ "Vecs_ContiguousDestroy"
|
|
|
31 |
/*
|
|
|
32 |
Frees the array of the contiguous vectors when all vectors have been destroyed.
|
|
|
33 |
*/
|
|
|
34 |
static PetscErrorCode Vecs_ContiguousDestroy(void *ctx)
|
|
|
35 |
{
|
|
|
36 |
PetscErrorCode ierr;
|
|
|
37 |
Vecs_Contiguous *vc = (Vecs_Contiguous*)ctx;
|
|
|
38 |
|
|
|
39 |
PetscFunctionBegin;
|
|
|
40 |
ierr = PetscFree(vc->array);CHKERRQ(ierr);
|
|
|
41 |
ierr = PetscFree(vc);CHKERRQ(ierr);
|
|
|
42 |
PetscFunctionReturn(0);
|
|
|
43 |
}
|
|
|
44 |
|
| 2410 |
jroman |
45 |
#undef __FUNCT__
|
|
|
46 |
#define __FUNCT__ "VecDuplicateVecs_Contiguous"
|
|
|
47 |
/*
|
|
|
48 |
Version of VecDuplicateVecs that sets contiguous storage.
|
|
|
49 |
*/
|
|
|
50 |
static PetscErrorCode VecDuplicateVecs_Contiguous(Vec v,PetscInt m,Vec *V[])
|
| 2339 |
jroman |
51 |
{
|
|
|
52 |
PetscErrorCode ierr;
|
|
|
53 |
PetscInt i,nloc;
|
|
|
54 |
PetscScalar *pV;
|
|
|
55 |
PetscContainer container;
|
|
|
56 |
Vecs_Contiguous *vc;
|
|
|
57 |
|
|
|
58 |
PetscFunctionBegin;
|
|
|
59 |
/* Allocate array */
|
|
|
60 |
ierr = VecGetLocalSize(v,&nloc);CHKERRQ(ierr);
|
|
|
61 |
ierr = PetscMalloc(m*nloc*sizeof(PetscScalar),&pV);CHKERRQ(ierr);
|
|
|
62 |
/* Create container */
|
|
|
63 |
ierr = PetscNew(Vecs_Contiguous,&vc);CHKERRQ(ierr);
|
|
|
64 |
vc->nvecs = m;
|
|
|
65 |
vc->array = pV;
|
|
|
66 |
ierr = PetscContainerCreate(((PetscObject)v)->comm,&container);CHKERRQ(ierr);
|
|
|
67 |
ierr = PetscContainerSetPointer(container,vc);CHKERRQ(ierr);
|
|
|
68 |
ierr = PetscContainerSetUserDestroy(container,Vecs_ContiguousDestroy);CHKERRQ(ierr);
|
|
|
69 |
/* Create vectors */
|
|
|
70 |
ierr = PetscMalloc(m*sizeof(Vec),V);CHKERRQ(ierr);
|
|
|
71 |
for (i=0;i<m;i++) {
|
|
|
72 |
ierr = VecCreateMPIWithArray(((PetscObject)v)->comm,nloc,PETSC_DECIDE,pV+i*nloc,*V+i);CHKERRQ(ierr);
|
|
|
73 |
ierr = PetscObjectCompose((PetscObject)*(*V+i),"contiguous",(PetscObject)container);CHKERRQ(ierr);
|
|
|
74 |
}
|
|
|
75 |
ierr = PetscContainerDestroy(&container);CHKERRQ(ierr);
|
|
|
76 |
PetscFunctionReturn(0);
|
|
|
77 |
}
|
|
|
78 |
|
|
|
79 |
#undef __FUNCT__
|
| 2410 |
jroman |
80 |
#define __FUNCT__ "SlepcVecSetTemplate"
|
| 2339 |
jroman |
81 |
/*@
|
| 2410 |
jroman |
82 |
SlepcVecSetTemplate - Sets a vector as a template for contiguous storage.
|
| 2339 |
jroman |
83 |
|
|
|
84 |
Collective on Vec
|
|
|
85 |
|
|
|
86 |
Input Parameters:
|
| 2410 |
jroman |
87 |
. v - the vector
|
| 2339 |
jroman |
88 |
|
| 2410 |
jroman |
89 |
Note:
|
|
|
90 |
Once this function is called, subsequent calls to VecDuplicateVecs()
|
|
|
91 |
with this vector will use a special version that generates vectors with
|
|
|
92 |
contiguous storage, that is, the array of values of V[1] immediately
|
|
|
93 |
follows the array of V[0], and so on.
|
|
|
94 |
|
| 2339 |
jroman |
95 |
Level: developer
|
| 2410 |
jroman |
96 |
@*/
|
|
|
97 |
PetscErrorCode SlepcVecSetTemplate(Vec v)
|
|
|
98 |
{
|
|
|
99 |
PetscErrorCode ierr;
|
|
|
100 |
PetscBool flg;
|
| 2339 |
jroman |
101 |
|
| 2410 |
jroman |
102 |
PetscFunctionBegin;
|
|
|
103 |
PetscValidHeaderSpecific(v,VEC_CLASSID,1);
|
|
|
104 |
ierr = PetscTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");CHKERRQ(ierr);
|
|
|
105 |
if (!flg) SETERRQ(((PetscObject)v)->comm,PETSC_ERR_SUP,"Only available for standard vectors (VECSEQ or VECMPI)");
|
|
|
106 |
v->ops->duplicatevecs = VecDuplicateVecs_Contiguous;
|
|
|
107 |
PetscFunctionReturn(0);
|
|
|
108 |
}
|
|
|
109 |
|
|
|
110 |
#undef __FUNCT__
|
|
|
111 |
#define __FUNCT__ "SlepcMatGetVecsTemplate"
|
|
|
112 |
/*@
|
|
|
113 |
SlepcMatGetVecsTemplate - Get vectors compatible with a matrix,
|
|
|
114 |
i.e. with the same parallel layout, and mark them as templates
|
|
|
115 |
for contiguous storage.
|
|
|
116 |
|
|
|
117 |
Collective on Mat
|
|
|
118 |
|
|
|
119 |
Input Parameter:
|
|
|
120 |
. mat - the matrix
|
|
|
121 |
|
|
|
122 |
Output Parameters:
|
|
|
123 |
+ right - (optional) vector that the matrix can be multiplied against
|
|
|
124 |
- left - (optional) vector that the matrix vector product can be stored in
|
|
|
125 |
|
|
|
126 |
Options Database Keys:
|
|
|
127 |
. -slepc_non_contiguous - Disable contiguous vector storage
|
|
|
128 |
|
|
|
129 |
Notes:
|
|
|
130 |
Use -slepc_non_contiguous to disable contiguous storage throughout SLEPc.
|
|
|
131 |
Contiguous storage is currently also disabled in AIJCUSP matrices.
|
|
|
132 |
|
|
|
133 |
Level: developer
|
|
|
134 |
|
|
|
135 |
.seealso: SlepcVecSetTemplate()
|
| 2339 |
jroman |
136 |
@*/
|
| 2410 |
jroman |
137 |
PetscErrorCode SlepcMatGetVecsTemplate(Mat mat,Vec *right,Vec *left)
|
| 2339 |
jroman |
138 |
{
|
|
|
139 |
PetscErrorCode ierr;
|
| 2410 |
jroman |
140 |
PetscBool flg;
|
|
|
141 |
Vec v;
|
| 2339 |
jroman |
142 |
|
|
|
143 |
PetscFunctionBegin;
|
| 2410 |
jroman |
144 |
PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
|
|
|
145 |
PetscValidType(mat,1);
|
|
|
146 |
ierr = MatGetVecs(mat,right,left);CHKERRQ(ierr);
|
|
|
147 |
v = right? *right: *left;
|
|
|
148 |
ierr = PetscTypeCompareAny((PetscObject)v,&flg,VECSEQ,VECMPI,"");CHKERRQ(ierr);
|
|
|
149 |
if (!flg) PetscFunctionReturn(0);
|
|
|
150 |
ierr = PetscOptionsHasName(PETSC_NULL,"-slepc_non_contiguous",&flg);CHKERRQ(ierr);
|
|
|
151 |
if (!flg) {
|
|
|
152 |
if (right) { ierr = SlepcVecSetTemplate(*right);CHKERRQ(ierr); }
|
|
|
153 |
if (left) { ierr = SlepcVecSetTemplate(*left);CHKERRQ(ierr); }
|
| 2339 |
jroman |
154 |
}
|
|
|
155 |
PetscFunctionReturn(0);
|
|
|
156 |
}
|
|
|
157 |
|
| 2340 |
jroman |
158 |
#undef __FUNCT__
|
| 2414 |
jroman |
159 |
#define __FUNCT__ "SlepcUpdateVectors_Noncontiguous_Inplace"
|
| 2415 |
jroman |
160 |
/*
|
| 2414 |
jroman |
161 |
SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
|
|
|
162 |
(non-contiguous).
|
| 2415 |
jroman |
163 |
*/
|
| 2417 |
jroman |
164 |
static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m_,Vec *V,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
|
| 2413 |
jroman |
165 |
{
|
| 2417 |
jroman |
166 |
PetscInt l;
|
|
|
167 |
PetscBLASInt j,ls,bs=64,m,k,ldq;
|
|
|
168 |
PetscScalar *pv,*pq=(PetscScalar*)Q,*work,*out,one=1.0,zero=0.0;
|
| 2413 |
jroman |
169 |
PetscErrorCode ierr;
|
|
|
170 |
|
|
|
171 |
PetscFunctionBegin;
|
|
|
172 |
ierr = PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
|
| 2417 |
jroman |
173 |
ierr = VecGetLocalSize(V[0],&l);CHKERRQ(ierr);
|
|
|
174 |
ls = PetscBLASIntCast(l);
|
|
|
175 |
m = PetscBLASIntCast(m_);
|
|
|
176 |
ldq = PetscBLASIntCast(ldq_);
|
|
|
177 |
ierr = PetscMalloc(sizeof(PetscScalar)*2*bs*m,&work);CHKERRQ(ierr);
|
|
|
178 |
out = work+m*bs;
|
|
|
179 |
k = ls % bs;
|
|
|
180 |
if (k) {
|
| 2414 |
jroman |
181 |
for (j=0;j<m;j++) {
|
| 2417 |
jroman |
182 |
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
|
|
183 |
ierr = PetscMemcpy(work+j*bs,pv,k*sizeof(PetscScalar));CHKERRQ(ierr);
|
|
|
184 |
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
| 2413 |
jroman |
185 |
}
|
| 2418 |
jroman |
186 |
BLASgemm_("N",qtrans?"C":"N",&k,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
|
| 2414 |
jroman |
187 |
for (j=0;j<m;j++) {
|
| 2413 |
jroman |
188 |
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
| 2417 |
jroman |
189 |
ierr = PetscMemcpy(pv,out+j*bs,k*sizeof(PetscScalar));CHKERRQ(ierr);
|
| 2413 |
jroman |
190 |
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
|
|
191 |
}
|
|
|
192 |
}
|
| 2417 |
jroman |
193 |
for (;k<ls;k+=bs) {
|
|
|
194 |
for (j=0;j<m;j++) {
|
|
|
195 |
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
|
|
196 |
ierr = PetscMemcpy(work+j*bs,pv+k,bs*sizeof(PetscScalar));CHKERRQ(ierr);
|
|
|
197 |
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
|
|
198 |
}
|
|
|
199 |
BLASgemm_("N",qtrans?"C":"N",&bs,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
|
|
|
200 |
for (j=0;j<m;j++) {
|
|
|
201 |
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
|
|
202 |
ierr = PetscMemcpy(pv+k,out+j*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr);
|
|
|
203 |
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
|
|
204 |
}
|
|
|
205 |
}
|
| 2413 |
jroman |
206 |
ierr = PetscFree(work);CHKERRQ(ierr);
|
| 2414 |
jroman |
207 |
ierr = PetscLogFlops(m*m*2.0*ls);CHKERRQ(ierr);
|
| 2413 |
jroman |
208 |
ierr = PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
|
|
|
209 |
PetscFunctionReturn(0);
|
|
|
210 |
}
|
|
|
211 |
|
|
|
212 |
#undef __FUNCT__
|
| 2414 |
jroman |
213 |
#define __FUNCT__ "SlepcUpdateVectors_Noncontiguous"
|
| 2415 |
jroman |
214 |
/*
|
| 2414 |
jroman |
215 |
SlepcUpdateVectors_Noncontiguous - V(:,s:e-1) = V*Q(:,s:e-1) for
|
|
|
216 |
regular vectors (non-contiguous).
|
|
|
217 |
|
|
|
218 |
Writing V = [ V1 V2 V3 ] and Q = [ Q1 Q2 Q3 ], where the V2 and Q2
|
|
|
219 |
correspond to the columns s:e-1, the computation is done as
|
|
|
220 |
V2 := V2*Q2 + V1*Q1 + V3*Q3
|
|
|
221 |
(the first term is computed with SlepcUpdateVectors_Noncontiguous_Inplace).
|
| 2415 |
jroman |
222 |
*/
|
| 2414 |
jroman |
223 |
static PetscErrorCode SlepcUpdateVectors_Noncontiguous(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
|
|
|
224 |
{
|
|
|
225 |
PetscInt i,j,m,ln;
|
|
|
226 |
PetscScalar *pq,qt[100];
|
|
|
227 |
PetscBool allocated = PETSC_FALSE;
|
|
|
228 |
PetscErrorCode ierr;
|
|
|
229 |
|
|
|
230 |
PetscFunctionBegin;
|
|
|
231 |
m = e-s;
|
|
|
232 |
if (qtrans) {
|
|
|
233 |
ln = PetscMax(s,n-e);
|
|
|
234 |
if (ln<=100) pq = qt;
|
|
|
235 |
else {
|
|
|
236 |
ierr = PetscMalloc(ln*sizeof(PetscScalar),&pq);CHKERRQ(ierr);
|
|
|
237 |
allocated = PETSC_TRUE;
|
|
|
238 |
}
|
|
|
239 |
}
|
|
|
240 |
/* V2 */
|
| 2418 |
jroman |
241 |
ierr = SlepcUpdateVectors_Noncontiguous_Inplace(m,V+s,Q+s*ldq+s,ldq,qtrans);CHKERRQ(ierr);
|
| 2414 |
jroman |
242 |
/* V1 */
|
|
|
243 |
if (s>0) {
|
|
|
244 |
for (i=s;i<e;i++) {
|
|
|
245 |
if (qtrans) {
|
|
|
246 |
for (j=0;j<s;j++) pq[j] = Q[i+j*ldq];
|
|
|
247 |
} else pq = (PetscScalar*)Q+i*ldq;
|
|
|
248 |
ierr = VecMAXPY(V[i],s,pq,V);CHKERRQ(ierr);
|
|
|
249 |
}
|
|
|
250 |
}
|
|
|
251 |
/* V3 */
|
|
|
252 |
if (n>e) {
|
|
|
253 |
for (i=s;i<e;i++) {
|
|
|
254 |
if (qtrans) {
|
|
|
255 |
for (j=0;j<n-e;j++) pq[j] = Q[i+(j+e)*ldq];
|
|
|
256 |
} else pq = (PetscScalar*)Q+i*ldq+e;
|
|
|
257 |
ierr = VecMAXPY(V[i],n-e,pq,V+e);CHKERRQ(ierr);
|
|
|
258 |
}
|
|
|
259 |
}
|
|
|
260 |
if (allocated) { ierr = PetscFree(pq);CHKERRQ(ierr); }
|
|
|
261 |
PetscFunctionReturn(0);
|
|
|
262 |
}
|
|
|
263 |
|
|
|
264 |
#undef __FUNCT__
|
| 2340 |
jroman |
265 |
#define __FUNCT__ "SlepcUpdateVectors"
|
|
|
266 |
/*@
|
|
|
267 |
SlepcUpdateVectors - Update a set of vectors V as V(:,s:e-1) = V*Q(:,s:e-1).
|
|
|
268 |
|
|
|
269 |
Not Collective
|
|
|
270 |
|
|
|
271 |
Input parameters:
|
|
|
272 |
+ n - number of vectors in V
|
|
|
273 |
. s - first column of V to be overwritten
|
|
|
274 |
. e - first column of V not to be overwritten
|
|
|
275 |
. Q - matrix containing the coefficients of the update
|
|
|
276 |
. ldq - leading dimension of Q
|
|
|
277 |
- qtrans - flag indicating if Q is to be transposed
|
|
|
278 |
|
|
|
279 |
Input/Output parameter:
|
|
|
280 |
. V - set of vectors
|
|
|
281 |
|
|
|
282 |
Notes:
|
|
|
283 |
This function computes V(:,s:e-1) = V*Q(:,s:e-1), that is, given a set of
|
|
|
284 |
vectors V, columns from s to e-1 are overwritten with columns from s to
|
|
|
285 |
e-1 of the matrix-matrix product V*Q.
|
|
|
286 |
|
|
|
287 |
Matrix V is represented as an array of Vec, whereas Q is represented as
|
|
|
288 |
a column-major dense array of leading dimension ldq. Only columns s to e-1
|
|
|
289 |
of Q are referenced.
|
|
|
290 |
|
|
|
291 |
If qtrans=PETSC_TRUE, the operation is V*Q'.
|
|
|
292 |
|
|
|
293 |
This routine is implemented with a call to BLAS, therefore V is an array
|
|
|
294 |
of Vec which have the data stored contiguously in memory as a Fortran matrix.
|
|
|
295 |
PETSc does not create such arrays by default.
|
|
|
296 |
|
|
|
297 |
Level: developer
|
|
|
298 |
|
|
|
299 |
@*/
|
| 2413 |
jroman |
300 |
PetscErrorCode SlepcUpdateVectors(PetscInt n,Vec *V,PetscInt s,PetscInt e,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
|
| 2340 |
jroman |
301 |
{
|
| 2413 |
jroman |
302 |
PetscContainer container;
|
| 2340 |
jroman |
303 |
PetscErrorCode ierr;
|
|
|
304 |
|
|
|
305 |
PetscFunctionBegin;
|
| 2413 |
jroman |
306 |
if (n<0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",n);
|
|
|
307 |
if (n==0 || s>=e) PetscFunctionReturn(0);
|
|
|
308 |
PetscValidPointer(V,2);
|
|
|
309 |
PetscValidHeaderSpecific(*V,VEC_CLASSID,2);
|
|
|
310 |
PetscValidType(*V,2);
|
|
|
311 |
PetscValidScalarPointer(Q,5);
|
|
|
312 |
ierr = PetscObjectQuery((PetscObject)(V[0]),"contiguous",(PetscObject*)&container);CHKERRQ(ierr);
|
|
|
313 |
if (container) {
|
|
|
314 |
/* contiguous Vecs, use BLAS calls */
|
|
|
315 |
ierr = SlepcUpdateStrideVectors(n,V,s,1,e,Q,ldq,qtrans);CHKERRQ(ierr);
|
|
|
316 |
} else {
|
|
|
317 |
/* use regular Vec operations */
|
|
|
318 |
ierr = SlepcUpdateVectors_Noncontiguous(n,V,s,e,Q,ldq,qtrans);CHKERRQ(ierr);
|
|
|
319 |
}
|
| 2340 |
jroman |
320 |
PetscFunctionReturn(0);
|
|
|
321 |
}
|
|
|
322 |
|
|
|
323 |
#undef __FUNCT__
|
|
|
324 |
#define __FUNCT__ "SlepcUpdateStrideVectors"
|
|
|
325 |
/*@
|
|
|
326 |
SlepcUpdateStrideVectors - Update a set of vectors V as
|
|
|
327 |
V(:,s:d:e-1) = V*Q(:,s:e-1).
|
|
|
328 |
|
|
|
329 |
Not Collective
|
|
|
330 |
|
|
|
331 |
Input parameters:
|
|
|
332 |
+ n - number of vectors in V
|
|
|
333 |
. s - first column of V to be overwritten
|
|
|
334 |
. d - stride
|
|
|
335 |
. e - first column of V not to be overwritten
|
|
|
336 |
. Q - matrix containing the coefficients of the update
|
|
|
337 |
. ldq - leading dimension of Q
|
|
|
338 |
- qtrans - flag indicating if Q is to be transposed
|
|
|
339 |
|
|
|
340 |
Input/Output parameter:
|
|
|
341 |
. V - set of vectors
|
|
|
342 |
|
|
|
343 |
Notes:
|
|
|
344 |
This function computes V(:,s:d:e-1) = V*Q(:,s:e-1), that is, given a set
|
|
|
345 |
of vectors V, columns from s to e-1 are overwritten with columns from s to
|
|
|
346 |
e-1 of the matrix-matrix product V*Q.
|
|
|
347 |
|
|
|
348 |
Matrix V is represented as an array of Vec, whereas Q is represented as
|
|
|
349 |
a column-major dense array of leading dimension ldq. Only columns s to e-1
|
|
|
350 |
of Q are referenced.
|
|
|
351 |
|
|
|
352 |
If qtrans=PETSC_TRUE, the operation is V*Q'.
|
|
|
353 |
|
|
|
354 |
This routine is implemented with a call to BLAS, therefore V is an array
|
|
|
355 |
of Vec which have the data stored contiguously in memory as a Fortran matrix.
|
|
|
356 |
PETSc does not create such arrays by default.
|
|
|
357 |
|
|
|
358 |
Level: developer
|
|
|
359 |
|
|
|
360 |
@*/
|
|
|
361 |
PetscErrorCode SlepcUpdateStrideVectors(PetscInt n_,Vec *V,PetscInt s,PetscInt d,PetscInt e,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
|
|
|
362 |
{
|
|
|
363 |
PetscErrorCode ierr;
|
|
|
364 |
PetscInt l;
|
|
|
365 |
PetscBLASInt i,j,k,bs=64,m,n,ldq,ls,ld;
|
|
|
366 |
PetscScalar *pv,*pw,*pq,*work,*pwork,one=1.0,zero=0.0;
|
|
|
367 |
const char *qt;
|
|
|
368 |
|
|
|
369 |
PetscFunctionBegin;
|
|
|
370 |
n = PetscBLASIntCast(n_/d);
|
|
|
371 |
ldq = PetscBLASIntCast(ldq_);
|
|
|
372 |
m = (e-s)/d;
|
|
|
373 |
if (m==0) PetscFunctionReturn(0);
|
|
|
374 |
PetscValidIntPointer(Q,5);
|
|
|
375 |
if (m<0 || n<0 || s<0 || m>n) SETERRQ(((PetscObject)*V)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Index argument out of range");
|
|
|
376 |
ierr = PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
|
|
|
377 |
ierr = VecGetLocalSize(V[0],&l);CHKERRQ(ierr);
|
|
|
378 |
ls = PetscBLASIntCast(l);
|
|
|
379 |
ld = ls*PetscBLASIntCast(d);
|
|
|
380 |
ierr = VecGetArray(V[0],&pv);CHKERRQ(ierr);
|
|
|
381 |
if (qtrans) {
|
|
|
382 |
pq = (PetscScalar*)Q+s;
|
| 2417 |
jroman |
383 |
qt = "C";
|
| 2340 |
jroman |
384 |
} else {
|
|
|
385 |
pq = (PetscScalar*)Q+s*ldq;
|
|
|
386 |
qt = "N";
|
|
|
387 |
}
|
|
|
388 |
ierr = PetscMalloc(sizeof(PetscScalar)*bs*m,&work);CHKERRQ(ierr);
|
|
|
389 |
k = ls % bs;
|
|
|
390 |
if (k) {
|
|
|
391 |
BLASgemm_("N",qt,&k,&m,&n,&one,pv,&ld,pq,&ldq,&zero,work,&k);
|
|
|
392 |
for (j=0;j<m;j++) {
|
|
|
393 |
pw = pv+(s+j)*ld;
|
|
|
394 |
pwork = work+j*k;
|
|
|
395 |
for (i=0;i<k;i++) {
|
|
|
396 |
*pw++ = *pwork++;
|
|
|
397 |
}
|
|
|
398 |
}
|
|
|
399 |
}
|
|
|
400 |
for (;k<ls;k+=bs) {
|
|
|
401 |
BLASgemm_("N",qt,&bs,&m,&n,&one,pv+k,&ld,pq,&ldq,&zero,work,&bs);
|
|
|
402 |
for (j=0;j<m;j++) {
|
|
|
403 |
pw = pv+(s+j)*ld+k;
|
|
|
404 |
pwork = work+j*bs;
|
|
|
405 |
for (i=0;i<bs;i++) {
|
|
|
406 |
*pw++ = *pwork++;
|
|
|
407 |
}
|
|
|
408 |
}
|
|
|
409 |
}
|
|
|
410 |
ierr = VecRestoreArray(V[0],&pv);CHKERRQ(ierr);
|
|
|
411 |
ierr = PetscFree(work);CHKERRQ(ierr);
|
|
|
412 |
ierr = PetscLogFlops(m*n*2.0*ls);CHKERRQ(ierr);
|
|
|
413 |
ierr = PetscLogEventEnd(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
|
|
|
414 |
PetscFunctionReturn(0);
|
|
|
415 |
}
|
|
|
416 |
|
|
|
417 |
#undef __FUNCT__
|
|
|
418 |
#define __FUNCT__ "SlepcVecMAXPBY"
|
|
|
419 |
/*@
|
|
|
420 |
SlepcVecMAXPBY - Computes y = beta*y + sum alpha*a[j]*x[j]
|
|
|
421 |
|
|
|
422 |
Logically Collective on Vec
|
|
|
423 |
|
|
|
424 |
Input parameters:
|
|
|
425 |
+ beta - scalar beta
|
|
|
426 |
. alpha - scalar alpha
|
|
|
427 |
. nv - number of vectors in x and scalars in a
|
|
|
428 |
. a - array of scalars
|
|
|
429 |
- x - set of vectors
|
|
|
430 |
|
|
|
431 |
Input/Output parameter:
|
|
|
432 |
. y - the vector to update
|
|
|
433 |
|
|
|
434 |
Notes:
|
| 2410 |
jroman |
435 |
If x are Vec's with contiguous storage, then the operation is done
|
|
|
436 |
through a call to BLAS. Otherwise, VecMAXPY() is called.
|
| 2340 |
jroman |
437 |
|
|
|
438 |
Level: developer
|
|
|
439 |
|
| 2410 |
jroman |
440 |
.seealso: SlepcVecSetTemplate()
|
| 2340 |
jroman |
441 |
@*/
|
| 2416 |
jroman |
442 |
PetscErrorCode SlepcVecMAXPBY(Vec y,PetscScalar beta,PetscScalar alpha,PetscInt nv,PetscScalar a[],Vec x[])
|
| 2340 |
jroman |
443 |
{
|
|
|
444 |
PetscErrorCode ierr;
|
| 2416 |
jroman |
445 |
PetscBLASInt i,n,m,one=1;
|
| 2340 |
jroman |
446 |
PetscScalar *py;
|
|
|
447 |
const PetscScalar *px;
|
| 2386 |
jroman |
448 |
PetscContainer container;
|
|
|
449 |
Vec z;
|
| 2340 |
jroman |
450 |
|
|
|
451 |
PetscFunctionBegin;
|
|
|
452 |
PetscValidHeaderSpecific(y,VEC_CLASSID,1);
|
|
|
453 |
if (!nv || !(y)->map->n) PetscFunctionReturn(0);
|
|
|
454 |
if (nv < 0) SETERRQ1(((PetscObject)y)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Number of vectors (given %D) cannot be negative",nv);
|
|
|
455 |
PetscValidLogicalCollectiveScalar(y,alpha,2);
|
|
|
456 |
PetscValidLogicalCollectiveScalar(y,beta,3);
|
|
|
457 |
PetscValidLogicalCollectiveInt(y,nv,4);
|
|
|
458 |
PetscValidScalarPointer(a,5);
|
|
|
459 |
PetscValidPointer(x,6);
|
|
|
460 |
PetscValidHeaderSpecific(*x,VEC_CLASSID,6);
|
|
|
461 |
PetscValidType(y,1);
|
|
|
462 |
PetscValidType(*x,6);
|
|
|
463 |
PetscCheckSameTypeAndComm(y,1,*x,6);
|
|
|
464 |
if ((*x)->map->N != (y)->map->N) SETERRQ(((PetscObject)y)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector global lengths");
|
|
|
465 |
if ((*x)->map->n != (y)->map->n) SETERRQ(((PetscObject)y)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector local lengths");
|
|
|
466 |
|
| 2386 |
jroman |
467 |
ierr = PetscObjectQuery((PetscObject)(x[0]),"contiguous",(PetscObject*)&container);CHKERRQ(ierr);
|
|
|
468 |
if (container) {
|
|
|
469 |
/* assume x Vecs are contiguous, use BLAS calls */
|
|
|
470 |
ierr = PetscLogEventBegin(SLEPC_VecMAXPBY,*x,y,0,0);CHKERRQ(ierr);
|
|
|
471 |
ierr = VecGetArray(y,&py);CHKERRQ(ierr);
|
|
|
472 |
ierr = VecGetArrayRead(*x,&px);CHKERRQ(ierr);
|
|
|
473 |
n = PetscBLASIntCast(nv);
|
|
|
474 |
m = PetscBLASIntCast((y)->map->n);
|
|
|
475 |
BLASgemv_("N",&m,&n,&alpha,px,&m,a,&one,&beta,py,&one);
|
|
|
476 |
ierr = VecRestoreArray(y,&py);CHKERRQ(ierr);
|
|
|
477 |
ierr = VecRestoreArrayRead(*x,&px);CHKERRQ(ierr);
|
|
|
478 |
ierr = PetscLogFlops(nv*2*(y)->map->n);CHKERRQ(ierr);
|
|
|
479 |
ierr = PetscLogEventEnd(SLEPC_VecMAXPBY,*x,y,0,0);CHKERRQ(ierr);
|
|
|
480 |
} else {
|
|
|
481 |
/* use regular Vec operations */
|
| 2416 |
jroman |
482 |
if (alpha==-beta) {
|
|
|
483 |
for (i=0;i<nv;i++) a[i] = -a[i];
|
|
|
484 |
ierr = VecMAXPY(y,nv,a,x);CHKERRQ(ierr);
|
|
|
485 |
for (i=0;i<nv;i++) a[i] = -a[i];
|
|
|
486 |
ierr = VecScale(y,beta);CHKERRQ(ierr);
|
|
|
487 |
} else {
|
|
|
488 |
ierr = VecDuplicate(y,&z);CHKERRQ(ierr);
|
|
|
489 |
ierr = VecCopy(y,z);CHKERRQ(ierr);
|
|
|
490 |
ierr = VecMAXPY(y,nv,a,x);CHKERRQ(ierr);
|
|
|
491 |
ierr = VecAXPBY(y,beta-alpha,alpha,z);CHKERRQ(ierr);
|
|
|
492 |
ierr = VecDestroy(&z);CHKERRQ(ierr);
|
|
|
493 |
}
|
| 2386 |
jroman |
494 |
}
|
| 2340 |
jroman |
495 |
PetscFunctionReturn(0);
|
|
|
496 |
}
|
|
|
497 |
|