Subversion Repositories slepc-dev

Rev

Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2110 jroman 1
/*
2
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3
   SLEPc - Scalable Library for Eigenvalue Problem Computations
2575 eromero 4
   Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
1960 eromero 5
 
2110 jroman 6
   This file is part of SLEPc.
7
 
8
   SLEPc is free software: you can redistribute it and/or modify it under  the
9
   terms of version 3 of the GNU Lesser General Public License as published by
10
   the Free Software Foundation.
11
 
12
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
13
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
14
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
15
   more details.
16
 
17
   You  should have received a copy of the GNU Lesser General  Public  License
18
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20
*/
21
 
2729 jroman 22
#include <petsc-private/vecimpl.h>     /*I  "petsvec.h"  I*/
1960 eromero 23
 
24
#ifdef __WITH_MPI__
25
#define __SUF__(A) A##_MPI
26
#else
27
#define __SUF__(A) A##_Seq
28
#endif
2226 eromero 29
#define __QUOTEME_(x) #x
30
#define __QUOTEME(x) __QUOTEME_(x)
1960 eromero 31
#define __SUF_C__(A) __QUOTEME(__SUF__(A))
32
 
33
 
34
#undef __FUNCT__  
35
#define __FUNCT__ __SUF_C__(VecDot_Comp)
2336 jroman 36
PetscErrorCode __SUF__(VecDot_Comp)(Vec a,Vec b,PetscScalar *z)
1960 eromero 37
{
2336 jroman 38
  PetscScalar    sum = 0.0,work;
1960 eromero 39
  PetscInt       i;
40
  PetscErrorCode ierr;
2336 jroman 41
  Vec_Comp       *as = (Vec_Comp*)a->data,*bs = (Vec_Comp*)b->data;
1960 eromero 42
 
43
  PetscFunctionBegin;
1965 eromero 44
  PetscValidVecComp(a);
45
  PetscValidVecComp(b);
1960 eromero 46
  if (as->x[0]->ops->dot_local) {
2336 jroman 47
    for (i=0,sum=0.0;i<as->n->n;i++) {
48
      ierr = as->x[i]->ops->dot_local(as->x[i],bs->x[i],&work);CHKERRQ(ierr);
49
      sum += work;
1960 eromero 50
    }
51
#ifdef __WITH_MPI__
52
    work = sum;
2336 jroman 53
    ierr = MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
1960 eromero 54
#endif
55
  } else {
2336 jroman 56
    for(i=0,sum=0.0; i<as->n->n; i++) {
57
      ierr = VecDot(as->x[i],bs->x[i],&work);CHKERRQ(ierr);
58
      sum += work;
1960 eromero 59
    }
60
  }
61
  *z = sum;
62
  PetscFunctionReturn(0);
63
}
64
 
65
#undef __FUNCT__  
66
#define __FUNCT__ __SUF_C__(VecMDot_Comp)
2336 jroman 67
PetscErrorCode __SUF__(VecMDot_Comp)(Vec a,PetscInt n,const Vec b[],PetscScalar *z)
1960 eromero 68
{
2336 jroman 69
  PetscScalar    *work,*work0,*r;
70
  PetscErrorCode ierr;
71
  Vec_Comp       *as = (Vec_Comp*)a->data;
72
  Vec            *bx;
73
  PetscInt       i,j;
1960 eromero 74
 
75
  PetscFunctionBegin;
1965 eromero 76
  PetscValidVecComp(a);
2336 jroman 77
  for (i=0;i<n;i++) PetscValidVecComp(b[i]);
1965 eromero 78
 
2336 jroman 79
  if (as->n->n == 0) {
1960 eromero 80
    *z = 0;
81
    PetscFunctionReturn(0);
82
  }
83
 
2336 jroman 84
  ierr = PetscMalloc(sizeof(PetscScalar)*n,&work0);CHKERRQ(ierr);
85
  ierr = PetscMalloc(sizeof(Vec)*n,&bx);CHKERRQ(ierr);
1960 eromero 86
 
87
#ifdef __WITH_MPI__
88
  if (as->x[0]->ops->mdot_local) {
89
    r = work0; work = z;
90
  } else
91
#endif
92
  {
93
    r = z; work = work0;
94
  }
95
 
96
  /* z[i] <- a.x' * b[i].x */
2336 jroman 97
  for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[0];
1960 eromero 98
  if (as->x[0]->ops->mdot_local) {
2336 jroman 99
    ierr = as->x[0]->ops->mdot_local(as->x[0],n,bx,r);CHKERRQ(ierr);
1960 eromero 100
  } else {
2336 jroman 101
    ierr = VecMDot(as->x[0],n,bx,r);CHKERRQ(ierr);
1960 eromero 102
  }
2336 jroman 103
  for (j=0;j<as->n->n;j++) {
104
    for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[j];
1960 eromero 105
    if (as->x[0]->ops->mdot_local) {
2336 jroman 106
      ierr = as->x[j]->ops->mdot_local(as->x[j],n,bx,work);CHKERRQ(ierr);
1960 eromero 107
    } else {
2336 jroman 108
      ierr = VecMDot(as->x[j],n,bx,work);CHKERRQ(ierr);
1960 eromero 109
    }
2336 jroman 110
    for (i=0;i<n;i++) r[i] += work[i];
1960 eromero 111
  }
112
 
113
  /* If def(__WITH_MPI__) and exists mdot_local */
114
#ifdef __WITH_MPI__
115
  if (as->x[0]->ops->mdot_local) {
116
    /* z[i] <- Allreduce(work[i]) */
2336 jroman 117
    ierr = MPI_Allreduce(r,z,n,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
1960 eromero 118
  }
119
#endif
120
 
2336 jroman 121
  ierr = PetscFree(work0);CHKERRQ(ierr);
122
  ierr = PetscFree(bx);CHKERRQ(ierr);
1960 eromero 123
  PetscFunctionReturn(0);
124
}
125
 
2733 eromero 126
 
127
#undef __FUNCT__  
128
#define __FUNCT__ __SUF_C__(VecTDot_Comp)
129
PetscErrorCode __SUF__(VecTDot_Comp)(Vec a,Vec b,PetscScalar *z)
130
{
131
  PetscScalar    sum = 0.0,work;
132
  PetscInt       i;
133
  PetscErrorCode ierr;
134
  Vec_Comp       *as = (Vec_Comp*)a->data,*bs = (Vec_Comp*)b->data;
135
 
136
  PetscFunctionBegin;
137
  PetscValidVecComp(a);
138
  PetscValidVecComp(b);
139
  if (as->x[0]->ops->tdot_local) {
140
    for (i=0,sum=0.0;i<as->n->n;i++) {
141
      ierr = as->x[i]->ops->tdot_local(as->x[i],bs->x[i],&work);CHKERRQ(ierr);
142
      sum += work;
143
    }
144
#ifdef __WITH_MPI__
145
    work = sum;
146
    ierr = MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
147
#endif
148
  } else {
149
    for(i=0,sum=0.0; i<as->n->n; i++) {
150
      ierr = VecTDot(as->x[i],bs->x[i],&work);CHKERRQ(ierr);
151
      sum += work;
152
    }
153
  }
154
  *z = sum;
155
  PetscFunctionReturn(0);
156
}
157
 
158
#undef __FUNCT__  
159
#define __FUNCT__ __SUF_C__(VecMTDot_Comp)
160
PetscErrorCode __SUF__(VecMTDot_Comp)(Vec a,PetscInt n,const Vec b[],PetscScalar *z)
161
{
162
  PetscScalar    *work,*work0,*r;
163
  PetscErrorCode ierr;
164
  Vec_Comp       *as = (Vec_Comp*)a->data;
165
  Vec            *bx;
166
  PetscInt       i,j;
167
 
168
  PetscFunctionBegin;
169
  PetscValidVecComp(a);
170
  for (i=0;i<n;i++) PetscValidVecComp(b[i]);
171
 
172
  if (as->n->n == 0) {
173
    *z = 0;
174
    PetscFunctionReturn(0);
175
  }
176
 
177
  ierr = PetscMalloc(sizeof(PetscScalar)*n,&work0);CHKERRQ(ierr);
178
  ierr = PetscMalloc(sizeof(Vec)*n,&bx);CHKERRQ(ierr);
179
 
180
#ifdef __WITH_MPI__
181
  if (as->x[0]->ops->mtdot_local) {
182
    r = work0; work = z;
183
  } else
184
#endif
185
  {
186
    r = z; work = work0;
187
  }
188
 
189
  /* z[i] <- a.x' * b[i].x */
190
  for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[0];
191
  if (as->x[0]->ops->mtdot_local) {
192
    ierr = as->x[0]->ops->mtdot_local(as->x[0],n,bx,r);CHKERRQ(ierr);
193
  } else {
194
    ierr = VecMTDot(as->x[0],n,bx,r);CHKERRQ(ierr);
195
  }
196
  for (j=0;j<as->n->n;j++) {
197
    for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[j];
198
    if (as->x[0]->ops->mtdot_local) {
199
      ierr = as->x[j]->ops->mtdot_local(as->x[j],n,bx,work);CHKERRQ(ierr);
200
    } else {
201
      ierr = VecMTDot(as->x[j],n,bx,work);CHKERRQ(ierr);
202
    }
203
    for (i=0;i<n;i++) r[i] += work[i];
204
  }
205
 
206
  /* If def(__WITH_MPI__) and exists mtdot_local */
207
#ifdef __WITH_MPI__
208
  if (as->x[0]->ops->mtdot_local) {
209
    /* z[i] <- Allreduce(work[i]) */
210
    ierr = MPI_Allreduce(r,z,n,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
211
  }
212
#endif
213
 
214
  ierr = PetscFree(work0);CHKERRQ(ierr);
215
  ierr = PetscFree(bx);CHKERRQ(ierr);
216
  PetscFunctionReturn(0);
217
}
218
 
219
 
2545 eromero 220
#ifndef __VEC_NORM2_FUNCS_
221
#define __VEC_NORM2_FUNCS_
222
PETSC_STATIC_INLINE void SumNorm2(PetscReal *ssq0,PetscReal *scale0,PetscReal *ssq1,PetscReal *scale1)
223
{
224
  PetscReal q;
225
  if (*scale0 > *scale1) { q = *scale1/(*scale0); *ssq1 = *ssq0 + q*q*(*ssq1); *scale1 = *scale0; }
226
  else                   { q = *scale0/(*scale1); *ssq1+=         q*q*(*ssq0); }
227
}
1960 eromero 228
 
2545 eromero 229
PETSC_STATIC_INLINE PetscReal GetNorm2(PetscReal ssq,PetscReal scale)
230
{
231
  return scale*PetscSqrtReal(ssq);
232
}
233
 
234
PETSC_STATIC_INLINE void AddNorm2(PetscReal *ssq,PetscReal *scale,PetscReal x)
235
{
236
  if (x != 0.0) {
237
    PetscReal absx = PetscAbs(x), q;
238
    if (*scale < absx) { q = *scale/absx; *ssq = 1.0 + *ssq*q*q; *scale = absx; }
239
    else               { q = absx/(*scale); *ssq+= q*q; }
1960 eromero 240
  }
2545 eromero 241
}
1960 eromero 242
 
2545 eromero 243
MPI_Datatype MPIU_NORM2=0, MPIU_NORM1_AND_2=0;
244
MPI_Op MPIU_NORM2_SUM=0;
245
 
246
EXTERN_C_BEGIN
247
#undef __FUNCT__
248
#define __FUNCT__ "SlepcSumNorm2_Local"
249
void SlepcSumNorm2_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
250
{
251
  PetscInt       i,count = *cnt;
252
 
253
  PetscFunctionBegin;
254
  if (*datatype == MPIU_NORM2) {
255
    PetscReal *xin = (PetscReal*)in,*xout = (PetscReal*)out;
256
    for (i=0; i<count; i++) {
257
      SumNorm2(&xin[i*2],&xin[i*2+1],&xout[i*2],&xout[i*2+1]);
258
    }
259
  } else if (*datatype == MPIU_NORM1_AND_2) {
260
    PetscReal *xin = (PetscReal*)in,*xout = (PetscReal*)out;
261
    for (i=0; i<count; i++) {
262
      xout[i*3]+= xin[i*3];
263
      SumNorm2(&xin[i*3+1],&xin[i*3+2],&xout[i*3+1],&xout[i*3+2]);
264
    }
265
  } else {
266
    (*PetscErrorPrintf)("Can only handle MPIU_NORM* data types");
267
    MPI_Abort(MPI_COMM_WORLD,1);
1960 eromero 268
  }
2545 eromero 269
  PetscFunctionReturnVoid();
270
}
1960 eromero 271
 
272
#undef __FUNCT__  
2545 eromero 273
#define __FUNCT__ "VecNormCompEnd"
2661 eromero 274
PetscErrorCode VecNormCompEnd(void)
2545 eromero 275
{
276
  PetscErrorCode ierr;
277
  PetscFunctionBegin;
278
 
279
  ierr = MPI_Type_free(&MPIU_NORM2);CHKERRQ(ierr);
280
  ierr = MPI_Type_free(&MPIU_NORM1_AND_2);CHKERRQ(ierr);
281
  ierr = MPI_Op_free(&MPIU_NORM2_SUM);CHKERRQ(ierr);
282
 
283
  PetscFunctionReturn(0);
284
}
285
EXTERN_C_END
286
 
287
#undef __FUNCT__  
288
#define __FUNCT__ "VecNormCompInit"
289
PetscErrorCode VecNormCompInit()
290
{
291
  PetscErrorCode ierr;
292
  PetscFunctionBegin;
293
 
294
  ierr = MPI_Type_contiguous(sizeof(PetscReal)*2,MPI_BYTE,&MPIU_NORM2);CHKERRQ(ierr);
295
  ierr = MPI_Type_commit(&MPIU_NORM2);
296
  ierr = MPI_Type_contiguous(sizeof(PetscReal)*3,MPI_BYTE,&MPIU_NORM1_AND_2);CHKERRQ(ierr);
297
  ierr = MPI_Type_commit(&MPIU_NORM1_AND_2);
298
  ierr = MPI_Op_create(SlepcSumNorm2_Local,1,&MPIU_NORM2_SUM);CHKERRQ(ierr);
299
  ierr = PetscRegisterFinalize(VecNormCompEnd);
300
 
301
  PetscFunctionReturn(0);
302
}
303
#endif
304
 
305
#undef __FUNCT__  
1960 eromero 306
#define __FUNCT__ __SUF_C__(VecNorm_Comp)
2336 jroman 307
PetscErrorCode __SUF__(VecNorm_Comp)(Vec a,NormType t,PetscReal *norm)
1960 eromero 308
{
2545 eromero 309
  PetscReal      work[3],s=0.0;
2336 jroman 310
  PetscErrorCode ierr;
311
  Vec_Comp       *as = (Vec_Comp*)a->data;
312
  PetscInt       i;
1960 eromero 313
 
314
  PetscFunctionBegin;
1965 eromero 315
  PetscValidVecComp(a);
2545 eromero 316
  /* Initialize norm */
317
  switch(t) {
318
  case NORM_1: case NORM_INFINITY: *norm = 0.0; break;
319
  case NORM_2: case NORM_FROBENIUS: *norm = 1.0; s = 0.0; break;
320
  case NORM_1_AND_2: norm[0] = 0.0; norm[1] = 1.0; s = 0.0; break;
321
  }
322
  for (i=0;i<as->n->n;i++) {
323
    if (as->x[0]->ops->norm_local) {
2336 jroman 324
      ierr = as->x[0]->ops->norm_local(as->x[i],t,work);CHKERRQ(ierr);
2545 eromero 325
    } else {
2336 jroman 326
      ierr = VecNorm(as->x[i],t,work);CHKERRQ(ierr);
1960 eromero 327
    }
2545 eromero 328
    /* norm+= work */
329
    switch(t) {
330
    case NORM_1: *norm+= *work; break;
331
    case NORM_2: case NORM_FROBENIUS: AddNorm2(norm,&s,*work); break;
332
    case NORM_1_AND_2: norm[0]+= work[0]; AddNorm2(&norm[1],&s,work[1]); break;
333
    case NORM_INFINITY: *norm = PetscMax(*norm,*work); break;
334
    }
1960 eromero 335
  }
336
 
337
  /* If def(__WITH_MPI__) and exists norm_local */
338
#ifdef __WITH_MPI__
339
  if (as->x[0]->ops->norm_local) {
2545 eromero 340
    PetscReal work0[3];
1960 eromero 341
    /* norm <- Allreduce(work) */
2545 eromero 342
    switch(t) {
343
    case NORM_1:
344
      work[0] = *norm;
345
      ierr = MPI_Allreduce(work,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
346
      break;
347
    case NORM_2: case NORM_FROBENIUS:
348
      work[0] = *norm; work[1] = s;
349
      ierr = MPI_Allreduce(work,work0,1,MPIU_NORM2,MPIU_NORM2_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
350
      *norm = GetNorm2(work0[0],work0[1]);
351
      break;
352
    case NORM_1_AND_2:
353
      work[0] = norm[0]; work[1] = norm[1]; work[2] = s;
354
      ierr = MPI_Allreduce(work,work0,1,MPIU_NORM1_AND_2,MPIU_NORM2_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
355
      norm[0] = work0[0];
356
      norm[1] = GetNorm2(work0[1],work0[2]);
357
      break;
358
    case NORM_INFINITY:
359
      work[0] = *norm;
360
      ierr = MPI_Allreduce(work,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)a)->comm);CHKERRQ(ierr);
361
      break;
362
    }
1960 eromero 363
  }
2545 eromero 364
#else
365
  /* Norm correction */
366
  switch(t) {
367
  case NORM_2: case NORM_FROBENIUS: *norm = GetNorm2(*norm,s); break;
368
  case NORM_1_AND_2: norm[1] = GetNorm2(norm[1],s); break;
369
  default: ;
370
  }
1960 eromero 371
#endif
372
 
373
  PetscFunctionReturn(0);
374
}
375
 
2009 eromero 376
#undef __FUNCT__  
377
#define __FUNCT__ __SUF_C__(VecDotNorm2_Comp)
2336 jroman 378
PetscErrorCode __SUF__(VecDotNorm2_Comp)(Vec v,Vec w,PetscScalar *dp,PetscScalar *nm)
2009 eromero 379
{
2336 jroman 380
  PetscScalar    *vx,*wx,dp0,nm0,dp1,nm1;
381
  PetscErrorCode ierr;
382
  Vec_Comp       *vs = (Vec_Comp*)v->data,*ws = (Vec_Comp*)w->data;
383
  PetscInt       i,n;
384
  PetscBool      t0,t1;
2009 eromero 385
#ifdef __WITH_MPI__
2336 jroman 386
  PetscScalar    work[4];
2009 eromero 387
#endif
1960 eromero 388
 
2009 eromero 389
  PetscFunctionBegin;
390
  /* Compute recursively the local part */
391
  dp0 = nm0 = 0.0;
2823 jroman 392
  ierr = PetscObjectTypeCompare((PetscObject)v,VECCOMP,&t0);CHKERRQ(ierr);
393
  ierr = PetscObjectTypeCompare((PetscObject)w,VECCOMP,&t1);CHKERRQ(ierr);
2009 eromero 394
  if (t0 == PETSC_TRUE && t1 == PETSC_TRUE) {
395
    PetscValidVecComp(v);
396
    PetscValidVecComp(w);
2336 jroman 397
    for (i=0;i<vs->n->n;i++) {
398
      ierr = VecDotNorm2_Comp_Seq(vs->x[i],ws->x[i],&dp1,&nm1);CHKERRQ(ierr);
399
      dp0 += dp1;
400
      nm0 += nm1;
2009 eromero 401
    }
402
  } else if (t0 == PETSC_FALSE && t1 == PETSC_FALSE) {
2336 jroman 403
    ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
404
    ierr = VecGetArray(v,&vx);CHKERRQ(ierr);
405
    ierr = VecGetArray(w,&wx);CHKERRQ(ierr);
406
    for (i=0;i<n;i++) {
2009 eromero 407
      dp0 += vx[i]*PetscConj(wx[i]);
408
      nm0 += wx[i]*PetscConj(wx[i]);
409
    }
2336 jroman 410
    ierr = VecRestoreArray(v,&vx);CHKERRQ(ierr);
411
    ierr = VecRestoreArray(w,&wx);CHKERRQ(ierr);
2009 eromero 412
  } else
2214 jroman 413
    SETERRQ(((PetscObject)v)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector types");
2009 eromero 414
 
415
#ifdef __WITH_MPI__
416
    /* [dp, nm] <- Allreduce([dp0, nm0]) */
417
    work[0] = dp0; work[1] = nm0;
2336 jroman 418
    ierr = MPI_Allreduce(&work,&work[2],2,MPIU_SCALAR,MPIU_SUM,((PetscObject)v)->comm);CHKERRQ(ierr);
2009 eromero 419
    *dp = work[2]; *nm = work[3];
420
#else
421
    *dp = dp0; *nm = nm0;
422
#endif
423
  PetscFunctionReturn(0);
424
}
425
 
1960 eromero 426
#undef __SUF__
427
#undef __QUOTEME
428
#undef __SUF_C__
429