Subversion Repositories slepc-dev

Rev

Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2110 jroman 1
/*
2
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3
   SLEPc - Scalable Library for Eigenvalue Problem Computations
2575 eromero 4
   Copyright (c) 2002-2011, Universitat Politecnica de Valencia, Spain
1960 eromero 5
 
2110 jroman 6
   This file is part of SLEPc.
7
 
8
   SLEPc is free software: you can redistribute it and/or modify it under  the
9
   terms of version 3 of the GNU Lesser General Public License as published by
10
   the Free Software Foundation.
11
 
12
   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
13
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
14
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
15
   more details.
16
 
17
   You  should have received a copy of the GNU Lesser General  Public  License
18
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
19
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
20
*/
21
 
2283 jroman 22
#include <private/vecimpl.h>     /*I  "petsvec.h"  I*/
1960 eromero 23
 
24
#ifdef __WITH_MPI__
25
#define __SUF__(A) A##_MPI
26
#else
27
#define __SUF__(A) A##_Seq
28
#endif
2226 eromero 29
#define __QUOTEME_(x) #x
30
#define __QUOTEME(x) __QUOTEME_(x)
1960 eromero 31
#define __SUF_C__(A) __QUOTEME(__SUF__(A))
32
 
33
 
34
#undef __FUNCT__  
35
#define __FUNCT__ __SUF_C__(VecDot_Comp)
2336 jroman 36
PetscErrorCode __SUF__(VecDot_Comp)(Vec a,Vec b,PetscScalar *z)
1960 eromero 37
{
2336 jroman 38
  PetscScalar    sum = 0.0,work;
1960 eromero 39
  PetscInt       i;
40
  PetscErrorCode ierr;
2336 jroman 41
  Vec_Comp       *as = (Vec_Comp*)a->data,*bs = (Vec_Comp*)b->data;
1960 eromero 42
 
43
  PetscFunctionBegin;
1965 eromero 44
  PetscValidVecComp(a);
45
  PetscValidVecComp(b);
1960 eromero 46
  if (as->x[0]->ops->dot_local) {
2336 jroman 47
    for (i=0,sum=0.0;i<as->n->n;i++) {
48
      ierr = as->x[i]->ops->dot_local(as->x[i],bs->x[i],&work);CHKERRQ(ierr);
49
      sum += work;
1960 eromero 50
    }
51
#ifdef __WITH_MPI__
52
    work = sum;
2336 jroman 53
    ierr = MPI_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
1960 eromero 54
#endif
55
  } else {
2336 jroman 56
    for(i=0,sum=0.0; i<as->n->n; i++) {
57
      ierr = VecDot(as->x[i],bs->x[i],&work);CHKERRQ(ierr);
58
      sum += work;
1960 eromero 59
    }
60
  }
61
  *z = sum;
62
  PetscFunctionReturn(0);
63
}
64
 
65
#undef __FUNCT__  
66
#define __FUNCT__ __SUF_C__(VecMDot_Comp)
2336 jroman 67
PetscErrorCode __SUF__(VecMDot_Comp)(Vec a,PetscInt n,const Vec b[],PetscScalar *z)
1960 eromero 68
{
2336 jroman 69
  PetscScalar    *work,*work0,*r;
70
  PetscErrorCode ierr;
71
  Vec_Comp       *as = (Vec_Comp*)a->data;
72
  Vec            *bx;
73
  PetscInt       i,j;
1960 eromero 74
 
75
  PetscFunctionBegin;
1965 eromero 76
  PetscValidVecComp(a);
2336 jroman 77
  for (i=0;i<n;i++) PetscValidVecComp(b[i]);
1965 eromero 78
 
2336 jroman 79
  if (as->n->n == 0) {
1960 eromero 80
    *z = 0;
81
    PetscFunctionReturn(0);
82
  }
83
 
2336 jroman 84
  ierr = PetscMalloc(sizeof(PetscScalar)*n,&work0);CHKERRQ(ierr);
85
  ierr = PetscMalloc(sizeof(Vec)*n,&bx);CHKERRQ(ierr);
1960 eromero 86
 
87
#ifdef __WITH_MPI__
88
  if (as->x[0]->ops->mdot_local) {
89
    r = work0; work = z;
90
  } else
91
#endif
92
  {
93
    r = z; work = work0;
94
  }
95
 
96
  /* z[i] <- a.x' * b[i].x */
2336 jroman 97
  for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[0];
1960 eromero 98
  if (as->x[0]->ops->mdot_local) {
2336 jroman 99
    ierr = as->x[0]->ops->mdot_local(as->x[0],n,bx,r);CHKERRQ(ierr);
1960 eromero 100
  } else {
2336 jroman 101
    ierr = VecMDot(as->x[0],n,bx,r);CHKERRQ(ierr);
1960 eromero 102
  }
2336 jroman 103
  for (j=0;j<as->n->n;j++) {
104
    for (i=0;i<n;i++) bx[i] = ((Vec_Comp*)b[i]->data)->x[j];
1960 eromero 105
    if (as->x[0]->ops->mdot_local) {
2336 jroman 106
      ierr = as->x[j]->ops->mdot_local(as->x[j],n,bx,work);CHKERRQ(ierr);
1960 eromero 107
    } else {
2336 jroman 108
      ierr = VecMDot(as->x[j],n,bx,work);CHKERRQ(ierr);
1960 eromero 109
    }
2336 jroman 110
    for (i=0;i<n;i++) r[i] += work[i];
1960 eromero 111
  }
112
 
113
  /* If def(__WITH_MPI__) and exists mdot_local */
114
#ifdef __WITH_MPI__
115
  if (as->x[0]->ops->mdot_local) {
116
    /* z[i] <- Allreduce(work[i]) */
2336 jroman 117
    ierr = MPI_Allreduce(r,z,n,MPIU_SCALAR,MPIU_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
1960 eromero 118
  }
119
#endif
120
 
2336 jroman 121
  ierr = PetscFree(work0);CHKERRQ(ierr);
122
  ierr = PetscFree(bx);CHKERRQ(ierr);
1960 eromero 123
  PetscFunctionReturn(0);
124
}
125
 
2545 eromero 126
#ifndef __VEC_NORM2_FUNCS_
127
#define __VEC_NORM2_FUNCS_
128
PETSC_STATIC_INLINE void SumNorm2(PetscReal *ssq0,PetscReal *scale0,PetscReal *ssq1,PetscReal *scale1)
129
{
130
  PetscReal q;
131
  if (*scale0 > *scale1) { q = *scale1/(*scale0); *ssq1 = *ssq0 + q*q*(*ssq1); *scale1 = *scale0; }
132
  else                   { q = *scale0/(*scale1); *ssq1+=         q*q*(*ssq0); }
133
}
1960 eromero 134
 
2545 eromero 135
PETSC_STATIC_INLINE PetscReal GetNorm2(PetscReal ssq,PetscReal scale)
136
{
137
  return scale*PetscSqrtReal(ssq);
138
}
139
 
140
PETSC_STATIC_INLINE void AddNorm2(PetscReal *ssq,PetscReal *scale,PetscReal x)
141
{
142
  if (x != 0.0) {
143
    PetscReal absx = PetscAbs(x), q;
144
    if (*scale < absx) { q = *scale/absx; *ssq = 1.0 + *ssq*q*q; *scale = absx; }
145
    else               { q = absx/(*scale); *ssq+= q*q; }
1960 eromero 146
  }
2545 eromero 147
}
1960 eromero 148
 
2545 eromero 149
MPI_Datatype MPIU_NORM2=0, MPIU_NORM1_AND_2=0;
150
MPI_Op MPIU_NORM2_SUM=0;
151
 
152
EXTERN_C_BEGIN
153
#undef __FUNCT__
154
#define __FUNCT__ "SlepcSumNorm2_Local"
155
void SlepcSumNorm2_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
156
{
157
  PetscInt       i,count = *cnt;
158
 
159
  PetscFunctionBegin;
160
  if (*datatype == MPIU_NORM2) {
161
    PetscReal *xin = (PetscReal*)in,*xout = (PetscReal*)out;
162
    for (i=0; i<count; i++) {
163
      SumNorm2(&xin[i*2],&xin[i*2+1],&xout[i*2],&xout[i*2+1]);
164
    }
165
  } else if (*datatype == MPIU_NORM1_AND_2) {
166
    PetscReal *xin = (PetscReal*)in,*xout = (PetscReal*)out;
167
    for (i=0; i<count; i++) {
168
      xout[i*3]+= xin[i*3];
169
      SumNorm2(&xin[i*3+1],&xin[i*3+2],&xout[i*3+1],&xout[i*3+2]);
170
    }
171
  } else {
172
    (*PetscErrorPrintf)("Can only handle MPIU_NORM* data types");
173
    MPI_Abort(MPI_COMM_WORLD,1);
1960 eromero 174
  }
2545 eromero 175
  PetscFunctionReturnVoid();
176
}
1960 eromero 177
 
178
#undef __FUNCT__  
2545 eromero 179
#define __FUNCT__ "VecNormCompEnd"
180
PetscErrorCode VecNormCompEnd()
181
{
182
  PetscErrorCode ierr;
183
  PetscFunctionBegin;
184
 
185
  ierr = MPI_Type_free(&MPIU_NORM2);CHKERRQ(ierr);
186
  ierr = MPI_Type_free(&MPIU_NORM1_AND_2);CHKERRQ(ierr);
187
  ierr = MPI_Op_free(&MPIU_NORM2_SUM);CHKERRQ(ierr);
188
 
189
  PetscFunctionReturn(0);
190
}
191
EXTERN_C_END
192
 
193
#undef __FUNCT__  
194
#define __FUNCT__ "VecNormCompInit"
195
PetscErrorCode VecNormCompInit()
196
{
197
  PetscErrorCode ierr;
198
  PetscFunctionBegin;
199
 
200
  ierr = MPI_Type_contiguous(sizeof(PetscReal)*2,MPI_BYTE,&MPIU_NORM2);CHKERRQ(ierr);
201
  ierr = MPI_Type_commit(&MPIU_NORM2);
202
  ierr = MPI_Type_contiguous(sizeof(PetscReal)*3,MPI_BYTE,&MPIU_NORM1_AND_2);CHKERRQ(ierr);
203
  ierr = MPI_Type_commit(&MPIU_NORM1_AND_2);
204
  ierr = MPI_Op_create(SlepcSumNorm2_Local,1,&MPIU_NORM2_SUM);CHKERRQ(ierr);
205
  ierr = PetscRegisterFinalize(VecNormCompEnd);
206
 
207
  PetscFunctionReturn(0);
208
}
209
#endif
210
 
211
#undef __FUNCT__  
1960 eromero 212
#define __FUNCT__ __SUF_C__(VecNorm_Comp)
2336 jroman 213
PetscErrorCode __SUF__(VecNorm_Comp)(Vec a,NormType t,PetscReal *norm)
1960 eromero 214
{
2545 eromero 215
  PetscReal      work[3],s=0.0;
2336 jroman 216
  PetscErrorCode ierr;
217
  Vec_Comp       *as = (Vec_Comp*)a->data;
218
  PetscInt       i;
1960 eromero 219
 
220
  PetscFunctionBegin;
1965 eromero 221
  PetscValidVecComp(a);
2545 eromero 222
  /* Initialize norm */
223
  switch(t) {
224
  case NORM_1: case NORM_INFINITY: *norm = 0.0; break;
225
  case NORM_2: case NORM_FROBENIUS: *norm = 1.0; s = 0.0; break;
226
  case NORM_1_AND_2: norm[0] = 0.0; norm[1] = 1.0; s = 0.0; break;
227
  }
228
  for (i=0;i<as->n->n;i++) {
229
    if (as->x[0]->ops->norm_local) {
2336 jroman 230
      ierr = as->x[0]->ops->norm_local(as->x[i],t,work);CHKERRQ(ierr);
2545 eromero 231
    } else {
2336 jroman 232
      ierr = VecNorm(as->x[i],t,work);CHKERRQ(ierr);
1960 eromero 233
    }
2545 eromero 234
    /* norm+= work */
235
    switch(t) {
236
    case NORM_1: *norm+= *work; break;
237
    case NORM_2: case NORM_FROBENIUS: AddNorm2(norm,&s,*work); break;
238
    case NORM_1_AND_2: norm[0]+= work[0]; AddNorm2(&norm[1],&s,work[1]); break;
239
    case NORM_INFINITY: *norm = PetscMax(*norm,*work); break;
240
    }
1960 eromero 241
  }
242
 
243
  /* If def(__WITH_MPI__) and exists norm_local */
244
#ifdef __WITH_MPI__
245
  if (as->x[0]->ops->norm_local) {
2545 eromero 246
    PetscReal work0[3];
1960 eromero 247
    /* norm <- Allreduce(work) */
2545 eromero 248
    switch(t) {
249
    case NORM_1:
250
      work[0] = *norm;
251
      ierr = MPI_Allreduce(work,norm,1,MPIU_REAL,MPIU_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
252
      break;
253
    case NORM_2: case NORM_FROBENIUS:
254
      work[0] = *norm; work[1] = s;
255
      ierr = MPI_Allreduce(work,work0,1,MPIU_NORM2,MPIU_NORM2_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
256
      *norm = GetNorm2(work0[0],work0[1]);
257
      break;
258
    case NORM_1_AND_2:
259
      work[0] = norm[0]; work[1] = norm[1]; work[2] = s;
260
      ierr = MPI_Allreduce(work,work0,1,MPIU_NORM1_AND_2,MPIU_NORM2_SUM,((PetscObject)a)->comm);CHKERRQ(ierr);
261
      norm[0] = work0[0];
262
      norm[1] = GetNorm2(work0[1],work0[2]);
263
      break;
264
    case NORM_INFINITY:
265
      work[0] = *norm;
266
      ierr = MPI_Allreduce(work,norm,1,MPIU_REAL,MPIU_MAX,((PetscObject)a)->comm);CHKERRQ(ierr);
267
      break;
268
    }
1960 eromero 269
  }
2545 eromero 270
#else
271
  /* Norm correction */
272
  switch(t) {
273
  case NORM_2: case NORM_FROBENIUS: *norm = GetNorm2(*norm,s); break;
274
  case NORM_1_AND_2: norm[1] = GetNorm2(norm[1],s); break;
275
  default: ;
276
  }
1960 eromero 277
#endif
278
 
279
  PetscFunctionReturn(0);
280
}
281
 
2009 eromero 282
#undef __FUNCT__  
283
#define __FUNCT__ __SUF_C__(VecDotNorm2_Comp)
2336 jroman 284
PetscErrorCode __SUF__(VecDotNorm2_Comp)(Vec v,Vec w,PetscScalar *dp,PetscScalar *nm)
2009 eromero 285
{
2336 jroman 286
  PetscScalar    *vx,*wx,dp0,nm0,dp1,nm1;
287
  PetscErrorCode ierr;
288
  Vec_Comp       *vs = (Vec_Comp*)v->data,*ws = (Vec_Comp*)w->data;
289
  PetscInt       i,n;
290
  PetscBool      t0,t1;
2009 eromero 291
#ifdef __WITH_MPI__
2336 jroman 292
  PetscScalar    work[4];
2009 eromero 293
#endif
1960 eromero 294
 
2009 eromero 295
  PetscFunctionBegin;
296
  /* Compute recursively the local part */
297
  dp0 = nm0 = 0.0;
2336 jroman 298
  ierr = PetscTypeCompare((PetscObject)v,VECCOMP,&t0);CHKERRQ(ierr);
299
  ierr = PetscTypeCompare((PetscObject)w,VECCOMP,&t1);CHKERRQ(ierr);
2009 eromero 300
  if (t0 == PETSC_TRUE && t1 == PETSC_TRUE) {
301
    PetscValidVecComp(v);
302
    PetscValidVecComp(w);
2336 jroman 303
    for (i=0;i<vs->n->n;i++) {
304
      ierr = VecDotNorm2_Comp_Seq(vs->x[i],ws->x[i],&dp1,&nm1);CHKERRQ(ierr);
305
      dp0 += dp1;
306
      nm0 += nm1;
2009 eromero 307
    }
308
  } else if (t0 == PETSC_FALSE && t1 == PETSC_FALSE) {
2336 jroman 309
    ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
310
    ierr = VecGetArray(v,&vx);CHKERRQ(ierr);
311
    ierr = VecGetArray(w,&wx);CHKERRQ(ierr);
312
    for (i=0;i<n;i++) {
2009 eromero 313
      dp0 += vx[i]*PetscConj(wx[i]);
314
      nm0 += wx[i]*PetscConj(wx[i]);
315
    }
2336 jroman 316
    ierr = VecRestoreArray(v,&vx);CHKERRQ(ierr);
317
    ierr = VecRestoreArray(w,&wx);CHKERRQ(ierr);
2009 eromero 318
  } else
2214 jroman 319
    SETERRQ(((PetscObject)v)->comm,PETSC_ERR_ARG_INCOMP,"Incompatible vector types");
2009 eromero 320
 
321
#ifdef __WITH_MPI__
322
    /* [dp, nm] <- Allreduce([dp0, nm0]) */
323
    work[0] = dp0; work[1] = nm0;
2336 jroman 324
    ierr = MPI_Allreduce(&work,&work[2],2,MPIU_SCALAR,MPIU_SUM,((PetscObject)v)->comm);CHKERRQ(ierr);
2009 eromero 325
    *dp = work[2]; *nm = work[3];
326
#else
327
    *dp = dp0; *nm = nm0;
328
#endif
329
  PetscFunctionReturn(0);
330
}
331
 
1960 eromero 332
#undef __SUF__
333
#undef __QUOTEME
334
#undef __SUF_C__
335