Subversion Repositories slepc-dev

Rev

Go to most recent revision | Show entire file | Ignore whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 2416 Rev 2417
Line 159... Line 159...
#define __FUNCT__ "SlepcUpdateVectors_Noncontiguous_Inplace"
#define __FUNCT__ "SlepcUpdateVectors_Noncontiguous_Inplace"
/*
/*
   SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
   SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
   (non-contiguous).
   (non-contiguous).
*/
*/
static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m,Vec *V,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m_,Vec *V,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
{
{
  PetscInt       i,j,k,ls;
  PetscInt       l;
  PetscScalar    t,*pv,*work;
  PetscBLASInt   j,ls,bs=64,m,k,ldq;
 
  PetscScalar    *pv,*pq=(PetscScalar*)Q,*work,*out,one=1.0,zero=0.0;
  PetscErrorCode ierr;
  PetscErrorCode ierr;
 
 
  PetscFunctionBegin;
  PetscFunctionBegin;
  ierr = PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
  ierr = PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
  ierr = VecGetLocalSize(V[0],&ls);CHKERRQ(ierr);
  ierr = VecGetLocalSize(V[0],&l);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscScalar)*m,&work);CHKERRQ(ierr);
  ls = PetscBLASIntCast(l);  
  for (i=0;i<ls;i++) {
  m = PetscBLASIntCast(m_);
 
  ldq = PetscBLASIntCast(ldq_);
 
  ierr = PetscMalloc(sizeof(PetscScalar)*2*bs*m,&work);CHKERRQ(ierr);
 
  out = work+m*bs;
 
  k = ls % bs;
 
  if (k) {
    for (j=0;j<m;j++) {
    for (j=0;j<m;j++) {
      t = 0;
      ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
      for (k=0;k<m;k++) {
      ierr = PetscMemcpy(work+j*bs,pv,k*sizeof(PetscScalar));CHKERRQ(ierr);
        ierr = VecGetArray(V[k],&pv);CHKERRQ(ierr);
      ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);    
        if (qtrans) t += pv[i]*Q[k*ldq+j];
    }
        else        t += pv[i]*Q[j*ldq+k];
    BLASgemm_("N",qtrans?"T":"N",&k,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
        ierr = VecRestoreArray(V[k],&pv);CHKERRQ(ierr);
    for (j=0;j<m;j++) {
      }
      ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
      work[j] = t;
      ierr = PetscMemcpy(pv,out+j*bs,k*sizeof(PetscScalar));CHKERRQ(ierr);
 
      ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);    
 
    }
 
  }
 
  for (;k<ls;k+=bs) {
 
    for (j=0;j<m;j++) {
 
      ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
 
      ierr = PetscMemcpy(work+j*bs,pv+k,bs*sizeof(PetscScalar));CHKERRQ(ierr);
 
      ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);    
    }
    }
 
    BLASgemm_("N",qtrans?"C":"N",&bs,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
    for (j=0;j<m;j++) {
    for (j=0;j<m;j++) {
      ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
      ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
      pv[i] = work[j];
      ierr = PetscMemcpy(pv+k,out+j*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr);
      ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);    
      ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);    
    }
    }
  }
  }
  ierr = PetscFree(work);CHKERRQ(ierr);
  ierr = PetscFree(work);CHKERRQ(ierr);
  ierr = PetscLogFlops(m*m*2.0*ls);CHKERRQ(ierr);
  ierr = PetscLogFlops(m*m*2.0*ls);CHKERRQ(ierr);
Line 364... Line 379...
  ls = PetscBLASIntCast(l);
  ls = PetscBLASIntCast(l);
  ld = ls*PetscBLASIntCast(d);
  ld = ls*PetscBLASIntCast(d);
  ierr = VecGetArray(V[0],&pv);CHKERRQ(ierr);
  ierr = VecGetArray(V[0],&pv);CHKERRQ(ierr);
  if (qtrans) {
  if (qtrans) {
    pq = (PetscScalar*)Q+s;
    pq = (PetscScalar*)Q+s;
    qt = "T";
    qt = "C";
  } else {
  } else {
    pq = (PetscScalar*)Q+s*ldq;
    pq = (PetscScalar*)Q+s*ldq;
    qt = "N";
    qt = "N";
  }
  }
  ierr = PetscMalloc(sizeof(PetscScalar)*bs*m,&work);CHKERRQ(ierr);
  ierr = PetscMalloc(sizeof(PetscScalar)*bs*m,&work);CHKERRQ(ierr);