| Line 159... |
Line 159... |
#define __FUNCT__ "SlepcUpdateVectors_Noncontiguous_Inplace"
|
#define __FUNCT__ "SlepcUpdateVectors_Noncontiguous_Inplace"
|
/*
|
/*
|
SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
|
SlepcUpdateVectors_Noncontiguous_Inplace - V = V*Q for regular vectors
|
(non-contiguous).
|
(non-contiguous).
|
*/
|
*/
|
static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m,Vec *V,const PetscScalar *Q,PetscInt ldq,PetscBool qtrans)
|
static PetscErrorCode SlepcUpdateVectors_Noncontiguous_Inplace(PetscInt m_,Vec *V,const PetscScalar *Q,PetscInt ldq_,PetscBool qtrans)
|
{
|
{
|
PetscInt i,j,k,ls;
|
PetscInt l;
|
PetscScalar t,*pv,*work;
|
PetscBLASInt j,ls,bs=64,m,k,ldq;
|
|
PetscScalar *pv,*pq=(PetscScalar*)Q,*work,*out,one=1.0,zero=0.0;
|
PetscErrorCode ierr;
|
PetscErrorCode ierr;
|
|
|
PetscFunctionBegin;
|
PetscFunctionBegin;
|
ierr = PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
|
ierr = PetscLogEventBegin(SLEPC_UpdateVectors,0,0,0,0);CHKERRQ(ierr);
|
ierr = VecGetLocalSize(V[0],&ls);CHKERRQ(ierr);
|
ierr = VecGetLocalSize(V[0],&l);CHKERRQ(ierr);
|
ierr = PetscMalloc(sizeof(PetscScalar)*m,&work);CHKERRQ(ierr);
|
ls = PetscBLASIntCast(l);
|
for (i=0;i<ls;i++) {
|
m = PetscBLASIntCast(m_);
|
|
ldq = PetscBLASIntCast(ldq_);
|
|
ierr = PetscMalloc(sizeof(PetscScalar)*2*bs*m,&work);CHKERRQ(ierr);
|
|
out = work+m*bs;
|
|
k = ls % bs;
|
|
if (k) {
|
for (j=0;j<m;j++) {
|
for (j=0;j<m;j++) {
|
t = 0;
|
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
for (k=0;k<m;k++) {
|
ierr = PetscMemcpy(work+j*bs,pv,k*sizeof(PetscScalar));CHKERRQ(ierr);
|
ierr = VecGetArray(V[k],&pv);CHKERRQ(ierr);
|
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
if (qtrans) t += pv[i]*Q[k*ldq+j];
|
}
|
else t += pv[i]*Q[j*ldq+k];
|
BLASgemm_("N",qtrans?"T":"N",&k,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
|
ierr = VecRestoreArray(V[k],&pv);CHKERRQ(ierr);
|
for (j=0;j<m;j++) {
|
}
|
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
work[j] = t;
|
ierr = PetscMemcpy(pv,out+j*bs,k*sizeof(PetscScalar));CHKERRQ(ierr);
|
|
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
|
}
|
|
}
|
|
for (;k<ls;k+=bs) {
|
|
for (j=0;j<m;j++) {
|
|
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
|
ierr = PetscMemcpy(work+j*bs,pv+k,bs*sizeof(PetscScalar));CHKERRQ(ierr);
|
|
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
}
|
}
|
|
BLASgemm_("N",qtrans?"C":"N",&bs,&m,&m,&one,work,&bs,pq,&ldq,&zero,out,&bs);
|
for (j=0;j<m;j++) {
|
for (j=0;j<m;j++) {
|
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
ierr = VecGetArray(V[j],&pv);CHKERRQ(ierr);
|
pv[i] = work[j];
|
ierr = PetscMemcpy(pv+k,out+j*bs,bs*sizeof(PetscScalar));CHKERRQ(ierr);
|
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
ierr = VecRestoreArray(V[j],&pv);CHKERRQ(ierr);
|
}
|
}
|
}
|
}
|
ierr = PetscFree(work);CHKERRQ(ierr);
|
ierr = PetscFree(work);CHKERRQ(ierr);
|
ierr = PetscLogFlops(m*m*2.0*ls);CHKERRQ(ierr);
|
ierr = PetscLogFlops(m*m*2.0*ls);CHKERRQ(ierr);
|
| Line 364... |
Line 379... |
ls = PetscBLASIntCast(l);
|
ls = PetscBLASIntCast(l);
|
ld = ls*PetscBLASIntCast(d);
|
ld = ls*PetscBLASIntCast(d);
|
ierr = VecGetArray(V[0],&pv);CHKERRQ(ierr);
|
ierr = VecGetArray(V[0],&pv);CHKERRQ(ierr);
|
if (qtrans) {
|
if (qtrans) {
|
pq = (PetscScalar*)Q+s;
|
pq = (PetscScalar*)Q+s;
|
qt = "T";
|
qt = "C";
|
} else {
|
} else {
|
pq = (PetscScalar*)Q+s*ldq;
|
pq = (PetscScalar*)Q+s*ldq;
|
qt = "N";
|
qt = "N";
|
}
|
}
|
ierr = PetscMalloc(sizeof(PetscScalar)*bs*m,&work);CHKERRQ(ierr);
|
ierr = PetscMalloc(sizeof(PetscScalar)*bs*m,&work);CHKERRQ(ierr);
|