/*
Interface for MatDense class: MPI reduction operations
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
SLEPc - Scalable Library for Eigenvalue Problem Computations
Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain
This file is part of SLEPc.
SLEPc is free software: you can redistribute it and/or modify it under the
terms of version 3 of the GNU Lesser General Public License as published by
the Free Software Foundation.
SLEPc is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
more details.
You should have received a copy of the GNU Lesser General Public License
along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/
#include <private/matdenseimpl.h> /*I "slepcmatdense.h" I*/
#include <petscblaslapack.h>
/* Structures for MatDenseReduce */
typedef struct MatDenseReductionChunk_ {
MatDense A; /* object matrix to reduce */
PetscScalar *v; /* pointer returned by MatDenseGetArray */
struct MatDenseReductionChunk_ *next; /* next node in this list */
} MatDenseReductionChunk;
typedef struct {
MatDenseReductionChunk *r; /* a list of reduction to perform */
PetscInt nr, /* total number of PetscScalar to reduce */
nobjects; /* number of pending objects of this reduction */
MPI_Op op; /* reduction operation */
MPI_Comm comm; /* communicator */
} MatDenseReduction;
static PetscErrorCode MatDensePerformReduction_Private(MatDenseReductionChunk *rc0,PetscInt nr,MPI_Op op,MPI_Comm comm,PetscBool free_nr);
#undef __FUNCT__
#define __FUNCT__ "MatDenseReduce"
/*@
MatDenseReduce - Performs the MPI_Allreduce with the MPI operation op over a matrix elements.
Neighbor-wise Collective on MatDense
Input Parameters:
+ A - dense matrix
- op - mpi reduction operation
Level: intermediate
@*/
PetscErrorCode MatDenseReduce(MatDense A,MPI_Op op)
{
PetscErrorCode ierr;
PetscFunctionBegin;
ierr = PetscCheckMatDenseForUpdate(A);CHKERRQ(ierr);
if (A->use_mpi_queue) {
ierr = MatDensePerformReduction(A,op,PETSC_FALSE);CHKERRQ(ierr);
} else {
MatDenseReductionChunk rc;
PetscInt l;
ierr = MatDenseSerialize(A,&l,PETSC_NULL);CHKERRQ(ierr);
ierr = MatDenseGetArray(A,&rc.v);CHKERRQ(ierr);
rc.A = A;
rc.next = PETSC_NULL;
ierr = MatDensePerformReduction_Private(&rc,l,op,((PetscObject)A)->comm,PETSC_FALSE);CHKERRQ(ierr);
}
PetscFunctionReturn(0);
}
#include <omp.h>
#undef __FUNCT__
#define __FUNCT__ "MatDensePerformReduction"
PetscErrorCode MatDensePerformReduction(MatDense A,MPI_Op op,PetscBool immediate)
{
PetscErrorCode ierr;
PetscInt l;
MatDenseReductionChunk nr0,*nr;
PetscScalar *v;
static MatDenseReduction *currentr = PETSC_NULL; /* reduction queue */
static PetscBool locks_initialized = PETSC_FALSE;
static omp_lock_t lock; /* reduction in curse */
static omp_lock_t lockq; /* processing the queue */
PetscBool new_thread,waitq;
PetscFunctionBegin;
/* Quick return */
if (!A && (!locks_initialized || !immediate)) PetscFunctionReturn(0);
/* Init lock if needed */
if (!locks_initialized) {
omp_init_lock(&lock);
omp_init_lock(&lockq);
locks_initialized = PETSC_TRUE;
}
if (A) {
/* Prepare reduction */
ierr = MatDenseSerialize(A,&l,PETSC_NULL);CHKERRQ(ierr);
ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
A->is_pending = PETSC_TRUE;
nr0.A = A;
nr0.v = v;
nr0.next = PETSC_NULL;
if (omp_test_lock(&lock)) {
ierr = MatDensePerformReduction_Private(&nr0,l,op,((PetscObject)A)->comm,PETSC_FALSE);CHKERRQ(ierr);
omp_unset_lock(&lock);
PetscFunctionReturn(0);
}
#pragma omp critical lock_currentr
{
/* If the communicator or the operation of the currect reduction are not compatible the reduction is not accumulated to them */
waitq = (currentr && (((PetscObject)A)->comm != currentr->comm || currentr->op != op))?PETSC_TRUE:PETSC_FALSE;
}
if (waitq) {
omp_set_lock(&lockq);
omp_unset_lock(&lockq);
}
#pragma omp critical lock_currentr
{
/* Create the list of reductions if needed */
if (!currentr) {
ierr = PetscMalloc(sizeof(MatDenseReduction),¤tr);CHKERRQ(ierr);
currentr->r = PETSC_NULL;
currentr->nr = 0;
currentr->nobjects = 0;
currentr->op = op;
currentr->comm = ((PetscObject)A)->comm;
new_thread = PETSC_TRUE;
} else {
new_thread = PETSC_FALSE;
}
/* Accumulate reduction */
ierr = PetscMalloc(sizeof(MatDenseReductionChunk),&nr);CHKERRQ(ierr);
nr->A = A;
nr->v = v;
nr->next = currentr->r;
currentr->r = nr;
currentr->nobjects++;
currentr->nr+= l;
}
/* Perform the reduction if it is immediate */
if (new_thread) {
omp_set_lock(&lockq);
#pragma omp parallel
#pragma omp single nowait
{
PetscErrorCode ierr;
MatDenseReduction *currentr0;
omp_set_lock(&lock);
#pragma omp critical lock_currentr
{
currentr0 = currentr;
currentr = PETSC_NULL;
}
omp_unset_lock(&lockq);
ierr = MatDensePerformReduction_Private(currentr0->r,currentr0->nr,currentr0->op,currentr0->comm,PETSC_TRUE);CHKERRABORT(currentr0->comm,ierr);
ierr = PetscFree(currentr0);CHKERRABORT(currentr0->comm,ierr);
omp_unset_lock(&lock);
}
}
}
if (immediate) {
omp_set_lock(&lockq);
omp_unset_lock(&lockq);
omp_set_lock(&lock);
omp_unset_lock(&lock);
}
PetscFunctionReturn(0);
}
#undef __FUNCT__
#define __FUNCT__ "MatDensePerformReduction_Private"
static PetscErrorCode MatDensePerformReduction_Private(MatDenseReductionChunk *rc0,PetscInt nr,MPI_Op op,MPI_Comm comm,PetscBool free_nr)
{
PetscErrorCode ierr;
PetscInt l;
MatDenseReductionChunk *oldr,*rc;
PetscScalar *in,*v;
PetscFunctionBegin;
ierr = PetscMalloc(sizeof(PetscScalar)*nr,&in);CHKERRQ(ierr);
/* Copy the matrix elements into the array */
for(rc = rc0, v = in; rc; rc = rc->next, v+= l) {
ierr = MatDenseSerialize(rc->A,&l,v);CHKERRQ(ierr);
}
/* Perform the reduction */
MPI_Allreduce(MPI_IN_PLACE,in,nr,MPIU_SCALAR,op,comm);
/* Copy back the reduced elements and destroy the list */
for(rc = rc0, v = in; rc; ) {
ierr = MatDenseDeserialize(rc->A,-1,v);CHKERRQ(ierr);
rc->A->is_pending = PETSC_FALSE;
ierr = MatDenseRestoreArray(rc->A,&rc->v);CHKERRQ(ierr);
oldr = rc;
rc = rc->next;
if (free_nr) {
ierr = PetscFree(oldr);CHKERRQ(ierr);
}
}
ierr = PetscFree(in);CHKERRQ(ierr);
PetscFunctionReturn(0);
}