Subversion Repositories slepc-dev

Rev

Blame | Last modification | View Log | RSS feed

/*
   Interface for MatDense class: MPI reduction operations

   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   SLEPc - Scalable Library for Eigenvalue Problem Computations
   Copyright (c) 2002-2010, Universidad Politecnica de Valencia, Spain

   This file is part of SLEPc.
     
   SLEPc is free software: you can redistribute it and/or modify it under  the
   terms of version 3 of the GNU Lesser General Public License as published by
   the Free Software Foundation.

   SLEPc  is  distributed in the hope that it will be useful, but WITHOUT  ANY
   WARRANTY;  without even the implied warranty of MERCHANTABILITY or  FITNESS
   FOR  A  PARTICULAR PURPOSE. See the GNU Lesser General Public  License  for
   more details.

   You  should have received a copy of the GNU Lesser General  Public  License
   along with SLEPc. If not, see <http://www.gnu.org/licenses/>.
   - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*/


#include <private/matdenseimpl.h>            /*I "slepcmatdense.h" I*/
#include <petscblaslapack.h>

/* Structures for MatDenseReduce */
typedef struct MatDenseReductionChunk_ {
  MatDense        A;            /* object matrix to reduce */
  PetscScalar     *v;           /* pointer returned by MatDenseGetArray */
  struct MatDenseReductionChunk_ *next; /* next node in this list */
} MatDenseReductionChunk;

typedef struct {
  MatDenseReductionChunk *r;    /* a list of reduction to perform */
  PetscInt        nr,           /* total number of PetscScalar to reduce */
    nobjects;                   /* number of pending objects of this reduction */
  MPI_Op          op;           /* reduction operation */
  MPI_Comm        comm;         /* communicator */
} MatDenseReduction;

static PetscErrorCode  MatDensePerformReduction_Private(MatDenseReductionChunk *rc0,PetscInt nr,MPI_Op op,MPI_Comm comm,PetscBool free_nr);

#undef __FUNCT__
#define __FUNCT__ "MatDenseReduce"
/*@
   MatDenseReduce - Performs the MPI_Allreduce with the MPI operation op over a matrix elements.

   Neighbor-wise Collective on MatDense

   Input Parameters:
+  A - dense matrix
-  op - mpi reduction operation

   Level: intermediate

@*/

PetscErrorCode  MatDenseReduce(MatDense A,MPI_Op op)
{
  PetscErrorCode  ierr;

  PetscFunctionBegin;
  ierr = PetscCheckMatDenseForUpdate(A);CHKERRQ(ierr);
  if (A->use_mpi_queue) {
    ierr = MatDensePerformReduction(A,op,PETSC_FALSE);CHKERRQ(ierr);
  } else {
    MatDenseReductionChunk rc;
    PetscInt l;
    ierr = MatDenseSerialize(A,&l,PETSC_NULL);CHKERRQ(ierr);
    ierr = MatDenseGetArray(A,&rc.v);CHKERRQ(ierr);
    rc.A = A;
    rc.next = PETSC_NULL;
    ierr = MatDensePerformReduction_Private(&rc,l,op,((PetscObject)A)->comm,PETSC_FALSE);CHKERRQ(ierr);
  }
  PetscFunctionReturn(0);
}

#include <omp.h>

#undef __FUNCT__
#define __FUNCT__ "MatDensePerformReduction"
PetscErrorCode  MatDensePerformReduction(MatDense A,MPI_Op op,PetscBool immediate)
{
  PetscErrorCode  ierr;
  PetscInt        l;
  MatDenseReductionChunk nr0,*nr;
  PetscScalar     *v;
  static MatDenseReduction *currentr = PETSC_NULL; /* reduction queue */
  static PetscBool locks_initialized = PETSC_FALSE;
  static omp_lock_t lock; /* reduction in curse */
  static omp_lock_t lockq; /* processing the queue */
  PetscBool   new_thread,waitq;

  PetscFunctionBegin;
  /* Quick return */
  if (!A && (!locks_initialized || !immediate)) PetscFunctionReturn(0);

  /* Init lock if needed */
  if (!locks_initialized) {
    omp_init_lock(&lock);
    omp_init_lock(&lockq);
    locks_initialized = PETSC_TRUE;
  }

  if (A) {
    /* Prepare reduction */
    ierr = MatDenseSerialize(A,&l,PETSC_NULL);CHKERRQ(ierr);
    ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr);
    A->is_pending = PETSC_TRUE;
    nr0.A = A;
    nr0.v = v;
    nr0.next = PETSC_NULL;
 
    if (omp_test_lock(&lock)) {
      ierr = MatDensePerformReduction_Private(&nr0,l,op,((PetscObject)A)->comm,PETSC_FALSE);CHKERRQ(ierr);
      omp_unset_lock(&lock);
      PetscFunctionReturn(0);
    }
 
    #pragma omp critical lock_currentr
    {
      /* If the communicator or the operation of the currect reduction are not compatible the reduction is not accumulated to them */
      waitq = (currentr && (((PetscObject)A)->comm != currentr->comm || currentr->op != op))?PETSC_TRUE:PETSC_FALSE;
    }
    if (waitq) {
      omp_set_lock(&lockq);
      omp_unset_lock(&lockq);
    }
    #pragma omp critical lock_currentr
    {
      /* Create the list of reductions if needed */
      if (!currentr) {
        ierr = PetscMalloc(sizeof(MatDenseReduction),&currentr);CHKERRQ(ierr);
        currentr->r = PETSC_NULL;
        currentr->nr = 0;
        currentr->nobjects = 0;
        currentr->op = op;
        currentr->comm = ((PetscObject)A)->comm;
        new_thread = PETSC_TRUE;
      } else {
        new_thread = PETSC_FALSE;
      }
 
      /* Accumulate reduction */
      ierr = PetscMalloc(sizeof(MatDenseReductionChunk),&nr);CHKERRQ(ierr);
      nr->A = A;
      nr->v = v;
      nr->next = currentr->r;
      currentr->r = nr;
      currentr->nobjects++;
      currentr->nr+= l;
    }
   
    /* Perform the reduction if it is immediate */
    if (new_thread) {
      omp_set_lock(&lockq);
      #pragma omp parallel
      #pragma omp single nowait
      {
        PetscErrorCode  ierr;
        MatDenseReduction *currentr0;
        omp_set_lock(&lock);
        #pragma omp critical lock_currentr
        {
          currentr0 = currentr;
          currentr = PETSC_NULL;
        }
        omp_unset_lock(&lockq);
        ierr = MatDensePerformReduction_Private(currentr0->r,currentr0->nr,currentr0->op,currentr0->comm,PETSC_TRUE);CHKERRABORT(currentr0->comm,ierr);
        ierr = PetscFree(currentr0);CHKERRABORT(currentr0->comm,ierr);
        omp_unset_lock(&lock);
      }
    }
  }

  if (immediate) {
    omp_set_lock(&lockq);
    omp_unset_lock(&lockq);
    omp_set_lock(&lock);
    omp_unset_lock(&lock);
  }

  PetscFunctionReturn(0);
}


#undef __FUNCT__
#define __FUNCT__ "MatDensePerformReduction_Private"
static PetscErrorCode  MatDensePerformReduction_Private(MatDenseReductionChunk *rc0,PetscInt nr,MPI_Op op,MPI_Comm comm,PetscBool free_nr)
{
  PetscErrorCode  ierr;
  PetscInt        l;
  MatDenseReductionChunk *oldr,*rc;
  PetscScalar     *in,*v;

  PetscFunctionBegin;

  ierr = PetscMalloc(sizeof(PetscScalar)*nr,&in);CHKERRQ(ierr);
  /* Copy the matrix elements into the array */
  for(rc = rc0, v = in; rc; rc = rc->next, v+= l) {
    ierr = MatDenseSerialize(rc->A,&l,v);CHKERRQ(ierr);
  }
  /* Perform the reduction */
  MPI_Allreduce(MPI_IN_PLACE,in,nr,MPIU_SCALAR,op,comm);
  /* Copy back the reduced elements and destroy the list */
  for(rc = rc0, v = in; rc; ) {
    ierr = MatDenseDeserialize(rc->A,-1,v);CHKERRQ(ierr);
    rc->A->is_pending = PETSC_FALSE;
    ierr = MatDenseRestoreArray(rc->A,&rc->v);CHKERRQ(ierr);
    oldr = rc;
    rc = rc->next;
    if (free_nr) {
      ierr = PetscFree(oldr);CHKERRQ(ierr);
    }
  }
  ierr = PetscFree(in);CHKERRQ(ierr);

  PetscFunctionReturn(0);
}