377 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
		
		
			
		
	
	
			377 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
|  | // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor@gmail.com>.
 | ||
|  | // Copyright (C) 2004 The Trustees of Indiana University
 | ||
|  | 
 | ||
|  | // Use, modification and distribution is subject to the Boost Software
 | ||
|  | // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
 | ||
|  | // http://www.boost.org/LICENSE_1_0.txt)
 | ||
|  | 
 | ||
|  | //   Authors: Douglas Gregor
 | ||
|  | //            Andrew Lumsdaine
 | ||
|  | 
 | ||
|  | // Message Passing Interface 1.1 -- Section 4.9.1. Reduce
 | ||
|  | #ifndef BOOST_MPI_REDUCE_HPP
 | ||
|  | #define BOOST_MPI_REDUCE_HPP
 | ||
|  | 
 | ||
|  | #include <boost/mpi/exception.hpp>
 | ||
|  | #include <boost/mpi/datatype.hpp>
 | ||
|  | 
 | ||
|  | // For (de-)serializing sends and receives
 | ||
|  | #include <boost/mpi/packed_oarchive.hpp>
 | ||
|  | #include <boost/mpi/packed_iarchive.hpp>
 | ||
|  | 
 | ||
|  | // For packed_[io]archive sends and receives
 | ||
|  | #include <boost/mpi/detail/point_to_point.hpp>
 | ||
|  | 
 | ||
|  | #include <boost/mpi/communicator.hpp>
 | ||
|  | #include <boost/mpi/environment.hpp>
 | ||
|  | #include <boost/mpi/detail/computation_tree.hpp>
 | ||
|  | #include <boost/mpi/operations.hpp>
 | ||
|  | #include <algorithm>
 | ||
|  | #include <exception>
 | ||
|  | #include <boost/assert.hpp>
 | ||
|  | #include <boost/scoped_array.hpp>
 | ||
|  | 
 | ||
|  | namespace boost { namespace mpi { | ||
|  | 
 | ||
|  | 
 | ||
|  | /************************************************************************
 | ||
|  |  * Implementation details                                               * | ||
|  |  ************************************************************************/ | ||
|  | namespace detail { | ||
|  |   /**********************************************************************
 | ||
|  |    * Simple reduction with MPI_Reduce                                   * | ||
|  |    **********************************************************************/ | ||
|  |   // We are reducing at the root for a type that has an associated MPI
 | ||
|  |   // datatype and operation, so we'll use MPI_Reduce directly.
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   reduce_impl(const communicator& comm, const T* in_values, int n, | ||
|  |               T* out_values, Op /*op*/, int root, mpl::true_ /*is_mpi_op*/, | ||
|  |               mpl::true_/*is_mpi_datatype*/) | ||
|  |   { | ||
|  |     BOOST_MPI_CHECK_RESULT(MPI_Reduce, | ||
|  |                            (const_cast<T*>(in_values), out_values, n, | ||
|  |                             boost::mpi::get_mpi_datatype<T>(*in_values), | ||
|  |                             (is_mpi_op<Op, T>::op()), root, comm)); | ||
|  |   } | ||
|  | 
 | ||
|  |   // We are reducing to the root for a type that has an associated MPI
 | ||
|  |   // datatype and operation, so we'll use MPI_Reduce directly.
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   reduce_impl(const communicator& comm, const T* in_values, int n, Op /*op*/, | ||
|  |               int root, mpl::true_ /*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/) | ||
|  |   { | ||
|  |     BOOST_MPI_CHECK_RESULT(MPI_Reduce, | ||
|  |                            (const_cast<T*>(in_values), 0, n, | ||
|  |                             boost::mpi::get_mpi_datatype<T>(*in_values), | ||
|  |                             (is_mpi_op<Op, T>::op()), root, comm)); | ||
|  |   } | ||
|  | 
 | ||
|  |   /**********************************************************************
 | ||
|  |    * User-defined reduction with MPI_Reduce                             * | ||
|  |    **********************************************************************/ | ||
|  | 
 | ||
|  |   // We are reducing at the root for a type that has an associated MPI
 | ||
|  |   // datatype but with a custom operation. We'll use MPI_Reduce
 | ||
|  |   // directly, but we'll need to create an MPI_Op manually.
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   reduce_impl(const communicator& comm, const T* in_values, int n, | ||
|  |               T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/, | ||
|  |               mpl::true_/*is_mpi_datatype*/) | ||
|  |   { | ||
|  |     user_op<Op, T> mpi_op(op); | ||
|  |     BOOST_MPI_CHECK_RESULT(MPI_Reduce, | ||
|  |                            (const_cast<T*>(in_values), out_values, n, | ||
|  |                             boost::mpi::get_mpi_datatype<T>(*in_values), | ||
|  |                             mpi_op.get_mpi_op(), root, comm)); | ||
|  |   } | ||
|  | 
 | ||
|  |   // We are reducing to the root for a type that has an associated MPI
 | ||
|  |   // datatype but with a custom operation. We'll use MPI_Reduce
 | ||
|  |   // directly, but we'll need to create an MPI_Op manually.
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | ||
|  |               int root, mpl::false_/*is_mpi_op*/, mpl::true_/*is_mpi_datatype*/) | ||
|  |   { | ||
|  |     user_op<Op, T> mpi_op(op); | ||
|  |     BOOST_MPI_CHECK_RESULT(MPI_Reduce, | ||
|  |                            (const_cast<T*>(in_values), 0, n, | ||
|  |                             boost::mpi::get_mpi_datatype<T>(*in_values), | ||
|  |                             mpi_op.get_mpi_op(), root, comm)); | ||
|  |   } | ||
|  | 
 | ||
|  |   /**********************************************************************
 | ||
|  |    * User-defined, tree-based reduction for non-MPI data types          * | ||
|  |    **********************************************************************/ | ||
|  | 
 | ||
|  |   // Commutative reduction
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   tree_reduce_impl(const communicator& comm, const T* in_values, int n, | ||
|  |                    T* out_values, Op op, int root, | ||
|  |                    mpl::true_ /*is_commutative*/) | ||
|  |   { | ||
|  |     std::copy(in_values, in_values + n, out_values); | ||
|  | 
 | ||
|  |     int size = comm.size(); | ||
|  |     int rank = comm.rank(); | ||
|  | 
 | ||
|  |     // The computation tree we will use.
 | ||
|  |     detail::computation_tree tree(rank, size, root); | ||
|  | 
 | ||
|  |     int tag = environment::collectives_tag(); | ||
|  | 
 | ||
|  |     MPI_Status status; | ||
|  |     int children = 0; | ||
|  |     for (int child = tree.child_begin(); | ||
|  |          children < tree.branching_factor() && child != root; | ||
|  |          ++children, child = (child + 1) % size) { | ||
|  |       // Receive archive
 | ||
|  |       packed_iarchive ia(comm); | ||
|  |       detail::packed_archive_recv(comm, child, tag, ia, status); | ||
|  | 
 | ||
|  |       T incoming; | ||
|  |       for (int i = 0; i < n; ++i) { | ||
|  |         ia >> incoming; | ||
|  |         out_values[i] = op(out_values[i], incoming); | ||
|  |       } | ||
|  |     } | ||
|  | 
 | ||
|  |     // For non-roots, send the result to the parent.
 | ||
|  |     if (tree.parent() != rank) { | ||
|  |       packed_oarchive oa(comm); | ||
|  |       for (int i = 0; i < n; ++i) | ||
|  |         oa << out_values[i]; | ||
|  |       detail::packed_archive_send(comm, tree.parent(), tag, oa); | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   // Commutative reduction from a non-root.
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | ||
|  |                    int root, mpl::true_ /*is_commutative*/) | ||
|  |   { | ||
|  |     scoped_array<T> results(new T[n]); | ||
|  |     detail::tree_reduce_impl(comm, in_values, n, results.get(), op, root, | ||
|  |                              mpl::true_()); | ||
|  |   } | ||
|  | 
 | ||
|  |   // Non-commutative reduction
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   tree_reduce_impl(const communicator& comm, const T* in_values, int n, | ||
|  |                    T* out_values, Op op, int root, | ||
|  |                    mpl::false_ /*is_commutative*/) | ||
|  |   { | ||
|  |     int tag = environment::collectives_tag(); | ||
|  | 
 | ||
|  |     int left_child = root / 2; | ||
|  |     int right_child = (root + comm.size()) / 2; | ||
|  | 
 | ||
|  |     MPI_Status status; | ||
|  |     if (left_child != root) { | ||
|  |       // Receive value from the left child and merge it with the value
 | ||
|  |       // we had incoming.
 | ||
|  |       packed_iarchive ia(comm); | ||
|  |       detail::packed_archive_recv(comm, left_child, tag, ia, status); | ||
|  |       T incoming; | ||
|  |       for (int i = 0; i < n; ++i) { | ||
|  |         ia >> incoming; | ||
|  |         out_values[i] = op(incoming, in_values[i]); | ||
|  |       } | ||
|  |     } else { | ||
|  |       // There was no left value, so copy our incoming value.
 | ||
|  |       std::copy(in_values, in_values + n, out_values); | ||
|  |     } | ||
|  | 
 | ||
|  |     if (right_child != root) { | ||
|  |       // Receive value from the right child and merge it with the
 | ||
|  |       // value we had incoming.
 | ||
|  |       packed_iarchive ia(comm); | ||
|  |       detail::packed_archive_recv(comm, right_child, tag, ia, status); | ||
|  |       T incoming; | ||
|  |       for (int i = 0; i < n; ++i) { | ||
|  |         ia >> incoming; | ||
|  |         out_values[i] = op(out_values[i], incoming); | ||
|  |       } | ||
|  |     } | ||
|  |   } | ||
|  | 
 | ||
|  |   // Non-commutative reduction from a non-root.
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   tree_reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | ||
|  |                    int root, mpl::false_ /*is_commutative*/) | ||
|  |   { | ||
|  |     int size = comm.size(); | ||
|  |     int rank = comm.rank(); | ||
|  | 
 | ||
|  |     int tag = environment::collectives_tag(); | ||
|  | 
 | ||
|  |     // Determine our parents and children in the commutative binary
 | ||
|  |     // computation tree.
 | ||
|  |     int grandparent = root; | ||
|  |     int parent = root; | ||
|  |     int left_bound = 0; | ||
|  |     int right_bound = size; | ||
|  |     int left_child, right_child; | ||
|  |     do { | ||
|  |       left_child = (left_bound + parent) / 2; | ||
|  |       right_child = (parent + right_bound) / 2; | ||
|  | 
 | ||
|  |       if (rank < parent) { | ||
|  |         // Go left.
 | ||
|  |         grandparent = parent; | ||
|  |         right_bound = parent; | ||
|  |         parent = left_child; | ||
|  |       } else if (rank > parent) { | ||
|  |         // Go right.
 | ||
|  |         grandparent = parent; | ||
|  |         left_bound = parent + 1; | ||
|  |         parent = right_child; | ||
|  |       } else { | ||
|  |         // We've found the parent
 | ||
|  |         break; | ||
|  |       } | ||
|  |     } while (true); | ||
|  | 
 | ||
|  |     // Our parent is the grandparent of our children. This is a slight
 | ||
|  |     // abuse of notation, but it makes the send-to-parent below make
 | ||
|  |     // more sense.
 | ||
|  |     parent = grandparent; | ||
|  | 
 | ||
|  |     MPI_Status status; | ||
|  |     scoped_array<T> out_values(new T[n]); | ||
|  |     if (left_child != rank) { | ||
|  |       // Receive value from the left child and merge it with the value
 | ||
|  |       // we had incoming.
 | ||
|  |       packed_iarchive ia(comm); | ||
|  |       detail::packed_archive_recv(comm, left_child, tag, ia, status); | ||
|  |       T incoming; | ||
|  |       for (int i = 0; i < n; ++i) { | ||
|  |         ia >> incoming; | ||
|  |         out_values[i] = op(incoming, in_values[i]); | ||
|  |       } | ||
|  |     } else { | ||
|  |       // There was no left value, so copy our incoming value.
 | ||
|  |       std::copy(in_values, in_values + n, out_values.get()); | ||
|  |     } | ||
|  | 
 | ||
|  |     if (right_child != rank) { | ||
|  |       // Receive value from the right child and merge it with the
 | ||
|  |       // value we had incoming.
 | ||
|  |       packed_iarchive ia(comm); | ||
|  |       detail::packed_archive_recv(comm, right_child, tag, ia, status); | ||
|  |       T incoming; | ||
|  |       for (int i = 0; i < n; ++i) { | ||
|  |         ia >> incoming; | ||
|  |         out_values[i] = op(out_values[i], incoming); | ||
|  |       } | ||
|  |     } | ||
|  | 
 | ||
|  |     // Send the combined value to our parent.
 | ||
|  |     packed_oarchive oa(comm); | ||
|  |     for (int i = 0; i < n; ++i) | ||
|  |       oa << out_values[i]; | ||
|  |     detail::packed_archive_send(comm, parent, tag, oa); | ||
|  |   } | ||
|  | 
 | ||
|  |   // We are reducing at the root for a type that has no associated MPI
 | ||
|  |   // datatype and operation, so we'll use a simple tree-based
 | ||
|  |   // algorithm.
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   reduce_impl(const communicator& comm, const T* in_values, int n, | ||
|  |               T* out_values, Op op, int root, mpl::false_ /*is_mpi_op*/, | ||
|  |               mpl::false_ /*is_mpi_datatype*/) | ||
|  |   { | ||
|  |     detail::tree_reduce_impl(comm, in_values, n, out_values, op, root, | ||
|  |                              is_commutative<Op, T>()); | ||
|  |   } | ||
|  | 
 | ||
|  |   // We are reducing to the root for a type that has no associated MPI
 | ||
|  |   // datatype and operation, so we'll use a simple tree-based
 | ||
|  |   // algorithm.
 | ||
|  |   template<typename T, typename Op> | ||
|  |   void | ||
|  |   reduce_impl(const communicator& comm, const T* in_values, int n, Op op, | ||
|  |               int root, mpl::false_ /*is_mpi_op*/, | ||
|  |               mpl::false_ /*is_mpi_datatype*/) | ||
|  |   { | ||
|  |     detail::tree_reduce_impl(comm, in_values, n, op, root, | ||
|  |                              is_commutative<Op, T>()); | ||
|  |   } | ||
|  | } // end namespace detail
 | ||
|  | 
 | ||
|  | template<typename T, typename Op> | ||
|  | void | ||
|  | reduce(const communicator& comm, const T* in_values, int n, T* out_values, | ||
|  |        Op op, int root) | ||
|  | { | ||
|  |   if (comm.rank() == root) | ||
|  |     detail::reduce_impl(comm, in_values, n, out_values, op, root, | ||
|  |                         is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | ||
|  |   else | ||
|  |     detail::reduce_impl(comm, in_values, n, op, root, | ||
|  |                         is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | ||
|  | } | ||
|  | 
 | ||
|  | template<typename T, typename Op> | ||
|  | void | ||
|  | reduce(const communicator& comm, const T* in_values, int n, Op op, int root) | ||
|  | { | ||
|  |   BOOST_ASSERT(comm.rank() != root); | ||
|  | 
 | ||
|  |   detail::reduce_impl(comm, in_values, n, op, root, | ||
|  |                       is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | ||
|  | } | ||
|  | 
 | ||
|  | template<typename T, typename Op> | ||
|  | void | ||
|  | reduce(const communicator & comm, std::vector<T> const & in_values, Op op, | ||
|  |        int root) | ||
|  | { | ||
|  |   reduce(comm, &in_values.front(), in_values.size(), op, root); | ||
|  | } | ||
|  | 
 | ||
|  | template<typename T, typename Op> | ||
|  | void | ||
|  | reduce(const communicator & comm, std::vector<T> const & in_values, | ||
|  |        std::vector<T> & out_values, Op op, int root) | ||
|  | { | ||
|  |   if (root == comm.rank()) out_values.resize(in_values.size()); | ||
|  |   reduce(comm, &in_values.front(), in_values.size(), &out_values.front(), op, | ||
|  |          root); | ||
|  | } | ||
|  | 
 | ||
|  | 
 | ||
|  | template<typename T, typename Op> | ||
|  | void | ||
|  | reduce(const communicator& comm, const T& in_value, T& out_value, Op op, | ||
|  |        int root) | ||
|  | { | ||
|  |   if (comm.rank() == root) | ||
|  |     detail::reduce_impl(comm, &in_value, 1, &out_value, op, root, | ||
|  |                         is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | ||
|  |   else | ||
|  |     detail::reduce_impl(comm, &in_value, 1, op, root, | ||
|  |                         is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | ||
|  | } | ||
|  | 
 | ||
|  | template<typename T, typename Op> | ||
|  | void reduce(const communicator& comm, const T& in_value, Op op, int root) | ||
|  | { | ||
|  |   BOOST_ASSERT(comm.rank() != root); | ||
|  | 
 | ||
|  |   detail::reduce_impl(comm, &in_value, 1, op, root, | ||
|  |                       is_mpi_op<Op, T>(), is_mpi_datatype<T>()); | ||
|  | } | ||
|  | 
 | ||
|  | } } // end namespace boost::mpi
 | ||
|  | 
 | ||
|  | #endif // BOOST_MPI_REDUCE_HPP
 |