138032Speter// -*- C++ -*-
264562Sgshapiro
364562Sgshapiro// Copyright (C) 2007-2020 Free Software Foundation, Inc.
438032Speter//
538032Speter// This file is part of the GNU ISO C++ Library.  This library is free
638032Speter// software; you can redistribute it and/or modify it under the terms
738032Speter// of the GNU General Public License as published by the Free Software
838032Speter// Foundation; either version 3, or (at your option) any later
938032Speter// version.
1038032Speter
1138032Speter// This library is distributed in the hope that it will be useful, but
1238032Speter// WITHOUT ANY WARRANTY; without even the implied warranty of
1338032Speter// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
1464562Sgshapiro// General Public License for more details.
1538032Speter
1638032Speter// Under Section 7 of GPL version 3, you are granted additional
1764562Sgshapiro// permissions described in the GCC Runtime Library Exception, version
1871345Sgshapiro// 3.1, as published by the Free Software Foundation.
1964562Sgshapiro
2071345Sgshapiro// You should have received a copy of the GNU General Public License and
2164562Sgshapiro// a copy of the GCC Runtime Library Exception along with this program;
2264562Sgshapiro// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
2338032Speter// <http://www.gnu.org/licenses/>.
2464562Sgshapiro
2538032Speter/** @file parallel/partial_sum.h
2638032Speter *  @brief Parallel implementation of std::partial_sum(), i.e. prefix
2764562Sgshapiro*  sums.
2838032Speter *  This file is a GNU parallel extension to the Standard C++ Library.
2938032Speter */
3038032Speter
3138032Speter// Written by Johannes Singler.
3238032Speter
3338032Speter#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
3438032Speter#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
3538032Speter
3638032Speter#include <omp.h>
3738032Speter#include <new>
3864562Sgshapiro#include <bits/stl_algobase.h>
3964562Sgshapiro#include <parallel/parallel.h>
4064562Sgshapiro#include <parallel/numericfwd.h>
4138032Speter
4238032Speternamespace __gnu_parallel
4338032Speter{
4438032Speter  // Problem: there is no 0-element given.
4538032Speter
4638032Speter  /** @brief Base case prefix sum routine.
4738032Speter   *  @param __begin Begin iterator of input sequence.
4864562Sgshapiro   *  @param __end End iterator of input sequence.
4964562Sgshapiro   *  @param __result Begin iterator of output sequence.
5064562Sgshapiro   *  @param __bin_op Associative binary function.
5138032Speter   *  @param __value Start value. Must be passed since the neutral
5238032Speter   *  element is unknown in general.
5338032Speter   *  @return End iterator of output sequence. */
5464562Sgshapiro  template<typename _IIter,
5564562Sgshapiro	   typename _OutputIterator,
5664562Sgshapiro	   typename _BinaryOperation>
5738032Speter    _OutputIterator
5864562Sgshapiro    __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
5964562Sgshapiro				    _OutputIterator __result,
6064562Sgshapiro				    _BinaryOperation __bin_op,
6138032Speter      typename std::iterator_traits <_IIter>::value_type __value)
6264562Sgshapiro    {
6364562Sgshapiro      if (__begin == __end)
6464562Sgshapiro	return __result;
6538032Speter
6664562Sgshapiro      while (__begin != __end)
6764562Sgshapiro	{
6864562Sgshapiro	  __value = __bin_op(__value, *__begin);
6938032Speter	  *__result = __value;
7064562Sgshapiro	  ++__result;
7164562Sgshapiro	  ++__begin;
7264562Sgshapiro	}
7338032Speter      return __result;
7464562Sgshapiro    }
7564562Sgshapiro
7664562Sgshapiro  /** @brief Parallel partial sum implementation, two-phase approach,
7764562Sgshapiro      no recursion.
7864562Sgshapiro      *  @param __begin Begin iterator of input sequence.
7964562Sgshapiro      *  @param __end End iterator of input sequence.
8064562Sgshapiro      *  @param __result Begin iterator of output sequence.
8164562Sgshapiro      *  @param __bin_op Associative binary function.
8264562Sgshapiro      *  @param __n Length of sequence.
8364562Sgshapiro      *  @return End iterator of output sequence.
8464562Sgshapiro      */
8538032Speter  template<typename _IIter,
8638032Speter	   typename _OutputIterator,
8738032Speter	   typename _BinaryOperation>
8838032Speter    _OutputIterator
8938032Speter    __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
9038032Speter				  _OutputIterator __result,
9164562Sgshapiro				  _BinaryOperation __bin_op,
9264562Sgshapiro      typename std::iterator_traits<_IIter>::difference_type __n)
9338032Speter    {
9438032Speter      typedef std::iterator_traits<_IIter> _TraitsType;
9538032Speter      typedef typename _TraitsType::value_type _ValueType;
9638032Speter      typedef typename _TraitsType::difference_type _DifferenceType;
9738032Speter
9838032Speter      if (__begin == __end)
9938032Speter	return __result;
10038032Speter
10138032Speter      _ThreadIndex __num_threads =
10238032Speter        std::min<_DifferenceType>(__get_max_threads(), __n - 1);
10338032Speter
10438032Speter      if (__num_threads < 2)
10538032Speter	{
10664562Sgshapiro	  *__result = *__begin;
10738032Speter	  return __parallel_partial_sum_basecase(__begin + 1, __end,
10838032Speter						 __result + 1, __bin_op,
10964562Sgshapiro						 *__begin);
11038032Speter	}
11138032Speter
11238032Speter      _DifferenceType* __borders;
11338032Speter      _ValueType* __sums;
11438032Speter
11538032Speter      const _Settings& __s = _Settings::get();
11638032Speter
11738032Speter#     pragma omp parallel num_threads(__num_threads)
11838032Speter      {
11938032Speter#       pragma omp single
12038032Speter	{
12138032Speter	  __num_threads = omp_get_num_threads();
12238032Speter
12338032Speter	  __borders = new _DifferenceType[__num_threads + 2];
12438032Speter
12564562Sgshapiro	  if (__s.partial_sum_dilation == 1.0f)
12638032Speter	    __equally_split(__n, __num_threads + 1, __borders);
12738032Speter	  else
12838032Speter	    {
12964562Sgshapiro	      _DifferenceType __first_part_length =
13038032Speter		  std::max<_DifferenceType>(1,
13138032Speter		    __n / (1.0f + __s.partial_sum_dilation * __num_threads));
13264562Sgshapiro	      _DifferenceType __chunk_length =
13364562Sgshapiro		  (__n - __first_part_length) / __num_threads;
13438032Speter	      _DifferenceType __borderstart =
13538032Speter		  __n - __num_threads * __chunk_length;
13638032Speter	      __borders[0] = 0;
13738032Speter	      for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
13838032Speter		{
13938032Speter		  __borders[__i] = __borderstart;
14038032Speter		  __borderstart += __chunk_length;
14138032Speter		}
14238032Speter	      __borders[__num_threads + 1] = __n;
14338032Speter	    }
14464562Sgshapiro
14564562Sgshapiro	  __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
14664562Sgshapiro                                                           * __num_threads));
14764562Sgshapiro	  _OutputIterator __target_end;
14864562Sgshapiro	} //single
14964562Sgshapiro
15038032Speter        _ThreadIndex __iam = omp_get_thread_num();
15138032Speter        if (__iam == 0)
15238032Speter          {
15338032Speter            *__result = *__begin;
15438032Speter            __parallel_partial_sum_basecase(__begin + 1,
15538032Speter					    __begin + __borders[1],
15638032Speter					    __result + 1,
15738032Speter					    __bin_op, *__begin);
15838032Speter            ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
15938032Speter          }
16038032Speter        else
16138032Speter          {
16238032Speter            ::new(&(__sums[__iam]))
16338032Speter              _ValueType(__gnu_parallel::accumulate(
16438032Speter                                         __begin + __borders[__iam] + 1,
16538032Speter                                         __begin + __borders[__iam + 1],
16638032Speter                                         *(__begin + __borders[__iam]),
16738032Speter                                         __bin_op,
16838032Speter                                         __gnu_parallel::sequential_tag()));
16938032Speter          }
17038032Speter
17138032Speter#       pragma omp barrier
17238032Speter
17338032Speter#       pragma omp single
17464562Sgshapiro	__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
17538032Speter					__sums + 1, __bin_op, __sums[0]);
17638032Speter
17738032Speter#       pragma omp barrier
17838032Speter
17938032Speter	// Still same team.
18064562Sgshapiro        __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
18138032Speter					__begin + __borders[__iam + 2],
18238032Speter					__result + __borders[__iam + 1],
18338032Speter					__bin_op, __sums[__iam]);
18438032Speter      } //parallel
18538032Speter
18638032Speter      for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
18764562Sgshapiro	__sums[__i].~_ValueType();
18838032Speter      ::operator delete(__sums);
18964562Sgshapiro
19038032Speter      delete[] __borders;
19138032Speter
19238032Speter      return __result + __n;
19338032Speter    }
19438032Speter
19538032Speter  /** @brief Parallel partial sum front-__end.
19638032Speter   *  @param __begin Begin iterator of input sequence.
19738032Speter   *  @param __end End iterator of input sequence.
19838032Speter   *  @param __result Begin iterator of output sequence.
19938032Speter   *  @param __bin_op Associative binary function.
20038032Speter   *  @return End iterator of output sequence. */
20164562Sgshapiro  template<typename _IIter,
20264562Sgshapiro	   typename _OutputIterator,
20364562Sgshapiro	   typename _BinaryOperation>
20464562Sgshapiro    _OutputIterator
20538032Speter    __parallel_partial_sum(_IIter __begin, _IIter __end,
20638032Speter			   _OutputIterator __result, _BinaryOperation __bin_op)
20738032Speter    {
20838032Speter      _GLIBCXX_CALL(__begin - __end)
20938032Speter
21038032Speter      typedef std::iterator_traits<_IIter> _TraitsType;
21138032Speter      typedef typename _TraitsType::value_type _ValueType;
21238032Speter      typedef typename _TraitsType::difference_type _DifferenceType;
21338032Speter
21438032Speter      _DifferenceType __n = __end - __begin;
21538032Speter
21638032Speter      switch (_Settings::get().partial_sum_algorithm)
21738032Speter	{
21838032Speter	case LINEAR:
21938032Speter	  // Need an initial offset.
22064562Sgshapiro	  return __parallel_partial_sum_linear(__begin, __end, __result,
22138032Speter					       __bin_op, __n);
22238032Speter	default:
22338032Speter	  // Partial_sum algorithm not implemented.
22438032Speter	  _GLIBCXX_PARALLEL_ASSERT(0);
22538032Speter	  return __result + __n;
22638032Speter	}
22738032Speter    }
22838032Speter}
22938032Speter
23038032Speter#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */
23164562Sgshapiro