138032Speter// -*- C++ -*- 264562Sgshapiro 364562Sgshapiro// Copyright (C) 2007-2020 Free Software Foundation, Inc. 438032Speter// 538032Speter// This file is part of the GNU ISO C++ Library. This library is free 638032Speter// software; you can redistribute it and/or modify it under the terms 738032Speter// of the GNU General Public License as published by the Free Software 838032Speter// Foundation; either version 3, or (at your option) any later 938032Speter// version. 1038032Speter 1138032Speter// This library is distributed in the hope that it will be useful, but 1238032Speter// WITHOUT ANY WARRANTY; without even the implied warranty of 1338032Speter// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1464562Sgshapiro// General Public License for more details. 1538032Speter 1638032Speter// Under Section 7 of GPL version 3, you are granted additional 1764562Sgshapiro// permissions described in the GCC Runtime Library Exception, version 1871345Sgshapiro// 3.1, as published by the Free Software Foundation. 1964562Sgshapiro 2071345Sgshapiro// You should have received a copy of the GNU General Public License and 2164562Sgshapiro// a copy of the GCC Runtime Library Exception along with this program; 2264562Sgshapiro// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 2338032Speter// <http://www.gnu.org/licenses/>. 2464562Sgshapiro 2538032Speter/** @file parallel/partial_sum.h 2638032Speter * @brief Parallel implementation of std::partial_sum(), i.e. prefix 2764562Sgshapiro* sums. 2838032Speter * This file is a GNU parallel extension to the Standard C++ Library. 2938032Speter */ 3038032Speter 3138032Speter// Written by Johannes Singler. 3238032Speter 3338032Speter#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H 3438032Speter#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1 3538032Speter 3638032Speter#include <omp.h> 3738032Speter#include <new> 3864562Sgshapiro#include <bits/stl_algobase.h> 3964562Sgshapiro#include <parallel/parallel.h> 4064562Sgshapiro#include <parallel/numericfwd.h> 4138032Speter 4238032Speternamespace __gnu_parallel 4338032Speter{ 4438032Speter // Problem: there is no 0-element given. 4538032Speter 4638032Speter /** @brief Base case prefix sum routine. 4738032Speter * @param __begin Begin iterator of input sequence. 4864562Sgshapiro * @param __end End iterator of input sequence. 4964562Sgshapiro * @param __result Begin iterator of output sequence. 5064562Sgshapiro * @param __bin_op Associative binary function. 5138032Speter * @param __value Start value. Must be passed since the neutral 5238032Speter * element is unknown in general. 5338032Speter * @return End iterator of output sequence. */ 5464562Sgshapiro template<typename _IIter, 5564562Sgshapiro typename _OutputIterator, 5664562Sgshapiro typename _BinaryOperation> 5738032Speter _OutputIterator 5864562Sgshapiro __parallel_partial_sum_basecase(_IIter __begin, _IIter __end, 5964562Sgshapiro _OutputIterator __result, 6064562Sgshapiro _BinaryOperation __bin_op, 6138032Speter typename std::iterator_traits <_IIter>::value_type __value) 6264562Sgshapiro { 6364562Sgshapiro if (__begin == __end) 6464562Sgshapiro return __result; 6538032Speter 6664562Sgshapiro while (__begin != __end) 6764562Sgshapiro { 6864562Sgshapiro __value = __bin_op(__value, *__begin); 6938032Speter *__result = __value; 7064562Sgshapiro ++__result; 7164562Sgshapiro ++__begin; 7264562Sgshapiro } 7338032Speter return __result; 7464562Sgshapiro } 7564562Sgshapiro 7664562Sgshapiro /** @brief Parallel partial sum implementation, two-phase approach, 7764562Sgshapiro no recursion. 7864562Sgshapiro * @param __begin Begin iterator of input sequence. 7964562Sgshapiro * @param __end End iterator of input sequence. 8064562Sgshapiro * @param __result Begin iterator of output sequence. 8164562Sgshapiro * @param __bin_op Associative binary function. 8264562Sgshapiro * @param __n Length of sequence. 8364562Sgshapiro * @return End iterator of output sequence. 8464562Sgshapiro */ 8538032Speter template<typename _IIter, 8638032Speter typename _OutputIterator, 8738032Speter typename _BinaryOperation> 8838032Speter _OutputIterator 8938032Speter __parallel_partial_sum_linear(_IIter __begin, _IIter __end, 9038032Speter _OutputIterator __result, 9164562Sgshapiro _BinaryOperation __bin_op, 9264562Sgshapiro typename std::iterator_traits<_IIter>::difference_type __n) 9338032Speter { 9438032Speter typedef std::iterator_traits<_IIter> _TraitsType; 9538032Speter typedef typename _TraitsType::value_type _ValueType; 9638032Speter typedef typename _TraitsType::difference_type _DifferenceType; 9738032Speter 9838032Speter if (__begin == __end) 9938032Speter return __result; 10038032Speter 10138032Speter _ThreadIndex __num_threads = 10238032Speter std::min<_DifferenceType>(__get_max_threads(), __n - 1); 10338032Speter 10438032Speter if (__num_threads < 2) 10538032Speter { 10664562Sgshapiro *__result = *__begin; 10738032Speter return __parallel_partial_sum_basecase(__begin + 1, __end, 10838032Speter __result + 1, __bin_op, 10964562Sgshapiro *__begin); 11038032Speter } 11138032Speter 11238032Speter _DifferenceType* __borders; 11338032Speter _ValueType* __sums; 11438032Speter 11538032Speter const _Settings& __s = _Settings::get(); 11638032Speter 11738032Speter# pragma omp parallel num_threads(__num_threads) 11838032Speter { 11938032Speter# pragma omp single 12038032Speter { 12138032Speter __num_threads = omp_get_num_threads(); 12238032Speter 12338032Speter __borders = new _DifferenceType[__num_threads + 2]; 12438032Speter 12564562Sgshapiro if (__s.partial_sum_dilation == 1.0f) 12638032Speter __equally_split(__n, __num_threads + 1, __borders); 12738032Speter else 12838032Speter { 12964562Sgshapiro _DifferenceType __first_part_length = 13038032Speter std::max<_DifferenceType>(1, 13138032Speter __n / (1.0f + __s.partial_sum_dilation * __num_threads)); 13264562Sgshapiro _DifferenceType __chunk_length = 13364562Sgshapiro (__n - __first_part_length) / __num_threads; 13438032Speter _DifferenceType __borderstart = 13538032Speter __n - __num_threads * __chunk_length; 13638032Speter __borders[0] = 0; 13738032Speter for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i) 13838032Speter { 13938032Speter __borders[__i] = __borderstart; 14038032Speter __borderstart += __chunk_length; 14138032Speter } 14238032Speter __borders[__num_threads + 1] = __n; 14338032Speter } 14464562Sgshapiro 14564562Sgshapiro __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType) 14664562Sgshapiro * __num_threads)); 14764562Sgshapiro _OutputIterator __target_end; 14864562Sgshapiro } //single 14964562Sgshapiro 15038032Speter _ThreadIndex __iam = omp_get_thread_num(); 15138032Speter if (__iam == 0) 15238032Speter { 15338032Speter *__result = *__begin; 15438032Speter __parallel_partial_sum_basecase(__begin + 1, 15538032Speter __begin + __borders[1], 15638032Speter __result + 1, 15738032Speter __bin_op, *__begin); 15838032Speter ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1)); 15938032Speter } 16038032Speter else 16138032Speter { 16238032Speter ::new(&(__sums[__iam])) 16338032Speter _ValueType(__gnu_parallel::accumulate( 16438032Speter __begin + __borders[__iam] + 1, 16538032Speter __begin + __borders[__iam + 1], 16638032Speter *(__begin + __borders[__iam]), 16738032Speter __bin_op, 16838032Speter __gnu_parallel::sequential_tag())); 16938032Speter } 17038032Speter 17138032Speter# pragma omp barrier 17238032Speter 17338032Speter# pragma omp single 17464562Sgshapiro __parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads, 17538032Speter __sums + 1, __bin_op, __sums[0]); 17638032Speter 17738032Speter# pragma omp barrier 17838032Speter 17938032Speter // Still same team. 18064562Sgshapiro __parallel_partial_sum_basecase(__begin + __borders[__iam + 1], 18138032Speter __begin + __borders[__iam + 2], 18238032Speter __result + __borders[__iam + 1], 18338032Speter __bin_op, __sums[__iam]); 18438032Speter } //parallel 18538032Speter 18638032Speter for (_ThreadIndex __i = 0; __i < __num_threads; ++__i) 18764562Sgshapiro __sums[__i].~_ValueType(); 18838032Speter ::operator delete(__sums); 18964562Sgshapiro 19038032Speter delete[] __borders; 19138032Speter 19238032Speter return __result + __n; 19338032Speter } 19438032Speter 19538032Speter /** @brief Parallel partial sum front-__end. 19638032Speter * @param __begin Begin iterator of input sequence. 19738032Speter * @param __end End iterator of input sequence. 19838032Speter * @param __result Begin iterator of output sequence. 19938032Speter * @param __bin_op Associative binary function. 20038032Speter * @return End iterator of output sequence. */ 20164562Sgshapiro template<typename _IIter, 20264562Sgshapiro typename _OutputIterator, 20364562Sgshapiro typename _BinaryOperation> 20464562Sgshapiro _OutputIterator 20538032Speter __parallel_partial_sum(_IIter __begin, _IIter __end, 20638032Speter _OutputIterator __result, _BinaryOperation __bin_op) 20738032Speter { 20838032Speter _GLIBCXX_CALL(__begin - __end) 20938032Speter 21038032Speter typedef std::iterator_traits<_IIter> _TraitsType; 21138032Speter typedef typename _TraitsType::value_type _ValueType; 21238032Speter typedef typename _TraitsType::difference_type _DifferenceType; 21338032Speter 21438032Speter _DifferenceType __n = __end - __begin; 21538032Speter 21638032Speter switch (_Settings::get().partial_sum_algorithm) 21738032Speter { 21838032Speter case LINEAR: 21938032Speter // Need an initial offset. 22064562Sgshapiro return __parallel_partial_sum_linear(__begin, __end, __result, 22138032Speter __bin_op, __n); 22238032Speter default: 22338032Speter // Partial_sum algorithm not implemented. 22438032Speter _GLIBCXX_PARALLEL_ASSERT(0); 22538032Speter return __result + __n; 22638032Speter } 22738032Speter } 22838032Speter} 22938032Speter 23038032Speter#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */ 23164562Sgshapiro