• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/toolchains/hndtools-armeabi-2013.11/arm-none-eabi/include/c++/4.8.1/parallel/
1// -*- C++ -*-
2
3// Copyright (C) 2007-2013 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
8// Foundation; either version 3, or (at your option) any later
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14// General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file parallel/partial_sum.h
26 *  @brief Parallel implementation of std::partial_sum(), i.e. prefix
27*  sums.
28 *  This file is a GNU parallel extension to the Standard C++ Library.
29 */
30
31// Written by Johannes Singler.
32
33#ifndef _GLIBCXX_PARALLEL_PARTIAL_SUM_H
34#define _GLIBCXX_PARALLEL_PARTIAL_SUM_H 1
35
36#include <omp.h>
37#include <new>
38#include <bits/stl_algobase.h>
39#include <parallel/parallel.h>
40#include <parallel/numericfwd.h>
41
42namespace __gnu_parallel
43{
44  // Problem: there is no 0-element given.
45
46  /** @brief Base case prefix sum routine.
47   *  @param __begin Begin iterator of input sequence.
48   *  @param __end End iterator of input sequence.
49   *  @param __result Begin iterator of output sequence.
50   *  @param __bin_op Associative binary function.
51   *  @param __value Start value. Must be passed since the neutral
52   *  element is unknown in general.
53   *  @return End iterator of output sequence. */
54  template<typename _IIter,
55	   typename _OutputIterator,
56	   typename _BinaryOperation>
57    _OutputIterator
58    __parallel_partial_sum_basecase(_IIter __begin, _IIter __end,
59				    _OutputIterator __result,
60				    _BinaryOperation __bin_op,
61      typename std::iterator_traits <_IIter>::value_type __value)
62    {
63      if (__begin == __end)
64	return __result;
65
66      while (__begin != __end)
67	{
68	  __value = __bin_op(__value, *__begin);
69	  *__result = __value;
70	  ++__result;
71	  ++__begin;
72	}
73      return __result;
74    }
75
76  /** @brief Parallel partial sum implementation, two-phase approach,
77      no recursion.
78      *  @param __begin Begin iterator of input sequence.
79      *  @param __end End iterator of input sequence.
80      *  @param __result Begin iterator of output sequence.
81      *  @param __bin_op Associative binary function.
82      *  @param __n Length of sequence.
83      *  @return End iterator of output sequence.
84      */
85  template<typename _IIter,
86	   typename _OutputIterator,
87	   typename _BinaryOperation>
88    _OutputIterator
89    __parallel_partial_sum_linear(_IIter __begin, _IIter __end,
90				  _OutputIterator __result,
91				  _BinaryOperation __bin_op,
92      typename std::iterator_traits<_IIter>::difference_type __n)
93    {
94      typedef std::iterator_traits<_IIter> _TraitsType;
95      typedef typename _TraitsType::value_type _ValueType;
96      typedef typename _TraitsType::difference_type _DifferenceType;
97
98      if (__begin == __end)
99	return __result;
100
101      _ThreadIndex __num_threads =
102        std::min<_DifferenceType>(__get_max_threads(), __n - 1);
103
104      if (__num_threads < 2)
105	{
106	  *__result = *__begin;
107	  return __parallel_partial_sum_basecase(__begin + 1, __end,
108						 __result + 1, __bin_op,
109						 *__begin);
110	}
111
112      _DifferenceType* __borders;
113      _ValueType* __sums;
114
115      const _Settings& __s = _Settings::get();
116
117#     pragma omp parallel num_threads(__num_threads)
118      {
119#       pragma omp single
120	{
121	  __num_threads = omp_get_num_threads();
122
123	  __borders = new _DifferenceType[__num_threads + 2];
124
125	  if (__s.partial_sum_dilation == 1.0f)
126	    __equally_split(__n, __num_threads + 1, __borders);
127	  else
128	    {
129	      _DifferenceType __first_part_length =
130		  std::max<_DifferenceType>(1,
131		    __n / (1.0f + __s.partial_sum_dilation * __num_threads));
132	      _DifferenceType __chunk_length =
133		  (__n - __first_part_length) / __num_threads;
134	      _DifferenceType __borderstart =
135		  __n - __num_threads * __chunk_length;
136	      __borders[0] = 0;
137	      for (_ThreadIndex __i = 1; __i < (__num_threads + 1); ++__i)
138		{
139		  __borders[__i] = __borderstart;
140		  __borderstart += __chunk_length;
141		}
142	      __borders[__num_threads + 1] = __n;
143	    }
144
145	  __sums = static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
146                                                           * __num_threads));
147	  _OutputIterator __target_end;
148	} //single
149
150        _ThreadIndex __iam = omp_get_thread_num();
151        if (__iam == 0)
152          {
153            *__result = *__begin;
154            __parallel_partial_sum_basecase(__begin + 1,
155					    __begin + __borders[1],
156					    __result + 1,
157					    __bin_op, *__begin);
158            ::new(&(__sums[__iam])) _ValueType(*(__result + __borders[1] - 1));
159          }
160        else
161          {
162            ::new(&(__sums[__iam]))
163              _ValueType(__gnu_parallel::accumulate(
164                                         __begin + __borders[__iam] + 1,
165                                         __begin + __borders[__iam + 1],
166                                         *(__begin + __borders[__iam]),
167                                         __bin_op,
168                                         __gnu_parallel::sequential_tag()));
169          }
170
171#       pragma omp barrier
172
173#       pragma omp single
174	__parallel_partial_sum_basecase(__sums + 1, __sums + __num_threads,
175					__sums + 1, __bin_op, __sums[0]);
176
177#       pragma omp barrier
178
179	// Still same team.
180        __parallel_partial_sum_basecase(__begin + __borders[__iam + 1],
181					__begin + __borders[__iam + 2],
182					__result + __borders[__iam + 1],
183					__bin_op, __sums[__iam]);
184      } //parallel
185
186      for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
187	__sums[__i].~_ValueType();
188      ::operator delete(__sums);
189
190      delete[] __borders;
191
192      return __result + __n;
193    }
194
195  /** @brief Parallel partial sum front-__end.
196   *  @param __begin Begin iterator of input sequence.
197   *  @param __end End iterator of input sequence.
198   *  @param __result Begin iterator of output sequence.
199   *  @param __bin_op Associative binary function.
200   *  @return End iterator of output sequence. */
201  template<typename _IIter,
202	   typename _OutputIterator,
203	   typename _BinaryOperation>
204    _OutputIterator
205    __parallel_partial_sum(_IIter __begin, _IIter __end,
206			   _OutputIterator __result, _BinaryOperation __bin_op)
207    {
208      _GLIBCXX_CALL(__begin - __end)
209
210      typedef std::iterator_traits<_IIter> _TraitsType;
211      typedef typename _TraitsType::value_type _ValueType;
212      typedef typename _TraitsType::difference_type _DifferenceType;
213
214      _DifferenceType __n = __end - __begin;
215
216      switch (_Settings::get().partial_sum_algorithm)
217	{
218	case LINEAR:
219	  // Need an initial offset.
220	  return __parallel_partial_sum_linear(__begin, __end, __result,
221					       __bin_op, __n);
222	default:
223	  // Partial_sum algorithm not implemented.
224	  _GLIBCXX_PARALLEL_ASSERT(0);
225	  return __result + __n;
226	}
227    }
228}
229
230#endif /* _GLIBCXX_PARALLEL_PARTIAL_SUM_H */
231