• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/toolchains/hndtools-armeabi-2013.11/arm-none-eabi/include/c++/4.8.1/parallel/
1// -*- C++ -*-
2
3// Copyright (C) 2007-2013 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
8// Foundation; either version 3, or (at your option) any later
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14// General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file parallel/multiway_mergesort.h
26 *  @brief Parallel multiway merge sort.
27 *  This file is a GNU parallel extension to the Standard C++ Library.
28 */
29
30// Written by Johannes Singler.
31
32#ifndef _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H
33#define _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H 1
34
35#include <vector>
36
37#include <parallel/basic_iterator.h>
38#include <bits/stl_algo.h>
39#include <parallel/parallel.h>
40#include <parallel/multiway_merge.h>
41
42namespace __gnu_parallel
43{
44  /** @brief Subsequence description. */
45  template<typename _DifferenceTp>
46    struct _Piece
47    {
48      typedef _DifferenceTp _DifferenceType;
49
50      /** @brief Begin of subsequence. */
51      _DifferenceType _M_begin;
52
53      /** @brief End of subsequence. */
54      _DifferenceType _M_end;
55    };
56
57  /** @brief Data accessed by all threads.
58   *
59   *  PMWMS = parallel multiway mergesort */
60  template<typename _RAIter>
61    struct _PMWMSSortingData
62    {
63      typedef std::iterator_traits<_RAIter> _TraitsType;
64      typedef typename _TraitsType::value_type _ValueType;
65      typedef typename _TraitsType::difference_type _DifferenceType;
66
67      /** @brief Number of threads involved. */
68      _ThreadIndex _M_num_threads;
69
70      /** @brief Input __begin. */
71      _RAIter _M_source;
72
73      /** @brief Start indices, per thread. */
74      _DifferenceType* _M_starts;
75
76      /** @brief Storage in which to sort. */
77      _ValueType** _M_temporary;
78
79      /** @brief Samples. */
80      _ValueType* _M_samples;
81
82      /** @brief Offsets to add to the found positions. */
83      _DifferenceType* _M_offsets;
84
85      /** @brief Pieces of data to merge @c [thread][__sequence] */
86      std::vector<_Piece<_DifferenceType> >* _M_pieces;
87  };
88
89  /**
90   *  @brief Select _M_samples from a sequence.
91   *  @param __sd Pointer to algorithm data. _Result will be placed in
92   *  @c __sd->_M_samples.
93   *  @param __num_samples Number of _M_samples to select.
94   */
95  template<typename _RAIter, typename _DifferenceTp>
96    void
97    __determine_samples(_PMWMSSortingData<_RAIter>* __sd,
98			_DifferenceTp __num_samples)
99    {
100      typedef std::iterator_traits<_RAIter> _TraitsType;
101      typedef typename _TraitsType::value_type _ValueType;
102      typedef _DifferenceTp _DifferenceType;
103
104      _ThreadIndex __iam = omp_get_thread_num();
105
106      _DifferenceType* __es = new _DifferenceType[__num_samples + 2];
107
108      __equally_split(__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam],
109		      __num_samples + 1, __es);
110
111      for (_DifferenceType __i = 0; __i < __num_samples; ++__i)
112	::new(&(__sd->_M_samples[__iam * __num_samples + __i]))
113	    _ValueType(__sd->_M_source[__sd->_M_starts[__iam]
114				       + __es[__i + 1]]);
115
116      delete[] __es;
117    }
118
119  /** @brief Split consistently. */
120  template<bool __exact, typename _RAIter,
121	   typename _Compare, typename _SortingPlacesIterator>
122    struct _SplitConsistently
123    { };
124
125  /** @brief Split by exact splitting. */
126  template<typename _RAIter, typename _Compare,
127	   typename _SortingPlacesIterator>
128    struct _SplitConsistently<true, _RAIter, _Compare, _SortingPlacesIterator>
129    {
130      void
131      operator()(const _ThreadIndex __iam,
132		 _PMWMSSortingData<_RAIter>* __sd,
133		 _Compare& __comp,
134		 const typename
135		 std::iterator_traits<_RAIter>::difference_type
136		 __num_samples) const
137      {
138#       pragma omp barrier
139
140	std::vector<std::pair<_SortingPlacesIterator,
141	                      _SortingPlacesIterator> >
142	  __seqs(__sd->_M_num_threads);
143	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
144	  __seqs[__s] = std::make_pair(__sd->_M_temporary[__s],
145				       __sd->_M_temporary[__s]
146				       + (__sd->_M_starts[__s + 1]
147					  - __sd->_M_starts[__s]));
148
149	std::vector<_SortingPlacesIterator> __offsets(__sd->_M_num_threads);
150
151	// if not last thread
152	if (__iam < __sd->_M_num_threads - 1)
153	  multiseq_partition(__seqs.begin(), __seqs.end(),
154			     __sd->_M_starts[__iam + 1], __offsets.begin(),
155			     __comp);
156
157	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
158	  {
159	    // for each sequence
160	    if (__iam < (__sd->_M_num_threads - 1))
161	      __sd->_M_pieces[__iam][__seq]._M_end
162		= __offsets[__seq] - __seqs[__seq].first;
163	    else
164	      // very end of this sequence
165	      __sd->_M_pieces[__iam][__seq]._M_end =
166		__sd->_M_starts[__seq + 1] - __sd->_M_starts[__seq];
167	  }
168
169#       pragma omp barrier
170
171	for (_ThreadIndex __seq = 0; __seq < __sd->_M_num_threads; __seq++)
172	  {
173	    // For each sequence.
174	    if (__iam > 0)
175	      __sd->_M_pieces[__iam][__seq]._M_begin =
176		__sd->_M_pieces[__iam - 1][__seq]._M_end;
177	    else
178	      // Absolute beginning.
179	      __sd->_M_pieces[__iam][__seq]._M_begin = 0;
180	  }
181      }
182  };
183
184  /** @brief Split by sampling. */
185  template<typename _RAIter, typename _Compare,
186	   typename _SortingPlacesIterator>
187    struct _SplitConsistently<false, _RAIter, _Compare, _SortingPlacesIterator>
188    {
189      void
190      operator()(const _ThreadIndex __iam,
191		 _PMWMSSortingData<_RAIter>* __sd,
192		 _Compare& __comp,
193		 const typename
194		 std::iterator_traits<_RAIter>::difference_type
195		 __num_samples) const
196      {
197	typedef std::iterator_traits<_RAIter> _TraitsType;
198	typedef typename _TraitsType::value_type _ValueType;
199	typedef typename _TraitsType::difference_type _DifferenceType;
200
201	__determine_samples(__sd, __num_samples);
202
203#       pragma omp barrier
204
205#       pragma omp single
206	__gnu_sequential::sort(__sd->_M_samples,
207			       __sd->_M_samples
208			       + (__num_samples * __sd->_M_num_threads),
209			       __comp);
210
211#       pragma omp barrier
212
213	for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
214	  {
215	    // For each sequence.
216	    if (__num_samples * __iam > 0)
217	      __sd->_M_pieces[__iam][__s]._M_begin =
218                std::lower_bound(__sd->_M_temporary[__s],
219				 __sd->_M_temporary[__s]
220				 + (__sd->_M_starts[__s + 1]
221				    - __sd->_M_starts[__s]),
222				 __sd->_M_samples[__num_samples * __iam],
223				 __comp)
224                - __sd->_M_temporary[__s];
225	    else
226	      // Absolute beginning.
227	      __sd->_M_pieces[__iam][__s]._M_begin = 0;
228
229	    if ((__num_samples * (__iam + 1)) <
230		(__num_samples * __sd->_M_num_threads))
231	      __sd->_M_pieces[__iam][__s]._M_end =
232                std::lower_bound(__sd->_M_temporary[__s],
233				 __sd->_M_temporary[__s]
234				 + (__sd->_M_starts[__s + 1]
235				    - __sd->_M_starts[__s]),
236				 __sd->_M_samples[__num_samples * (__iam + 1)],
237				 __comp)
238                - __sd->_M_temporary[__s];
239	    else
240	      // Absolute end.
241	      __sd->_M_pieces[__iam][__s]._M_end = (__sd->_M_starts[__s + 1]
242						    - __sd->_M_starts[__s]);
243	  }
244      }
245  };
246
247  template<bool __stable, typename _RAIter, typename _Compare>
248    struct __possibly_stable_sort
249    { };
250
251  template<typename _RAIter, typename _Compare>
252    struct __possibly_stable_sort<true, _RAIter, _Compare>
253    {
254      void operator()(const _RAIter& __begin,
255		      const _RAIter& __end, _Compare& __comp) const
256      { __gnu_sequential::stable_sort(__begin, __end, __comp); }
257    };
258
259  template<typename _RAIter, typename _Compare>
260    struct __possibly_stable_sort<false, _RAIter, _Compare>
261    {
262      void operator()(const _RAIter __begin,
263		      const _RAIter __end, _Compare& __comp) const
264      { __gnu_sequential::sort(__begin, __end, __comp); }
265    };
266
267  template<bool __stable, typename Seq_RAIter,
268	   typename _RAIter, typename _Compare,
269	   typename DiffType>
270    struct __possibly_stable_multiway_merge
271    { };
272
273  template<typename Seq_RAIter, typename _RAIter,
274	   typename _Compare, typename _DiffType>
275    struct __possibly_stable_multiway_merge<true, Seq_RAIter,
276					    _RAIter, _Compare, _DiffType>
277    {
278      void operator()(const Seq_RAIter& __seqs_begin,
279		      const Seq_RAIter& __seqs_end,
280		      const _RAIter& __target,
281		      _Compare& __comp,
282		      _DiffType __length_am) const
283      { stable_multiway_merge(__seqs_begin, __seqs_end, __target,
284			      __length_am, __comp, sequential_tag()); }
285    };
286
287  template<typename Seq_RAIter, typename _RAIter,
288	   typename _Compare, typename _DiffType>
289    struct __possibly_stable_multiway_merge<false, Seq_RAIter,
290					    _RAIter, _Compare, _DiffType>
291    {
292      void operator()(const Seq_RAIter& __seqs_begin,
293                      const Seq_RAIter& __seqs_end,
294                      const _RAIter& __target,
295                      _Compare& __comp,
296                      _DiffType __length_am) const
297      { multiway_merge(__seqs_begin, __seqs_end, __target, __length_am,
298		       __comp, sequential_tag()); }
299    };
300
301  /** @brief PMWMS code executed by each thread.
302   *  @param __sd Pointer to algorithm data.
303   *  @param __comp Comparator.
304   */
305  template<bool __stable, bool __exact, typename _RAIter,
306	   typename _Compare>
307    void
308    parallel_sort_mwms_pu(_PMWMSSortingData<_RAIter>* __sd,
309			  _Compare& __comp)
310    {
311      typedef std::iterator_traits<_RAIter> _TraitsType;
312      typedef typename _TraitsType::value_type _ValueType;
313      typedef typename _TraitsType::difference_type _DifferenceType;
314
315      _ThreadIndex __iam = omp_get_thread_num();
316
317      // Length of this thread's chunk, before merging.
318      _DifferenceType __length_local =
319	__sd->_M_starts[__iam + 1] - __sd->_M_starts[__iam];
320
321      // Sort in temporary storage, leave space for sentinel.
322
323      typedef _ValueType* _SortingPlacesIterator;
324
325      __sd->_M_temporary[__iam] =
326        static_cast<_ValueType*>(::operator new(sizeof(_ValueType)
327						* (__length_local + 1)));
328
329      // Copy there.
330      std::uninitialized_copy(__sd->_M_source + __sd->_M_starts[__iam],
331			      __sd->_M_source + __sd->_M_starts[__iam]
332			      + __length_local,
333			      __sd->_M_temporary[__iam]);
334
335      __possibly_stable_sort<__stable, _SortingPlacesIterator, _Compare>()
336        (__sd->_M_temporary[__iam],
337	 __sd->_M_temporary[__iam] + __length_local,
338         __comp);
339
340      // Invariant: locally sorted subsequence in sd->_M_temporary[__iam],
341      // __sd->_M_temporary[__iam] + __length_local.
342
343      // No barrier here: Synchronization is done by the splitting routine.
344
345      _DifferenceType __num_samples =
346        _Settings::get().sort_mwms_oversampling * __sd->_M_num_threads - 1;
347      _SplitConsistently<__exact, _RAIter, _Compare, _SortingPlacesIterator>()
348        (__iam, __sd, __comp, __num_samples);
349
350      // Offset from __target __begin, __length after merging.
351      _DifferenceType __offset = 0, __length_am = 0;
352      for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; __s++)
353	{
354	  __length_am += (__sd->_M_pieces[__iam][__s]._M_end
355			  - __sd->_M_pieces[__iam][__s]._M_begin);
356	  __offset += __sd->_M_pieces[__iam][__s]._M_begin;
357	}
358
359      typedef std::vector<
360        std::pair<_SortingPlacesIterator, _SortingPlacesIterator> >
361        _SeqVector;
362      _SeqVector __seqs(__sd->_M_num_threads);
363
364      for (_ThreadIndex __s = 0; __s < __sd->_M_num_threads; ++__s)
365	{
366	  __seqs[__s] =
367	    std::make_pair(__sd->_M_temporary[__s]
368			   + __sd->_M_pieces[__iam][__s]._M_begin,
369			   __sd->_M_temporary[__s]
370			   + __sd->_M_pieces[__iam][__s]._M_end);
371	}
372
373      __possibly_stable_multiway_merge<
374        __stable, typename _SeqVector::iterator,
375	_RAIter, _Compare, _DifferenceType>()(__seqs.begin(), __seqs.end(),
376				     __sd->_M_source + __offset, __comp,
377				     __length_am);
378
379#     pragma omp barrier
380
381      for (_DifferenceType __i = 0; __i < __length_local; ++__i)
382	__sd->_M_temporary[__iam][__i].~_ValueType();
383      ::operator delete(__sd->_M_temporary[__iam]);
384    }
385
386  /** @brief PMWMS main call.
387   *  @param __begin Begin iterator of sequence.
388   *  @param __end End iterator of sequence.
389   *  @param __comp Comparator.
390   *  @param __num_threads Number of threads to use.
391   */
392  template<bool __stable, bool __exact, typename _RAIter,
393           typename _Compare>
394    void
395    parallel_sort_mwms(_RAIter __begin, _RAIter __end,
396		       _Compare __comp,
397		       _ThreadIndex __num_threads)
398    {
399      _GLIBCXX_CALL(__end - __begin)
400
401      typedef std::iterator_traits<_RAIter> _TraitsType;
402      typedef typename _TraitsType::value_type _ValueType;
403      typedef typename _TraitsType::difference_type _DifferenceType;
404
405      _DifferenceType __n = __end - __begin;
406
407      if (__n <= 1)
408	return;
409
410      // at least one element per thread
411      if (__num_threads > __n)
412	__num_threads = static_cast<_ThreadIndex>(__n);
413
414      // shared variables
415      _PMWMSSortingData<_RAIter> __sd;
416      _DifferenceType* __starts;
417      _DifferenceType __size;
418
419#     pragma omp parallel num_threads(__num_threads)
420      {
421        __num_threads = omp_get_num_threads(); //no more threads than requested
422
423#       pragma omp single
424	{
425	  __sd._M_num_threads = __num_threads;
426	  __sd._M_source = __begin;
427
428	  __sd._M_temporary = new _ValueType*[__num_threads];
429
430	  if (!__exact)
431	    {
432	      __size =
433		(_Settings::get().sort_mwms_oversampling * __num_threads - 1)
434		* __num_threads;
435	      __sd._M_samples = static_cast<_ValueType*>
436		(::operator new(__size * sizeof(_ValueType)));
437	    }
438	  else
439	    __sd._M_samples = 0;
440
441	  __sd._M_offsets = new _DifferenceType[__num_threads - 1];
442	  __sd._M_pieces
443	    = new std::vector<_Piece<_DifferenceType> >[__num_threads];
444	  for (_ThreadIndex __s = 0; __s < __num_threads; ++__s)
445	    __sd._M_pieces[__s].resize(__num_threads);
446	  __starts = __sd._M_starts = new _DifferenceType[__num_threads + 1];
447
448	  _DifferenceType __chunk_length = __n / __num_threads;
449	  _DifferenceType __split = __n % __num_threads;
450	  _DifferenceType __pos = 0;
451	  for (_ThreadIndex __i = 0; __i < __num_threads; ++__i)
452	    {
453	      __starts[__i] = __pos;
454	      __pos += ((__i < __split)
455			? (__chunk_length + 1) : __chunk_length);
456	    }
457	  __starts[__num_threads] = __pos;
458	} //single
459
460        // Now sort in parallel.
461        parallel_sort_mwms_pu<__stable, __exact>(&__sd, __comp);
462      } //parallel
463
464      delete[] __starts;
465      delete[] __sd._M_temporary;
466
467      if (!__exact)
468	{
469	  for (_DifferenceType __i = 0; __i < __size; ++__i)
470	    __sd._M_samples[__i].~_ValueType();
471	  ::operator delete(__sd._M_samples);
472	}
473
474      delete[] __sd._M_offsets;
475      delete[] __sd._M_pieces;
476    }
477
478} //namespace __gnu_parallel
479
480#endif /* _GLIBCXX_PARALLEL_MULTIWAY_MERGESORT_H */
481