1// -*- C++ -*-
2
3// Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
4//
5// This file is part of the GNU ISO C++ Library.  This library is free
6// software; you can redistribute it and/or modify it under the terms
7// of the GNU General Public License as published by the Free Software
8// Foundation; either version 3, or (at your option) any later
9// version.
10
11// This library is distributed in the hope that it will be useful, but
12// WITHOUT ANY WARRANTY; without even the implied warranty of
13// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14// General Public License for more details.
15
16// Under Section 7 of GPL version 3, you are granted additional
17// permissions described in the GCC Runtime Library Exception, version
18// 3.1, as published by the Free Software Foundation.
19
20// You should have received a copy of the GNU General Public License and
21// a copy of the GCC Runtime Library Exception along with this program;
22// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23// <http://www.gnu.org/licenses/>.
24
25/** @file parallel/compatibility.h
26 *  @brief Compatibility layer, mostly concerned with atomic operations.
27 *  This file is a GNU parallel extension to the Standard C++ Library.
28 */
29
30// Written by Felix Putze.
31
32#ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
33#define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
34
35#include <parallel/types.h>
36#include <parallel/base.h>
37
38#if defined(__SUNPRO_CC) && defined(__sparc)
39#include <sys/atomic.h>
40#endif
41
42#if !defined(_WIN32) || defined (__CYGWIN__)
43#include <sched.h>
44#endif
45
46#if defined(_MSC_VER)
47#include <Windows.h>
48#include <intrin.h>
49#undef max
50#undef min
51#endif
52
53#ifdef __MINGW32__
54// Including <windows.h> will drag in all the windows32 names.  Since
55// that can cause user code portability problems, we just declare the
56// one needed function here.
57extern "C"
58__attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
59#endif
60
61namespace __gnu_parallel
62{
63#if defined(__ICC)
64  template<typename _MustBeInt = int>
65  int32_t __faa32(int32_t* __x, int32_t __inc)
66  {
67    asm volatile("lock xadd %0,%1"
68                 : "=__r" (__inc), "=__m" (*__x)
69                 : "0" (__inc)
70                 : "memory");
71    return __inc;
72  }
73#if defined(__x86_64)
74  template<typename _MustBeInt = int>
75  int64_t __faa64(int64_t* __x, int64_t __inc)
76  {
77    asm volatile("lock xadd %0,%1"
78                 : "=__r" (__inc), "=__m" (*__x)
79                 : "0" (__inc)
80                 : "memory");
81    return __inc;
82  }
83#endif
84#endif
85
86  // atomic functions only work on integers
87
88  /** @brief Add a value to a variable, atomically.
89   *
90   *  Implementation is heavily platform-dependent.
91   *  @param __ptr Pointer to a 32-bit signed integer.
92   *  @param __addend Value to add.
93   */
94  inline int32_t
95  __fetch_and_add_32(volatile int32_t* __ptr, int32_t __addend)
96  {
97#if defined(__ICC)      //x86 version
98    return _InterlockedExchangeAdd((void*)__ptr, __addend);
99#elif defined(__ECC)    //IA-64 version
100    return _InterlockedExchangeAdd((void*)__ptr, __addend);
101#elif defined(__ICL) || defined(_MSC_VER)
102    return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr),
103                                   __addend);
104#elif defined(__GNUC__)
105    return __sync_fetch_and_add(__ptr, __addend);
106#elif defined(__SUNPRO_CC) && defined(__sparc)
107    volatile int32_t __before, __after;
108    do
109      {
110        __before = *__ptr;
111        __after = __before + __addend;
112      } while (atomic_cas_32((volatile unsigned int*)__ptr, __before,
113                             __after) != __before);
114    return __before;
115#else   //fallback, slow
116#pragma message("slow __fetch_and_add_32")
117    int32_t __res;
118#pragma omp critical
119    {
120      __res = *__ptr;
121      *(__ptr) += __addend;
122    }
123    return __res;
124#endif
125  }
126
127  /** @brief Add a value to a variable, atomically.
128   *
129   *  Implementation is heavily platform-dependent.
130   *  @param __ptr Pointer to a 64-bit signed integer.
131   *  @param __addend Value to add.
132   */
133  inline int64_t
134  __fetch_and_add_64(volatile int64_t* __ptr, int64_t __addend)
135  {
136#if defined(__ICC) && defined(__x86_64) //x86 version
137    return __faa64<int>((int64_t*)__ptr, __addend);
138#elif defined(__ECC)    //IA-64 version
139    return _InterlockedExchangeAdd64((void*)__ptr, __addend);
140#elif defined(__ICL) || defined(_MSC_VER)
141#ifndef _WIN64
142    _GLIBCXX_PARALLEL_ASSERT(false);    //not available in this case
143    return 0;
144#else
145    return _InterlockedExchangeAdd64(__ptr, __addend);
146#endif
147#elif defined(__GNUC__) && defined(__x86_64)
148    return __sync_fetch_and_add(__ptr, __addend);
149#elif defined(__GNUC__) && defined(__i386) &&                   \
150  (defined(__i686) || defined(__pentium4) || defined(__athlon)  \
151   || defined(__k8) || defined(__core2))
152    return __sync_fetch_and_add(__ptr, __addend);
153#elif defined(__SUNPRO_CC) && defined(__sparc)
154    volatile int64_t __before, __after;
155    do
156      {
157        __before = *__ptr;
158        __after = __before + __addend;
159      } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before,
160                             __after) != __before);
161    return __before;
162#else   //fallback, slow
163#if defined(__GNUC__) && defined(__i386)
164    // XXX doesn'__t work with -march=native
165    //#warning "please compile with -march=i686 or better"
166#endif
167#pragma message("slow __fetch_and_add_64")
168    int64_t __res;
169#pragma omp critical
170    {
171      __res = *__ptr;
172      *(__ptr) += __addend;
173    }
174    return __res;
175#endif
176  }
177
178  /** @brief Add a value to a variable, atomically.
179   *
180   *  Implementation is heavily platform-dependent.
181   *  @param __ptr Pointer to a signed integer.
182   *  @param __addend Value to add.
183   */
184  template<typename _Tp>
185  inline _Tp
186  __fetch_and_add(volatile _Tp* __ptr, _Tp __addend)
187  {
188    if (sizeof(_Tp) == sizeof(int32_t))
189      return
190        (_Tp)__fetch_and_add_32((volatile int32_t*) __ptr, (int32_t)__addend);
191    else if (sizeof(_Tp) == sizeof(int64_t))
192      return
193        (_Tp)__fetch_and_add_64((volatile int64_t*) __ptr, (int64_t)__addend);
194    else
195      _GLIBCXX_PARALLEL_ASSERT(false);
196  }
197
198
199#if defined(__ICC)
200
201  template<typename _MustBeInt = int>
202  inline int32_t
203  __cas32(volatile int32_t* __ptr, int32_t __old, int32_t __nw)
204  {
205    int32_t __before;
206    __asm__ __volatile__("lock; cmpxchgl %1,%2"
207                         : "=a"(__before)
208                         : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
209                               "0"(__old)
210                         : "memory");
211    return __before;
212  }
213
214#if defined(__x86_64)
215  template<typename _MustBeInt = int>
216  inline int64_t
217  __cas64(volatile int64_t *__ptr, int64_t __old, int64_t __nw)
218  {
219    int64_t __before;
220    __asm__ __volatile__("lock; cmpxchgq %1,%2"
221                         : "=a"(__before)
222                         : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
223                               "0"(__old)
224                         : "memory");
225    return __before;
226  }
227#endif
228
229#endif
230
231  /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c
232   * *__ptr=__replacement and return @c true, return @c false otherwise.
233   *
234   *  Implementation is heavily platform-dependent.
235   *  @param __ptr Pointer to 32-bit signed integer.
236   *  @param __comparand Compare value.
237   *  @param __replacement Replacement value.
238   */
239  inline bool
240  __compare_and_swap_32(volatile int32_t* __ptr, int32_t __comparand,
241                        int32_t __replacement)
242  {
243#if defined(__ICC)      //x86 version
244    return _InterlockedCompareExchange((void*)__ptr, __replacement,
245                                       __comparand) == __comparand;
246#elif defined(__ECC)    //IA-64 version
247    return _InterlockedCompareExchange((void*)__ptr, __replacement,
248                                       __comparand) == __comparand;
249#elif defined(__ICL) || defined(_MSC_VER)
250    return _InterlockedCompareExchange(
251               reinterpret_cast<volatile long*>(__ptr),
252               __replacement, __comparand)
253             == __comparand;
254#elif defined(__GNUC__)
255    return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
256#elif defined(__SUNPRO_CC) && defined(__sparc)
257    return atomic_cas_32((volatile unsigned int*)__ptr, __comparand,
258                         __replacement) == __comparand;
259#else
260#pragma message("slow __compare_and_swap_32")
261    bool __res = false;
262#pragma omp critical
263    {
264      if (*__ptr == __comparand)
265        {
266          *__ptr = __replacement;
267          __res = true;
268        }
269    }
270    return __res;
271#endif
272  }
273
274  /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c
275   * *__ptr=__replacement and return @c true, return @c false otherwise.
276   *
277   *  Implementation is heavily platform-dependent.
278   *  @param __ptr Pointer to 64-bit signed integer.
279   *  @param __comparand Compare value.
280   *  @param __replacement Replacement value.
281   */
282  inline bool
283  __compare_and_swap_64(volatile int64_t* __ptr, int64_t __comparand,
284                        int64_t __replacement)
285  {
286#if defined(__ICC) && defined(__x86_64) //x86 version
287    return __cas64<int>(__ptr, __comparand, __replacement) == __comparand;
288#elif defined(__ECC)    //IA-64 version
289    return _InterlockedCompareExchange64((void*)__ptr, __replacement,
290                                         __comparand) == __comparand;
291#elif defined(__ICL) || defined(_MSC_VER)
292#ifndef _WIN64
293    _GLIBCXX_PARALLEL_ASSERT(false);    //not available in this case
294    return 0;
295#else
296    return _InterlockedCompareExchange64(__ptr, __replacement,
297                                         __comparand) == __comparand;
298#endif
299
300#elif defined(__GNUC__) && defined(__x86_64)
301    return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
302#elif defined(__GNUC__) && defined(__i386) &&                   \
303  (defined(__i686) || defined(__pentium4) || defined(__athlon)  \
304   || defined(__k8) || defined(__core2))
305    return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
306#elif defined(__SUNPRO_CC) && defined(__sparc)
307    return atomic_cas_64((volatile unsigned long long*)__ptr,
308                         __comparand, __replacement) == __comparand;
309#else
310#if defined(__GNUC__) && defined(__i386)
311    // XXX -march=native
312    //#warning "please compile with -march=i686 or better"
313#endif
314#pragma message("slow __compare_and_swap_64")
315    bool __res = false;
316#pragma omp critical
317    {
318      if (*__ptr == __comparand)
319        {
320          *__ptr = __replacement;
321          __res = true;
322        }
323    }
324    return __res;
325#endif
326  }
327
328  /** @brief Compare @c *__ptr and @c __comparand. If equal, let @c
329   * *__ptr=__replacement and return @c true, return @c false otherwise.
330   *
331   *  Implementation is heavily platform-dependent.
332   *  @param __ptr Pointer to signed integer.
333   *  @param __comparand Compare value.
334   *  @param __replacement Replacement value. */
335  template<typename _Tp>
336  inline bool
337  __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement)
338  {
339    if (sizeof(_Tp) == sizeof(int32_t))
340      return __compare_and_swap_32((volatile int32_t*) __ptr,
341                                   (int32_t)__comparand,
342                                   (int32_t)__replacement);
343    else if (sizeof(_Tp) == sizeof(int64_t))
344      return __compare_and_swap_64((volatile int64_t*) __ptr,
345                                   (int64_t)__comparand,
346                                   (int64_t)__replacement);
347    else
348      _GLIBCXX_PARALLEL_ASSERT(false);
349  }
350
351  /** @brief Yield the control to another thread, without waiting for
352      the end to the time slice. */
353  inline void
354  __yield()
355  {
356#if defined (_WIN32) && !defined (__CYGWIN__)
357    Sleep(0);
358#else
359    sched_yield();
360#endif
361  }
362} // end namespace
363
364#endif /* _GLIBCXX_PARALLEL_COMPATIBILITY_H */
365