1326943Sdim/* Copyright (C) 2012-2015 Free Software Foundation, Inc. 2326943Sdim Contributed by Richard Henderson <rth@redhat.com>. 3353358Sdim 4353358Sdim This file is part of the GNU Atomic Library (libatomic). 5353358Sdim 6326943Sdim Libatomic is free software; you can redistribute it and/or modify it 7326943Sdim under the terms of the GNU General Public License as published by 8326943Sdim the Free Software Foundation; either version 3 of the License, or 9326943Sdim (at your option) any later version. 10326943Sdim 11326943Sdim Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY 12326943Sdim WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13326943Sdim FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14326943Sdim more details. 15326943Sdim 16326943Sdim Under Section 7 of GPL version 3, you are granted additional 17326943Sdim permissions described in the GCC Runtime Library Exception, version 18336817Sdim 3.1, as published by the Free Software Foundation. 19326943Sdim 20326943Sdim You should have received a copy of the GNU General Public License and 21326943Sdim a copy of the GCC Runtime Library Exception along with this program; 22326943Sdim see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23326943Sdim <http://www.gnu.org/licenses/>. */ 24326943Sdim 25326943Sdim#include "libatomic_i.h" 26326943Sdim 27326943Sdim 28326943Sdim/* If we natively support the exchange, and if we're unconcerned with extra 29326943Sdim barriers (e.g. fully in-order cpu for which barriers are a nop), then 30326943Sdim go ahead and expand the operation inline. */ 31326943Sdim#if !defined(WANT_SPECIALCASE_RELAXED) && !defined(__OPTIMIZE_SIZE__) 32326943Sdim# define EXACT_INLINE(N) \ 33326943Sdim if (C2(HAVE_ATOMIC_EXCHANGE_,N)) \ 34326943Sdim { \ 35326943Sdim *PTR(N,rptr) = __atomic_exchange_n \ 36326943Sdim (PTR(N,mptr), *PTR(N,vptr), __ATOMIC_SEQ_CST); \ 37326943Sdim return; \ 38326943Sdim } 39326943Sdim#else 40326943Sdim# define EXACT_INLINE(N) 41326943Sdim#endif 42326943Sdim 43326943Sdim 44326943Sdim#define EXACT(N) \ 45326943Sdim do { \ 46326943Sdim if (!C2(HAVE_INT,N)) break; \ 47326943Sdim if ((uintptr_t)mptr & (N - 1)) break; \ 48326943Sdim EXACT_INLINE (N); \ 49326943Sdim *PTR(N,rptr) = C3(local_,exchange_,N) \ 50326943Sdim (PTR(N,mptr), *PTR(N,vptr), smodel); \ 51326943Sdim return; \ 52326943Sdim } while (0) 53326943Sdim 54326943Sdim 55326943Sdim#define LARGER(N) \ 56326943Sdim do { \ 57326943Sdim if (!C2(HAVE_INT,N)) break; \ 58326943Sdim if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break; \ 59326943Sdim r = (uintptr_t)mptr & (N - 1); \ 60326943Sdim a = (uintptr_t)mptr & -N; \ 61326943Sdim if (r + n <= N) \ 62326943Sdim { \ 63326943Sdim pre_barrier (smodel); \ 64326943Sdim u.C2(i,N) = *PTR(N,a); \ 65326943Sdim do { \ 66326943Sdim v = u; \ 67326943Sdim memcpy (v.b + r, vptr, n); \ 68326943Sdim } while (!(C2(HAVE_ATOMIC_CAS_,N) \ 69326943Sdim ? __atomic_compare_exchange_n (PTR(N,a), \ 70326943Sdim &u.C2(i,N), v.C2(i,N), true, \ 71326943Sdim __ATOMIC_RELAXED, __ATOMIC_RELAXED) \ 72326943Sdim : C3(local_,compare_exchange_,N) (PTR(N,a), \ 73326943Sdim &u.C2(i,N), v.C2(i,N), \ 74326943Sdim __ATOMIC_RELAXED, __ATOMIC_RELAXED))); \ 75326943Sdim goto Lfinish; \ 76326943Sdim } \ 77326943Sdim } while (0) 78326943Sdim 79326943Sdim 80326943Sdimstatic void __attribute__((noinline)) 81326943Sdimlibat_exchange_large_inplace (size_t n, void *mptr, void *vptr) 82326943Sdim{ 83326943Sdim#define BUF 1024 84326943Sdim 85326943Sdim char temp[BUF]; 86326943Sdim size_t i = 0; 87326943Sdim 88326943Sdim for (i = 0; n >= BUF; i += BUF, n -= BUF) 89326943Sdim { 90326943Sdim memcpy (temp, mptr + i, BUF); 91326943Sdim memcpy (mptr + i, vptr + i, BUF); 92326943Sdim memcpy (vptr + i, temp, BUF); 93326943Sdim } 94326943Sdim if (n > 0) 95326943Sdim { 96326943Sdim memcpy (temp, mptr + i, n); 97326943Sdim memcpy (mptr + i, vptr + i, n); 98326943Sdim memcpy (vptr + i, temp, n); 99326943Sdim } 100326943Sdim 101326943Sdim#undef BUF 102326943Sdim} 103326943Sdim 104326943Sdimvoid 105326943Sdimlibat_exchange (size_t n, void *mptr, void *vptr, void *rptr, int smodel) 106326943Sdim{ 107326943Sdim union max_size_u u, v; 108326943Sdim uintptr_t r, a; 109326943Sdim 110326943Sdim switch (n) 111326943Sdim { 112326943Sdim case 0: return; 113326943Sdim case 1: EXACT(1); goto L4; 114326943Sdim case 2: EXACT(2); goto L4; 115326943Sdim case 4: EXACT(4); goto L8; 116326943Sdim case 8: EXACT(8); goto L16; 117326943Sdim case 16: EXACT(16); break; 118326943Sdim 119326943Sdim case 3: L4: LARGER(4); /* FALLTHRU */ 120326943Sdim case 5 ... 7: L8: LARGER(8); /* FALLTHRU */ 121326943Sdim case 9 ... 15: L16: LARGER(16); break; 122326943Sdim 123326943Sdim Lfinish: 124326943Sdim post_barrier (smodel); 125326943Sdim memcpy (rptr, u.b + r, n); 126326943Sdim return; 127326943Sdim } 128326943Sdim 129326943Sdim pre_seq_barrier (smodel); 130326943Sdim libat_lock_n (mptr, n); 131326943Sdim 132326943Sdim if (vptr != rptr) 133326943Sdim { 134326943Sdim memcpy (rptr, mptr, n); 135326943Sdim memcpy (mptr, vptr, n); 136326943Sdim } 137326943Sdim else 138326943Sdim libat_exchange_large_inplace (n, mptr, vptr); 139326943Sdim 140326943Sdim libat_unlock_n (mptr, n); 141326943Sdim post_seq_barrier (smodel); 142326943Sdim} 143326943Sdim 144326943SdimEXPORT_ALIAS (exchange); 145326943Sdim