1326943Sdim/* Copyright (C) 2012-2015 Free Software Foundation, Inc.
2326943Sdim   Contributed by Richard Henderson <rth@redhat.com>.
3353358Sdim
4353358Sdim   This file is part of the GNU Atomic Library (libatomic).
5353358Sdim
6326943Sdim   Libatomic is free software; you can redistribute it and/or modify it
7326943Sdim   under the terms of the GNU General Public License as published by
8326943Sdim   the Free Software Foundation; either version 3 of the License, or
9326943Sdim   (at your option) any later version.
10326943Sdim
11326943Sdim   Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
12326943Sdim   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13326943Sdim   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14326943Sdim   more details.
15326943Sdim
16326943Sdim   Under Section 7 of GPL version 3, you are granted additional
17326943Sdim   permissions described in the GCC Runtime Library Exception, version
18336817Sdim   3.1, as published by the Free Software Foundation.
19326943Sdim
20326943Sdim   You should have received a copy of the GNU General Public License and
21326943Sdim   a copy of the GCC Runtime Library Exception along with this program;
22326943Sdim   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23326943Sdim   <http://www.gnu.org/licenses/>.  */
24326943Sdim
25326943Sdim#include "libatomic_i.h"
26326943Sdim
27326943Sdim
28326943Sdim/* If we natively support the exchange, and if we're unconcerned with extra
29326943Sdim   barriers (e.g. fully in-order cpu for which barriers are a nop), then
30326943Sdim   go ahead and expand the operation inline.  */
31326943Sdim#if !defined(WANT_SPECIALCASE_RELAXED) && !defined(__OPTIMIZE_SIZE__)
32326943Sdim# define EXACT_INLINE(N)					\
33326943Sdim  if (C2(HAVE_ATOMIC_EXCHANGE_,N))				\
34326943Sdim    {								\
35326943Sdim      *PTR(N,rptr) = __atomic_exchange_n			\
36326943Sdim	(PTR(N,mptr), *PTR(N,vptr), __ATOMIC_SEQ_CST);		\
37326943Sdim      return;							\
38326943Sdim    }
39326943Sdim#else
40326943Sdim# define EXACT_INLINE(N)
41326943Sdim#endif
42326943Sdim
43326943Sdim
44326943Sdim#define EXACT(N)						\
45326943Sdim  do {								\
46326943Sdim    if (!C2(HAVE_INT,N)) break;					\
47326943Sdim    if ((uintptr_t)mptr & (N - 1)) break;			\
48326943Sdim    EXACT_INLINE (N);						\
49326943Sdim    *PTR(N,rptr) = C3(local_,exchange_,N)			\
50326943Sdim      (PTR(N,mptr), *PTR(N,vptr), smodel);			\
51326943Sdim    return;							\
52326943Sdim  } while (0)
53326943Sdim
54326943Sdim
55326943Sdim#define LARGER(N)						\
56326943Sdim  do {								\
57326943Sdim    if (!C2(HAVE_INT,N)) break;					\
58326943Sdim    if (!C2(MAYBE_HAVE_ATOMIC_CAS_,N)) break;			\
59326943Sdim    r = (uintptr_t)mptr & (N - 1);				\
60326943Sdim    a = (uintptr_t)mptr & -N;					\
61326943Sdim    if (r + n <= N)						\
62326943Sdim      {								\
63326943Sdim	pre_barrier (smodel);					\
64326943Sdim	u.C2(i,N) = *PTR(N,a);					\
65326943Sdim	do {							\
66326943Sdim	  v = u;						\
67326943Sdim	  memcpy (v.b + r, vptr, n);				\
68326943Sdim	} while (!(C2(HAVE_ATOMIC_CAS_,N)			\
69326943Sdim		   ? __atomic_compare_exchange_n (PTR(N,a),	\
70326943Sdim			&u.C2(i,N), v.C2(i,N), true,		\
71326943Sdim			__ATOMIC_RELAXED, __ATOMIC_RELAXED)	\
72326943Sdim		   : C3(local_,compare_exchange_,N) (PTR(N,a),	\
73326943Sdim			&u.C2(i,N), v.C2(i,N),			\
74326943Sdim			__ATOMIC_RELAXED, __ATOMIC_RELAXED)));	\
75326943Sdim	goto Lfinish;						\
76326943Sdim      }								\
77326943Sdim  } while (0)
78326943Sdim
79326943Sdim
80326943Sdimstatic void __attribute__((noinline))
81326943Sdimlibat_exchange_large_inplace (size_t n, void *mptr, void *vptr)
82326943Sdim{
83326943Sdim#define BUF	1024
84326943Sdim
85326943Sdim  char temp[BUF];
86326943Sdim  size_t i = 0;
87326943Sdim
88326943Sdim  for (i = 0; n >= BUF; i += BUF, n -= BUF)
89326943Sdim    {
90326943Sdim      memcpy (temp, mptr + i, BUF);
91326943Sdim      memcpy (mptr + i, vptr + i, BUF);
92326943Sdim      memcpy (vptr + i, temp, BUF);
93326943Sdim    }
94326943Sdim  if (n > 0)
95326943Sdim    {
96326943Sdim      memcpy (temp, mptr + i, n);
97326943Sdim      memcpy (mptr + i, vptr + i, n);
98326943Sdim      memcpy (vptr + i, temp, n);
99326943Sdim    }
100326943Sdim
101326943Sdim#undef BUF
102326943Sdim}
103326943Sdim
104326943Sdimvoid
105326943Sdimlibat_exchange (size_t n, void *mptr, void *vptr, void *rptr, int smodel)
106326943Sdim{
107326943Sdim  union max_size_u u, v;
108326943Sdim  uintptr_t r, a;
109326943Sdim
110326943Sdim  switch (n)
111326943Sdim    {
112326943Sdim    case 0:				return;
113326943Sdim    case 1:		EXACT(1);	goto L4;
114326943Sdim    case 2:		EXACT(2);	goto L4;
115326943Sdim    case 4:		EXACT(4);	goto L8;
116326943Sdim    case 8:		EXACT(8);	goto L16;
117326943Sdim    case 16:		EXACT(16);	break;
118326943Sdim
119326943Sdim    case 3: L4:		LARGER(4);	/* FALLTHRU */
120326943Sdim    case 5 ... 7: L8:	LARGER(8);	/* FALLTHRU */
121326943Sdim    case 9 ... 15: L16:	LARGER(16);	break;
122326943Sdim
123326943Sdim    Lfinish:
124326943Sdim      post_barrier (smodel);
125326943Sdim      memcpy (rptr, u.b + r, n);
126326943Sdim      return;
127326943Sdim    }
128326943Sdim
129326943Sdim  pre_seq_barrier (smodel);
130326943Sdim  libat_lock_n (mptr, n);
131326943Sdim
132326943Sdim  if (vptr != rptr)
133326943Sdim    {
134326943Sdim      memcpy (rptr, mptr, n);
135326943Sdim      memcpy (mptr, vptr, n);
136326943Sdim    }
137326943Sdim  else
138326943Sdim    libat_exchange_large_inplace (n, mptr, vptr);
139326943Sdim
140326943Sdim  libat_unlock_n (mptr, n);
141326943Sdim  post_seq_barrier (smodel);
142326943Sdim}
143326943Sdim
144326943SdimEXPORT_ALIAS (exchange);
145326943Sdim