1/* Out-of-line LSE atomics for AArch64 architecture.
2   Copyright (C) 2019-2022 Free Software Foundation, Inc.
3   Contributed by Linaro Ltd.
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 3, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15for more details.
16
17Under Section 7 of GPL version 3, you are granted additional
18permissions described in the GCC Runtime Library Exception, version
193.1, as published by the Free Software Foundation.
20
21You should have received a copy of the GNU General Public License and
22a copy of the GCC Runtime Library Exception along with this program;
23see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24<http://www.gnu.org/licenses/>.  */
25
26/*
27 * The problem that we are trying to solve is operating system deployment
28 * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
29 *
30 * There are a number of potential solutions for this problem which have
31 * been proposed and rejected for various reasons.  To recap:
32 *
33 * (1) Multiple builds.  The dynamic linker will examine /lib64/atomics/
34 * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
35 * However, not all Linux distributions are happy with multiple builds,
36 * and anyway it has no effect on main applications.
37 *
38 * (2) IFUNC.  We could put these functions into libgcc_s.so, and have
39 * a single copy of each function for all DSOs.  However, ARM is concerned
40 * that the branch-to-indirect-branch that is implied by using a PLT,
41 * as required by IFUNC, is too much overhead for smaller cpus.
42 *
43 * (3) Statically predicted direct branches.  This is the approach that
44 * is taken here.  These functions are linked into every DSO that uses them.
45 * All of the symbols are hidden, so that the functions are called via a
46 * direct branch.  The choice of LSE vs non-LSE is done via one byte load
47 * followed by a well-predicted direct branch.  The functions are compiled
48 * separately to minimize code size.
49 */
50
51#include "auto-target.h"
52
53/* Tell the assembler to accept LSE instructions.  */
54#ifdef HAVE_AS_LSE
55	.arch armv8-a+lse
56#else
57	.arch armv8-a
58#endif
59
60/* Declare the symbol gating the LSE implementations.  */
61	.hidden	__aarch64_have_lse_atomics
62
63/* Turn size and memory model defines into mnemonic fragments.  */
64#if SIZE == 1
65# define S     b
66# define UXT   uxtb
67# define B     0x00000000
68#elif SIZE == 2
69# define S     h
70# define UXT   uxth
71# define B     0x40000000
72#elif SIZE == 4 || SIZE == 8 || SIZE == 16
73# define S
74# define UXT   mov
75# if SIZE == 4
76#  define B    0x80000000
77# elif SIZE == 8
78#  define B    0xc0000000
79# endif
80#else
81# error
82#endif
83
84#if MODEL == 1
85# define SUFF  _relax
86# define A
87# define L
88# define M     0x000000
89# define N     0x000000
90# define BARRIER
91#elif MODEL == 2
92# define SUFF  _acq
93# define A     a
94# define L
95# define M     0x400000
96# define N     0x800000
97# define BARRIER
98#elif MODEL == 3
99# define SUFF  _rel
100# define A
101# define L     l
102# define M     0x008000
103# define N     0x400000
104# define BARRIER
105#elif MODEL == 4
106# define SUFF  _acq_rel
107# define A     a
108# define L     l
109# define M     0x408000
110# define N     0xc00000
111# define BARRIER
112#elif MODEL == 5
113# define SUFF  _sync
114#ifdef L_swp
115/* swp has _acq semantics.  */
116#  define A    a
117#  define L
118#  define M    0x400000
119#  define N    0x800000
120#else
121/* All other _sync functions have _seq semantics.  */
122#  define A    a
123#  define L    l
124#  define M    0x408000
125#  define N    0xc00000
126#endif
127# define BARRIER dmb		ish
128#else
129# error
130#endif
131
132/* Concatenate symbols.  */
133#define glue2_(A, B)		A ## B
134#define glue2(A, B)		glue2_(A, B)
135#define glue3_(A, B, C)		A ## B ## C
136#define glue3(A, B, C)		glue3_(A, B, C)
137#define glue4_(A, B, C, D)	A ## B ## C ## D
138#define glue4(A, B, C, D)	glue4_(A, B, C, D)
139
140/* Select the size of a register, given a regno.  */
141#define x(N)			glue2(x, N)
142#define w(N)			glue2(w, N)
143#if SIZE < 8
144# define s(N)			w(N)
145#else
146# define s(N)			x(N)
147#endif
148
149#define NAME(BASE)		glue4(__aarch64_, BASE, SIZE, SUFF)
150#if MODEL == 5
151/* Drop A for _sync functions.  */
152# define LDXR			glue3(ld, xr, S)
153#else
154# define LDXR			glue4(ld, A, xr, S)
155#endif
156#define STXR			glue4(st, L, xr, S)
157
158/* Temporary registers used.  Other than these, only the return value
159   register (x0) and the flags are modified.  */
160#define tmp0	16
161#define tmp1	17
162#define tmp2	15
163
164#define BTI_C	hint	34
165
166/* Start and end a function.  */
167.macro	STARTFN name
168	.text
169	.balign	16
170	.globl	\name
171	.hidden	\name
172	.type	\name, %function
173	.cfi_startproc
174\name:
175	BTI_C
176.endm
177
178.macro	ENDFN name
179	.cfi_endproc
180	.size	\name, . - \name
181.endm
182
183/* Branch to LABEL if LSE is disabled.  */
184.macro	JUMP_IF_NOT_LSE label
185	adrp	x(tmp0), __aarch64_have_lse_atomics
186	ldrb	w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
187	cbz	w(tmp0), \label
188.endm
189
190#ifdef L_cas
191
192STARTFN	NAME(cas)
193	JUMP_IF_NOT_LSE	8f
194
195#if SIZE < 16
196#ifdef HAVE_AS_LSE
197# define CAS	glue4(cas, A, L, S)	s(0), s(1), [x2]
198#else
199# define CAS	.inst 0x08a07c41 + B + M
200#endif
201
202	CAS		/* s(0), s(1), [x2] */
203	ret
204
2058:	UXT		s(tmp0), s(0)
2060:	LDXR		s(0), [x2]
207	cmp		s(0), s(tmp0)
208	bne		1f
209	STXR		w(tmp1), s(1), [x2]
210	cbnz		w(tmp1), 0b
2111:	BARRIER
212	ret
213
214#else
215#if MODEL == 5
216/* Drop A for _sync functions.  */
217# define LDXP	glue2(ld, xp)
218#else
219# define LDXP	glue3(ld, A, xp)
220#endif
221#define STXP	glue3(st, L, xp)
222#ifdef HAVE_AS_LSE
223# define CASP	glue3(casp, A, L)	x0, x1, x2, x3, [x4]
224#else
225# define CASP	.inst 0x48207c82 + M
226#endif
227
228	CASP		/* x0, x1, x2, x3, [x4] */
229	ret
230
2318:	mov		x(tmp0), x0
232	mov		x(tmp1), x1
2330:	LDXP		x0, x1, [x4]
234	cmp		x0, x(tmp0)
235	ccmp		x1, x(tmp1), #0, eq
236	bne		1f
237	STXP		w(tmp2), x2, x3, [x4]
238	cbnz		w(tmp2), 0b
2391:	BARRIER
240	ret
241
242#endif
243
244ENDFN	NAME(cas)
245#endif
246
247#ifdef L_swp
248#ifdef HAVE_AS_LSE
249# define SWP	glue4(swp, A, L, S)	s(0), s(0), [x1]
250#else
251# define SWP	.inst 0x38208020 + B + N
252#endif
253
254STARTFN	NAME(swp)
255	JUMP_IF_NOT_LSE	8f
256
257	SWP		/* s(0), s(0), [x1] */
258	ret
259
2608:	mov		s(tmp0), s(0)
2610:	LDXR		s(0), [x1]
262	STXR		w(tmp1), s(tmp0), [x1]
263	cbnz		w(tmp1), 0b
264	BARRIER
265	ret
266
267ENDFN	NAME(swp)
268#endif
269
270#if defined(L_ldadd) || defined(L_ldclr) \
271    || defined(L_ldeor) || defined(L_ldset)
272
273#ifdef L_ldadd
274#define LDNM	ldadd
275#define OP	add
276#define OPN	0x0000
277#elif defined(L_ldclr)
278#define LDNM	ldclr
279#define OP	bic
280#define OPN	0x1000
281#elif defined(L_ldeor)
282#define LDNM	ldeor
283#define OP	eor
284#define OPN	0x2000
285#elif defined(L_ldset)
286#define LDNM	ldset
287#define OP	orr
288#define OPN	0x3000
289#else
290#error
291#endif
292#ifdef HAVE_AS_LSE
293# define LDOP	glue4(LDNM, A, L, S)	s(0), s(0), [x1]
294#else
295# define LDOP	.inst 0x38200020 + OPN + B + N
296#endif
297
298STARTFN	NAME(LDNM)
299	JUMP_IF_NOT_LSE	8f
300
301	LDOP		/* s(0), s(0), [x1] */
302	ret
303
3048:	mov		s(tmp0), s(0)
3050:	LDXR		s(0), [x1]
306	OP		s(tmp1), s(0), s(tmp0)
307	STXR		w(tmp2), s(tmp1), [x1]
308	cbnz		w(tmp2), 0b
309	BARRIER
310	ret
311
312ENDFN	NAME(LDNM)
313#endif
314
315/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code.  */
316#define FEATURE_1_AND 0xc0000000
317#define FEATURE_1_BTI 1
318#define FEATURE_1_PAC 2
319
320/* Supported features based on the code generation options.  */
321#if defined(__ARM_FEATURE_BTI_DEFAULT)
322# define BTI_FLAG FEATURE_1_BTI
323#else
324# define BTI_FLAG 0
325#endif
326
327#if __ARM_FEATURE_PAC_DEFAULT & 3
328# define PAC_FLAG FEATURE_1_PAC
329#else
330# define PAC_FLAG 0
331#endif
332
333/* Add a NT_GNU_PROPERTY_TYPE_0 note.  */
334#define GNU_PROPERTY(type, value)	\
335  .section .note.gnu.property, "a";	\
336  .p2align 3;				\
337  .word 4;				\
338  .word 16;				\
339  .word 5;				\
340  .asciz "GNU";				\
341  .word type;				\
342  .word 4;				\
343  .word value;				\
344  .word 0;
345
346#if defined(__linux__) || defined(__FreeBSD__)
347.section .note.GNU-stack, "", %progbits
348
349/* Add GNU property note if built with branch protection.  */
350# if (BTI_FLAG|PAC_FLAG) != 0
351GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG)
352# endif
353#endif
354