1/* Out-of-line LSE atomics for AArch64 architecture. 2 Copyright (C) 2019-2022 Free Software Foundation, Inc. 3 Contributed by Linaro Ltd. 4 5This file is part of GCC. 6 7GCC is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 3, or (at your option) any later 10version. 11 12GCC is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17Under Section 7 of GPL version 3, you are granted additional 18permissions described in the GCC Runtime Library Exception, version 193.1, as published by the Free Software Foundation. 20 21You should have received a copy of the GNU General Public License and 22a copy of the GCC Runtime Library Exception along with this program; 23see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 24<http://www.gnu.org/licenses/>. */ 25 26/* 27 * The problem that we are trying to solve is operating system deployment 28 * of ARMv8.1-Atomics, also known as Large System Exensions (LSE). 29 * 30 * There are a number of potential solutions for this problem which have 31 * been proposed and rejected for various reasons. To recap: 32 * 33 * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/ 34 * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten. 35 * However, not all Linux distributions are happy with multiple builds, 36 * and anyway it has no effect on main applications. 37 * 38 * (2) IFUNC. We could put these functions into libgcc_s.so, and have 39 * a single copy of each function for all DSOs. However, ARM is concerned 40 * that the branch-to-indirect-branch that is implied by using a PLT, 41 * as required by IFUNC, is too much overhead for smaller cpus. 42 * 43 * (3) Statically predicted direct branches. This is the approach that 44 * is taken here. These functions are linked into every DSO that uses them. 45 * All of the symbols are hidden, so that the functions are called via a 46 * direct branch. The choice of LSE vs non-LSE is done via one byte load 47 * followed by a well-predicted direct branch. The functions are compiled 48 * separately to minimize code size. 49 */ 50 51#include "auto-target.h" 52 53/* Tell the assembler to accept LSE instructions. */ 54#ifdef HAVE_AS_LSE 55 .arch armv8-a+lse 56#else 57 .arch armv8-a 58#endif 59 60/* Declare the symbol gating the LSE implementations. */ 61 .hidden __aarch64_have_lse_atomics 62 63/* Turn size and memory model defines into mnemonic fragments. */ 64#if SIZE == 1 65# define S b 66# define UXT uxtb 67# define B 0x00000000 68#elif SIZE == 2 69# define S h 70# define UXT uxth 71# define B 0x40000000 72#elif SIZE == 4 || SIZE == 8 || SIZE == 16 73# define S 74# define UXT mov 75# if SIZE == 4 76# define B 0x80000000 77# elif SIZE == 8 78# define B 0xc0000000 79# endif 80#else 81# error 82#endif 83 84#if MODEL == 1 85# define SUFF _relax 86# define A 87# define L 88# define M 0x000000 89# define N 0x000000 90# define BARRIER 91#elif MODEL == 2 92# define SUFF _acq 93# define A a 94# define L 95# define M 0x400000 96# define N 0x800000 97# define BARRIER 98#elif MODEL == 3 99# define SUFF _rel 100# define A 101# define L l 102# define M 0x008000 103# define N 0x400000 104# define BARRIER 105#elif MODEL == 4 106# define SUFF _acq_rel 107# define A a 108# define L l 109# define M 0x408000 110# define N 0xc00000 111# define BARRIER 112#elif MODEL == 5 113# define SUFF _sync 114#ifdef L_swp 115/* swp has _acq semantics. */ 116# define A a 117# define L 118# define M 0x400000 119# define N 0x800000 120#else 121/* All other _sync functions have _seq semantics. */ 122# define A a 123# define L l 124# define M 0x408000 125# define N 0xc00000 126#endif 127# define BARRIER dmb ish 128#else 129# error 130#endif 131 132/* Concatenate symbols. */ 133#define glue2_(A, B) A ## B 134#define glue2(A, B) glue2_(A, B) 135#define glue3_(A, B, C) A ## B ## C 136#define glue3(A, B, C) glue3_(A, B, C) 137#define glue4_(A, B, C, D) A ## B ## C ## D 138#define glue4(A, B, C, D) glue4_(A, B, C, D) 139 140/* Select the size of a register, given a regno. */ 141#define x(N) glue2(x, N) 142#define w(N) glue2(w, N) 143#if SIZE < 8 144# define s(N) w(N) 145#else 146# define s(N) x(N) 147#endif 148 149#define NAME(BASE) glue4(__aarch64_, BASE, SIZE, SUFF) 150#if MODEL == 5 151/* Drop A for _sync functions. */ 152# define LDXR glue3(ld, xr, S) 153#else 154# define LDXR glue4(ld, A, xr, S) 155#endif 156#define STXR glue4(st, L, xr, S) 157 158/* Temporary registers used. Other than these, only the return value 159 register (x0) and the flags are modified. */ 160#define tmp0 16 161#define tmp1 17 162#define tmp2 15 163 164#define BTI_C hint 34 165 166/* Start and end a function. */ 167.macro STARTFN name 168 .text 169 .balign 16 170 .globl \name 171 .hidden \name 172 .type \name, %function 173 .cfi_startproc 174\name: 175 BTI_C 176.endm 177 178.macro ENDFN name 179 .cfi_endproc 180 .size \name, . - \name 181.endm 182 183/* Branch to LABEL if LSE is disabled. */ 184.macro JUMP_IF_NOT_LSE label 185 adrp x(tmp0), __aarch64_have_lse_atomics 186 ldrb w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics] 187 cbz w(tmp0), \label 188.endm 189 190#ifdef L_cas 191 192STARTFN NAME(cas) 193 JUMP_IF_NOT_LSE 8f 194 195#if SIZE < 16 196#ifdef HAVE_AS_LSE 197# define CAS glue4(cas, A, L, S) s(0), s(1), [x2] 198#else 199# define CAS .inst 0x08a07c41 + B + M 200#endif 201 202 CAS /* s(0), s(1), [x2] */ 203 ret 204 2058: UXT s(tmp0), s(0) 2060: LDXR s(0), [x2] 207 cmp s(0), s(tmp0) 208 bne 1f 209 STXR w(tmp1), s(1), [x2] 210 cbnz w(tmp1), 0b 2111: BARRIER 212 ret 213 214#else 215#if MODEL == 5 216/* Drop A for _sync functions. */ 217# define LDXP glue2(ld, xp) 218#else 219# define LDXP glue3(ld, A, xp) 220#endif 221#define STXP glue3(st, L, xp) 222#ifdef HAVE_AS_LSE 223# define CASP glue3(casp, A, L) x0, x1, x2, x3, [x4] 224#else 225# define CASP .inst 0x48207c82 + M 226#endif 227 228 CASP /* x0, x1, x2, x3, [x4] */ 229 ret 230 2318: mov x(tmp0), x0 232 mov x(tmp1), x1 2330: LDXP x0, x1, [x4] 234 cmp x0, x(tmp0) 235 ccmp x1, x(tmp1), #0, eq 236 bne 1f 237 STXP w(tmp2), x2, x3, [x4] 238 cbnz w(tmp2), 0b 2391: BARRIER 240 ret 241 242#endif 243 244ENDFN NAME(cas) 245#endif 246 247#ifdef L_swp 248#ifdef HAVE_AS_LSE 249# define SWP glue4(swp, A, L, S) s(0), s(0), [x1] 250#else 251# define SWP .inst 0x38208020 + B + N 252#endif 253 254STARTFN NAME(swp) 255 JUMP_IF_NOT_LSE 8f 256 257 SWP /* s(0), s(0), [x1] */ 258 ret 259 2608: mov s(tmp0), s(0) 2610: LDXR s(0), [x1] 262 STXR w(tmp1), s(tmp0), [x1] 263 cbnz w(tmp1), 0b 264 BARRIER 265 ret 266 267ENDFN NAME(swp) 268#endif 269 270#if defined(L_ldadd) || defined(L_ldclr) \ 271 || defined(L_ldeor) || defined(L_ldset) 272 273#ifdef L_ldadd 274#define LDNM ldadd 275#define OP add 276#define OPN 0x0000 277#elif defined(L_ldclr) 278#define LDNM ldclr 279#define OP bic 280#define OPN 0x1000 281#elif defined(L_ldeor) 282#define LDNM ldeor 283#define OP eor 284#define OPN 0x2000 285#elif defined(L_ldset) 286#define LDNM ldset 287#define OP orr 288#define OPN 0x3000 289#else 290#error 291#endif 292#ifdef HAVE_AS_LSE 293# define LDOP glue4(LDNM, A, L, S) s(0), s(0), [x1] 294#else 295# define LDOP .inst 0x38200020 + OPN + B + N 296#endif 297 298STARTFN NAME(LDNM) 299 JUMP_IF_NOT_LSE 8f 300 301 LDOP /* s(0), s(0), [x1] */ 302 ret 303 3048: mov s(tmp0), s(0) 3050: LDXR s(0), [x1] 306 OP s(tmp1), s(0), s(tmp0) 307 STXR w(tmp2), s(tmp1), [x1] 308 cbnz w(tmp2), 0b 309 BARRIER 310 ret 311 312ENDFN NAME(LDNM) 313#endif 314 315/* GNU_PROPERTY_AARCH64_* macros from elf.h for use in asm code. */ 316#define FEATURE_1_AND 0xc0000000 317#define FEATURE_1_BTI 1 318#define FEATURE_1_PAC 2 319 320/* Supported features based on the code generation options. */ 321#if defined(__ARM_FEATURE_BTI_DEFAULT) 322# define BTI_FLAG FEATURE_1_BTI 323#else 324# define BTI_FLAG 0 325#endif 326 327#if __ARM_FEATURE_PAC_DEFAULT & 3 328# define PAC_FLAG FEATURE_1_PAC 329#else 330# define PAC_FLAG 0 331#endif 332 333/* Add a NT_GNU_PROPERTY_TYPE_0 note. */ 334#define GNU_PROPERTY(type, value) \ 335 .section .note.gnu.property, "a"; \ 336 .p2align 3; \ 337 .word 4; \ 338 .word 16; \ 339 .word 5; \ 340 .asciz "GNU"; \ 341 .word type; \ 342 .word 4; \ 343 .word value; \ 344 .word 0; 345 346#if defined(__linux__) || defined(__FreeBSD__) 347.section .note.GNU-stack, "", %progbits 348 349/* Add GNU property note if built with branch protection. */ 350# if (BTI_FLAG|PAC_FLAG) != 0 351GNU_PROPERTY (FEATURE_1_AND, BTI_FLAG|PAC_FLAG) 352# endif 353#endif 354