1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * FP/SIMD state saving and restoring macros
4 *
5 * Copyright (C) 2012 ARM Ltd.
6 * Author: Catalin Marinas <catalin.marinas@arm.com>
7 */
8
9#include <asm/assembler.h>
10
11.macro fpsimd_save state, tmpnr
12	stp	q0, q1, [\state, #16 * 0]
13	stp	q2, q3, [\state, #16 * 2]
14	stp	q4, q5, [\state, #16 * 4]
15	stp	q6, q7, [\state, #16 * 6]
16	stp	q8, q9, [\state, #16 * 8]
17	stp	q10, q11, [\state, #16 * 10]
18	stp	q12, q13, [\state, #16 * 12]
19	stp	q14, q15, [\state, #16 * 14]
20	stp	q16, q17, [\state, #16 * 16]
21	stp	q18, q19, [\state, #16 * 18]
22	stp	q20, q21, [\state, #16 * 20]
23	stp	q22, q23, [\state, #16 * 22]
24	stp	q24, q25, [\state, #16 * 24]
25	stp	q26, q27, [\state, #16 * 26]
26	stp	q28, q29, [\state, #16 * 28]
27	stp	q30, q31, [\state, #16 * 30]!
28	mrs	x\tmpnr, fpsr
29	str	w\tmpnr, [\state, #16 * 2]
30	mrs	x\tmpnr, fpcr
31	str	w\tmpnr, [\state, #16 * 2 + 4]
32.endm
33
34.macro fpsimd_restore_fpcr state, tmp
35	/*
36	 * Writes to fpcr may be self-synchronising, so avoid restoring
37	 * the register if it hasn't changed.
38	 */
39	mrs	\tmp, fpcr
40	cmp	\tmp, \state
41	b.eq	9999f
42	msr	fpcr, \state
439999:
44.endm
45
46/* Clobbers \state */
47.macro fpsimd_restore state, tmpnr
48	ldp	q0, q1, [\state, #16 * 0]
49	ldp	q2, q3, [\state, #16 * 2]
50	ldp	q4, q5, [\state, #16 * 4]
51	ldp	q6, q7, [\state, #16 * 6]
52	ldp	q8, q9, [\state, #16 * 8]
53	ldp	q10, q11, [\state, #16 * 10]
54	ldp	q12, q13, [\state, #16 * 12]
55	ldp	q14, q15, [\state, #16 * 14]
56	ldp	q16, q17, [\state, #16 * 16]
57	ldp	q18, q19, [\state, #16 * 18]
58	ldp	q20, q21, [\state, #16 * 20]
59	ldp	q22, q23, [\state, #16 * 22]
60	ldp	q24, q25, [\state, #16 * 24]
61	ldp	q26, q27, [\state, #16 * 26]
62	ldp	q28, q29, [\state, #16 * 28]
63	ldp	q30, q31, [\state, #16 * 30]!
64	ldr	w\tmpnr, [\state, #16 * 2]
65	msr	fpsr, x\tmpnr
66	ldr	w\tmpnr, [\state, #16 * 2 + 4]
67	fpsimd_restore_fpcr x\tmpnr, \state
68.endm
69
70/* Sanity-check macros to help avoid encoding garbage instructions */
71
72.macro _check_general_reg nr
73	.if (\nr) < 0 || (\nr) > 30
74		.error "Bad register number \nr."
75	.endif
76.endm
77
78.macro _sve_check_zreg znr
79	.if (\znr) < 0 || (\znr) > 31
80		.error "Bad Scalable Vector Extension vector register number \znr."
81	.endif
82.endm
83
84.macro _sve_check_preg pnr
85	.if (\pnr) < 0 || (\pnr) > 15
86		.error "Bad Scalable Vector Extension predicate register number \pnr."
87	.endif
88.endm
89
90.macro _check_num n, min, max
91	.if (\n) < (\min) || (\n) > (\max)
92		.error "Number \n out of range [\min,\max]"
93	.endif
94.endm
95
96.macro _sme_check_wv v
97	.if (\v) < 12 || (\v) > 15
98		.error "Bad vector select register \v."
99	.endif
100.endm
101
102/* SVE instruction encodings for non-SVE-capable assemblers */
103/* (pre binutils 2.28, all kernel capable clang versions support SVE) */
104
105/* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
106.macro _sve_str_v nz, nxbase, offset=0
107	_sve_check_zreg \nz
108	_check_general_reg \nxbase
109	_check_num (\offset), -0x100, 0xff
110	.inst	0xe5804000			\
111		| (\nz)				\
112		| ((\nxbase) << 5)		\
113		| (((\offset) & 7) << 10)	\
114		| (((\offset) & 0x1f8) << 13)
115.endm
116
117/* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
118.macro _sve_ldr_v nz, nxbase, offset=0
119	_sve_check_zreg \nz
120	_check_general_reg \nxbase
121	_check_num (\offset), -0x100, 0xff
122	.inst	0x85804000			\
123		| (\nz)				\
124		| ((\nxbase) << 5)		\
125		| (((\offset) & 7) << 10)	\
126		| (((\offset) & 0x1f8) << 13)
127.endm
128
129/* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
130.macro _sve_str_p np, nxbase, offset=0
131	_sve_check_preg \np
132	_check_general_reg \nxbase
133	_check_num (\offset), -0x100, 0xff
134	.inst	0xe5800000			\
135		| (\np)				\
136		| ((\nxbase) << 5)		\
137		| (((\offset) & 7) << 10)	\
138		| (((\offset) & 0x1f8) << 13)
139.endm
140
141/* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
142.macro _sve_ldr_p np, nxbase, offset=0
143	_sve_check_preg \np
144	_check_general_reg \nxbase
145	_check_num (\offset), -0x100, 0xff
146	.inst	0x85800000			\
147		| (\np)				\
148		| ((\nxbase) << 5)		\
149		| (((\offset) & 7) << 10)	\
150		| (((\offset) & 0x1f8) << 13)
151.endm
152
153/* RDVL X\nx, #\imm */
154.macro _sve_rdvl nx, imm
155	_check_general_reg \nx
156	_check_num (\imm), -0x20, 0x1f
157	.inst	0x04bf5000			\
158		| (\nx)				\
159		| (((\imm) & 0x3f) << 5)
160.endm
161
162/* RDFFR (unpredicated): RDFFR P\np.B */
163.macro _sve_rdffr np
164	_sve_check_preg \np
165	.inst	0x2519f000			\
166		| (\np)
167.endm
168
169/* WRFFR P\np.B */
170.macro _sve_wrffr np
171	_sve_check_preg \np
172	.inst	0x25289000			\
173		| ((\np) << 5)
174.endm
175
176/* PFALSE P\np.B */
177.macro _sve_pfalse np
178	_sve_check_preg \np
179	.inst	0x2518e400			\
180		| (\np)
181.endm
182
183/* SME instruction encodings for non-SME-capable assemblers */
184/* (pre binutils 2.38/LLVM 13) */
185
186/* RDSVL X\nx, #\imm */
187.macro _sme_rdsvl nx, imm
188	_check_general_reg \nx
189	_check_num (\imm), -0x20, 0x1f
190	.inst	0x04bf5800			\
191		| (\nx)				\
192		| (((\imm) & 0x3f) << 5)
193.endm
194
195/*
196 * STR (vector from ZA array):
197 *	STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
198 */
199.macro _sme_str_zav nw, nxbase, offset=0
200	_sme_check_wv \nw
201	_check_general_reg \nxbase
202	_check_num (\offset), -0x100, 0xff
203	.inst	0xe1200000			\
204		| (((\nw) & 3) << 13)		\
205		| ((\nxbase) << 5)		\
206		| ((\offset) & 7)
207.endm
208
209/*
210 * LDR (vector to ZA array):
211 *	LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
212 */
213.macro _sme_ldr_zav nw, nxbase, offset=0
214	_sme_check_wv \nw
215	_check_general_reg \nxbase
216	_check_num (\offset), -0x100, 0xff
217	.inst	0xe1000000			\
218		| (((\nw) & 3) << 13)		\
219		| ((\nxbase) << 5)		\
220		| ((\offset) & 7)
221.endm
222
223/*
224 * LDR (ZT0)
225 *
226 *	LDR ZT0, nx
227 */
228.macro _ldr_zt nx
229	_check_general_reg \nx
230	.inst	0xe11f8000	\
231		 | (\nx << 5)
232.endm
233
234/*
235 * STR (ZT0)
236 *
237 *	STR ZT0, nx
238 */
239.macro _str_zt nx
240	_check_general_reg \nx
241	.inst	0xe13f8000		\
242		| (\nx << 5)
243.endm
244
245.macro __for from:req, to:req
246	.if (\from) == (\to)
247		_for__body %\from
248	.else
249		__for %\from, %((\from) + ((\to) - (\from)) / 2)
250		__for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
251	.endif
252.endm
253
254.macro _for var:req, from:req, to:req, insn:vararg
255	.macro _for__body \var:req
256		.noaltmacro
257		\insn
258		.altmacro
259	.endm
260
261	.altmacro
262	__for \from, \to
263	.noaltmacro
264
265	.purgem _for__body
266.endm
267
268/* Update ZCR_EL1.LEN with the new VQ */
269.macro sve_load_vq xvqminus1, xtmp, xtmp2
270		mrs_s		\xtmp, SYS_ZCR_EL1
271		bic		\xtmp2, \xtmp, ZCR_ELx_LEN_MASK
272		orr		\xtmp2, \xtmp2, \xvqminus1
273		cmp		\xtmp2, \xtmp
274		b.eq		921f
275		msr_s		SYS_ZCR_EL1, \xtmp2	//self-synchronising
276921:
277.endm
278
279/* Update SMCR_EL1.LEN with the new VQ */
280.macro sme_load_vq xvqminus1, xtmp, xtmp2
281		mrs_s		\xtmp, SYS_SMCR_EL1
282		bic		\xtmp2, \xtmp, SMCR_ELx_LEN_MASK
283		orr		\xtmp2, \xtmp2, \xvqminus1
284		cmp		\xtmp2, \xtmp
285		b.eq		921f
286		msr_s		SYS_SMCR_EL1, \xtmp2	//self-synchronising
287921:
288.endm
289
290/* Preserve the first 128-bits of Znz and zero the rest. */
291.macro _sve_flush_z nz
292	_sve_check_zreg \nz
293	mov	v\nz\().16b, v\nz\().16b
294.endm
295
296.macro sve_flush_z
297 _for n, 0, 31, _sve_flush_z	\n
298.endm
299.macro sve_flush_p
300 _for n, 0, 15, _sve_pfalse	\n
301.endm
302.macro sve_flush_ffr
303		_sve_wrffr	0
304.endm
305
306.macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
307 _for n, 0, 31,	_sve_str_v	\n, \nxbase, \n - 34
308 _for n, 0, 15,	_sve_str_p	\n, \nxbase, \n - 16
309		cbz		\save_ffr, 921f
310		_sve_rdffr	0
311		b		922f
312921:
313		_sve_pfalse	0			// Zero out FFR
314922:
315		_sve_str_p	0, \nxbase
316		_sve_ldr_p	0, \nxbase, -16
317		mrs		x\nxtmp, fpsr
318		str		w\nxtmp, [\xpfpsr]
319		mrs		x\nxtmp, fpcr
320		str		w\nxtmp, [\xpfpsr, #4]
321.endm
322
323.macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
324 _for n, 0, 31,	_sve_ldr_v	\n, \nxbase, \n - 34
325		cbz		\restore_ffr, 921f
326		_sve_ldr_p	0, \nxbase
327		_sve_wrffr	0
328921:
329 _for n, 0, 15,	_sve_ldr_p	\n, \nxbase, \n - 16
330
331		ldr		w\nxtmp, [\xpfpsr]
332		msr		fpsr, x\nxtmp
333		ldr		w\nxtmp, [\xpfpsr, #4]
334		msr		fpcr, x\nxtmp
335.endm
336
337.macro sme_save_za nxbase, xvl, nw
338	mov	w\nw, #0
339
340423:
341	_sme_str_zav \nw, \nxbase
342	add	x\nxbase, x\nxbase, \xvl
343	add	x\nw, x\nw, #1
344	cmp	\xvl, x\nw
345	bne	423b
346.endm
347
348.macro sme_load_za nxbase, xvl, nw
349	mov	w\nw, #0
350
351423:
352	_sme_ldr_zav \nw, \nxbase
353	add	x\nxbase, x\nxbase, \xvl
354	add	x\nw, x\nw, #1
355	cmp	\xvl, x\nw
356	bne	423b
357.endm
358