1// SPDX-License-Identifier: GPL-2.0
2/*
3 * BPF JIT compiler for PA-RISC (64-bit)
4 *
5 * Copyright(c) 2023 Helge Deller <deller@gmx.de>
6 *
7 * The code is based on the BPF JIT compiler for RV64 by Bj��rn T��pel.
8 *
9 * TODO:
10 * - check if bpf_jit_needs_zext() is needed (currently enabled)
11 * - implement arch_prepare_bpf_trampoline(), poke(), ...
12 */
13
14#include <linux/bitfield.h>
15#include <linux/bpf.h>
16#include <linux/filter.h>
17#include <linux/libgcc.h>
18#include "bpf_jit.h"
19
20static const int regmap[] = {
21	[BPF_REG_0] =	HPPA_REG_RET0,
22	[BPF_REG_1] =	HPPA_R(5),
23	[BPF_REG_2] =	HPPA_R(6),
24	[BPF_REG_3] =	HPPA_R(7),
25	[BPF_REG_4] =	HPPA_R(8),
26	[BPF_REG_5] =	HPPA_R(9),
27	[BPF_REG_6] =	HPPA_R(10),
28	[BPF_REG_7] =	HPPA_R(11),
29	[BPF_REG_8] =	HPPA_R(12),
30	[BPF_REG_9] =	HPPA_R(13),
31	[BPF_REG_FP] =	HPPA_R(14),
32	[BPF_REG_AX] =	HPPA_R(15),
33};
34
35/*
36 * Stack layout during BPF program execution (note: stack grows up):
37 *
38 *                     high
39 *   HPPA64 sp =>  +----------+ <= HPPA64 fp
40 *                 | saved sp |
41 *                 | saved rp |
42 *                 |   ...    | HPPA64 callee-saved registers
43 *                 | curr args|
44 *                 | local var|
45 *                 +----------+ <= (BPF FP)
46 *                 |          |
47 *                 |   ...    | BPF program stack
48 *                 |          |
49 *                 |   ...    | Function call stack
50 *                 |          |
51 *                 +----------+
52 *                     low
53 */
54
55/* Offset from fp for BPF registers stored on stack. */
56#define STACK_ALIGN	FRAME_SIZE
57
58#define EXIT_PTR_LOAD(reg)	hppa64_ldd_im16(-FRAME_SIZE, HPPA_REG_SP, reg)
59#define EXIT_PTR_STORE(reg)	hppa64_std_im16(reg, -FRAME_SIZE, HPPA_REG_SP)
60#define EXIT_PTR_JUMP(reg, nop)	hppa_bv(HPPA_REG_ZERO, reg, nop)
61
62static u8 bpf_to_hppa_reg(int bpf_reg, struct hppa_jit_context *ctx)
63{
64	u8 reg = regmap[bpf_reg];
65
66	REG_SET_SEEN(ctx, reg);
67	return reg;
68};
69
70static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
71{
72	REG_SET_SEEN(ctx, rd);
73	if (OPTIMIZE_HPPA && (rs == rd))
74		return;
75	REG_SET_SEEN(ctx, rs);
76	emit(hppa_copy(rs, rd), ctx);
77}
78
79static void emit_hppa64_depd(u8 src, u8 pos, u8 len, u8 target, bool no_zero, struct hppa_jit_context *ctx)
80{
81	int c;
82
83	pos &= (BITS_PER_LONG - 1);
84	pos = 63 - pos;
85	len = 64 - len;
86	c =  (len < 32)  ? 0x4 : 0;
87	c |= (pos >= 32) ? 0x2 : 0;
88	c |= (no_zero)   ? 0x1 : 0;
89	emit(hppa_t10_insn(0x3c, target, src, 0, c, pos & 0x1f, len & 0x1f), ctx);
90}
91
92static void emit_hppa64_shld(u8 src, int num, u8 target, struct hppa_jit_context *ctx)
93{
94	emit_hppa64_depd(src, 63-num, 64-num, target, 0, ctx);
95}
96
97static void emit_hppa64_extrd(u8 src, u8 pos, u8 len, u8 target, bool signed_op, struct hppa_jit_context *ctx)
98{
99	int c;
100
101	pos &= (BITS_PER_LONG - 1);
102	len = 64 - len;
103	c =  (len <  32) ? 0x4 : 0;
104	c |= (pos >= 32) ? 0x2 : 0;
105	c |= signed_op   ? 0x1 : 0;
106	emit(hppa_t10_insn(0x36, src, target, 0, c, pos & 0x1f, len & 0x1f), ctx);
107}
108
109static void emit_hppa64_extrw(u8 src, u8 pos, u8 len, u8 target, bool signed_op, struct hppa_jit_context *ctx)
110{
111	int c;
112
113	pos &= (32 - 1);
114	len = 32 - len;
115	c = 0x06 | (signed_op ? 1 : 0);
116	emit(hppa_t10_insn(0x34, src, target, 0, c, pos, len), ctx);
117}
118
119#define emit_hppa64_zext32(r, target, ctx) \
120	emit_hppa64_extrd(r, 63, 32, target, false, ctx)
121#define emit_hppa64_sext32(r, target, ctx) \
122	emit_hppa64_extrd(r, 63, 32, target, true, ctx)
123
124static void emit_hppa64_shrd(u8 src, int num, u8 target, bool signed_op, struct hppa_jit_context *ctx)
125{
126	emit_hppa64_extrd(src, 63-num, 64-num, target, signed_op, ctx);
127}
128
129static void emit_hppa64_shrw(u8 src, int num, u8 target, bool signed_op, struct hppa_jit_context *ctx)
130{
131	emit_hppa64_extrw(src, 31-num, 32-num, target, signed_op, ctx);
132}
133
134/* Emit variable-length instructions for 32-bit imm */
135static void emit_imm32(u8 rd, s32 imm, struct hppa_jit_context *ctx)
136{
137	u32 lower = im11(imm);
138
139	REG_SET_SEEN(ctx, rd);
140	if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) {
141		emit(hppa_ldi(imm, rd), ctx);
142		return;
143	}
144	if (OPTIMIZE_HPPA && lower == imm) {
145		emit(hppa_ldo(lower, HPPA_REG_ZERO, rd), ctx);
146		return;
147	}
148	emit(hppa_ldil(imm, rd), ctx);
149	if (OPTIMIZE_HPPA && (lower == 0))
150		return;
151	emit(hppa_ldo(lower, rd, rd), ctx);
152}
153
154static bool is_32b_int(s64 val)
155{
156	return val == (s32) val;
157}
158
159/* Emit variable-length instructions for 64-bit imm */
160static void emit_imm(u8 rd, s64 imm, u8 tmpreg, struct hppa_jit_context *ctx)
161{
162	u32 upper32;
163
164	/* get lower 32-bits into rd, sign extended */
165	emit_imm32(rd, imm, ctx);
166
167	/* do we have upper 32-bits too ? */
168	if (OPTIMIZE_HPPA && is_32b_int(imm))
169		return;
170
171	/* load upper 32-bits into lower tmpreg and deposit into rd */
172	upper32 = imm >> 32;
173	if (upper32 || !OPTIMIZE_HPPA) {
174		emit_imm32(tmpreg, upper32, ctx);
175		emit_hppa64_depd(tmpreg, 31, 32, rd, 1, ctx);
176	} else
177		emit_hppa64_depd(HPPA_REG_ZERO, 31, 32, rd, 1, ctx);
178
179}
180
181static int emit_jump(signed long paoff, bool force_far,
182			       struct hppa_jit_context *ctx)
183{
184	unsigned long pc, addr;
185
186	/* Note: Use 2 instructions for jumps if force_far is set. */
187	if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 22)) {
188		/* use BL,long branch followed by nop() */
189		emit(hppa64_bl_long(paoff - HPPA_BRANCH_DISPLACEMENT), ctx);
190		if (force_far)
191			emit(hppa_nop(), ctx);
192		return 0;
193	}
194
195	pc = (uintptr_t) &ctx->insns[ctx->ninsns];
196	addr = pc + (paoff * HPPA_INSN_SIZE);
197	/* even the 64-bit kernel runs in memory below 4GB */
198	if (WARN_ON_ONCE(addr >> 32))
199		return -E2BIG;
200	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
201	emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx);
202	return 0;
203}
204
205static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx)
206{
207	int i;
208
209	if (is_tail_call) {
210		/*
211		 * goto *(t0 + 4);
212		 * Skips first instruction of prologue which initializes tail
213		 * call counter. Assumes t0 contains address of target program,
214		 * see emit_bpf_tail_call.
215		 */
216		emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx);
217		emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx);
218		/* in delay slot: */
219		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx);
220
221		return;
222	}
223
224	/* load epilogue function pointer and jump to it. */
225	/* exit point is either at next instruction, or the outest TCC exit function */
226	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
227	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
228
229	/* NOTE: we are 64-bit and big-endian, so return lower sign-extended 32-bit value */
230	emit_hppa64_sext32(regmap[BPF_REG_0], HPPA_REG_RET0, ctx);
231
232	/* Restore callee-saved registers. */
233	for (i = 3; i <= 15; i++) {
234		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
235			continue;
236		emit(hppa64_ldd_im16(-REG_SIZE * i, HPPA_REG_SP, HPPA_R(i)), ctx);
237	}
238
239	/* load original return pointer (stored by outest TCC function) */
240	emit(hppa64_ldd_im16(-2*REG_SIZE, HPPA_REG_SP, HPPA_REG_RP), ctx);
241	emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx);
242	/* in delay slot: */
243	emit(hppa64_ldd_im5(-REG_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
244
245	emit(hppa_nop(), ctx); // XXX WARUM einer zu wenig ??
246}
247
248static int emit_branch(u8 op, u8 rd, u8 rs, signed long paoff,
249			struct hppa_jit_context *ctx)
250{
251	int e, s;
252	bool far = false;
253	int off;
254
255	if (op == BPF_JSET) {
256		/*
257		 * BPF_JSET is a special case: it has no inverse so translate
258		 * to and() function and compare against zero
259		 */
260		emit(hppa_and(rd, rs, HPPA_REG_T0), ctx);
261		paoff -= 1; /* reduce offset due to hppa_and() above */
262		rd = HPPA_REG_T0;
263		rs = HPPA_REG_ZERO;
264		op = BPF_JNE;
265	}
266
267	/* set start after BPF_JSET */
268	s = ctx->ninsns;
269
270	if (!relative_branch_ok(paoff - HPPA_BRANCH_DISPLACEMENT + 1, 12)) {
271		op = invert_bpf_cond(op);
272		far = true;
273	}
274
275	/*
276	 * For a far branch, the condition is negated and we jump over the
277	 * branch itself, and the two instructions from emit_jump.
278	 * For a near branch, just use paoff.
279	 */
280	off = far ? (2 - HPPA_BRANCH_DISPLACEMENT) : paoff - HPPA_BRANCH_DISPLACEMENT;
281
282	switch (op) {
283	/* IF (dst COND src) JUMP off */
284	case BPF_JEQ:
285		emit(hppa_beq(rd, rs, off), ctx);
286		break;
287	case BPF_JGT:
288		emit(hppa_bgtu(rd, rs, off), ctx);
289		break;
290	case BPF_JLT:
291		emit(hppa_bltu(rd, rs, off), ctx);
292		break;
293	case BPF_JGE:
294		emit(hppa_bgeu(rd, rs, off), ctx);
295		break;
296	case BPF_JLE:
297		emit(hppa_bleu(rd, rs, off), ctx);
298		break;
299	case BPF_JNE:
300		emit(hppa_bne(rd, rs, off), ctx);
301		break;
302	case BPF_JSGT:
303		emit(hppa_bgt(rd, rs, off), ctx);
304		break;
305	case BPF_JSLT:
306		emit(hppa_blt(rd, rs, off), ctx);
307		break;
308	case BPF_JSGE:
309		emit(hppa_bge(rd, rs, off), ctx);
310		break;
311	case BPF_JSLE:
312		emit(hppa_ble(rd, rs, off), ctx);
313		break;
314	default:
315		WARN_ON(1);
316	}
317
318	if (far) {
319		int ret;
320		e = ctx->ninsns;
321		/* Adjust for extra insns. */
322		paoff -= (e - s);
323		ret = emit_jump(paoff, true, ctx);
324		if (ret)
325			return ret;
326	} else {
327		/*
328		 * always allocate 2 nops instead of the far branch to
329		 * reduce translation loops
330		 */
331		emit(hppa_nop(), ctx);
332		emit(hppa_nop(), ctx);
333	}
334	return 0;
335}
336
337static void emit_zext_32(u8 reg, struct hppa_jit_context *ctx)
338{
339	emit_hppa64_zext32(reg, reg, ctx);
340}
341
342static void emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx)
343{
344	/*
345	 * R1 -> &ctx
346	 * R2 -> &array
347	 * R3 -> index
348	 */
349	int off;
350	const s8 arr_reg = regmap[BPF_REG_2];
351	const s8 idx_reg = regmap[BPF_REG_3];
352	struct bpf_array bpfa;
353	struct bpf_prog bpfp;
354
355	/* if there is any tail call, we need to save & restore all registers */
356	REG_SET_SEEN_ALL(ctx);
357
358	/* get address of TCC main exit function for error case into rp */
359	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
360
361	/* max_entries = array->map.max_entries; */
362	off = offsetof(struct bpf_array, map.max_entries);
363	BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4);
364	emit(hppa_ldw(off, arr_reg, HPPA_REG_T1), ctx);
365
366	/*
367	 * if (index >= max_entries)
368	 *   goto out;
369	 */
370	emit(hppa_bltu(idx_reg, HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
371	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
372
373	/*
374	 * if (--tcc < 0)
375	 *   goto out;
376	 */
377	REG_FORCE_SEEN(ctx, HPPA_REG_TCC);
378	emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx);
379	emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
380	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
381
382	/*
383	 * prog = array->ptrs[index];
384	 * if (!prog)
385	 *   goto out;
386	 */
387	BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 8);
388	emit(hppa64_shladd(idx_reg, 3, arr_reg, HPPA_REG_T0), ctx);
389	off = offsetof(struct bpf_array, ptrs);
390	BUILD_BUG_ON(off < 16);
391	emit(hppa64_ldd_im16(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
392	emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
393	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
394
395	/*
396	 * tcc = temp_tcc;
397	 * goto *(prog->bpf_func + 4);
398	 */
399	off = offsetof(struct bpf_prog, bpf_func);
400	BUILD_BUG_ON(off < 16);
401	BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 8);
402	emit(hppa64_ldd_im16(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
403	/* Epilogue jumps to *(t0 + 4). */
404	__build_epilogue(true, ctx);
405}
406
407static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
408		      struct hppa_jit_context *ctx)
409{
410	u8 code = insn->code;
411
412	switch (code) {
413	case BPF_JMP | BPF_JA:
414	case BPF_JMP | BPF_CALL:
415	case BPF_JMP | BPF_EXIT:
416	case BPF_JMP | BPF_TAIL_CALL:
417		break;
418	default:
419		*rd = bpf_to_hppa_reg(insn->dst_reg, ctx);
420	}
421
422	if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) ||
423	    code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) ||
424	    code & BPF_LDX || code & BPF_STX)
425		*rs = bpf_to_hppa_reg(insn->src_reg, ctx);
426}
427
428static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct hppa_jit_context *ctx)
429{
430	emit_hppa64_zext32(*rd, HPPA_REG_T2, ctx);
431	*rd = HPPA_REG_T2;
432	emit_hppa64_zext32(*rs, HPPA_REG_T1, ctx);
433	*rs = HPPA_REG_T1;
434}
435
436static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct hppa_jit_context *ctx)
437{
438	emit_hppa64_sext32(*rd, HPPA_REG_T2, ctx);
439	*rd = HPPA_REG_T2;
440	emit_hppa64_sext32(*rs, HPPA_REG_T1, ctx);
441	*rs = HPPA_REG_T1;
442}
443
444static void emit_zext_32_rd_t1(u8 *rd, struct hppa_jit_context *ctx)
445{
446	emit_hppa64_zext32(*rd, HPPA_REG_T2, ctx);
447	*rd = HPPA_REG_T2;
448	emit_zext_32(HPPA_REG_T1, ctx);
449}
450
451static void emit_sext_32_rd(u8 *rd, struct hppa_jit_context *ctx)
452{
453	emit_hppa64_sext32(*rd, HPPA_REG_T2, ctx);
454	*rd = HPPA_REG_T2;
455}
456
457static bool is_signed_bpf_cond(u8 cond)
458{
459	return cond == BPF_JSGT || cond == BPF_JSLT ||
460		cond == BPF_JSGE || cond == BPF_JSLE;
461}
462
463static void emit_call(u64 addr, bool fixed, struct hppa_jit_context *ctx)
464{
465	const int offset_sp = 2*FRAME_SIZE;
466
467	emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
468
469	emit_hppa_copy(regmap[BPF_REG_1], HPPA_REG_ARG0, ctx);
470	emit_hppa_copy(regmap[BPF_REG_2], HPPA_REG_ARG1, ctx);
471	emit_hppa_copy(regmap[BPF_REG_3], HPPA_REG_ARG2, ctx);
472	emit_hppa_copy(regmap[BPF_REG_4], HPPA_REG_ARG3, ctx);
473	emit_hppa_copy(regmap[BPF_REG_5], HPPA_REG_ARG4, ctx);
474
475	/* Backup TCC. */
476	REG_FORCE_SEEN(ctx, HPPA_REG_TCC_SAVED);
477	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
478		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx);
479
480	/*
481	 * Use ldil() to load absolute address. Don't use emit_imm as the
482	 * number of emitted instructions should not depend on the value of
483	 * addr.
484	 */
485	WARN_ON(addr >> 32);
486	/* load function address and gp from Elf64_Fdesc descriptor */
487	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
488	emit(hppa_ldo(im11(addr), HPPA_REG_R31, HPPA_REG_R31), ctx);
489	emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, addr),
490			     HPPA_REG_R31, HPPA_REG_RP), ctx);
491	emit(hppa64_bve_l_rp(HPPA_REG_RP), ctx);
492	emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, gp),
493			     HPPA_REG_R31, HPPA_REG_GP), ctx);
494
495	/* Restore TCC. */
496	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
497		emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx);
498
499	emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
500
501	/* Set return value. */
502	emit_hppa_copy(HPPA_REG_RET0, regmap[BPF_REG_0], ctx);
503}
504
505static void emit_call_libgcc_ll(void *func, const s8 arg0,
506		const s8 arg1, u8 opcode, struct hppa_jit_context *ctx)
507{
508	u64 func_addr;
509
510	if (BPF_CLASS(opcode) == BPF_ALU) {
511		emit_hppa64_zext32(arg0, HPPA_REG_ARG0, ctx);
512		emit_hppa64_zext32(arg1, HPPA_REG_ARG1, ctx);
513	} else {
514		emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx);
515		emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx);
516	}
517
518	/* libcgcc overwrites HPPA_REG_RET0, so keep copy in HPPA_REG_TCC_SAVED */
519	if (arg0 != HPPA_REG_RET0) {
520		REG_SET_SEEN(ctx, HPPA_REG_TCC_SAVED);
521		emit(hppa_copy(HPPA_REG_RET0, HPPA_REG_TCC_SAVED), ctx);
522	}
523
524	/* set up stack */
525	emit(hppa_ldo(FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
526
527	func_addr = (uintptr_t) func;
528	/* load function func_address and gp from Elf64_Fdesc descriptor */
529	emit_imm(HPPA_REG_R31, func_addr, arg0, ctx);
530	emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, addr),
531			     HPPA_REG_R31, HPPA_REG_RP), ctx);
532        /* skip the following bve_l instruction if divisor is 0. */
533        if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
534		if (BPF_OP(opcode) == BPF_DIV)
535			emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx);
536		else {
537			emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET0, ctx);
538		}
539		emit(hppa_beq(HPPA_REG_ARG1, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
540	}
541	emit(hppa64_bve_l_rp(HPPA_REG_RP), ctx);
542	emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, gp),
543			     HPPA_REG_R31, HPPA_REG_GP), ctx);
544
545	emit(hppa_ldo(-FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
546
547	emit_hppa_copy(HPPA_REG_RET0, arg0, ctx);
548
549	/* restore HPPA_REG_RET0 */
550	if (arg0 != HPPA_REG_RET0)
551		emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_RET0), ctx);
552}
553
554static void emit_store(const s8 rd, const s8 rs, s16 off,
555			  struct hppa_jit_context *ctx, const u8 size,
556			  const u8 mode)
557{
558	s8 dstreg;
559
560	/* need to calculate address since offset does not fit in 14 bits? */
561	if (relative_bits_ok(off, 14))
562		dstreg = rd;
563	else {
564		/* need to use R1 here, since addil puts result into R1 */
565		dstreg = HPPA_REG_R1;
566		emit(hppa_addil(off, rd), ctx);
567		off = im11(off);
568	}
569
570	switch (size) {
571	case BPF_B:
572		emit(hppa_stb(rs, off, dstreg), ctx);
573		break;
574	case BPF_H:
575		emit(hppa_sth(rs, off, dstreg), ctx);
576		break;
577	case BPF_W:
578		emit(hppa_stw(rs, off, dstreg), ctx);
579		break;
580	case BPF_DW:
581		if (off & 7) {
582			emit(hppa_ldo(off, dstreg, HPPA_REG_R1), ctx);
583			emit(hppa64_std_im5(rs, 0, HPPA_REG_R1), ctx);
584		} else if (off >= -16 && off <= 15)
585			emit(hppa64_std_im5(rs, off, dstreg), ctx);
586		else
587			emit(hppa64_std_im16(rs, off, dstreg), ctx);
588		break;
589	}
590}
591
592int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx,
593		      bool extra_pass)
594{
595	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
596		    BPF_CLASS(insn->code) == BPF_JMP;
597	int s, e, ret, i = insn - ctx->prog->insnsi;
598	s64 paoff;
599	struct bpf_prog_aux *aux = ctx->prog->aux;
600	u8 rd = -1, rs = -1, code = insn->code;
601	s16 off = insn->off;
602	s32 imm = insn->imm;
603
604	init_regs(&rd, &rs, insn, ctx);
605
606	switch (code) {
607	/* dst = src */
608	case BPF_ALU | BPF_MOV | BPF_X:
609	case BPF_ALU64 | BPF_MOV | BPF_X:
610		if (imm == 1) {
611			/* Special mov32 for zext */
612			emit_zext_32(rd, ctx);
613			break;
614		}
615		if (!is64 && !aux->verifier_zext)
616			emit_hppa64_zext32(rs, rd, ctx);
617		else
618			emit_hppa_copy(rs, rd, ctx);
619		break;
620
621	/* dst = dst OP src */
622	case BPF_ALU | BPF_ADD | BPF_X:
623	case BPF_ALU64 | BPF_ADD | BPF_X:
624                emit(hppa_add(rd, rs, rd), ctx);
625		if (!is64 && !aux->verifier_zext)
626			emit_zext_32(rd, ctx);
627		break;
628	case BPF_ALU | BPF_SUB | BPF_X:
629	case BPF_ALU64 | BPF_SUB | BPF_X:
630                emit(hppa_sub(rd, rs, rd), ctx);
631		if (!is64 && !aux->verifier_zext)
632			emit_zext_32(rd, ctx);
633		break;
634	case BPF_ALU | BPF_AND | BPF_X:
635	case BPF_ALU64 | BPF_AND | BPF_X:
636                emit(hppa_and(rd, rs, rd), ctx);
637		if (!is64 && !aux->verifier_zext)
638			emit_zext_32(rd, ctx);
639		break;
640	case BPF_ALU | BPF_OR | BPF_X:
641	case BPF_ALU64 | BPF_OR | BPF_X:
642                emit(hppa_or(rd, rs, rd), ctx);
643		if (!is64 && !aux->verifier_zext)
644			emit_zext_32(rd, ctx);
645		break;
646	case BPF_ALU | BPF_XOR | BPF_X:
647	case BPF_ALU64 | BPF_XOR | BPF_X:
648                emit(hppa_xor(rd, rs, rd), ctx);
649		if (!is64 && !aux->verifier_zext && rs != rd)
650			emit_zext_32(rd, ctx);
651		break;
652	case BPF_ALU | BPF_MUL | BPF_K:
653	case BPF_ALU64 | BPF_MUL | BPF_K:
654		emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx);
655		rs = HPPA_REG_T1;
656		fallthrough;
657	case BPF_ALU | BPF_MUL | BPF_X:
658	case BPF_ALU64 | BPF_MUL | BPF_X:
659		emit_call_libgcc_ll(__muldi3, rd, rs, code, ctx);
660		if (!is64 && !aux->verifier_zext)
661			emit_zext_32(rd, ctx);
662		break;
663	case BPF_ALU | BPF_DIV | BPF_K:
664	case BPF_ALU64 | BPF_DIV | BPF_K:
665		emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx);
666		rs = HPPA_REG_T1;
667		fallthrough;
668	case BPF_ALU | BPF_DIV | BPF_X:
669	case BPF_ALU64 | BPF_DIV | BPF_X:
670		emit_call_libgcc_ll(&hppa_div64, rd, rs, code, ctx);
671		if (!is64 && !aux->verifier_zext)
672			emit_zext_32(rd, ctx);
673		break;
674	case BPF_ALU | BPF_MOD | BPF_K:
675	case BPF_ALU64 | BPF_MOD | BPF_K:
676		emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx);
677		rs = HPPA_REG_T1;
678		fallthrough;
679	case BPF_ALU | BPF_MOD | BPF_X:
680	case BPF_ALU64 | BPF_MOD | BPF_X:
681		emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, code, ctx);
682		if (!is64 && !aux->verifier_zext)
683			emit_zext_32(rd, ctx);
684		break;
685
686	case BPF_ALU | BPF_LSH | BPF_X:
687	case BPF_ALU64 | BPF_LSH | BPF_X:
688		emit_hppa64_sext32(rs, HPPA_REG_T0, ctx);
689		emit(hppa64_mtsarcm(HPPA_REG_T0), ctx);
690		if (is64)
691			emit(hppa64_depdz_sar(rd, rd), ctx);
692		else
693			emit(hppa_depwz_sar(rd, rd), ctx);
694		if (!is64 && !aux->verifier_zext)
695			emit_zext_32(rd, ctx);
696		break;
697	case BPF_ALU | BPF_RSH | BPF_X:
698	case BPF_ALU64 | BPF_RSH | BPF_X:
699		emit(hppa_mtsar(rs), ctx);
700		if (is64)
701			emit(hppa64_shrpd_sar(rd, rd), ctx);
702		else
703			emit(hppa_shrpw_sar(rd, rd), ctx);
704		if (!is64 && !aux->verifier_zext)
705			emit_zext_32(rd, ctx);
706		break;
707	case BPF_ALU | BPF_ARSH | BPF_X:
708	case BPF_ALU64 | BPF_ARSH | BPF_X:
709		emit_hppa64_sext32(rs, HPPA_REG_T0, ctx);
710                emit(hppa64_mtsarcm(HPPA_REG_T0), ctx);
711		if (is64)
712			emit(hppa_extrd_sar(rd, rd, 1), ctx);
713		else
714			emit(hppa_extrws_sar(rd, rd), ctx);
715		if (!is64 && !aux->verifier_zext)
716			emit_zext_32(rd, ctx);
717		break;
718
719	/* dst = -dst */
720	case BPF_ALU | BPF_NEG:
721	case BPF_ALU64 | BPF_NEG:
722		emit(hppa_sub(HPPA_REG_ZERO, rd, rd), ctx);
723		if (!is64 && !aux->verifier_zext)
724			emit_zext_32(rd, ctx);
725		break;
726
727	/* dst = BSWAP##imm(dst) */
728	case BPF_ALU | BPF_END | BPF_FROM_BE:
729		switch (imm) {
730		case 16:
731			/* zero-extend 16 bits into 64 bits */
732			emit_hppa64_depd(HPPA_REG_ZERO, 63-16, 64-16, rd, 1, ctx);
733			break;
734		case 32:
735			if (!aux->verifier_zext)
736				emit_zext_32(rd, ctx);
737			break;
738		case 64:
739			/* Do nothing */
740			break;
741		}
742		break;
743
744	case BPF_ALU | BPF_END | BPF_FROM_LE:
745		switch (imm) {
746		case 16:
747			emit(hppa_extru(rd, 31 - 8, 8, HPPA_REG_T1), ctx);
748			emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx);
749			emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx);
750			emit_hppa64_extrd(HPPA_REG_T1, 63, 16, rd, 0, ctx);
751			break;
752		case 32:
753			emit(hppa_shrpw(rd, rd, 16, HPPA_REG_T1), ctx);
754			emit_hppa64_depd(HPPA_REG_T1, 63-16, 8, HPPA_REG_T1, 1, ctx);
755			emit(hppa_shrpw(rd, HPPA_REG_T1, 8, HPPA_REG_T1), ctx);
756			emit_hppa64_extrd(HPPA_REG_T1, 63, 32, rd, 0, ctx);
757			break;
758		case 64:
759			emit(hppa64_permh_3210(rd, HPPA_REG_T1), ctx);
760			emit(hppa64_hshl(HPPA_REG_T1, 8, HPPA_REG_T2), ctx);
761			emit(hppa64_hshr_u(HPPA_REG_T1, 8, HPPA_REG_T1), ctx);
762			emit(hppa_or(HPPA_REG_T2, HPPA_REG_T1, rd), ctx);
763			break;
764		default:
765			pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
766			return -1;
767		}
768		break;
769
770	/* dst = imm */
771	case BPF_ALU | BPF_MOV | BPF_K:
772	case BPF_ALU64 | BPF_MOV | BPF_K:
773		emit_imm(rd, imm, HPPA_REG_T2, ctx);
774		if (!is64 && !aux->verifier_zext)
775			emit_zext_32(rd, ctx);
776		break;
777
778	/* dst = dst OP imm */
779	case BPF_ALU | BPF_ADD | BPF_K:
780	case BPF_ALU64 | BPF_ADD | BPF_K:
781		if (relative_bits_ok(imm, 14)) {
782			emit(hppa_ldo(imm, rd, rd), ctx);
783		} else {
784			emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
785			emit(hppa_add(rd, HPPA_REG_T1, rd), ctx);
786		}
787		if (!is64 && !aux->verifier_zext)
788			emit_zext_32(rd, ctx);
789		break;
790	case BPF_ALU | BPF_SUB | BPF_K:
791	case BPF_ALU64 | BPF_SUB | BPF_K:
792		if (relative_bits_ok(-imm, 14)) {
793			emit(hppa_ldo(-imm, rd, rd), ctx);
794		} else {
795			emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
796			emit(hppa_sub(rd, HPPA_REG_T1, rd), ctx);
797		}
798		if (!is64 && !aux->verifier_zext)
799			emit_zext_32(rd, ctx);
800		break;
801	case BPF_ALU | BPF_AND | BPF_K:
802	case BPF_ALU64 | BPF_AND | BPF_K:
803		emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
804                emit(hppa_and(rd, HPPA_REG_T1, rd), ctx);
805		if (!is64 && !aux->verifier_zext)
806			emit_zext_32(rd, ctx);
807		break;
808	case BPF_ALU | BPF_OR | BPF_K:
809	case BPF_ALU64 | BPF_OR | BPF_K:
810		emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
811                emit(hppa_or(rd, HPPA_REG_T1, rd), ctx);
812		if (!is64 && !aux->verifier_zext)
813			emit_zext_32(rd, ctx);
814		break;
815	case BPF_ALU | BPF_XOR | BPF_K:
816	case BPF_ALU64 | BPF_XOR | BPF_K:
817		emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
818                emit(hppa_xor(rd, HPPA_REG_T1, rd), ctx);
819		if (!is64 && !aux->verifier_zext)
820			emit_zext_32(rd, ctx);
821		break;
822	case BPF_ALU | BPF_LSH | BPF_K:
823	case BPF_ALU64 | BPF_LSH | BPF_K:
824		if (imm != 0) {
825			emit_hppa64_shld(rd, imm, rd, ctx);
826		}
827
828		if (!is64 && !aux->verifier_zext)
829			emit_zext_32(rd, ctx);
830		break;
831	case BPF_ALU | BPF_RSH | BPF_K:
832	case BPF_ALU64 | BPF_RSH | BPF_K:
833		if (imm != 0) {
834			if (is64)
835				emit_hppa64_shrd(rd, imm, rd, false, ctx);
836			else
837				emit_hppa64_shrw(rd, imm, rd, false, ctx);
838		}
839
840		if (!is64 && !aux->verifier_zext)
841			emit_zext_32(rd, ctx);
842		break;
843	case BPF_ALU | BPF_ARSH | BPF_K:
844	case BPF_ALU64 | BPF_ARSH | BPF_K:
845		if (imm != 0) {
846			if (is64)
847				emit_hppa64_shrd(rd, imm, rd, true, ctx);
848			else
849				emit_hppa64_shrw(rd, imm, rd, true, ctx);
850		}
851
852		if (!is64 && !aux->verifier_zext)
853			emit_zext_32(rd, ctx);
854		break;
855
856	/* JUMP off */
857	case BPF_JMP | BPF_JA:
858		paoff = hppa_offset(i, off, ctx);
859		ret = emit_jump(paoff, false, ctx);
860		if (ret)
861			return ret;
862		break;
863
864	/* IF (dst COND src) JUMP off */
865	case BPF_JMP | BPF_JEQ | BPF_X:
866	case BPF_JMP32 | BPF_JEQ | BPF_X:
867	case BPF_JMP | BPF_JGT | BPF_X:
868	case BPF_JMP32 | BPF_JGT | BPF_X:
869	case BPF_JMP | BPF_JLT | BPF_X:
870	case BPF_JMP32 | BPF_JLT | BPF_X:
871	case BPF_JMP | BPF_JGE | BPF_X:
872	case BPF_JMP32 | BPF_JGE | BPF_X:
873	case BPF_JMP | BPF_JLE | BPF_X:
874	case BPF_JMP32 | BPF_JLE | BPF_X:
875	case BPF_JMP | BPF_JNE | BPF_X:
876	case BPF_JMP32 | BPF_JNE | BPF_X:
877	case BPF_JMP | BPF_JSGT | BPF_X:
878	case BPF_JMP32 | BPF_JSGT | BPF_X:
879	case BPF_JMP | BPF_JSLT | BPF_X:
880	case BPF_JMP32 | BPF_JSLT | BPF_X:
881	case BPF_JMP | BPF_JSGE | BPF_X:
882	case BPF_JMP32 | BPF_JSGE | BPF_X:
883	case BPF_JMP | BPF_JSLE | BPF_X:
884	case BPF_JMP32 | BPF_JSLE | BPF_X:
885	case BPF_JMP | BPF_JSET | BPF_X:
886	case BPF_JMP32 | BPF_JSET | BPF_X:
887		paoff = hppa_offset(i, off, ctx);
888		if (!is64) {
889			s = ctx->ninsns;
890			if (is_signed_bpf_cond(BPF_OP(code)))
891				emit_sext_32_rd_rs(&rd, &rs, ctx);
892			else
893				emit_zext_32_rd_rs(&rd, &rs, ctx);
894			e = ctx->ninsns;
895
896			/* Adjust for extra insns */
897			paoff -= (e - s);
898		}
899		if (BPF_OP(code) == BPF_JSET) {
900			/* Adjust for and */
901			paoff -= 1;
902			emit(hppa_and(rs, rd, HPPA_REG_T1), ctx);
903			emit_branch(BPF_JNE, HPPA_REG_T1, HPPA_REG_ZERO, paoff,
904				    ctx);
905		} else {
906			emit_branch(BPF_OP(code), rd, rs, paoff, ctx);
907		}
908		break;
909
910	/* IF (dst COND imm) JUMP off */
911	case BPF_JMP | BPF_JEQ | BPF_K:
912	case BPF_JMP32 | BPF_JEQ | BPF_K:
913	case BPF_JMP | BPF_JGT | BPF_K:
914	case BPF_JMP32 | BPF_JGT | BPF_K:
915	case BPF_JMP | BPF_JLT | BPF_K:
916	case BPF_JMP32 | BPF_JLT | BPF_K:
917	case BPF_JMP | BPF_JGE | BPF_K:
918	case BPF_JMP32 | BPF_JGE | BPF_K:
919	case BPF_JMP | BPF_JLE | BPF_K:
920	case BPF_JMP32 | BPF_JLE | BPF_K:
921	case BPF_JMP | BPF_JNE | BPF_K:
922	case BPF_JMP32 | BPF_JNE | BPF_K:
923	case BPF_JMP | BPF_JSGT | BPF_K:
924	case BPF_JMP32 | BPF_JSGT | BPF_K:
925	case BPF_JMP | BPF_JSLT | BPF_K:
926	case BPF_JMP32 | BPF_JSLT | BPF_K:
927	case BPF_JMP | BPF_JSGE | BPF_K:
928	case BPF_JMP32 | BPF_JSGE | BPF_K:
929	case BPF_JMP | BPF_JSLE | BPF_K:
930	case BPF_JMP32 | BPF_JSLE | BPF_K:
931		paoff = hppa_offset(i, off, ctx);
932		s = ctx->ninsns;
933		if (imm) {
934			emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
935			rs = HPPA_REG_T1;
936		} else {
937			rs = HPPA_REG_ZERO;
938		}
939		if (!is64) {
940			if (is_signed_bpf_cond(BPF_OP(code)))
941				emit_sext_32_rd(&rd, ctx);
942			else
943				emit_zext_32_rd_t1(&rd, ctx);
944		}
945		e = ctx->ninsns;
946
947		/* Adjust for extra insns */
948		paoff -= (e - s);
949		emit_branch(BPF_OP(code), rd, rs, paoff, ctx);
950		break;
951	case BPF_JMP | BPF_JSET | BPF_K:
952	case BPF_JMP32 | BPF_JSET | BPF_K:
953		paoff = hppa_offset(i, off, ctx);
954		s = ctx->ninsns;
955		emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx);
956		emit(hppa_and(HPPA_REG_T1, rd, HPPA_REG_T1), ctx);
957		/* For jset32, we should clear the upper 32 bits of t1, but
958		 * sign-extension is sufficient here and saves one instruction,
959		 * as t1 is used only in comparison against zero.
960		 */
961		if (!is64 && imm < 0)
962			emit_hppa64_sext32(HPPA_REG_T1, HPPA_REG_T1, ctx);
963		e = ctx->ninsns;
964		paoff -= (e - s);
965		emit_branch(BPF_JNE, HPPA_REG_T1, HPPA_REG_ZERO, paoff, ctx);
966		break;
967	/* function call */
968	case BPF_JMP | BPF_CALL:
969	{
970		bool fixed_addr;
971		u64 addr;
972
973		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
974					    &addr, &fixed_addr);
975		if (ret < 0)
976			return ret;
977
978		REG_SET_SEEN_ALL(ctx);
979		emit_call(addr, fixed_addr, ctx);
980		break;
981	}
982	/* tail call */
983	case BPF_JMP | BPF_TAIL_CALL:
984		emit_bpf_tail_call(i, ctx);
985		break;
986
987	/* function return */
988	case BPF_JMP | BPF_EXIT:
989		if (i == ctx->prog->len - 1)
990			break;
991
992		paoff = epilogue_offset(ctx);
993		ret = emit_jump(paoff, false, ctx);
994		if (ret)
995			return ret;
996		break;
997
998	/* dst = imm64 */
999	case BPF_LD | BPF_IMM | BPF_DW:
1000	{
1001		struct bpf_insn insn1 = insn[1];
1002		u64 imm64 = (u64)insn1.imm << 32 | (u32)imm;
1003		if (bpf_pseudo_func(insn))
1004			imm64 = (uintptr_t)dereference_function_descriptor((void*)imm64);
1005		emit_imm(rd, imm64, HPPA_REG_T2, ctx);
1006
1007		return 1;
1008	}
1009
1010	/* LDX: dst = *(size *)(src + off) */
1011	case BPF_LDX | BPF_MEM | BPF_B:
1012	case BPF_LDX | BPF_MEM | BPF_H:
1013	case BPF_LDX | BPF_MEM | BPF_W:
1014	case BPF_LDX | BPF_MEM | BPF_DW:
1015	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1016	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1017	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1018	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1019	{
1020		u8 srcreg;
1021
1022		/* need to calculate address since offset does not fit in 14 bits? */
1023		if (relative_bits_ok(off, 14))
1024			srcreg = rs;
1025		else {
1026			/* need to use R1 here, since addil puts result into R1 */
1027			srcreg = HPPA_REG_R1;
1028			BUG_ON(rs == HPPA_REG_R1);
1029			BUG_ON(rd == HPPA_REG_R1);
1030			emit(hppa_addil(off, rs), ctx);
1031			off = im11(off);
1032		}
1033
1034		switch (BPF_SIZE(code)) {
1035		case BPF_B:
1036			emit(hppa_ldb(off, srcreg, rd), ctx);
1037			if (insn_is_zext(&insn[1]))
1038				return 1;
1039			break;
1040		case BPF_H:
1041			emit(hppa_ldh(off, srcreg, rd), ctx);
1042			if (insn_is_zext(&insn[1]))
1043				return 1;
1044			break;
1045		case BPF_W:
1046			emit(hppa_ldw(off, srcreg, rd), ctx);
1047			if (insn_is_zext(&insn[1]))
1048				return 1;
1049			break;
1050		case BPF_DW:
1051			if (off & 7) {
1052				emit(hppa_ldo(off, srcreg, HPPA_REG_R1), ctx);
1053				emit(hppa64_ldd_reg(HPPA_REG_ZERO, HPPA_REG_R1, rd), ctx);
1054			} else if (off >= -16 && off <= 15)
1055				emit(hppa64_ldd_im5(off, srcreg, rd), ctx);
1056			else
1057				emit(hppa64_ldd_im16(off, srcreg, rd), ctx);
1058			break;
1059		}
1060		break;
1061	}
1062	/* speculation barrier */
1063	case BPF_ST | BPF_NOSPEC:
1064		break;
1065
1066	/* ST: *(size *)(dst + off) = imm */
1067	/* STX: *(size *)(dst + off) = src */
1068	case BPF_ST | BPF_MEM | BPF_B:
1069	case BPF_ST | BPF_MEM | BPF_H:
1070	case BPF_ST | BPF_MEM | BPF_W:
1071	case BPF_ST | BPF_MEM | BPF_DW:
1072
1073	case BPF_STX | BPF_MEM | BPF_B:
1074	case BPF_STX | BPF_MEM | BPF_H:
1075	case BPF_STX | BPF_MEM | BPF_W:
1076	case BPF_STX | BPF_MEM | BPF_DW:
1077		if (BPF_CLASS(code) == BPF_ST) {
1078			emit_imm(HPPA_REG_T2, imm, HPPA_REG_T1, ctx);
1079			rs = HPPA_REG_T2;
1080		}
1081
1082		emit_store(rd, rs, off, ctx, BPF_SIZE(code), BPF_MODE(code));
1083		break;
1084
1085	case BPF_STX | BPF_ATOMIC | BPF_W:
1086	case BPF_STX | BPF_ATOMIC | BPF_DW:
1087		pr_info_once(
1088			"bpf-jit: not supported: atomic operation %02x ***\n",
1089			insn->imm);
1090		return -EFAULT;
1091
1092	default:
1093		pr_err("bpf-jit: unknown opcode %02x\n", code);
1094		return -EINVAL;
1095	}
1096
1097	return 0;
1098}
1099
1100void bpf_jit_build_prologue(struct hppa_jit_context *ctx)
1101{
1102	int bpf_stack_adjust, stack_adjust, i;
1103	unsigned long addr;
1104	s8 reg;
1105
1106	/*
1107	 * stack on hppa grows up, so if tail calls are used we need to
1108	 * allocate the maximum stack size
1109	 */
1110	if (REG_ALL_SEEN(ctx))
1111		bpf_stack_adjust = MAX_BPF_STACK;
1112	else
1113		bpf_stack_adjust = ctx->prog->aux->stack_depth;
1114	bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN);
1115
1116	stack_adjust = FRAME_SIZE + bpf_stack_adjust;
1117	stack_adjust = round_up(stack_adjust, STACK_ALIGN);
1118
1119	/*
1120	 * NOTE: We construct an Elf64_Fdesc descriptor here.
1121	 * The first 4 words initialize the TCC and compares them.
1122	 * Then follows the virtual address of the eBPF function,
1123	 * and the gp for this function.
1124	 *
1125	 * The first instruction sets the tail-call-counter (TCC) register.
1126	 * This instruction is skipped by tail calls.
1127	 * Use a temporary register instead of a caller-saved register initially.
1128	 */
1129	REG_FORCE_SEEN(ctx, HPPA_REG_TCC_IN_INIT);
1130	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx);
1131
1132	/*
1133	 * Skip all initializations when called as BPF TAIL call.
1134	 */
1135	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx);
1136	emit(hppa_beq(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, 6 - HPPA_BRANCH_DISPLACEMENT), ctx);
1137	emit(hppa64_bl_long(ctx->prologue_len - 3 - HPPA_BRANCH_DISPLACEMENT), ctx);
1138
1139	/* store entry address of this eBPF function */
1140	addr = (uintptr_t) &ctx->insns[0];
1141	emit(addr >> 32, ctx);
1142	emit(addr & 0xffffffff, ctx);
1143
1144	/* store gp of this eBPF function */
1145	asm("copy %%r27,%0" : "=r" (addr) );
1146	emit(addr >> 32, ctx);
1147	emit(addr & 0xffffffff, ctx);
1148
1149	/* Set up hppa stack frame. */
1150	emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx);
1151	emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx);
1152	emit(hppa64_std_im5 (HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx);
1153	emit(hppa64_std_im16(HPPA_REG_RP, -2*REG_SIZE, HPPA_REG_SP), ctx);
1154
1155	/* Save callee-save registers. */
1156	for (i = 3; i <= 15; i++) {
1157		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
1158			continue;
1159		emit(hppa64_std_im16(HPPA_R(i), -REG_SIZE * i, HPPA_REG_SP), ctx);
1160	}
1161
1162	/* load function parameters; load all if we use tail functions */
1163	#define LOAD_PARAM(arg, dst) \
1164		if (REG_WAS_SEEN(ctx, regmap[dst]) ||	\
1165		    REG_WAS_SEEN(ctx, HPPA_REG_TCC))	\
1166			emit_hppa_copy(arg, regmap[dst], ctx)
1167	LOAD_PARAM(HPPA_REG_ARG0, BPF_REG_1);
1168	LOAD_PARAM(HPPA_REG_ARG1, BPF_REG_2);
1169	LOAD_PARAM(HPPA_REG_ARG2, BPF_REG_3);
1170	LOAD_PARAM(HPPA_REG_ARG3, BPF_REG_4);
1171	LOAD_PARAM(HPPA_REG_ARG4, BPF_REG_5);
1172	#undef LOAD_PARAM
1173
1174	REG_FORCE_SEEN(ctx, HPPA_REG_T0);
1175	REG_FORCE_SEEN(ctx, HPPA_REG_T1);
1176	REG_FORCE_SEEN(ctx, HPPA_REG_T2);
1177
1178	/*
1179	 * Now really set the tail call counter (TCC) register.
1180	 */
1181	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
1182		emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx);
1183
1184	/*
1185	 * Save epilogue function pointer for outer TCC call chain.
1186	 * The main TCC call stores the final RP on stack.
1187	 */
1188	addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset];
1189	/* skip first two instructions which jump to exit */
1190	addr += 2 * HPPA_INSN_SIZE;
1191	emit_imm(HPPA_REG_T2, addr, HPPA_REG_T1, ctx);
1192	emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx);
1193
1194	/* Set up BPF frame pointer. */
1195	reg = regmap[BPF_REG_FP];	/* -> HPPA_REG_FP */
1196	if (REG_WAS_SEEN(ctx, reg)) {
1197		emit(hppa_ldo(-FRAME_SIZE, HPPA_REG_SP, reg), ctx);
1198	}
1199}
1200
1201void bpf_jit_build_epilogue(struct hppa_jit_context *ctx)
1202{
1203	__build_epilogue(false, ctx);
1204}
1205
1206bool bpf_jit_supports_kfunc_call(void)
1207{
1208	return true;
1209}
1210