1// SPDX-License-Identifier: GPL-2.0
2/*
3 * BPF JIT compiler for PA-RISC (32-bit)
4 *
5 * Copyright (c) 2023 Helge Deller <deller@gmx.de>
6 *
7 * The code is based on the BPF JIT compiler for RV64 by Bj��rn T��pel and
8 * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan.
9 */
10
11#include <linux/bpf.h>
12#include <linux/filter.h>
13#include <linux/libgcc.h>
14#include "bpf_jit.h"
15
16/*
17 * Stack layout during BPF program execution (note: stack grows up):
18 *
19 *                     high
20 *   HPPA32 sp =>  +----------+ <= HPPA32 fp
21 *                 | saved sp |
22 *                 | saved rp |
23 *                 |   ...    | HPPA32 callee-saved registers
24 *                 | curr args|
25 *                 | local var|
26 *                 +----------+ <= (sp - 4 * NR_SAVED_REGISTERS)
27 *                 |  lo(R9)  |
28 *                 |  hi(R9)  |
29 *                 |  lo(FP)  | JIT scratch space for BPF registers
30 *                 |  hi(FP)  |
31 *                 |   ...    |
32 *                 +----------+ <= (sp - 4 * NR_SAVED_REGISTERS
33 *                 |          |        - 4 * BPF_JIT_SCRATCH_REGS)
34 *                 |          |
35 *                 |   ...    | BPF program stack
36 *                 |          |
37 *                 |   ...    | Function call stack
38 *                 |          |
39 *                 +----------+
40 *                     low
41 */
42
43enum {
44	/* Stack layout - these are offsets from top of JIT scratch space. */
45	BPF_R8_HI,
46	BPF_R8_LO,
47	BPF_R9_HI,
48	BPF_R9_LO,
49	BPF_FP_HI,
50	BPF_FP_LO,
51	BPF_AX_HI,
52	BPF_AX_LO,
53	BPF_R0_TEMP_HI,
54	BPF_R0_TEMP_LO,
55	BPF_JIT_SCRATCH_REGS,
56};
57
58/* Number of callee-saved registers stored to stack: rp, r3-r18. */
59#define NR_SAVED_REGISTERS	(18 - 3 + 1 + 8)
60
61/* Offset from fp for BPF registers stored on stack. */
62#define STACK_OFFSET(k)	(- (NR_SAVED_REGISTERS + k + 1))
63#define STACK_ALIGN	FRAME_SIZE
64
65#define EXIT_PTR_LOAD(reg)	hppa_ldw(-0x08, HPPA_REG_SP, reg)
66#define EXIT_PTR_STORE(reg)	hppa_stw(reg, -0x08, HPPA_REG_SP)
67#define EXIT_PTR_JUMP(reg, nop)	hppa_bv(HPPA_REG_ZERO, reg, nop)
68
69#define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
70#define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
71#define TMP_REG_R0	(MAX_BPF_JIT_REG + 2)
72
73static const s8 regmap[][2] = {
74	/* Return value from in-kernel function, and exit value from eBPF. */
75	[BPF_REG_0] = {HPPA_REG_RET0, HPPA_REG_RET1},		/* HI/LOW */
76
77	/* Arguments from eBPF program to in-kernel function. */
78	[BPF_REG_1] = {HPPA_R(3), HPPA_R(4)},
79	[BPF_REG_2] = {HPPA_R(5), HPPA_R(6)},
80	[BPF_REG_3] = {HPPA_R(7), HPPA_R(8)},
81	[BPF_REG_4] = {HPPA_R(9), HPPA_R(10)},
82	[BPF_REG_5] = {HPPA_R(11), HPPA_R(12)},
83
84	[BPF_REG_6] = {HPPA_R(13), HPPA_R(14)},
85	[BPF_REG_7] = {HPPA_R(15), HPPA_R(16)},
86	/*
87	 * Callee-saved registers that in-kernel function will preserve.
88	 * Stored on the stack.
89	 */
90	[BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)},
91	[BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)},
92
93	/* Read-only frame pointer to access BPF stack. Not needed. */
94	[BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)},
95
96	/* Temporary register for blinding constants. Stored on the stack. */
97	[BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)},
98	/*
99	 * Temporary registers used by the JIT to operate on registers stored
100	 * on the stack. Save t0 and t1 to be used as temporaries in generated
101	 * code.
102	 */
103	[TMP_REG_1] = {HPPA_REG_T3, HPPA_REG_T2},
104	[TMP_REG_2] = {HPPA_REG_T5, HPPA_REG_T4},
105
106	/* temporary space for BPF_R0 during libgcc and millicode calls */
107	[TMP_REG_R0] = {STACK_OFFSET(BPF_R0_TEMP_HI), STACK_OFFSET(BPF_R0_TEMP_LO)},
108};
109
110static s8 hi(const s8 *r)
111{
112	return r[0];
113}
114
115static s8 lo(const s8 *r)
116{
117	return r[1];
118}
119
120static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
121{
122	REG_SET_SEEN(ctx, rd);
123	if (OPTIMIZE_HPPA && (rs == rd))
124		return;
125	REG_SET_SEEN(ctx, rs);
126	emit(hppa_copy(rs, rd), ctx);
127}
128
129static void emit_hppa_xor(const s8 r1, const s8 r2, const s8 r3, struct hppa_jit_context *ctx)
130{
131	REG_SET_SEEN(ctx, r1);
132	REG_SET_SEEN(ctx, r2);
133	REG_SET_SEEN(ctx, r3);
134	if (OPTIMIZE_HPPA && (r1 == r2)) {
135		emit(hppa_copy(HPPA_REG_ZERO, r3), ctx);
136	} else {
137		emit(hppa_xor(r1, r2, r3), ctx);
138	}
139}
140
141static void emit_imm(const s8 rd, s32 imm, struct hppa_jit_context *ctx)
142{
143	u32 lower = im11(imm);
144
145	REG_SET_SEEN(ctx, rd);
146	if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) {
147		emit(hppa_ldi(imm, rd), ctx);
148		return;
149	}
150	emit(hppa_ldil(imm, rd), ctx);
151	if (OPTIMIZE_HPPA && (lower == 0))
152		return;
153	emit(hppa_ldo(lower, rd, rd), ctx);
154}
155
156static void emit_imm32(const s8 *rd, s32 imm, struct hppa_jit_context *ctx)
157{
158	/* Emit immediate into lower bits. */
159	REG_SET_SEEN(ctx, lo(rd));
160	emit_imm(lo(rd), imm, ctx);
161
162	/* Sign-extend into upper bits. */
163	REG_SET_SEEN(ctx, hi(rd));
164	if (imm >= 0)
165		emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
166	else
167		emit(hppa_ldi(-1, hi(rd)), ctx);
168}
169
170static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo,
171		       struct hppa_jit_context *ctx)
172{
173	emit_imm(hi(rd), imm_hi, ctx);
174	emit_imm(lo(rd), imm_lo, ctx);
175}
176
177static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx)
178{
179	const s8 *r0 = regmap[BPF_REG_0];
180	int i;
181
182	if (is_tail_call) {
183		/*
184		 * goto *(t0 + 4);
185		 * Skips first instruction of prologue which initializes tail
186		 * call counter. Assumes t0 contains address of target program,
187		 * see emit_bpf_tail_call.
188		 */
189		emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx);
190		emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx);
191		/* in delay slot: */
192		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx);
193
194		return;
195	}
196
197	/* load epilogue function pointer and jump to it. */
198	/* exit point is either directly below, or the outest TCC exit function */
199	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
200	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
201
202	/* NOTE: we are 32-bit and big-endian, so return lower 32-bit value */
203	emit_hppa_copy(lo(r0), HPPA_REG_RET0, ctx);
204
205	/* Restore callee-saved registers. */
206	for (i = 3; i <= 18; i++) {
207		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
208			continue;
209		emit(hppa_ldw(-REG_SIZE * (8 + (i-3)), HPPA_REG_SP, HPPA_R(i)), ctx);
210	}
211
212	/* load original return pointer (stored by outest TCC function) */
213	emit(hppa_ldw(-0x14, HPPA_REG_SP, HPPA_REG_RP), ctx);
214	emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx);
215	/* in delay slot: */
216	emit(hppa_ldw(-0x04, HPPA_REG_SP, HPPA_REG_SP), ctx);
217}
218
219static bool is_stacked(s8 reg)
220{
221	return reg < 0;
222}
223
224static const s8 *bpf_get_reg64_offset(const s8 *reg, const s8 *tmp,
225		u16 offset_sp, struct hppa_jit_context *ctx)
226{
227	if (is_stacked(hi(reg))) {
228		emit(hppa_ldw(REG_SIZE * hi(reg) - offset_sp, HPPA_REG_SP, hi(tmp)), ctx);
229		emit(hppa_ldw(REG_SIZE * lo(reg) - offset_sp, HPPA_REG_SP, lo(tmp)), ctx);
230		reg = tmp;
231	}
232	REG_SET_SEEN(ctx, hi(reg));
233	REG_SET_SEEN(ctx, lo(reg));
234	return reg;
235}
236
237static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp,
238			       struct hppa_jit_context *ctx)
239{
240	return bpf_get_reg64_offset(reg, tmp, 0, ctx);
241}
242
243static const s8 *bpf_get_reg64_ref(const s8 *reg, const s8 *tmp,
244		bool must_load, struct hppa_jit_context *ctx)
245{
246	if (!OPTIMIZE_HPPA)
247		return bpf_get_reg64(reg, tmp, ctx);
248
249	if (is_stacked(hi(reg))) {
250		if (must_load)
251			emit(hppa_ldw(REG_SIZE * hi(reg), HPPA_REG_SP, hi(tmp)), ctx);
252		reg = tmp;
253	}
254	REG_SET_SEEN(ctx, hi(reg));
255	REG_SET_SEEN(ctx, lo(reg));
256	return reg;
257}
258
259
260static void bpf_put_reg64(const s8 *reg, const s8 *src,
261			  struct hppa_jit_context *ctx)
262{
263	if (is_stacked(hi(reg))) {
264		emit(hppa_stw(hi(src), REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
265		emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
266	}
267}
268
269static void bpf_save_R0(struct hppa_jit_context *ctx)
270{
271	bpf_put_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
272}
273
274static void bpf_restore_R0(struct hppa_jit_context *ctx)
275{
276	bpf_get_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx);
277}
278
279
280static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp,
281			       struct hppa_jit_context *ctx)
282{
283	if (is_stacked(lo(reg))) {
284		emit(hppa_ldw(REG_SIZE * lo(reg), HPPA_REG_SP, lo(tmp)), ctx);
285		reg = tmp;
286	}
287	REG_SET_SEEN(ctx, lo(reg));
288	return reg;
289}
290
291static const s8 *bpf_get_reg32_ref(const s8 *reg, const s8 *tmp,
292		struct hppa_jit_context *ctx)
293{
294	if (!OPTIMIZE_HPPA)
295		return bpf_get_reg32(reg, tmp, ctx);
296
297	if (is_stacked(hi(reg))) {
298		reg = tmp;
299	}
300	REG_SET_SEEN(ctx, lo(reg));
301	return reg;
302}
303
304static void bpf_put_reg32(const s8 *reg, const s8 *src,
305			  struct hppa_jit_context *ctx)
306{
307	if (is_stacked(lo(reg))) {
308		REG_SET_SEEN(ctx, lo(src));
309		emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx);
310		if (1 && !ctx->prog->aux->verifier_zext) {
311			REG_SET_SEEN(ctx, hi(reg));
312			emit(hppa_stw(HPPA_REG_ZERO, REG_SIZE * hi(reg), HPPA_REG_SP), ctx);
313		}
314	} else if (1 && !ctx->prog->aux->verifier_zext) {
315		REG_SET_SEEN(ctx, hi(reg));
316		emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
317	}
318}
319
320/* extern hppa millicode functions */
321extern void $$mulI(void);
322extern void $$divU(void);
323extern void $$remU(void);
324
325static void emit_call_millicode(void *func, const s8 arg0,
326		const s8 arg1, u8 opcode, struct hppa_jit_context *ctx)
327{
328	u32 func_addr;
329
330	emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx);
331	emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx);
332
333	/* libcgcc overwrites HPPA_REG_RET0/1, save temp. in dest. */
334	if (arg0 != HPPA_REG_RET1)
335		bpf_save_R0(ctx);
336
337	func_addr = (uintptr_t) dereference_function_descriptor(func);
338	emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
339	/* skip the following be_l instruction if divisor is zero. */
340	if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
341		if (BPF_OP(opcode) == BPF_DIV)
342			emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
343		else
344			emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
345		emit(hppa_or_cond(HPPA_REG_ARG1, HPPA_REG_ZERO, 1, 0, HPPA_REG_ZERO), ctx);
346	}
347	/* Note: millicode functions use r31 as return pointer instead of rp */
348	emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx);
349	emit(hppa_nop(), ctx); /* this nop is needed here for delay slot */
350
351	/* Note: millicode functions return result in RET1, not RET0 */
352	emit_hppa_copy(HPPA_REG_RET1, arg0, ctx);
353
354	/* restore HPPA_REG_RET0/1, temp. save in dest. */
355	if (arg0 != HPPA_REG_RET1)
356		bpf_restore_R0(ctx);
357}
358
359static void emit_call_libgcc_ll(void *func, const s8 *arg0,
360		const s8 *arg1, u8 opcode, struct hppa_jit_context *ctx)
361{
362	u32 func_addr;
363
364	emit_hppa_copy(lo(arg0), HPPA_REG_ARG0, ctx);
365	emit_hppa_copy(hi(arg0), HPPA_REG_ARG1, ctx);
366	emit_hppa_copy(lo(arg1), HPPA_REG_ARG2, ctx);
367	emit_hppa_copy(hi(arg1), HPPA_REG_ARG3, ctx);
368
369	/* libcgcc overwrites HPPA_REG_RET0/_RET1, so keep copy of R0 on stack */
370	if (hi(arg0) != HPPA_REG_RET0)
371		bpf_save_R0(ctx);
372
373	/* prepare stack */
374	emit(hppa_ldo(2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
375
376	func_addr = (uintptr_t) dereference_function_descriptor(func);
377	emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx);
378        /* zero out the following be_l instruction if divisor is 0 (and set default values) */
379	if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) {
380		emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx);
381		if (BPF_OP(opcode) == BPF_DIV)
382			emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx);
383		else
384			emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx);
385		emit(hppa_or_cond(HPPA_REG_ARG2, HPPA_REG_ARG3, 1, 0, HPPA_REG_ZERO), ctx);
386	}
387	emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
388	emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
389
390	/* restore stack */
391	emit(hppa_ldo(-2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx);
392
393	emit_hppa_copy(HPPA_REG_RET0, hi(arg0), ctx);
394	emit_hppa_copy(HPPA_REG_RET1, lo(arg0), ctx);
395
396	/* restore HPPA_REG_RET0/_RET1 */
397	if (hi(arg0) != HPPA_REG_RET0)
398		bpf_restore_R0(ctx);
399}
400
401static void emit_jump(s32 paoff, bool force_far,
402			       struct hppa_jit_context *ctx)
403{
404	unsigned long pc, addr;
405
406	/* Note: allocate 2 instructions for jumps if force_far is set. */
407	if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 17)) {
408		/* use BL,short branch followed by nop() */
409		emit(hppa_bl(paoff - HPPA_BRANCH_DISPLACEMENT, HPPA_REG_ZERO), ctx);
410		if (force_far)
411			emit(hppa_nop(), ctx);
412		return;
413	}
414
415	pc = (uintptr_t) &ctx->insns[ctx->ninsns];
416	addr = pc + (paoff * HPPA_INSN_SIZE);
417	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
418	emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); // be,l,n addr(sr4,r31), %sr0, %r31
419}
420
421static void emit_alu_i64(const s8 *dst, s32 imm,
422			 struct hppa_jit_context *ctx, const u8 op)
423{
424	const s8 *tmp1 = regmap[TMP_REG_1];
425	const s8 *rd;
426
427	if (0 && op == BPF_MOV)
428		rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
429	else
430		rd = bpf_get_reg64(dst, tmp1, ctx);
431
432	/* dst = dst OP imm */
433	switch (op) {
434	case BPF_MOV:
435		emit_imm32(rd, imm, ctx);
436		break;
437	case BPF_AND:
438		emit_imm(HPPA_REG_T0, imm, ctx);
439		emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
440		if (imm >= 0)
441			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
442		break;
443	case BPF_OR:
444		emit_imm(HPPA_REG_T0, imm, ctx);
445		emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
446		if (imm < 0)
447			emit_imm(hi(rd), -1, ctx);
448		break;
449	case BPF_XOR:
450		emit_imm(HPPA_REG_T0, imm, ctx);
451		emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
452		if (imm < 0) {
453			emit_imm(HPPA_REG_T0, -1, ctx);
454			emit_hppa_xor(hi(rd), HPPA_REG_T0, hi(rd), ctx);
455		}
456		break;
457	case BPF_LSH:
458		if (imm == 0)
459			break;
460		if (imm > 32) {
461			imm -= 32;
462			emit(hppa_zdep(lo(rd), imm, imm, hi(rd)), ctx);
463			emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
464		} else if (imm == 32) {
465			emit_hppa_copy(lo(rd), hi(rd), ctx);
466			emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx);
467		} else {
468			emit(hppa_shd(hi(rd), lo(rd), 32 - imm, hi(rd)), ctx);
469			emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
470		}
471		break;
472	case BPF_RSH:
473		if (imm == 0)
474			break;
475		if (imm > 32) {
476			imm -= 32;
477			emit(hppa_shr(hi(rd), imm, lo(rd)), ctx);
478			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
479		} else if (imm == 32) {
480			emit_hppa_copy(hi(rd), lo(rd), ctx);
481			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
482		} else {
483			emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
484			emit(hppa_shr(hi(rd), imm, hi(rd)), ctx);
485		}
486		break;
487	case BPF_ARSH:
488		if (imm == 0)
489			break;
490		if (imm > 32) {
491			imm -= 32;
492			emit(hppa_extrws(hi(rd), 31 - imm, imm, lo(rd)), ctx);
493			emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
494		} else if (imm == 32) {
495			emit_hppa_copy(hi(rd), lo(rd), ctx);
496			emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx);
497		} else {
498			emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx);
499			emit(hppa_extrws(hi(rd), 31 - imm, imm, hi(rd)), ctx);
500		}
501		break;
502	default:
503		WARN_ON(1);
504	}
505
506	bpf_put_reg64(dst, rd, ctx);
507}
508
509static void emit_alu_i32(const s8 *dst, s32 imm,
510			 struct hppa_jit_context *ctx, const u8 op)
511{
512	const s8 *tmp1 = regmap[TMP_REG_1];
513	const s8 *rd = bpf_get_reg32(dst, tmp1, ctx);
514
515	if (op == BPF_MOV)
516		rd = bpf_get_reg32_ref(dst, tmp1, ctx);
517	else
518		rd = bpf_get_reg32(dst, tmp1, ctx);
519
520	/* dst = dst OP imm */
521	switch (op) {
522	case BPF_MOV:
523		emit_imm(lo(rd), imm, ctx);
524		break;
525	case BPF_ADD:
526		emit_imm(HPPA_REG_T0, imm, ctx);
527		emit(hppa_add(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
528		break;
529	case BPF_SUB:
530		emit_imm(HPPA_REG_T0, imm, ctx);
531		emit(hppa_sub(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
532		break;
533	case BPF_AND:
534		emit_imm(HPPA_REG_T0, imm, ctx);
535		emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
536		break;
537	case BPF_OR:
538		emit_imm(HPPA_REG_T0, imm, ctx);
539		emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx);
540		break;
541	case BPF_XOR:
542		emit_imm(HPPA_REG_T0, imm, ctx);
543		emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx);
544		break;
545	case BPF_LSH:
546		if (imm != 0)
547			emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx);
548		break;
549	case BPF_RSH:
550		if (imm != 0)
551			emit(hppa_shr(lo(rd), imm, lo(rd)), ctx);
552		break;
553	case BPF_ARSH:
554		if (imm != 0)
555			emit(hppa_extrws(lo(rd), 31 - imm, imm, lo(rd)), ctx);
556		break;
557	default:
558		WARN_ON(1);
559	}
560
561	bpf_put_reg32(dst, rd, ctx);
562}
563
564static void emit_alu_r64(const s8 *dst, const s8 *src,
565			 struct hppa_jit_context *ctx, const u8 op)
566{
567	const s8 *tmp1 = regmap[TMP_REG_1];
568	const s8 *tmp2 = regmap[TMP_REG_2];
569	const s8 *rd;
570	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
571
572	if (op == BPF_MOV)
573		rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
574	else
575		rd = bpf_get_reg64(dst, tmp1, ctx);
576
577	/* dst = dst OP src */
578	switch (op) {
579	case BPF_MOV:
580		emit_hppa_copy(lo(rs), lo(rd), ctx);
581		emit_hppa_copy(hi(rs), hi(rd), ctx);
582		break;
583	case BPF_ADD:
584		emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
585		emit(hppa_addc(hi(rd), hi(rs), hi(rd)), ctx);
586		break;
587	case BPF_SUB:
588		emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
589		emit(hppa_subb(hi(rd), hi(rs), hi(rd)), ctx);
590		break;
591	case BPF_AND:
592		emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
593		emit(hppa_and(hi(rd), hi(rs), hi(rd)), ctx);
594		break;
595	case BPF_OR:
596		emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
597		emit(hppa_or(hi(rd), hi(rs), hi(rd)), ctx);
598		break;
599	case BPF_XOR:
600		emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
601		emit_hppa_xor(hi(rd), hi(rs), hi(rd), ctx);
602		break;
603	case BPF_MUL:
604		emit_call_libgcc_ll(__muldi3, rd, rs, op, ctx);
605		break;
606	case BPF_DIV:
607		emit_call_libgcc_ll(&hppa_div64, rd, rs, op, ctx);
608		break;
609	case BPF_MOD:
610		emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, op, ctx);
611		break;
612	case BPF_LSH:
613		emit_call_libgcc_ll(__ashldi3, rd, rs, op, ctx);
614		break;
615	case BPF_RSH:
616		emit_call_libgcc_ll(__lshrdi3, rd, rs, op, ctx);
617		break;
618	case BPF_ARSH:
619		emit_call_libgcc_ll(__ashrdi3, rd, rs, op, ctx);
620		break;
621	case BPF_NEG:
622		emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx);
623		emit(hppa_subb(HPPA_REG_ZERO, hi(rd), hi(rd)), ctx);
624		break;
625	default:
626		WARN_ON(1);
627	}
628
629	bpf_put_reg64(dst, rd, ctx);
630}
631
632static void emit_alu_r32(const s8 *dst, const s8 *src,
633			 struct hppa_jit_context *ctx, const u8 op)
634{
635	const s8 *tmp1 = regmap[TMP_REG_1];
636	const s8 *tmp2 = regmap[TMP_REG_2];
637	const s8 *rd;
638	const s8 *rs = bpf_get_reg32(src, tmp2, ctx);
639
640	if (op == BPF_MOV)
641		rd = bpf_get_reg32_ref(dst, tmp1, ctx);
642	else
643		rd = bpf_get_reg32(dst, tmp1, ctx);
644
645	/* dst = dst OP src */
646	switch (op) {
647	case BPF_MOV:
648		emit_hppa_copy(lo(rs), lo(rd), ctx);
649		break;
650	case BPF_ADD:
651		emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx);
652		break;
653	case BPF_SUB:
654		emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx);
655		break;
656	case BPF_AND:
657		emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx);
658		break;
659	case BPF_OR:
660		emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx);
661		break;
662	case BPF_XOR:
663		emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx);
664		break;
665	case BPF_MUL:
666		emit_call_millicode($$mulI, lo(rd), lo(rs), op, ctx);
667		break;
668	case BPF_DIV:
669		emit_call_millicode($$divU, lo(rd), lo(rs), op, ctx);
670		break;
671	case BPF_MOD:
672		emit_call_millicode($$remU, lo(rd), lo(rs), op, ctx);
673		break;
674	case BPF_LSH:
675		emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
676		emit(hppa_mtsar(HPPA_REG_T0), ctx);
677		emit(hppa_depwz_sar(lo(rd), lo(rd)), ctx);
678		break;
679	case BPF_RSH:
680		emit(hppa_mtsar(lo(rs)), ctx);
681		emit(hppa_shrpw_sar(lo(rd), lo(rd)), ctx);
682		break;
683	case BPF_ARSH: /* sign extending arithmetic shift right */
684		// emit(hppa_beq(lo(rs), HPPA_REG_ZERO, 2), ctx);
685		emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx);
686		emit(hppa_mtsar(HPPA_REG_T0), ctx);
687		emit(hppa_extrws_sar(lo(rd), lo(rd)), ctx);
688		break;
689	case BPF_NEG:
690		emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx);  // sub r0,rd,rd
691		break;
692	default:
693		WARN_ON(1);
694	}
695
696	bpf_put_reg32(dst, rd, ctx);
697}
698
699static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 paoff,
700			   struct hppa_jit_context *ctx, const u8 op)
701{
702	int e, s = ctx->ninsns;
703	const s8 *tmp1 = regmap[TMP_REG_1];
704	const s8 *tmp2 = regmap[TMP_REG_2];
705
706	const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx);
707	const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx);
708
709	/*
710	 * NO_JUMP skips over the rest of the instructions and the
711	 * emit_jump, meaning the BPF branch is not taken.
712	 * JUMP skips directly to the emit_jump, meaning
713	 * the BPF branch is taken.
714	 *
715	 * The fallthrough case results in the BPF branch being taken.
716	 */
717#define NO_JUMP(idx)	(2 + (idx) - 1)
718#define JUMP(idx)	(0 + (idx) - 1)
719
720	switch (op) {
721	case BPF_JEQ:
722		emit(hppa_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
723		emit(hppa_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
724		break;
725	case BPF_JGT:
726		emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
727		emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
728		emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
729		break;
730	case BPF_JLT:
731		emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
732		emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
733		emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
734		break;
735	case BPF_JGE:
736		emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx);
737		emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
738		emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
739		break;
740	case BPF_JLE:
741		emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx);
742		emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
743		emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
744		break;
745	case BPF_JNE:
746		emit(hppa_bne(hi(rs1), hi(rs2), JUMP(1)), ctx);
747		emit(hppa_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
748		break;
749	case BPF_JSGT:
750		emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
751		emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
752		emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
753		break;
754	case BPF_JSLT:
755		emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
756		emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
757		emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
758		break;
759	case BPF_JSGE:
760		emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx);
761		emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
762		emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
763		break;
764	case BPF_JSLE:
765		emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx);
766		emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx);
767		emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx);
768		break;
769	case BPF_JSET:
770		emit(hppa_and(hi(rs1), hi(rs2), HPPA_REG_T0), ctx);
771		emit(hppa_and(lo(rs1), lo(rs2), HPPA_REG_T1), ctx);
772		emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, JUMP(1)), ctx);
773		emit(hppa_beq(HPPA_REG_T1, HPPA_REG_ZERO, NO_JUMP(0)), ctx);
774		break;
775	default:
776		WARN_ON(1);
777	}
778
779#undef NO_JUMP
780#undef JUMP
781
782	e = ctx->ninsns;
783	/* Adjust for extra insns. */
784	paoff -= (e - s);
785	emit_jump(paoff, true, ctx);
786	return 0;
787}
788
789static int emit_bcc(u8 op, u8 rd, u8 rs, int paoff, struct hppa_jit_context *ctx)
790{
791	int e, s;
792	bool far = false;
793	int off;
794
795	if (op == BPF_JSET) {
796		/*
797		 * BPF_JSET is a special case: it has no inverse so we always
798		 * treat it as a far branch.
799		 */
800		emit(hppa_and(rd, rs, HPPA_REG_T0), ctx);
801		paoff -= 1; /* reduce offset due to hppa_and() above */
802		rd = HPPA_REG_T0;
803		rs = HPPA_REG_ZERO;
804		op = BPF_JNE;
805	}
806
807	s = ctx->ninsns;
808
809	if (!relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 12)) {
810		op = invert_bpf_cond(op);
811		far = true;
812	}
813
814	/*
815	 * For a far branch, the condition is negated and we jump over the
816	 * branch itself, and the three instructions from emit_jump.
817	 * For a near branch, just use paoff.
818	 */
819	off = far ? (HPPA_BRANCH_DISPLACEMENT - 1) : paoff - HPPA_BRANCH_DISPLACEMENT;
820
821	switch (op) {
822	/* IF (dst COND src) JUMP off */
823	case BPF_JEQ:
824		emit(hppa_beq(rd, rs, off), ctx);
825		break;
826	case BPF_JGT:
827		emit(hppa_bgtu(rd, rs, off), ctx);
828		break;
829	case BPF_JLT:
830		emit(hppa_bltu(rd, rs, off), ctx);
831		break;
832	case BPF_JGE:
833		emit(hppa_bgeu(rd, rs, off), ctx);
834		break;
835	case BPF_JLE:
836		emit(hppa_bleu(rd, rs, off), ctx);
837		break;
838	case BPF_JNE:
839		emit(hppa_bne(rd, rs, off), ctx);
840		break;
841	case BPF_JSGT:
842		emit(hppa_bgt(rd, rs, off), ctx);
843		break;
844	case BPF_JSLT:
845		emit(hppa_blt(rd, rs, off), ctx);
846		break;
847	case BPF_JSGE:
848		emit(hppa_bge(rd, rs, off), ctx);
849		break;
850	case BPF_JSLE:
851		emit(hppa_ble(rd, rs, off), ctx);
852		break;
853	default:
854		WARN_ON(1);
855	}
856
857	if (far) {
858		e = ctx->ninsns;
859		/* Adjust for extra insns. */
860		paoff -= (e - s);
861		emit_jump(paoff, true, ctx);
862	}
863	return 0;
864}
865
866static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 paoff,
867			   struct hppa_jit_context *ctx, const u8 op)
868{
869	int e, s = ctx->ninsns;
870	const s8 *tmp1 = regmap[TMP_REG_1];
871	const s8 *tmp2 = regmap[TMP_REG_2];
872
873	const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx);
874	const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx);
875
876	e = ctx->ninsns;
877	/* Adjust for extra insns. */
878	paoff -= (e - s);
879
880	if (emit_bcc(op, lo(rs1), lo(rs2), paoff, ctx))
881		return -1;
882
883	return 0;
884}
885
886static void emit_call(bool fixed, u64 addr, struct hppa_jit_context *ctx)
887{
888	const s8 *tmp = regmap[TMP_REG_1];
889	const s8 *r0 = regmap[BPF_REG_0];
890	const s8 *reg;
891	const int offset_sp = 2 * STACK_ALIGN;
892
893	/* prepare stack */
894	emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
895
896	/* load R1 & R2 in registers, R3-R5 to stack. */
897	reg = bpf_get_reg64_offset(regmap[BPF_REG_5], tmp, offset_sp, ctx);
898	emit(hppa_stw(hi(reg), -0x48, HPPA_REG_SP), ctx);
899	emit(hppa_stw(lo(reg), -0x44, HPPA_REG_SP), ctx);
900
901	reg = bpf_get_reg64_offset(regmap[BPF_REG_4], tmp, offset_sp, ctx);
902	emit(hppa_stw(hi(reg), -0x40, HPPA_REG_SP), ctx);
903	emit(hppa_stw(lo(reg), -0x3c, HPPA_REG_SP), ctx);
904
905	reg = bpf_get_reg64_offset(regmap[BPF_REG_3], tmp, offset_sp, ctx);
906	emit(hppa_stw(hi(reg), -0x38, HPPA_REG_SP), ctx);
907	emit(hppa_stw(lo(reg), -0x34, HPPA_REG_SP), ctx);
908
909	reg = bpf_get_reg64_offset(regmap[BPF_REG_2], tmp, offset_sp, ctx);
910	emit_hppa_copy(hi(reg), HPPA_REG_ARG3, ctx);
911	emit_hppa_copy(lo(reg), HPPA_REG_ARG2, ctx);
912
913	reg = bpf_get_reg64_offset(regmap[BPF_REG_1], tmp, offset_sp, ctx);
914	emit_hppa_copy(hi(reg), HPPA_REG_ARG1, ctx);
915	emit_hppa_copy(lo(reg), HPPA_REG_ARG0, ctx);
916
917	/* backup TCC */
918	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
919		emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx);
920
921	/*
922	 * Use ldil() to load absolute address. Don't use emit_imm as the
923	 * number of emitted instructions should not depend on the value of
924	 * addr.
925	 */
926	emit(hppa_ldil(addr, HPPA_REG_R31), ctx);
927	emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx);
928	/* set return address in delay slot */
929	emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx);
930
931	/* restore TCC */
932	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
933		emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx);
934
935	/* restore stack */
936	emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx);
937
938	/* set return value. */
939	emit_hppa_copy(HPPA_REG_RET0, hi(r0), ctx);
940	emit_hppa_copy(HPPA_REG_RET1, lo(r0), ctx);
941}
942
943static int emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx)
944{
945	/*
946	 * R1 -> &ctx
947	 * R2 -> &array
948	 * R3 -> index
949	 */
950	int off;
951	const s8 *arr_reg = regmap[BPF_REG_2];
952	const s8 *idx_reg = regmap[BPF_REG_3];
953	struct bpf_array bpfa;
954	struct bpf_prog bpfp;
955
956	/* get address of TCC main exit function for error case into rp */
957	emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
958
959	/* max_entries = array->map.max_entries; */
960	off = offsetof(struct bpf_array, map.max_entries);
961	BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4);
962	emit(hppa_ldw(off, lo(arr_reg), HPPA_REG_T1), ctx);
963
964	/*
965	 * if (index >= max_entries)
966	 *   goto out;
967	 */
968	emit(hppa_bltu(lo(idx_reg), HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
969	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
970
971	/*
972	 * if (--tcc < 0)
973	 *   goto out;
974	 */
975	REG_FORCE_SEEN(ctx, HPPA_REG_TCC);
976	emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx);
977	emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
978	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
979
980	/*
981	 * prog = array->ptrs[index];
982	 * if (!prog)
983	 *   goto out;
984	 */
985	BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 4);
986	emit(hppa_sh2add(lo(idx_reg), lo(arr_reg), HPPA_REG_T0), ctx);
987	off = offsetof(struct bpf_array, ptrs);
988	BUILD_BUG_ON(!relative_bits_ok(off, 11));
989	emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
990	emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
991	emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
992
993	/*
994	 * tcc = temp_tcc;
995	 * goto *(prog->bpf_func + 4);
996	 */
997	off = offsetof(struct bpf_prog, bpf_func);
998	BUILD_BUG_ON(!relative_bits_ok(off, 11));
999	BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 4);
1000	emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx);
1001	/* Epilogue jumps to *(t0 + 4). */
1002	__build_epilogue(true, ctx);
1003	return 0;
1004}
1005
1006static int emit_load_r64(const s8 *dst, const s8 *src, s16 off,
1007			 struct hppa_jit_context *ctx, const u8 size)
1008{
1009	const s8 *tmp1 = regmap[TMP_REG_1];
1010	const s8 *tmp2 = regmap[TMP_REG_2];
1011	const s8 *rd = bpf_get_reg64_ref(dst, tmp1, ctx->prog->aux->verifier_zext, ctx);
1012	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1013	s8 srcreg;
1014
1015	/* need to calculate address since offset does not fit in 14 bits? */
1016	if (relative_bits_ok(off, 14))
1017		srcreg = lo(rs);
1018	else {
1019		/* need to use R1 here, since addil puts result into R1 */
1020		srcreg = HPPA_REG_R1;
1021		emit(hppa_addil(off, lo(rs)), ctx);
1022		off = im11(off);
1023	}
1024
1025	/* LDX: dst = *(size *)(src + off) */
1026	switch (size) {
1027	case BPF_B:
1028		emit(hppa_ldb(off + 0, srcreg, lo(rd)), ctx);
1029		if (!ctx->prog->aux->verifier_zext)
1030			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1031		break;
1032	case BPF_H:
1033		emit(hppa_ldh(off + 0, srcreg, lo(rd)), ctx);
1034		if (!ctx->prog->aux->verifier_zext)
1035			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1036		break;
1037	case BPF_W:
1038		emit(hppa_ldw(off + 0, srcreg, lo(rd)), ctx);
1039		if (!ctx->prog->aux->verifier_zext)
1040			emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1041		break;
1042	case BPF_DW:
1043		emit(hppa_ldw(off + 0, srcreg, hi(rd)), ctx);
1044		emit(hppa_ldw(off + 4, srcreg, lo(rd)), ctx);
1045		break;
1046	}
1047
1048	bpf_put_reg64(dst, rd, ctx);
1049	return 0;
1050}
1051
1052static int emit_store_r64(const s8 *dst, const s8 *src, s16 off,
1053			  struct hppa_jit_context *ctx, const u8 size,
1054			  const u8 mode)
1055{
1056	const s8 *tmp1 = regmap[TMP_REG_1];
1057	const s8 *tmp2 = regmap[TMP_REG_2];
1058	const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1059	const s8 *rs = bpf_get_reg64(src, tmp2, ctx);
1060	s8 dstreg;
1061
1062	/* need to calculate address since offset does not fit in 14 bits? */
1063	if (relative_bits_ok(off, 14))
1064		dstreg = lo(rd);
1065	else {
1066		/* need to use R1 here, since addil puts result into R1 */
1067		dstreg = HPPA_REG_R1;
1068		emit(hppa_addil(off, lo(rd)), ctx);
1069		off = im11(off);
1070	}
1071
1072	/* ST: *(size *)(dst + off) = imm */
1073	switch (size) {
1074	case BPF_B:
1075		emit(hppa_stb(lo(rs), off + 0, dstreg), ctx);
1076		break;
1077	case BPF_H:
1078		emit(hppa_sth(lo(rs), off + 0, dstreg), ctx);
1079		break;
1080	case BPF_W:
1081		emit(hppa_stw(lo(rs), off + 0, dstreg), ctx);
1082		break;
1083	case BPF_DW:
1084		emit(hppa_stw(hi(rs), off + 0, dstreg), ctx);
1085		emit(hppa_stw(lo(rs), off + 4, dstreg), ctx);
1086		break;
1087	}
1088
1089	return 0;
1090}
1091
1092static void emit_rev16(const s8 rd, struct hppa_jit_context *ctx)
1093{
1094	emit(hppa_extru(rd, 23, 8, HPPA_REG_T1), ctx);
1095	emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx);
1096	emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx);
1097}
1098
1099static void emit_rev32(const s8 rs, const s8 rd, struct hppa_jit_context *ctx)
1100{
1101	emit(hppa_shrpw(rs, rs, 16, HPPA_REG_T1), ctx);
1102	emit(hppa_depwz(HPPA_REG_T1, 15, 8, HPPA_REG_T1), ctx);
1103	emit(hppa_shrpw(rs, HPPA_REG_T1, 8, rd), ctx);
1104}
1105
1106static void emit_zext64(const s8 *dst, struct hppa_jit_context *ctx)
1107{
1108	const s8 *rd;
1109	const s8 *tmp1 = regmap[TMP_REG_1];
1110
1111	rd = bpf_get_reg64(dst, tmp1, ctx);
1112	emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1113	bpf_put_reg64(dst, rd, ctx);
1114}
1115
1116int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx,
1117		      bool extra_pass)
1118{
1119	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
1120		BPF_CLASS(insn->code) == BPF_JMP;
1121	int s, e, paoff, i = insn - ctx->prog->insnsi;
1122	u8 code = insn->code;
1123	s16 off = insn->off;
1124	s32 imm = insn->imm;
1125
1126	const s8 *dst = regmap[insn->dst_reg];
1127	const s8 *src = regmap[insn->src_reg];
1128	const s8 *tmp1 = regmap[TMP_REG_1];
1129	const s8 *tmp2 = regmap[TMP_REG_2];
1130
1131	if (0) printk("CLASS %03d  CODE %#02x ALU64:%d BPF_SIZE %#02x  "
1132		"BPF_CODE %#02x  src_reg %d  dst_reg %d\n",
1133		BPF_CLASS(code), code, (code & BPF_ALU64) ? 1:0, BPF_SIZE(code),
1134		BPF_OP(code), insn->src_reg, insn->dst_reg);
1135
1136	switch (code) {
1137	/* dst = src */
1138	case BPF_ALU64 | BPF_MOV | BPF_X:
1139
1140	case BPF_ALU64 | BPF_ADD | BPF_X:
1141	case BPF_ALU64 | BPF_ADD | BPF_K:
1142
1143	case BPF_ALU64 | BPF_SUB | BPF_X:
1144	case BPF_ALU64 | BPF_SUB | BPF_K:
1145
1146	case BPF_ALU64 | BPF_AND | BPF_X:
1147	case BPF_ALU64 | BPF_OR | BPF_X:
1148	case BPF_ALU64 | BPF_XOR | BPF_X:
1149
1150	case BPF_ALU64 | BPF_MUL | BPF_X:
1151	case BPF_ALU64 | BPF_MUL | BPF_K:
1152
1153	case BPF_ALU64 | BPF_DIV | BPF_X:
1154	case BPF_ALU64 | BPF_DIV | BPF_K:
1155
1156	case BPF_ALU64 | BPF_MOD | BPF_X:
1157	case BPF_ALU64 | BPF_MOD | BPF_K:
1158
1159	case BPF_ALU64 | BPF_LSH | BPF_X:
1160	case BPF_ALU64 | BPF_RSH | BPF_X:
1161	case BPF_ALU64 | BPF_ARSH | BPF_X:
1162		if (BPF_SRC(code) == BPF_K) {
1163			emit_imm32(tmp2, imm, ctx);
1164			src = tmp2;
1165		}
1166		emit_alu_r64(dst, src, ctx, BPF_OP(code));
1167		break;
1168
1169	/* dst = -dst */
1170	case BPF_ALU64 | BPF_NEG:
1171		emit_alu_r64(dst, tmp2, ctx, BPF_OP(code));
1172		break;
1173
1174	case BPF_ALU64 | BPF_MOV | BPF_K:
1175	case BPF_ALU64 | BPF_AND | BPF_K:
1176	case BPF_ALU64 | BPF_OR | BPF_K:
1177	case BPF_ALU64 | BPF_XOR | BPF_K:
1178	case BPF_ALU64 | BPF_LSH | BPF_K:
1179	case BPF_ALU64 | BPF_RSH | BPF_K:
1180	case BPF_ALU64 | BPF_ARSH | BPF_K:
1181		emit_alu_i64(dst, imm, ctx, BPF_OP(code));
1182		break;
1183
1184	case BPF_ALU | BPF_MOV | BPF_X:
1185		if (imm == 1) {
1186			/* Special mov32 for zext. */
1187			emit_zext64(dst, ctx);
1188			break;
1189		}
1190		fallthrough;
1191	/* dst = dst OP src */
1192	case BPF_ALU | BPF_ADD | BPF_X:
1193	case BPF_ALU | BPF_SUB | BPF_X:
1194	case BPF_ALU | BPF_AND | BPF_X:
1195	case BPF_ALU | BPF_OR | BPF_X:
1196	case BPF_ALU | BPF_XOR | BPF_X:
1197
1198	case BPF_ALU | BPF_MUL | BPF_X:
1199	case BPF_ALU | BPF_MUL | BPF_K:
1200
1201	case BPF_ALU | BPF_DIV | BPF_X:
1202	case BPF_ALU | BPF_DIV | BPF_K:
1203
1204	case BPF_ALU | BPF_MOD | BPF_X:
1205	case BPF_ALU | BPF_MOD | BPF_K:
1206
1207	case BPF_ALU | BPF_LSH | BPF_X:
1208	case BPF_ALU | BPF_RSH | BPF_X:
1209	case BPF_ALU | BPF_ARSH | BPF_X:
1210		if (BPF_SRC(code) == BPF_K) {
1211			emit_imm32(tmp2, imm, ctx);
1212			src = tmp2;
1213		}
1214		emit_alu_r32(dst, src, ctx, BPF_OP(code));
1215		break;
1216
1217	/* dst = dst OP imm */
1218	case BPF_ALU | BPF_MOV | BPF_K:
1219	case BPF_ALU | BPF_ADD | BPF_K:
1220	case BPF_ALU | BPF_SUB | BPF_K:
1221	case BPF_ALU | BPF_AND | BPF_K:
1222	case BPF_ALU | BPF_OR | BPF_K:
1223	case BPF_ALU | BPF_XOR | BPF_K:
1224	case BPF_ALU | BPF_LSH | BPF_K:
1225	case BPF_ALU | BPF_RSH | BPF_K:
1226	case BPF_ALU | BPF_ARSH | BPF_K:
1227		/*
1228		 * mul,div,mod are handled in the BPF_X case.
1229		 */
1230		emit_alu_i32(dst, imm, ctx, BPF_OP(code));
1231		break;
1232
1233	/* dst = -dst */
1234	case BPF_ALU | BPF_NEG:
1235		/*
1236		 * src is ignored---choose tmp2 as a dummy register since it
1237		 * is not on the stack.
1238		 */
1239		emit_alu_r32(dst, tmp2, ctx, BPF_OP(code));
1240		break;
1241
1242	/* dst = BSWAP##imm(dst) */
1243	case BPF_ALU | BPF_END | BPF_FROM_BE:
1244	{
1245		const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1246
1247		switch (imm) {
1248		case 16:
1249			/* zero-extend 16 bits into 64 bits */
1250			emit(hppa_extru(lo(rd), 31, 16, lo(rd)), ctx);
1251			fallthrough;
1252		case 32:
1253			/* zero-extend 32 bits into 64 bits */
1254			if (!ctx->prog->aux->verifier_zext)
1255				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1256			break;
1257		case 64:
1258			/* Do nothing. */
1259			break;
1260		default:
1261			pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1262			return -1;
1263		}
1264
1265		bpf_put_reg64(dst, rd, ctx);
1266		break;
1267	}
1268
1269	case BPF_ALU | BPF_END | BPF_FROM_LE:
1270	{
1271		const s8 *rd = bpf_get_reg64(dst, tmp1, ctx);
1272
1273		switch (imm) {
1274		case 16:
1275			emit_rev16(lo(rd), ctx);
1276			if (!ctx->prog->aux->verifier_zext)
1277				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1278			break;
1279		case 32:
1280			emit_rev32(lo(rd), lo(rd), ctx);
1281			if (!ctx->prog->aux->verifier_zext)
1282				emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx);
1283			break;
1284		case 64:
1285			/* Swap upper and lower halves, then each half. */
1286			emit_hppa_copy(hi(rd), HPPA_REG_T0, ctx);
1287			emit_rev32(lo(rd), hi(rd), ctx);
1288			emit_rev32(HPPA_REG_T0, lo(rd), ctx);
1289			break;
1290		default:
1291			pr_err("bpf-jit: BPF_END imm %d invalid\n", imm);
1292			return -1;
1293		}
1294
1295		bpf_put_reg64(dst, rd, ctx);
1296		break;
1297	}
1298	/* JUMP off */
1299	case BPF_JMP | BPF_JA:
1300		paoff = hppa_offset(i, off, ctx);
1301		emit_jump(paoff, false, ctx);
1302		break;
1303	/* function call */
1304	case BPF_JMP | BPF_CALL:
1305	{
1306		bool fixed;
1307		int ret;
1308		u64 addr;
1309
1310		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
1311					    &fixed);
1312		if (ret < 0)
1313			return ret;
1314		emit_call(fixed, addr, ctx);
1315		break;
1316	}
1317	/* tail call */
1318	case BPF_JMP | BPF_TAIL_CALL:
1319		REG_SET_SEEN_ALL(ctx);
1320		if (emit_bpf_tail_call(i, ctx))
1321			return -1;
1322		break;
1323	/* IF (dst COND imm) JUMP off */
1324	case BPF_JMP | BPF_JEQ | BPF_X:
1325	case BPF_JMP | BPF_JEQ | BPF_K:
1326	case BPF_JMP32 | BPF_JEQ | BPF_X:
1327	case BPF_JMP32 | BPF_JEQ | BPF_K:
1328
1329	case BPF_JMP | BPF_JNE | BPF_X:
1330	case BPF_JMP | BPF_JNE | BPF_K:
1331	case BPF_JMP32 | BPF_JNE | BPF_X:
1332	case BPF_JMP32 | BPF_JNE | BPF_K:
1333
1334	case BPF_JMP | BPF_JLE | BPF_X:
1335	case BPF_JMP | BPF_JLE | BPF_K:
1336	case BPF_JMP32 | BPF_JLE | BPF_X:
1337	case BPF_JMP32 | BPF_JLE | BPF_K:
1338
1339	case BPF_JMP | BPF_JLT | BPF_X:
1340	case BPF_JMP | BPF_JLT | BPF_K:
1341	case BPF_JMP32 | BPF_JLT | BPF_X:
1342	case BPF_JMP32 | BPF_JLT | BPF_K:
1343
1344	case BPF_JMP | BPF_JGE | BPF_X:
1345	case BPF_JMP | BPF_JGE | BPF_K:
1346	case BPF_JMP32 | BPF_JGE | BPF_X:
1347	case BPF_JMP32 | BPF_JGE | BPF_K:
1348
1349	case BPF_JMP | BPF_JGT | BPF_X:
1350	case BPF_JMP | BPF_JGT | BPF_K:
1351	case BPF_JMP32 | BPF_JGT | BPF_X:
1352	case BPF_JMP32 | BPF_JGT | BPF_K:
1353
1354	case BPF_JMP | BPF_JSLE | BPF_X:
1355	case BPF_JMP | BPF_JSLE | BPF_K:
1356	case BPF_JMP32 | BPF_JSLE | BPF_X:
1357	case BPF_JMP32 | BPF_JSLE | BPF_K:
1358
1359	case BPF_JMP | BPF_JSLT | BPF_X:
1360	case BPF_JMP | BPF_JSLT | BPF_K:
1361	case BPF_JMP32 | BPF_JSLT | BPF_X:
1362	case BPF_JMP32 | BPF_JSLT | BPF_K:
1363
1364	case BPF_JMP | BPF_JSGE | BPF_X:
1365	case BPF_JMP | BPF_JSGE | BPF_K:
1366	case BPF_JMP32 | BPF_JSGE | BPF_X:
1367	case BPF_JMP32 | BPF_JSGE | BPF_K:
1368
1369	case BPF_JMP | BPF_JSGT | BPF_X:
1370	case BPF_JMP | BPF_JSGT | BPF_K:
1371	case BPF_JMP32 | BPF_JSGT | BPF_X:
1372	case BPF_JMP32 | BPF_JSGT | BPF_K:
1373
1374	case BPF_JMP | BPF_JSET | BPF_X:
1375	case BPF_JMP | BPF_JSET | BPF_K:
1376	case BPF_JMP32 | BPF_JSET | BPF_X:
1377	case BPF_JMP32 | BPF_JSET | BPF_K:
1378		paoff = hppa_offset(i, off, ctx);
1379		if (BPF_SRC(code) == BPF_K) {
1380			s = ctx->ninsns;
1381			emit_imm32(tmp2, imm, ctx);
1382			src = tmp2;
1383			e = ctx->ninsns;
1384			paoff -= (e - s);
1385		}
1386		if (is64)
1387			emit_branch_r64(dst, src, paoff, ctx, BPF_OP(code));
1388		else
1389			emit_branch_r32(dst, src, paoff, ctx, BPF_OP(code));
1390		break;
1391	/* function return */
1392	case BPF_JMP | BPF_EXIT:
1393		if (i == ctx->prog->len - 1)
1394			break;
1395		/* load epilogue function pointer and jump to it. */
1396		emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx);
1397		emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx);
1398		break;
1399
1400	/* dst = imm64 */
1401	case BPF_LD | BPF_IMM | BPF_DW:
1402	{
1403		struct bpf_insn insn1 = insn[1];
1404		u32 upper = insn1.imm;
1405		u32 lower = imm;
1406		const s8 *rd = bpf_get_reg64_ref(dst, tmp1, false, ctx);
1407
1408		if (0 && bpf_pseudo_func(insn)) {
1409			WARN_ON(upper); /* we are 32-bit! */
1410			upper = 0;
1411			lower = (uintptr_t) dereference_function_descriptor(lower);
1412		}
1413
1414		emit_imm64(rd, upper, lower, ctx);
1415		bpf_put_reg64(dst, rd, ctx);
1416		return 1;
1417	}
1418
1419	/* LDX: dst = *(size *)(src + off) */
1420	case BPF_LDX | BPF_MEM | BPF_B:
1421	case BPF_LDX | BPF_MEM | BPF_H:
1422	case BPF_LDX | BPF_MEM | BPF_W:
1423	case BPF_LDX | BPF_MEM | BPF_DW:
1424		if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code)))
1425			return -1;
1426		break;
1427
1428	/* speculation barrier */
1429	case BPF_ST | BPF_NOSPEC:
1430		break;
1431
1432	/* ST: *(size *)(dst + off) = imm */
1433	case BPF_ST | BPF_MEM | BPF_B:
1434	case BPF_ST | BPF_MEM | BPF_H:
1435	case BPF_ST | BPF_MEM | BPF_W:
1436	case BPF_ST | BPF_MEM | BPF_DW:
1437
1438	case BPF_STX | BPF_MEM | BPF_B:
1439	case BPF_STX | BPF_MEM | BPF_H:
1440	case BPF_STX | BPF_MEM | BPF_W:
1441	case BPF_STX | BPF_MEM | BPF_DW:
1442		if (BPF_CLASS(code) == BPF_ST) {
1443			emit_imm32(tmp2, imm, ctx);
1444			src = tmp2;
1445		}
1446
1447		if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code),
1448				   BPF_MODE(code)))
1449			return -1;
1450		break;
1451
1452	case BPF_STX | BPF_ATOMIC | BPF_W:
1453	case BPF_STX | BPF_ATOMIC | BPF_DW:
1454		pr_info_once(
1455			"bpf-jit: not supported: atomic operation %02x ***\n",
1456			insn->imm);
1457		return -EFAULT;
1458
1459	default:
1460		pr_err("bpf-jit: unknown opcode %02x\n", code);
1461		return -EINVAL;
1462	}
1463
1464	return 0;
1465}
1466
1467void bpf_jit_build_prologue(struct hppa_jit_context *ctx)
1468{
1469	const s8 *tmp = regmap[TMP_REG_1];
1470	const s8 *dst, *reg;
1471	int stack_adjust = 0;
1472	int i;
1473	unsigned long addr;
1474	int bpf_stack_adjust;
1475
1476	/*
1477	 * stack on hppa grows up, so if tail calls are used we need to
1478	 * allocate the maximum stack size
1479	 */
1480	if (REG_ALL_SEEN(ctx))
1481		bpf_stack_adjust = MAX_BPF_STACK;
1482	else
1483		bpf_stack_adjust = ctx->prog->aux->stack_depth;
1484	bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN);
1485
1486	/* make space for callee-saved registers. */
1487	stack_adjust += NR_SAVED_REGISTERS * REG_SIZE;
1488	/* make space for BPF registers on stack. */
1489	stack_adjust += BPF_JIT_SCRATCH_REGS * REG_SIZE;
1490	/* make space for BPF stack. */
1491	stack_adjust += bpf_stack_adjust;
1492	/* round up for stack alignment. */
1493	stack_adjust = round_up(stack_adjust, STACK_ALIGN);
1494
1495	/*
1496	 * The first instruction sets the tail-call-counter (TCC) register.
1497	 * This instruction is skipped by tail calls.
1498	 * Use a temporary register instead of a caller-saved register initially.
1499	 */
1500	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx);
1501
1502	/*
1503	 * skip all initializations when called as BPF TAIL call.
1504	 */
1505	emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx);
1506	emit(hppa_bne(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, ctx->prologue_len - 2 - HPPA_BRANCH_DISPLACEMENT), ctx);
1507
1508	/* set up hppa stack frame. */
1509	emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx);			// copy sp,r1 (=prev_sp)
1510	emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx);	// ldo stack_adjust(sp),sp (increase stack)
1511	emit(hppa_stw(HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx);	// stw prev_sp,-0x04(sp)
1512	emit(hppa_stw(HPPA_REG_RP, -0x14, HPPA_REG_SP), ctx);		// stw rp,-0x14(sp)
1513
1514	REG_FORCE_SEEN(ctx, HPPA_REG_T0);
1515	REG_FORCE_SEEN(ctx, HPPA_REG_T1);
1516	REG_FORCE_SEEN(ctx, HPPA_REG_T2);
1517	REG_FORCE_SEEN(ctx, HPPA_REG_T3);
1518	REG_FORCE_SEEN(ctx, HPPA_REG_T4);
1519	REG_FORCE_SEEN(ctx, HPPA_REG_T5);
1520
1521	/* save callee-save registers. */
1522	for (i = 3; i <= 18; i++) {
1523		if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i)))
1524			continue;
1525		emit(hppa_stw(HPPA_R(i), -REG_SIZE * (8 + (i-3)), HPPA_REG_SP), ctx);	// stw ri,-save_area(sp)
1526	}
1527
1528	/*
1529	 * now really set the tail call counter (TCC) register.
1530	 */
1531	if (REG_WAS_SEEN(ctx, HPPA_REG_TCC))
1532		emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx);
1533
1534	/*
1535	 * save epilogue function pointer for outer TCC call chain.
1536	 * The main TCC call stores the final RP on stack.
1537	 */
1538	addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset];
1539	/* skip first two instructions of exit function, which jump to exit */
1540	addr += 2 * HPPA_INSN_SIZE;
1541	emit(hppa_ldil(addr, HPPA_REG_T2), ctx);
1542	emit(hppa_ldo(im11(addr), HPPA_REG_T2, HPPA_REG_T2), ctx);
1543	emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx);
1544
1545	/* load R1 & R2 from registers, R3-R5 from stack. */
1546	/* use HPPA_REG_R1 which holds the old stack value */
1547	dst = regmap[BPF_REG_5];
1548	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1549	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1550		if (REG_WAS_SEEN(ctx, hi(reg)))
1551			emit(hppa_ldw(-0x48, HPPA_REG_R1, hi(reg)), ctx);
1552		if (REG_WAS_SEEN(ctx, lo(reg)))
1553			emit(hppa_ldw(-0x44, HPPA_REG_R1, lo(reg)), ctx);
1554		bpf_put_reg64(dst, tmp, ctx);
1555	}
1556
1557	dst = regmap[BPF_REG_4];
1558	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1559	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1560		if (REG_WAS_SEEN(ctx, hi(reg)))
1561			emit(hppa_ldw(-0x40, HPPA_REG_R1, hi(reg)), ctx);
1562		if (REG_WAS_SEEN(ctx, lo(reg)))
1563			emit(hppa_ldw(-0x3c, HPPA_REG_R1, lo(reg)), ctx);
1564		bpf_put_reg64(dst, tmp, ctx);
1565	}
1566
1567	dst = regmap[BPF_REG_3];
1568	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1569	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1570		if (REG_WAS_SEEN(ctx, hi(reg)))
1571			emit(hppa_ldw(-0x38, HPPA_REG_R1, hi(reg)), ctx);
1572		if (REG_WAS_SEEN(ctx, lo(reg)))
1573			emit(hppa_ldw(-0x34, HPPA_REG_R1, lo(reg)), ctx);
1574		bpf_put_reg64(dst, tmp, ctx);
1575	}
1576
1577	dst = regmap[BPF_REG_2];
1578	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1579	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1580		if (REG_WAS_SEEN(ctx, hi(reg)))
1581			emit_hppa_copy(HPPA_REG_ARG3, hi(reg), ctx);
1582		if (REG_WAS_SEEN(ctx, lo(reg)))
1583			emit_hppa_copy(HPPA_REG_ARG2, lo(reg), ctx);
1584		bpf_put_reg64(dst, tmp, ctx);
1585	}
1586
1587	dst = regmap[BPF_REG_1];
1588	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1589	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1590		if (REG_WAS_SEEN(ctx, hi(reg)))
1591			emit_hppa_copy(HPPA_REG_ARG1, hi(reg), ctx);
1592		if (REG_WAS_SEEN(ctx, lo(reg)))
1593			emit_hppa_copy(HPPA_REG_ARG0, lo(reg), ctx);
1594		bpf_put_reg64(dst, tmp, ctx);
1595	}
1596
1597	/* Set up BPF frame pointer. */
1598	dst = regmap[BPF_REG_FP];
1599	reg = bpf_get_reg64_ref(dst, tmp, false, ctx);
1600	if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) {
1601		if (REG_WAS_SEEN(ctx, lo(reg)))
1602			emit(hppa_ldo(-REG_SIZE * (NR_SAVED_REGISTERS + BPF_JIT_SCRATCH_REGS),
1603				HPPA_REG_SP, lo(reg)), ctx);
1604		if (REG_WAS_SEEN(ctx, hi(reg)))
1605			emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx);
1606		bpf_put_reg64(dst, tmp, ctx);
1607	}
1608
1609	emit(hppa_nop(), ctx);
1610}
1611
1612void bpf_jit_build_epilogue(struct hppa_jit_context *ctx)
1613{
1614	__build_epilogue(false, ctx);
1615}
1616