1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * BPF JIT compiler for LoongArch
4 *
5 * Copyright (C) 2022 Loongson Technology Corporation Limited
6 */
7#include "bpf_jit.h"
8
9#define REG_TCC		LOONGARCH_GPR_A6
10#define TCC_SAVED	LOONGARCH_GPR_S5
11
12#define SAVE_RA		BIT(0)
13#define SAVE_TCC	BIT(1)
14
15static const int regmap[] = {
16	/* return value from in-kernel function, and exit value for eBPF program */
17	[BPF_REG_0] = LOONGARCH_GPR_A5,
18	/* arguments from eBPF program to in-kernel function */
19	[BPF_REG_1] = LOONGARCH_GPR_A0,
20	[BPF_REG_2] = LOONGARCH_GPR_A1,
21	[BPF_REG_3] = LOONGARCH_GPR_A2,
22	[BPF_REG_4] = LOONGARCH_GPR_A3,
23	[BPF_REG_5] = LOONGARCH_GPR_A4,
24	/* callee saved registers that in-kernel function will preserve */
25	[BPF_REG_6] = LOONGARCH_GPR_S0,
26	[BPF_REG_7] = LOONGARCH_GPR_S1,
27	[BPF_REG_8] = LOONGARCH_GPR_S2,
28	[BPF_REG_9] = LOONGARCH_GPR_S3,
29	/* read-only frame pointer to access stack */
30	[BPF_REG_FP] = LOONGARCH_GPR_S4,
31	/* temporary register for blinding constants */
32	[BPF_REG_AX] = LOONGARCH_GPR_T0,
33};
34
35static void mark_call(struct jit_ctx *ctx)
36{
37	ctx->flags |= SAVE_RA;
38}
39
40static void mark_tail_call(struct jit_ctx *ctx)
41{
42	ctx->flags |= SAVE_TCC;
43}
44
45static bool seen_call(struct jit_ctx *ctx)
46{
47	return (ctx->flags & SAVE_RA);
48}
49
50static bool seen_tail_call(struct jit_ctx *ctx)
51{
52	return (ctx->flags & SAVE_TCC);
53}
54
55static u8 tail_call_reg(struct jit_ctx *ctx)
56{
57	if (seen_call(ctx))
58		return TCC_SAVED;
59
60	return REG_TCC;
61}
62
63/*
64 * eBPF prog stack layout:
65 *
66 *                                        high
67 * original $sp ------------> +-------------------------+ <--LOONGARCH_GPR_FP
68 *                            |           $ra           |
69 *                            +-------------------------+
70 *                            |           $fp           |
71 *                            +-------------------------+
72 *                            |           $s0           |
73 *                            +-------------------------+
74 *                            |           $s1           |
75 *                            +-------------------------+
76 *                            |           $s2           |
77 *                            +-------------------------+
78 *                            |           $s3           |
79 *                            +-------------------------+
80 *                            |           $s4           |
81 *                            +-------------------------+
82 *                            |           $s5           |
83 *                            +-------------------------+ <--BPF_REG_FP
84 *                            |  prog->aux->stack_depth |
85 *                            |        (optional)       |
86 * current $sp -------------> +-------------------------+
87 *                                        low
88 */
89static void build_prologue(struct jit_ctx *ctx)
90{
91	int stack_adjust = 0, store_offset, bpf_stack_adjust;
92
93	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
94
95	/* To store ra, fp, s0, s1, s2, s3, s4 and s5. */
96	stack_adjust += sizeof(long) * 8;
97
98	stack_adjust = round_up(stack_adjust, 16);
99	stack_adjust += bpf_stack_adjust;
100
101	/*
102	 * First instruction initializes the tail call count (TCC).
103	 * On tail call we skip this instruction, and the TCC is
104	 * passed in REG_TCC from the caller.
105	 */
106	emit_insn(ctx, addid, REG_TCC, LOONGARCH_GPR_ZERO, MAX_TAIL_CALL_CNT);
107
108	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, -stack_adjust);
109
110	store_offset = stack_adjust - sizeof(long);
111	emit_insn(ctx, std, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, store_offset);
112
113	store_offset -= sizeof(long);
114	emit_insn(ctx, std, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, store_offset);
115
116	store_offset -= sizeof(long);
117	emit_insn(ctx, std, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, store_offset);
118
119	store_offset -= sizeof(long);
120	emit_insn(ctx, std, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, store_offset);
121
122	store_offset -= sizeof(long);
123	emit_insn(ctx, std, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, store_offset);
124
125	store_offset -= sizeof(long);
126	emit_insn(ctx, std, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, store_offset);
127
128	store_offset -= sizeof(long);
129	emit_insn(ctx, std, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, store_offset);
130
131	store_offset -= sizeof(long);
132	emit_insn(ctx, std, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, store_offset);
133
134	emit_insn(ctx, addid, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, stack_adjust);
135
136	if (bpf_stack_adjust)
137		emit_insn(ctx, addid, regmap[BPF_REG_FP], LOONGARCH_GPR_SP, bpf_stack_adjust);
138
139	/*
140	 * Program contains calls and tail calls, so REG_TCC need
141	 * to be saved across calls.
142	 */
143	if (seen_tail_call(ctx) && seen_call(ctx))
144		move_reg(ctx, TCC_SAVED, REG_TCC);
145
146	ctx->stack_size = stack_adjust;
147}
148
149static void __build_epilogue(struct jit_ctx *ctx, bool is_tail_call)
150{
151	int stack_adjust = ctx->stack_size;
152	int load_offset;
153
154	load_offset = stack_adjust - sizeof(long);
155	emit_insn(ctx, ldd, LOONGARCH_GPR_RA, LOONGARCH_GPR_SP, load_offset);
156
157	load_offset -= sizeof(long);
158	emit_insn(ctx, ldd, LOONGARCH_GPR_FP, LOONGARCH_GPR_SP, load_offset);
159
160	load_offset -= sizeof(long);
161	emit_insn(ctx, ldd, LOONGARCH_GPR_S0, LOONGARCH_GPR_SP, load_offset);
162
163	load_offset -= sizeof(long);
164	emit_insn(ctx, ldd, LOONGARCH_GPR_S1, LOONGARCH_GPR_SP, load_offset);
165
166	load_offset -= sizeof(long);
167	emit_insn(ctx, ldd, LOONGARCH_GPR_S2, LOONGARCH_GPR_SP, load_offset);
168
169	load_offset -= sizeof(long);
170	emit_insn(ctx, ldd, LOONGARCH_GPR_S3, LOONGARCH_GPR_SP, load_offset);
171
172	load_offset -= sizeof(long);
173	emit_insn(ctx, ldd, LOONGARCH_GPR_S4, LOONGARCH_GPR_SP, load_offset);
174
175	load_offset -= sizeof(long);
176	emit_insn(ctx, ldd, LOONGARCH_GPR_S5, LOONGARCH_GPR_SP, load_offset);
177
178	emit_insn(ctx, addid, LOONGARCH_GPR_SP, LOONGARCH_GPR_SP, stack_adjust);
179
180	if (!is_tail_call) {
181		/* Set return value */
182		move_reg(ctx, LOONGARCH_GPR_A0, regmap[BPF_REG_0]);
183		/* Return to the caller */
184		emit_insn(ctx, jirl, LOONGARCH_GPR_RA, LOONGARCH_GPR_ZERO, 0);
185	} else {
186		/*
187		 * Call the next bpf prog and skip the first instruction
188		 * of TCC initialization.
189		 */
190		emit_insn(ctx, jirl, LOONGARCH_GPR_T3, LOONGARCH_GPR_ZERO, 1);
191	}
192}
193
194static void build_epilogue(struct jit_ctx *ctx)
195{
196	__build_epilogue(ctx, false);
197}
198
199bool bpf_jit_supports_kfunc_call(void)
200{
201	return true;
202}
203
204bool bpf_jit_supports_far_kfunc_call(void)
205{
206	return true;
207}
208
209/* initialized on the first pass of build_body() */
210static int out_offset = -1;
211static int emit_bpf_tail_call(struct jit_ctx *ctx)
212{
213	int off;
214	u8 tcc = tail_call_reg(ctx);
215	u8 a1 = LOONGARCH_GPR_A1;
216	u8 a2 = LOONGARCH_GPR_A2;
217	u8 t1 = LOONGARCH_GPR_T1;
218	u8 t2 = LOONGARCH_GPR_T2;
219	u8 t3 = LOONGARCH_GPR_T3;
220	const int idx0 = ctx->idx;
221
222#define cur_offset (ctx->idx - idx0)
223#define jmp_offset (out_offset - (cur_offset))
224
225	/*
226	 * a0: &ctx
227	 * a1: &array
228	 * a2: index
229	 *
230	 * if (index >= array->map.max_entries)
231	 *	 goto out;
232	 */
233	off = offsetof(struct bpf_array, map.max_entries);
234	emit_insn(ctx, ldwu, t1, a1, off);
235	/* bgeu $a2, $t1, jmp_offset */
236	if (emit_tailcall_jmp(ctx, BPF_JGE, a2, t1, jmp_offset) < 0)
237		goto toofar;
238
239	/*
240	 * if (--TCC < 0)
241	 *	 goto out;
242	 */
243	emit_insn(ctx, addid, REG_TCC, tcc, -1);
244	if (emit_tailcall_jmp(ctx, BPF_JSLT, REG_TCC, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
245		goto toofar;
246
247	/*
248	 * prog = array->ptrs[index];
249	 * if (!prog)
250	 *	 goto out;
251	 */
252	emit_insn(ctx, alsld, t2, a2, a1, 2);
253	off = offsetof(struct bpf_array, ptrs);
254	emit_insn(ctx, ldd, t2, t2, off);
255	/* beq $t2, $zero, jmp_offset */
256	if (emit_tailcall_jmp(ctx, BPF_JEQ, t2, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
257		goto toofar;
258
259	/* goto *(prog->bpf_func + 4); */
260	off = offsetof(struct bpf_prog, bpf_func);
261	emit_insn(ctx, ldd, t3, t2, off);
262	__build_epilogue(ctx, true);
263
264	/* out: */
265	if (out_offset == -1)
266		out_offset = cur_offset;
267	if (cur_offset != out_offset) {
268		pr_err_once("tail_call out_offset = %d, expected %d!\n",
269			    cur_offset, out_offset);
270		return -1;
271	}
272
273	return 0;
274
275toofar:
276	pr_info_once("tail_call: jump too far\n");
277	return -1;
278#undef cur_offset
279#undef jmp_offset
280}
281
282static void emit_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
283{
284	const u8 t1 = LOONGARCH_GPR_T1;
285	const u8 t2 = LOONGARCH_GPR_T2;
286	const u8 t3 = LOONGARCH_GPR_T3;
287	const u8 r0 = regmap[BPF_REG_0];
288	const u8 src = regmap[insn->src_reg];
289	const u8 dst = regmap[insn->dst_reg];
290	const s16 off = insn->off;
291	const s32 imm = insn->imm;
292	const bool isdw = BPF_SIZE(insn->code) == BPF_DW;
293
294	move_imm(ctx, t1, off, false);
295	emit_insn(ctx, addd, t1, dst, t1);
296	move_reg(ctx, t3, src);
297
298	switch (imm) {
299	/* lock *(size *)(dst + off) <op>= src */
300	case BPF_ADD:
301		if (isdw)
302			emit_insn(ctx, amaddd, t2, t1, src);
303		else
304			emit_insn(ctx, amaddw, t2, t1, src);
305		break;
306	case BPF_AND:
307		if (isdw)
308			emit_insn(ctx, amandd, t2, t1, src);
309		else
310			emit_insn(ctx, amandw, t2, t1, src);
311		break;
312	case BPF_OR:
313		if (isdw)
314			emit_insn(ctx, amord, t2, t1, src);
315		else
316			emit_insn(ctx, amorw, t2, t1, src);
317		break;
318	case BPF_XOR:
319		if (isdw)
320			emit_insn(ctx, amxord, t2, t1, src);
321		else
322			emit_insn(ctx, amxorw, t2, t1, src);
323		break;
324	/* src = atomic_fetch_<op>(dst + off, src) */
325	case BPF_ADD | BPF_FETCH:
326		if (isdw) {
327			emit_insn(ctx, amaddd, src, t1, t3);
328		} else {
329			emit_insn(ctx, amaddw, src, t1, t3);
330			emit_zext_32(ctx, src, true);
331		}
332		break;
333	case BPF_AND | BPF_FETCH:
334		if (isdw) {
335			emit_insn(ctx, amandd, src, t1, t3);
336		} else {
337			emit_insn(ctx, amandw, src, t1, t3);
338			emit_zext_32(ctx, src, true);
339		}
340		break;
341	case BPF_OR | BPF_FETCH:
342		if (isdw) {
343			emit_insn(ctx, amord, src, t1, t3);
344		} else {
345			emit_insn(ctx, amorw, src, t1, t3);
346			emit_zext_32(ctx, src, true);
347		}
348		break;
349	case BPF_XOR | BPF_FETCH:
350		if (isdw) {
351			emit_insn(ctx, amxord, src, t1, t3);
352		} else {
353			emit_insn(ctx, amxorw, src, t1, t3);
354			emit_zext_32(ctx, src, true);
355		}
356		break;
357	/* src = atomic_xchg(dst + off, src); */
358	case BPF_XCHG:
359		if (isdw) {
360			emit_insn(ctx, amswapd, src, t1, t3);
361		} else {
362			emit_insn(ctx, amswapw, src, t1, t3);
363			emit_zext_32(ctx, src, true);
364		}
365		break;
366	/* r0 = atomic_cmpxchg(dst + off, r0, src); */
367	case BPF_CMPXCHG:
368		move_reg(ctx, t2, r0);
369		if (isdw) {
370			emit_insn(ctx, lld, r0, t1, 0);
371			emit_insn(ctx, bne, t2, r0, 4);
372			move_reg(ctx, t3, src);
373			emit_insn(ctx, scd, t3, t1, 0);
374			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -4);
375		} else {
376			emit_insn(ctx, llw, r0, t1, 0);
377			emit_zext_32(ctx, t2, true);
378			emit_zext_32(ctx, r0, true);
379			emit_insn(ctx, bne, t2, r0, 4);
380			move_reg(ctx, t3, src);
381			emit_insn(ctx, scw, t3, t1, 0);
382			emit_insn(ctx, beq, t3, LOONGARCH_GPR_ZERO, -6);
383			emit_zext_32(ctx, r0, true);
384		}
385		break;
386	}
387}
388
389static bool is_signed_bpf_cond(u8 cond)
390{
391	return cond == BPF_JSGT || cond == BPF_JSLT ||
392	       cond == BPF_JSGE || cond == BPF_JSLE;
393}
394
395#define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
396#define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
397
398bool ex_handler_bpf(const struct exception_table_entry *ex,
399		    struct pt_regs *regs)
400{
401	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
402	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
403
404	regs->regs[dst_reg] = 0;
405	regs->csr_era = (unsigned long)&ex->fixup - offset;
406
407	return true;
408}
409
410/* For accesses to BTF pointers, add an entry to the exception table */
411static int add_exception_handler(const struct bpf_insn *insn,
412				 struct jit_ctx *ctx,
413				 int dst_reg)
414{
415	unsigned long pc;
416	off_t offset;
417	struct exception_table_entry *ex;
418
419	if (!ctx->image || !ctx->prog->aux->extable)
420		return 0;
421
422	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
423	    BPF_MODE(insn->code) != BPF_PROBE_MEMSX)
424		return 0;
425
426	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
427		return -EINVAL;
428
429	ex = &ctx->prog->aux->extable[ctx->num_exentries];
430	pc = (unsigned long)&ctx->image[ctx->idx - 1];
431
432	offset = pc - (long)&ex->insn;
433	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
434		return -ERANGE;
435
436	ex->insn = offset;
437
438	/*
439	 * Since the extable follows the program, the fixup offset is always
440	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
441	 * to keep things simple, and put the destination register in the upper
442	 * bits. We don't need to worry about buildtime or runtime sort
443	 * modifying the upper bits because the table is already sorted, and
444	 * isn't part of the main exception table.
445	 */
446	offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
447	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
448		return -ERANGE;
449
450	ex->type = EX_TYPE_BPF;
451	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
452
453	ctx->num_exentries++;
454
455	return 0;
456}
457
458static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
459{
460	u8 tm = -1;
461	u64 func_addr;
462	bool func_addr_fixed, sign_extend;
463	int i = insn - ctx->prog->insnsi;
464	int ret, jmp_offset;
465	const u8 code = insn->code;
466	const u8 cond = BPF_OP(code);
467	const u8 t1 = LOONGARCH_GPR_T1;
468	const u8 t2 = LOONGARCH_GPR_T2;
469	const u8 src = regmap[insn->src_reg];
470	const u8 dst = regmap[insn->dst_reg];
471	const s16 off = insn->off;
472	const s32 imm = insn->imm;
473	const bool is32 = BPF_CLASS(insn->code) == BPF_ALU || BPF_CLASS(insn->code) == BPF_JMP32;
474
475	switch (code) {
476	/* dst = src */
477	case BPF_ALU | BPF_MOV | BPF_X:
478	case BPF_ALU64 | BPF_MOV | BPF_X:
479		switch (off) {
480		case 0:
481			move_reg(ctx, dst, src);
482			emit_zext_32(ctx, dst, is32);
483			break;
484		case 8:
485			move_reg(ctx, t1, src);
486			emit_insn(ctx, extwb, dst, t1);
487			emit_zext_32(ctx, dst, is32);
488			break;
489		case 16:
490			move_reg(ctx, t1, src);
491			emit_insn(ctx, extwh, dst, t1);
492			emit_zext_32(ctx, dst, is32);
493			break;
494		case 32:
495			emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO);
496			break;
497		}
498		break;
499
500	/* dst = imm */
501	case BPF_ALU | BPF_MOV | BPF_K:
502	case BPF_ALU64 | BPF_MOV | BPF_K:
503		move_imm(ctx, dst, imm, is32);
504		break;
505
506	/* dst = dst + src */
507	case BPF_ALU | BPF_ADD | BPF_X:
508	case BPF_ALU64 | BPF_ADD | BPF_X:
509		emit_insn(ctx, addd, dst, dst, src);
510		emit_zext_32(ctx, dst, is32);
511		break;
512
513	/* dst = dst + imm */
514	case BPF_ALU | BPF_ADD | BPF_K:
515	case BPF_ALU64 | BPF_ADD | BPF_K:
516		if (is_signed_imm12(imm)) {
517			emit_insn(ctx, addid, dst, dst, imm);
518		} else {
519			move_imm(ctx, t1, imm, is32);
520			emit_insn(ctx, addd, dst, dst, t1);
521		}
522		emit_zext_32(ctx, dst, is32);
523		break;
524
525	/* dst = dst - src */
526	case BPF_ALU | BPF_SUB | BPF_X:
527	case BPF_ALU64 | BPF_SUB | BPF_X:
528		emit_insn(ctx, subd, dst, dst, src);
529		emit_zext_32(ctx, dst, is32);
530		break;
531
532	/* dst = dst - imm */
533	case BPF_ALU | BPF_SUB | BPF_K:
534	case BPF_ALU64 | BPF_SUB | BPF_K:
535		if (is_signed_imm12(-imm)) {
536			emit_insn(ctx, addid, dst, dst, -imm);
537		} else {
538			move_imm(ctx, t1, imm, is32);
539			emit_insn(ctx, subd, dst, dst, t1);
540		}
541		emit_zext_32(ctx, dst, is32);
542		break;
543
544	/* dst = dst * src */
545	case BPF_ALU | BPF_MUL | BPF_X:
546	case BPF_ALU64 | BPF_MUL | BPF_X:
547		emit_insn(ctx, muld, dst, dst, src);
548		emit_zext_32(ctx, dst, is32);
549		break;
550
551	/* dst = dst * imm */
552	case BPF_ALU | BPF_MUL | BPF_K:
553	case BPF_ALU64 | BPF_MUL | BPF_K:
554		move_imm(ctx, t1, imm, is32);
555		emit_insn(ctx, muld, dst, dst, t1);
556		emit_zext_32(ctx, dst, is32);
557		break;
558
559	/* dst = dst / src */
560	case BPF_ALU | BPF_DIV | BPF_X:
561	case BPF_ALU64 | BPF_DIV | BPF_X:
562		if (!off) {
563			emit_zext_32(ctx, dst, is32);
564			move_reg(ctx, t1, src);
565			emit_zext_32(ctx, t1, is32);
566			emit_insn(ctx, divdu, dst, dst, t1);
567			emit_zext_32(ctx, dst, is32);
568		} else {
569			emit_sext_32(ctx, dst, is32);
570			move_reg(ctx, t1, src);
571			emit_sext_32(ctx, t1, is32);
572			emit_insn(ctx, divd, dst, dst, t1);
573			emit_sext_32(ctx, dst, is32);
574		}
575		break;
576
577	/* dst = dst / imm */
578	case BPF_ALU | BPF_DIV | BPF_K:
579	case BPF_ALU64 | BPF_DIV | BPF_K:
580		if (!off) {
581			move_imm(ctx, t1, imm, is32);
582			emit_zext_32(ctx, dst, is32);
583			emit_insn(ctx, divdu, dst, dst, t1);
584			emit_zext_32(ctx, dst, is32);
585		} else {
586			move_imm(ctx, t1, imm, false);
587			emit_sext_32(ctx, t1, is32);
588			emit_sext_32(ctx, dst, is32);
589			emit_insn(ctx, divd, dst, dst, t1);
590			emit_sext_32(ctx, dst, is32);
591		}
592		break;
593
594	/* dst = dst % src */
595	case BPF_ALU | BPF_MOD | BPF_X:
596	case BPF_ALU64 | BPF_MOD | BPF_X:
597		if (!off) {
598			emit_zext_32(ctx, dst, is32);
599			move_reg(ctx, t1, src);
600			emit_zext_32(ctx, t1, is32);
601			emit_insn(ctx, moddu, dst, dst, t1);
602			emit_zext_32(ctx, dst, is32);
603		} else {
604			emit_sext_32(ctx, dst, is32);
605			move_reg(ctx, t1, src);
606			emit_sext_32(ctx, t1, is32);
607			emit_insn(ctx, modd, dst, dst, t1);
608			emit_sext_32(ctx, dst, is32);
609		}
610		break;
611
612	/* dst = dst % imm */
613	case BPF_ALU | BPF_MOD | BPF_K:
614	case BPF_ALU64 | BPF_MOD | BPF_K:
615		if (!off) {
616			move_imm(ctx, t1, imm, is32);
617			emit_zext_32(ctx, dst, is32);
618			emit_insn(ctx, moddu, dst, dst, t1);
619			emit_zext_32(ctx, dst, is32);
620		} else {
621			move_imm(ctx, t1, imm, false);
622			emit_sext_32(ctx, t1, is32);
623			emit_sext_32(ctx, dst, is32);
624			emit_insn(ctx, modd, dst, dst, t1);
625			emit_sext_32(ctx, dst, is32);
626		}
627		break;
628
629	/* dst = -dst */
630	case BPF_ALU | BPF_NEG:
631	case BPF_ALU64 | BPF_NEG:
632		move_imm(ctx, t1, imm, is32);
633		emit_insn(ctx, subd, dst, LOONGARCH_GPR_ZERO, dst);
634		emit_zext_32(ctx, dst, is32);
635		break;
636
637	/* dst = dst & src */
638	case BPF_ALU | BPF_AND | BPF_X:
639	case BPF_ALU64 | BPF_AND | BPF_X:
640		emit_insn(ctx, and, dst, dst, src);
641		emit_zext_32(ctx, dst, is32);
642		break;
643
644	/* dst = dst & imm */
645	case BPF_ALU | BPF_AND | BPF_K:
646	case BPF_ALU64 | BPF_AND | BPF_K:
647		if (is_unsigned_imm12(imm)) {
648			emit_insn(ctx, andi, dst, dst, imm);
649		} else {
650			move_imm(ctx, t1, imm, is32);
651			emit_insn(ctx, and, dst, dst, t1);
652		}
653		emit_zext_32(ctx, dst, is32);
654		break;
655
656	/* dst = dst | src */
657	case BPF_ALU | BPF_OR | BPF_X:
658	case BPF_ALU64 | BPF_OR | BPF_X:
659		emit_insn(ctx, or, dst, dst, src);
660		emit_zext_32(ctx, dst, is32);
661		break;
662
663	/* dst = dst | imm */
664	case BPF_ALU | BPF_OR | BPF_K:
665	case BPF_ALU64 | BPF_OR | BPF_K:
666		if (is_unsigned_imm12(imm)) {
667			emit_insn(ctx, ori, dst, dst, imm);
668		} else {
669			move_imm(ctx, t1, imm, is32);
670			emit_insn(ctx, or, dst, dst, t1);
671		}
672		emit_zext_32(ctx, dst, is32);
673		break;
674
675	/* dst = dst ^ src */
676	case BPF_ALU | BPF_XOR | BPF_X:
677	case BPF_ALU64 | BPF_XOR | BPF_X:
678		emit_insn(ctx, xor, dst, dst, src);
679		emit_zext_32(ctx, dst, is32);
680		break;
681
682	/* dst = dst ^ imm */
683	case BPF_ALU | BPF_XOR | BPF_K:
684	case BPF_ALU64 | BPF_XOR | BPF_K:
685		if (is_unsigned_imm12(imm)) {
686			emit_insn(ctx, xori, dst, dst, imm);
687		} else {
688			move_imm(ctx, t1, imm, is32);
689			emit_insn(ctx, xor, dst, dst, t1);
690		}
691		emit_zext_32(ctx, dst, is32);
692		break;
693
694	/* dst = dst << src (logical) */
695	case BPF_ALU | BPF_LSH | BPF_X:
696		emit_insn(ctx, sllw, dst, dst, src);
697		emit_zext_32(ctx, dst, is32);
698		break;
699
700	case BPF_ALU64 | BPF_LSH | BPF_X:
701		emit_insn(ctx, slld, dst, dst, src);
702		break;
703
704	/* dst = dst << imm (logical) */
705	case BPF_ALU | BPF_LSH | BPF_K:
706		emit_insn(ctx, slliw, dst, dst, imm);
707		emit_zext_32(ctx, dst, is32);
708		break;
709
710	case BPF_ALU64 | BPF_LSH | BPF_K:
711		emit_insn(ctx, sllid, dst, dst, imm);
712		break;
713
714	/* dst = dst >> src (logical) */
715	case BPF_ALU | BPF_RSH | BPF_X:
716		emit_insn(ctx, srlw, dst, dst, src);
717		emit_zext_32(ctx, dst, is32);
718		break;
719
720	case BPF_ALU64 | BPF_RSH | BPF_X:
721		emit_insn(ctx, srld, dst, dst, src);
722		break;
723
724	/* dst = dst >> imm (logical) */
725	case BPF_ALU | BPF_RSH | BPF_K:
726		emit_insn(ctx, srliw, dst, dst, imm);
727		emit_zext_32(ctx, dst, is32);
728		break;
729
730	case BPF_ALU64 | BPF_RSH | BPF_K:
731		emit_insn(ctx, srlid, dst, dst, imm);
732		break;
733
734	/* dst = dst >> src (arithmetic) */
735	case BPF_ALU | BPF_ARSH | BPF_X:
736		emit_insn(ctx, sraw, dst, dst, src);
737		emit_zext_32(ctx, dst, is32);
738		break;
739
740	case BPF_ALU64 | BPF_ARSH | BPF_X:
741		emit_insn(ctx, srad, dst, dst, src);
742		break;
743
744	/* dst = dst >> imm (arithmetic) */
745	case BPF_ALU | BPF_ARSH | BPF_K:
746		emit_insn(ctx, sraiw, dst, dst, imm);
747		emit_zext_32(ctx, dst, is32);
748		break;
749
750	case BPF_ALU64 | BPF_ARSH | BPF_K:
751		emit_insn(ctx, sraid, dst, dst, imm);
752		break;
753
754	/* dst = BSWAP##imm(dst) */
755	case BPF_ALU | BPF_END | BPF_FROM_LE:
756		switch (imm) {
757		case 16:
758			/* zero-extend 16 bits into 64 bits */
759			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
760			break;
761		case 32:
762			/* zero-extend 32 bits into 64 bits */
763			emit_zext_32(ctx, dst, is32);
764			break;
765		case 64:
766			/* do nothing */
767			break;
768		}
769		break;
770
771	case BPF_ALU | BPF_END | BPF_FROM_BE:
772	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
773		switch (imm) {
774		case 16:
775			emit_insn(ctx, revb2h, dst, dst);
776			/* zero-extend 16 bits into 64 bits */
777			emit_insn(ctx, bstrpickd, dst, dst, 15, 0);
778			break;
779		case 32:
780			emit_insn(ctx, revb2w, dst, dst);
781			/* clear the upper 32 bits */
782			emit_zext_32(ctx, dst, true);
783			break;
784		case 64:
785			emit_insn(ctx, revbd, dst, dst);
786			break;
787		}
788		break;
789
790	/* PC += off if dst cond src */
791	case BPF_JMP | BPF_JEQ | BPF_X:
792	case BPF_JMP | BPF_JNE | BPF_X:
793	case BPF_JMP | BPF_JGT | BPF_X:
794	case BPF_JMP | BPF_JGE | BPF_X:
795	case BPF_JMP | BPF_JLT | BPF_X:
796	case BPF_JMP | BPF_JLE | BPF_X:
797	case BPF_JMP | BPF_JSGT | BPF_X:
798	case BPF_JMP | BPF_JSGE | BPF_X:
799	case BPF_JMP | BPF_JSLT | BPF_X:
800	case BPF_JMP | BPF_JSLE | BPF_X:
801	case BPF_JMP32 | BPF_JEQ | BPF_X:
802	case BPF_JMP32 | BPF_JNE | BPF_X:
803	case BPF_JMP32 | BPF_JGT | BPF_X:
804	case BPF_JMP32 | BPF_JGE | BPF_X:
805	case BPF_JMP32 | BPF_JLT | BPF_X:
806	case BPF_JMP32 | BPF_JLE | BPF_X:
807	case BPF_JMP32 | BPF_JSGT | BPF_X:
808	case BPF_JMP32 | BPF_JSGE | BPF_X:
809	case BPF_JMP32 | BPF_JSLT | BPF_X:
810	case BPF_JMP32 | BPF_JSLE | BPF_X:
811		jmp_offset = bpf2la_offset(i, off, ctx);
812		move_reg(ctx, t1, dst);
813		move_reg(ctx, t2, src);
814		if (is_signed_bpf_cond(BPF_OP(code))) {
815			emit_sext_32(ctx, t1, is32);
816			emit_sext_32(ctx, t2, is32);
817		} else {
818			emit_zext_32(ctx, t1, is32);
819			emit_zext_32(ctx, t2, is32);
820		}
821		if (emit_cond_jmp(ctx, cond, t1, t2, jmp_offset) < 0)
822			goto toofar;
823		break;
824
825	/* PC += off if dst cond imm */
826	case BPF_JMP | BPF_JEQ | BPF_K:
827	case BPF_JMP | BPF_JNE | BPF_K:
828	case BPF_JMP | BPF_JGT | BPF_K:
829	case BPF_JMP | BPF_JGE | BPF_K:
830	case BPF_JMP | BPF_JLT | BPF_K:
831	case BPF_JMP | BPF_JLE | BPF_K:
832	case BPF_JMP | BPF_JSGT | BPF_K:
833	case BPF_JMP | BPF_JSGE | BPF_K:
834	case BPF_JMP | BPF_JSLT | BPF_K:
835	case BPF_JMP | BPF_JSLE | BPF_K:
836	case BPF_JMP32 | BPF_JEQ | BPF_K:
837	case BPF_JMP32 | BPF_JNE | BPF_K:
838	case BPF_JMP32 | BPF_JGT | BPF_K:
839	case BPF_JMP32 | BPF_JGE | BPF_K:
840	case BPF_JMP32 | BPF_JLT | BPF_K:
841	case BPF_JMP32 | BPF_JLE | BPF_K:
842	case BPF_JMP32 | BPF_JSGT | BPF_K:
843	case BPF_JMP32 | BPF_JSGE | BPF_K:
844	case BPF_JMP32 | BPF_JSLT | BPF_K:
845	case BPF_JMP32 | BPF_JSLE | BPF_K:
846		jmp_offset = bpf2la_offset(i, off, ctx);
847		if (imm) {
848			move_imm(ctx, t1, imm, false);
849			tm = t1;
850		} else {
851			/* If imm is 0, simply use zero register. */
852			tm = LOONGARCH_GPR_ZERO;
853		}
854		move_reg(ctx, t2, dst);
855		if (is_signed_bpf_cond(BPF_OP(code))) {
856			emit_sext_32(ctx, tm, is32);
857			emit_sext_32(ctx, t2, is32);
858		} else {
859			emit_zext_32(ctx, tm, is32);
860			emit_zext_32(ctx, t2, is32);
861		}
862		if (emit_cond_jmp(ctx, cond, t2, tm, jmp_offset) < 0)
863			goto toofar;
864		break;
865
866	/* PC += off if dst & src */
867	case BPF_JMP | BPF_JSET | BPF_X:
868	case BPF_JMP32 | BPF_JSET | BPF_X:
869		jmp_offset = bpf2la_offset(i, off, ctx);
870		emit_insn(ctx, and, t1, dst, src);
871		emit_zext_32(ctx, t1, is32);
872		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
873			goto toofar;
874		break;
875
876	/* PC += off if dst & imm */
877	case BPF_JMP | BPF_JSET | BPF_K:
878	case BPF_JMP32 | BPF_JSET | BPF_K:
879		jmp_offset = bpf2la_offset(i, off, ctx);
880		move_imm(ctx, t1, imm, is32);
881		emit_insn(ctx, and, t1, dst, t1);
882		emit_zext_32(ctx, t1, is32);
883		if (emit_cond_jmp(ctx, cond, t1, LOONGARCH_GPR_ZERO, jmp_offset) < 0)
884			goto toofar;
885		break;
886
887	/* PC += off */
888	case BPF_JMP | BPF_JA:
889	case BPF_JMP32 | BPF_JA:
890		if (BPF_CLASS(code) == BPF_JMP)
891			jmp_offset = bpf2la_offset(i, off, ctx);
892		else
893			jmp_offset = bpf2la_offset(i, imm, ctx);
894		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
895			goto toofar;
896		break;
897
898	/* function call */
899	case BPF_JMP | BPF_CALL:
900		mark_call(ctx);
901		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
902					    &func_addr, &func_addr_fixed);
903		if (ret < 0)
904			return ret;
905
906		move_addr(ctx, t1, func_addr);
907		emit_insn(ctx, jirl, t1, LOONGARCH_GPR_RA, 0);
908		move_reg(ctx, regmap[BPF_REG_0], LOONGARCH_GPR_A0);
909		break;
910
911	/* tail call */
912	case BPF_JMP | BPF_TAIL_CALL:
913		mark_tail_call(ctx);
914		if (emit_bpf_tail_call(ctx) < 0)
915			return -EINVAL;
916		break;
917
918	/* function return */
919	case BPF_JMP | BPF_EXIT:
920		if (i == ctx->prog->len - 1)
921			break;
922
923		jmp_offset = epilogue_offset(ctx);
924		if (emit_uncond_jmp(ctx, jmp_offset) < 0)
925			goto toofar;
926		break;
927
928	/* dst = imm64 */
929	case BPF_LD | BPF_IMM | BPF_DW:
930	{
931		const u64 imm64 = (u64)(insn + 1)->imm << 32 | (u32)insn->imm;
932
933		move_imm(ctx, dst, imm64, is32);
934		return 1;
935	}
936
937	/* dst = *(size *)(src + off) */
938	case BPF_LDX | BPF_MEM | BPF_B:
939	case BPF_LDX | BPF_MEM | BPF_H:
940	case BPF_LDX | BPF_MEM | BPF_W:
941	case BPF_LDX | BPF_MEM | BPF_DW:
942	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
943	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
944	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
945	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
946	/* dst_reg = (s64)*(signed size *)(src_reg + off) */
947	case BPF_LDX | BPF_MEMSX | BPF_B:
948	case BPF_LDX | BPF_MEMSX | BPF_H:
949	case BPF_LDX | BPF_MEMSX | BPF_W:
950	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
951	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
952	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
953		sign_extend = BPF_MODE(insn->code) == BPF_MEMSX ||
954			      BPF_MODE(insn->code) == BPF_PROBE_MEMSX;
955		switch (BPF_SIZE(code)) {
956		case BPF_B:
957			if (is_signed_imm12(off)) {
958				if (sign_extend)
959					emit_insn(ctx, ldb, dst, src, off);
960				else
961					emit_insn(ctx, ldbu, dst, src, off);
962			} else {
963				move_imm(ctx, t1, off, is32);
964				if (sign_extend)
965					emit_insn(ctx, ldxb, dst, src, t1);
966				else
967					emit_insn(ctx, ldxbu, dst, src, t1);
968			}
969			break;
970		case BPF_H:
971			if (is_signed_imm12(off)) {
972				if (sign_extend)
973					emit_insn(ctx, ldh, dst, src, off);
974				else
975					emit_insn(ctx, ldhu, dst, src, off);
976			} else {
977				move_imm(ctx, t1, off, is32);
978				if (sign_extend)
979					emit_insn(ctx, ldxh, dst, src, t1);
980				else
981					emit_insn(ctx, ldxhu, dst, src, t1);
982			}
983			break;
984		case BPF_W:
985			if (is_signed_imm12(off)) {
986				if (sign_extend)
987					emit_insn(ctx, ldw, dst, src, off);
988				else
989					emit_insn(ctx, ldwu, dst, src, off);
990			} else {
991				move_imm(ctx, t1, off, is32);
992				if (sign_extend)
993					emit_insn(ctx, ldxw, dst, src, t1);
994				else
995					emit_insn(ctx, ldxwu, dst, src, t1);
996			}
997			break;
998		case BPF_DW:
999			move_imm(ctx, t1, off, is32);
1000			emit_insn(ctx, ldxd, dst, src, t1);
1001			break;
1002		}
1003
1004		ret = add_exception_handler(insn, ctx, dst);
1005		if (ret)
1006			return ret;
1007		break;
1008
1009	/* *(size *)(dst + off) = imm */
1010	case BPF_ST | BPF_MEM | BPF_B:
1011	case BPF_ST | BPF_MEM | BPF_H:
1012	case BPF_ST | BPF_MEM | BPF_W:
1013	case BPF_ST | BPF_MEM | BPF_DW:
1014		switch (BPF_SIZE(code)) {
1015		case BPF_B:
1016			move_imm(ctx, t1, imm, is32);
1017			if (is_signed_imm12(off)) {
1018				emit_insn(ctx, stb, t1, dst, off);
1019			} else {
1020				move_imm(ctx, t2, off, is32);
1021				emit_insn(ctx, stxb, t1, dst, t2);
1022			}
1023			break;
1024		case BPF_H:
1025			move_imm(ctx, t1, imm, is32);
1026			if (is_signed_imm12(off)) {
1027				emit_insn(ctx, sth, t1, dst, off);
1028			} else {
1029				move_imm(ctx, t2, off, is32);
1030				emit_insn(ctx, stxh, t1, dst, t2);
1031			}
1032			break;
1033		case BPF_W:
1034			move_imm(ctx, t1, imm, is32);
1035			if (is_signed_imm12(off)) {
1036				emit_insn(ctx, stw, t1, dst, off);
1037			} else if (is_signed_imm14(off)) {
1038				emit_insn(ctx, stptrw, t1, dst, off);
1039			} else {
1040				move_imm(ctx, t2, off, is32);
1041				emit_insn(ctx, stxw, t1, dst, t2);
1042			}
1043			break;
1044		case BPF_DW:
1045			move_imm(ctx, t1, imm, is32);
1046			if (is_signed_imm12(off)) {
1047				emit_insn(ctx, std, t1, dst, off);
1048			} else if (is_signed_imm14(off)) {
1049				emit_insn(ctx, stptrd, t1, dst, off);
1050			} else {
1051				move_imm(ctx, t2, off, is32);
1052				emit_insn(ctx, stxd, t1, dst, t2);
1053			}
1054			break;
1055		}
1056		break;
1057
1058	/* *(size *)(dst + off) = src */
1059	case BPF_STX | BPF_MEM | BPF_B:
1060	case BPF_STX | BPF_MEM | BPF_H:
1061	case BPF_STX | BPF_MEM | BPF_W:
1062	case BPF_STX | BPF_MEM | BPF_DW:
1063		switch (BPF_SIZE(code)) {
1064		case BPF_B:
1065			if (is_signed_imm12(off)) {
1066				emit_insn(ctx, stb, src, dst, off);
1067			} else {
1068				move_imm(ctx, t1, off, is32);
1069				emit_insn(ctx, stxb, src, dst, t1);
1070			}
1071			break;
1072		case BPF_H:
1073			if (is_signed_imm12(off)) {
1074				emit_insn(ctx, sth, src, dst, off);
1075			} else {
1076				move_imm(ctx, t1, off, is32);
1077				emit_insn(ctx, stxh, src, dst, t1);
1078			}
1079			break;
1080		case BPF_W:
1081			if (is_signed_imm12(off)) {
1082				emit_insn(ctx, stw, src, dst, off);
1083			} else if (is_signed_imm14(off)) {
1084				emit_insn(ctx, stptrw, src, dst, off);
1085			} else {
1086				move_imm(ctx, t1, off, is32);
1087				emit_insn(ctx, stxw, src, dst, t1);
1088			}
1089			break;
1090		case BPF_DW:
1091			if (is_signed_imm12(off)) {
1092				emit_insn(ctx, std, src, dst, off);
1093			} else if (is_signed_imm14(off)) {
1094				emit_insn(ctx, stptrd, src, dst, off);
1095			} else {
1096				move_imm(ctx, t1, off, is32);
1097				emit_insn(ctx, stxd, src, dst, t1);
1098			}
1099			break;
1100		}
1101		break;
1102
1103	case BPF_STX | BPF_ATOMIC | BPF_W:
1104	case BPF_STX | BPF_ATOMIC | BPF_DW:
1105		emit_atomic(insn, ctx);
1106		break;
1107
1108	/* Speculation barrier */
1109	case BPF_ST | BPF_NOSPEC:
1110		break;
1111
1112	default:
1113		pr_err("bpf_jit: unknown opcode %02x\n", code);
1114		return -EINVAL;
1115	}
1116
1117	return 0;
1118
1119toofar:
1120	pr_info_once("bpf_jit: opcode %02x, jump too far\n", code);
1121	return -E2BIG;
1122}
1123
1124static int build_body(struct jit_ctx *ctx, bool extra_pass)
1125{
1126	int i;
1127	const struct bpf_prog *prog = ctx->prog;
1128
1129	for (i = 0; i < prog->len; i++) {
1130		const struct bpf_insn *insn = &prog->insnsi[i];
1131		int ret;
1132
1133		if (ctx->image == NULL)
1134			ctx->offset[i] = ctx->idx;
1135
1136		ret = build_insn(insn, ctx, extra_pass);
1137		if (ret > 0) {
1138			i++;
1139			if (ctx->image == NULL)
1140				ctx->offset[i] = ctx->idx;
1141			continue;
1142		}
1143		if (ret)
1144			return ret;
1145	}
1146
1147	if (ctx->image == NULL)
1148		ctx->offset[i] = ctx->idx;
1149
1150	return 0;
1151}
1152
1153/* Fill space with break instructions */
1154static void jit_fill_hole(void *area, unsigned int size)
1155{
1156	u32 *ptr;
1157
1158	/* We are guaranteed to have aligned memory */
1159	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
1160		*ptr++ = INSN_BREAK;
1161}
1162
1163static int validate_code(struct jit_ctx *ctx)
1164{
1165	int i;
1166	union loongarch_instruction insn;
1167
1168	for (i = 0; i < ctx->idx; i++) {
1169		insn = ctx->image[i];
1170		/* Check INSN_BREAK */
1171		if (insn.word == INSN_BREAK)
1172			return -1;
1173	}
1174
1175	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
1176		return -1;
1177
1178	return 0;
1179}
1180
1181struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1182{
1183	bool tmp_blinded = false, extra_pass = false;
1184	u8 *image_ptr;
1185	int image_size, prog_size, extable_size;
1186	struct jit_ctx ctx;
1187	struct jit_data *jit_data;
1188	struct bpf_binary_header *header;
1189	struct bpf_prog *tmp, *orig_prog = prog;
1190
1191	/*
1192	 * If BPF JIT was not enabled then we must fall back to
1193	 * the interpreter.
1194	 */
1195	if (!prog->jit_requested)
1196		return orig_prog;
1197
1198	tmp = bpf_jit_blind_constants(prog);
1199	/*
1200	 * If blinding was requested and we failed during blinding,
1201	 * we must fall back to the interpreter. Otherwise, we save
1202	 * the new JITed code.
1203	 */
1204	if (IS_ERR(tmp))
1205		return orig_prog;
1206
1207	if (tmp != prog) {
1208		tmp_blinded = true;
1209		prog = tmp;
1210	}
1211
1212	jit_data = prog->aux->jit_data;
1213	if (!jit_data) {
1214		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1215		if (!jit_data) {
1216			prog = orig_prog;
1217			goto out;
1218		}
1219		prog->aux->jit_data = jit_data;
1220	}
1221	if (jit_data->ctx.offset) {
1222		ctx = jit_data->ctx;
1223		image_ptr = jit_data->image;
1224		header = jit_data->header;
1225		extra_pass = true;
1226		prog_size = sizeof(u32) * ctx.idx;
1227		goto skip_init_ctx;
1228	}
1229
1230	memset(&ctx, 0, sizeof(ctx));
1231	ctx.prog = prog;
1232
1233	ctx.offset = kvcalloc(prog->len + 1, sizeof(u32), GFP_KERNEL);
1234	if (ctx.offset == NULL) {
1235		prog = orig_prog;
1236		goto out_offset;
1237	}
1238
1239	/* 1. Initial fake pass to compute ctx->idx and set ctx->flags */
1240	build_prologue(&ctx);
1241	if (build_body(&ctx, extra_pass)) {
1242		prog = orig_prog;
1243		goto out_offset;
1244	}
1245	ctx.epilogue_offset = ctx.idx;
1246	build_epilogue(&ctx);
1247
1248	extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
1249
1250	/* Now we know the actual image size.
1251	 * As each LoongArch instruction is of length 32bit,
1252	 * we are translating number of JITed intructions into
1253	 * the size required to store these JITed code.
1254	 */
1255	prog_size = sizeof(u32) * ctx.idx;
1256	image_size = prog_size + extable_size;
1257	/* Now we know the size of the structure to make */
1258	header = bpf_jit_binary_alloc(image_size, &image_ptr,
1259				      sizeof(u32), jit_fill_hole);
1260	if (header == NULL) {
1261		prog = orig_prog;
1262		goto out_offset;
1263	}
1264
1265	/* 2. Now, the actual pass to generate final JIT code */
1266	ctx.image = (union loongarch_instruction *)image_ptr;
1267	if (extable_size)
1268		prog->aux->extable = (void *)image_ptr + prog_size;
1269
1270skip_init_ctx:
1271	ctx.idx = 0;
1272	ctx.num_exentries = 0;
1273
1274	build_prologue(&ctx);
1275	if (build_body(&ctx, extra_pass)) {
1276		bpf_jit_binary_free(header);
1277		prog = orig_prog;
1278		goto out_offset;
1279	}
1280	build_epilogue(&ctx);
1281
1282	/* 3. Extra pass to validate JITed code */
1283	if (validate_code(&ctx)) {
1284		bpf_jit_binary_free(header);
1285		prog = orig_prog;
1286		goto out_offset;
1287	}
1288
1289	/* And we're done */
1290	if (bpf_jit_enable > 1)
1291		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1292
1293	/* Update the icache */
1294	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
1295
1296	if (!prog->is_func || extra_pass) {
1297		if (extra_pass && ctx.idx != jit_data->ctx.idx) {
1298			pr_err_once("multi-func JIT bug %d != %d\n",
1299				    ctx.idx, jit_data->ctx.idx);
1300			bpf_jit_binary_free(header);
1301			prog->bpf_func = NULL;
1302			prog->jited = 0;
1303			prog->jited_len = 0;
1304			goto out_offset;
1305		}
1306		bpf_jit_binary_lock_ro(header);
1307	} else {
1308		jit_data->ctx = ctx;
1309		jit_data->image = image_ptr;
1310		jit_data->header = header;
1311	}
1312	prog->jited = 1;
1313	prog->jited_len = prog_size;
1314	prog->bpf_func = (void *)ctx.image;
1315
1316	if (!prog->is_func || extra_pass) {
1317		int i;
1318
1319		/* offset[prog->len] is the size of program */
1320		for (i = 0; i <= prog->len; i++)
1321			ctx.offset[i] *= LOONGARCH_INSN_SIZE;
1322		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
1323
1324out_offset:
1325		kvfree(ctx.offset);
1326		kfree(jit_data);
1327		prog->aux->jit_data = NULL;
1328	}
1329
1330out:
1331	if (tmp_blinded)
1332		bpf_jit_prog_release_other(prog, prog == orig_prog ? tmp : orig_prog);
1333
1334	out_offset = -1;
1335
1336	return prog;
1337}
1338
1339/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
1340bool bpf_jit_supports_subprog_tailcalls(void)
1341{
1342	return true;
1343}
1344