1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Just-In-Time compiler for eBPF bytecode on MIPS.
4 * Implementation of JIT functions for 32-bit CPUs.
5 *
6 * Copyright (c) 2021 Anyfi Networks AB.
7 * Author: Johan Almbladh <johan.almbladh@gmail.com>
8 *
9 * Based on code and ideas from
10 * Copyright (c) 2017 Cavium, Inc.
11 * Copyright (c) 2017 Shubham Bansal <illusionist.neo@gmail.com>
12 * Copyright (c) 2011 Mircea Gherzan <mgherzan@gmail.com>
13 */
14
15#include <linux/math64.h>
16#include <linux/errno.h>
17#include <linux/filter.h>
18#include <linux/bpf.h>
19#include <asm/cpu-features.h>
20#include <asm/isa-rev.h>
21#include <asm/uasm.h>
22
23#include "bpf_jit_comp.h"
24
25/* MIPS a4-a7 are not available in the o32 ABI */
26#undef MIPS_R_A4
27#undef MIPS_R_A5
28#undef MIPS_R_A6
29#undef MIPS_R_A7
30
31/* Stack is 8-byte aligned in o32 ABI */
32#define MIPS_STACK_ALIGNMENT 8
33
34/*
35 * The top 16 bytes of a stack frame is reserved for the callee in O32 ABI.
36 * This corresponds to stack space for register arguments a0-a3.
37 */
38#define JIT_RESERVED_STACK 16
39
40/* Temporary 64-bit register used by JIT */
41#define JIT_REG_TMP MAX_BPF_JIT_REG
42
43/*
44 * Number of prologue bytes to skip when doing a tail call.
45 * Tail call count (TCC) initialization (8 bytes) always, plus
46 * R0-to-v0 assignment (4 bytes) if big endian.
47 */
48#ifdef __BIG_ENDIAN
49#define JIT_TCALL_SKIP 12
50#else
51#define JIT_TCALL_SKIP 8
52#endif
53
54/* CPU registers holding the callee return value */
55#define JIT_RETURN_REGS	  \
56	(BIT(MIPS_R_V0) | \
57	 BIT(MIPS_R_V1))
58
59/* CPU registers arguments passed to callee directly */
60#define JIT_ARG_REGS      \
61	(BIT(MIPS_R_A0) | \
62	 BIT(MIPS_R_A1) | \
63	 BIT(MIPS_R_A2) | \
64	 BIT(MIPS_R_A3))
65
66/* CPU register arguments passed to callee on stack */
67#define JIT_STACK_REGS    \
68	(BIT(MIPS_R_T0) | \
69	 BIT(MIPS_R_T1) | \
70	 BIT(MIPS_R_T2) | \
71	 BIT(MIPS_R_T3) | \
72	 BIT(MIPS_R_T4) | \
73	 BIT(MIPS_R_T5))
74
75/* Caller-saved CPU registers */
76#define JIT_CALLER_REGS    \
77	(JIT_RETURN_REGS | \
78	 JIT_ARG_REGS    | \
79	 JIT_STACK_REGS)
80
81/* Callee-saved CPU registers */
82#define JIT_CALLEE_REGS   \
83	(BIT(MIPS_R_S0) | \
84	 BIT(MIPS_R_S1) | \
85	 BIT(MIPS_R_S2) | \
86	 BIT(MIPS_R_S3) | \
87	 BIT(MIPS_R_S4) | \
88	 BIT(MIPS_R_S5) | \
89	 BIT(MIPS_R_S6) | \
90	 BIT(MIPS_R_S7) | \
91	 BIT(MIPS_R_GP) | \
92	 BIT(MIPS_R_FP) | \
93	 BIT(MIPS_R_RA))
94
95/*
96 * Mapping of 64-bit eBPF registers to 32-bit native MIPS registers.
97 *
98 * 1) Native register pairs are ordered according to CPU endianness, following
99 *    the MIPS convention for passing 64-bit arguments and return values.
100 * 2) The eBPF return value, arguments and callee-saved registers are mapped
101 *    to their native MIPS equivalents.
102 * 3) Since the 32 highest bits in the eBPF FP register are always zero,
103 *    only one general-purpose register is actually needed for the mapping.
104 *    We use the fp register for this purpose, and map the highest bits to
105 *    the MIPS register r0 (zero).
106 * 4) We use the MIPS gp and at registers as internal temporary registers
107 *    for constant blinding. The gp register is callee-saved.
108 * 5) One 64-bit temporary register is mapped for use when sign-extending
109 *    immediate operands. MIPS registers t6-t9 are available to the JIT
110 *    for as temporaries when implementing complex 64-bit operations.
111 *
112 * With this scheme all eBPF registers are being mapped to native MIPS
113 * registers without having to use any stack scratch space. The direct
114 * register mapping (2) simplifies the handling of function calls.
115 */
116static const u8 bpf2mips32[][2] = {
117	/* Return value from in-kernel function, and exit value from eBPF */
118	[BPF_REG_0] = {MIPS_R_V1, MIPS_R_V0},
119	/* Arguments from eBPF program to in-kernel function */
120	[BPF_REG_1] = {MIPS_R_A1, MIPS_R_A0},
121	[BPF_REG_2] = {MIPS_R_A3, MIPS_R_A2},
122	/* Remaining arguments, to be passed on the stack per O32 ABI */
123	[BPF_REG_3] = {MIPS_R_T1, MIPS_R_T0},
124	[BPF_REG_4] = {MIPS_R_T3, MIPS_R_T2},
125	[BPF_REG_5] = {MIPS_R_T5, MIPS_R_T4},
126	/* Callee-saved registers that in-kernel function will preserve */
127	[BPF_REG_6] = {MIPS_R_S1, MIPS_R_S0},
128	[BPF_REG_7] = {MIPS_R_S3, MIPS_R_S2},
129	[BPF_REG_8] = {MIPS_R_S5, MIPS_R_S4},
130	[BPF_REG_9] = {MIPS_R_S7, MIPS_R_S6},
131	/* Read-only frame pointer to access the eBPF stack */
132#ifdef __BIG_ENDIAN
133	[BPF_REG_FP] = {MIPS_R_FP, MIPS_R_ZERO},
134#else
135	[BPF_REG_FP] = {MIPS_R_ZERO, MIPS_R_FP},
136#endif
137	/* Temporary register for blinding constants */
138	[BPF_REG_AX] = {MIPS_R_GP, MIPS_R_AT},
139	/* Temporary register for internal JIT use */
140	[JIT_REG_TMP] = {MIPS_R_T7, MIPS_R_T6},
141};
142
143/* Get low CPU register for a 64-bit eBPF register mapping */
144static inline u8 lo(const u8 reg[])
145{
146#ifdef __BIG_ENDIAN
147	return reg[0];
148#else
149	return reg[1];
150#endif
151}
152
153/* Get high CPU register for a 64-bit eBPF register mapping */
154static inline u8 hi(const u8 reg[])
155{
156#ifdef __BIG_ENDIAN
157	return reg[1];
158#else
159	return reg[0];
160#endif
161}
162
163/*
164 * Mark a 64-bit CPU register pair as clobbered, it needs to be
165 * saved/restored by the program if callee-saved.
166 */
167static void clobber_reg64(struct jit_context *ctx, const u8 reg[])
168{
169	clobber_reg(ctx, reg[0]);
170	clobber_reg(ctx, reg[1]);
171}
172
173/* dst = imm (sign-extended) */
174static void emit_mov_se_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
175{
176	emit_mov_i(ctx, lo(dst), imm);
177	if (imm < 0)
178		emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
179	else
180		emit(ctx, move, hi(dst), MIPS_R_ZERO);
181	clobber_reg64(ctx, dst);
182}
183
184/* Zero extension, if verifier does not do it for us  */
185static void emit_zext_ver(struct jit_context *ctx, const u8 dst[])
186{
187	if (!ctx->program->aux->verifier_zext) {
188		emit(ctx, move, hi(dst), MIPS_R_ZERO);
189		clobber_reg(ctx, hi(dst));
190	}
191}
192
193/* Load delay slot, if ISA mandates it */
194static void emit_load_delay(struct jit_context *ctx)
195{
196	if (!cpu_has_mips_2_3_4_5_r)
197		emit(ctx, nop);
198}
199
200/* ALU immediate operation (64-bit) */
201static void emit_alu_i64(struct jit_context *ctx,
202			 const u8 dst[], s32 imm, u8 op)
203{
204	u8 src = MIPS_R_T6;
205
206	/*
207	 * ADD/SUB with all but the max negative imm can be handled by
208	 * inverting the operation and the imm value, saving one insn.
209	 */
210	if (imm > S32_MIN && imm < 0)
211		switch (op) {
212		case BPF_ADD:
213			op = BPF_SUB;
214			imm = -imm;
215			break;
216		case BPF_SUB:
217			op = BPF_ADD;
218			imm = -imm;
219			break;
220		}
221
222	/* Move immediate to temporary register */
223	emit_mov_i(ctx, src, imm);
224
225	switch (op) {
226	/* dst = dst + imm */
227	case BPF_ADD:
228		emit(ctx, addu, lo(dst), lo(dst), src);
229		emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
230		emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
231		if (imm < 0)
232			emit(ctx, addiu, hi(dst), hi(dst), -1);
233		break;
234	/* dst = dst - imm */
235	case BPF_SUB:
236		emit(ctx, sltu, MIPS_R_T9, lo(dst), src);
237		emit(ctx, subu, lo(dst), lo(dst), src);
238		emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
239		if (imm < 0)
240			emit(ctx, addiu, hi(dst), hi(dst), 1);
241		break;
242	/* dst = dst | imm */
243	case BPF_OR:
244		emit(ctx, or, lo(dst), lo(dst), src);
245		if (imm < 0)
246			emit(ctx, addiu, hi(dst), MIPS_R_ZERO, -1);
247		break;
248	/* dst = dst & imm */
249	case BPF_AND:
250		emit(ctx, and, lo(dst), lo(dst), src);
251		if (imm >= 0)
252			emit(ctx, move, hi(dst), MIPS_R_ZERO);
253		break;
254	/* dst = dst ^ imm */
255	case BPF_XOR:
256		emit(ctx, xor, lo(dst), lo(dst), src);
257		if (imm < 0) {
258			emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
259			emit(ctx, addiu, hi(dst), hi(dst), -1);
260		}
261		break;
262	}
263	clobber_reg64(ctx, dst);
264}
265
266/* ALU register operation (64-bit) */
267static void emit_alu_r64(struct jit_context *ctx,
268			 const u8 dst[], const u8 src[], u8 op)
269{
270	switch (BPF_OP(op)) {
271	/* dst = dst + src */
272	case BPF_ADD:
273		if (src == dst) {
274			emit(ctx, srl, MIPS_R_T9, lo(dst), 31);
275			emit(ctx, addu, lo(dst), lo(dst), lo(dst));
276		} else {
277			emit(ctx, addu, lo(dst), lo(dst), lo(src));
278			emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
279		}
280		emit(ctx, addu, hi(dst), hi(dst), hi(src));
281		emit(ctx, addu, hi(dst), hi(dst), MIPS_R_T9);
282		break;
283	/* dst = dst - src */
284	case BPF_SUB:
285		emit(ctx, sltu, MIPS_R_T9, lo(dst), lo(src));
286		emit(ctx, subu, lo(dst), lo(dst), lo(src));
287		emit(ctx, subu, hi(dst), hi(dst), hi(src));
288		emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
289		break;
290	/* dst = dst | src */
291	case BPF_OR:
292		emit(ctx, or, lo(dst), lo(dst), lo(src));
293		emit(ctx, or, hi(dst), hi(dst), hi(src));
294		break;
295	/* dst = dst & src */
296	case BPF_AND:
297		emit(ctx, and, lo(dst), lo(dst), lo(src));
298		emit(ctx, and, hi(dst), hi(dst), hi(src));
299		break;
300	/* dst = dst ^ src */
301	case BPF_XOR:
302		emit(ctx, xor, lo(dst), lo(dst), lo(src));
303		emit(ctx, xor, hi(dst), hi(dst), hi(src));
304		break;
305	}
306	clobber_reg64(ctx, dst);
307}
308
309/* ALU invert (64-bit) */
310static void emit_neg_i64(struct jit_context *ctx, const u8 dst[])
311{
312	emit(ctx, sltu, MIPS_R_T9, MIPS_R_ZERO, lo(dst));
313	emit(ctx, subu, lo(dst), MIPS_R_ZERO, lo(dst));
314	emit(ctx, subu, hi(dst), MIPS_R_ZERO, hi(dst));
315	emit(ctx, subu, hi(dst), hi(dst), MIPS_R_T9);
316
317	clobber_reg64(ctx, dst);
318}
319
320/* ALU shift immediate (64-bit) */
321static void emit_shift_i64(struct jit_context *ctx,
322			   const u8 dst[], u32 imm, u8 op)
323{
324	switch (BPF_OP(op)) {
325	/* dst = dst << imm */
326	case BPF_LSH:
327		if (imm < 32) {
328			emit(ctx, srl, MIPS_R_T9, lo(dst), 32 - imm);
329			emit(ctx, sll, lo(dst), lo(dst), imm);
330			emit(ctx, sll, hi(dst), hi(dst), imm);
331			emit(ctx, or, hi(dst), hi(dst), MIPS_R_T9);
332		} else {
333			emit(ctx, sll, hi(dst), lo(dst), imm - 32);
334			emit(ctx, move, lo(dst), MIPS_R_ZERO);
335		}
336		break;
337	/* dst = dst >> imm */
338	case BPF_RSH:
339		if (imm < 32) {
340			emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
341			emit(ctx, srl, lo(dst), lo(dst), imm);
342			emit(ctx, srl, hi(dst), hi(dst), imm);
343			emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
344		} else {
345			emit(ctx, srl, lo(dst), hi(dst), imm - 32);
346			emit(ctx, move, hi(dst), MIPS_R_ZERO);
347		}
348		break;
349	/* dst = dst >> imm (arithmetic) */
350	case BPF_ARSH:
351		if (imm < 32) {
352			emit(ctx, sll, MIPS_R_T9, hi(dst), 32 - imm);
353			emit(ctx, srl, lo(dst), lo(dst), imm);
354			emit(ctx, sra, hi(dst), hi(dst), imm);
355			emit(ctx, or, lo(dst), lo(dst), MIPS_R_T9);
356		} else {
357			emit(ctx, sra, lo(dst), hi(dst), imm - 32);
358			emit(ctx, sra, hi(dst), hi(dst), 31);
359		}
360		break;
361	}
362	clobber_reg64(ctx, dst);
363}
364
365/* ALU shift register (64-bit) */
366static void emit_shift_r64(struct jit_context *ctx,
367			   const u8 dst[], u8 src, u8 op)
368{
369	u8 t1 = MIPS_R_T8;
370	u8 t2 = MIPS_R_T9;
371
372	emit(ctx, andi, t1, src, 32);              /* t1 = src & 32          */
373	emit(ctx, beqz, t1, 16);                   /* PC += 16 if t1 == 0    */
374	emit(ctx, nor, t2, src, MIPS_R_ZERO);      /* t2 = ~src (delay slot) */
375
376	switch (BPF_OP(op)) {
377	/* dst = dst << src */
378	case BPF_LSH:
379		/* Next: shift >= 32 */
380		emit(ctx, sllv, hi(dst), lo(dst), src);    /* dh = dl << src */
381		emit(ctx, move, lo(dst), MIPS_R_ZERO);     /* dl = 0         */
382		emit(ctx, b, 20);                          /* PC += 20       */
383		/* +16: shift < 32 */
384		emit(ctx, srl, t1, lo(dst), 1);            /* t1 = dl >> 1   */
385		emit(ctx, srlv, t1, t1, t2);               /* t1 = t1 >> t2  */
386		emit(ctx, sllv, lo(dst), lo(dst), src);    /* dl = dl << src */
387		emit(ctx, sllv, hi(dst), hi(dst), src);    /* dh = dh << src */
388		emit(ctx, or, hi(dst), hi(dst), t1);       /* dh = dh | t1   */
389		break;
390	/* dst = dst >> src */
391	case BPF_RSH:
392		/* Next: shift >= 32 */
393		emit(ctx, srlv, lo(dst), hi(dst), src);    /* dl = dh >> src */
394		emit(ctx, move, hi(dst), MIPS_R_ZERO);     /* dh = 0         */
395		emit(ctx, b, 20);                          /* PC += 20       */
396		/* +16: shift < 32 */
397		emit(ctx, sll, t1, hi(dst), 1);            /* t1 = dl << 1   */
398		emit(ctx, sllv, t1, t1, t2);               /* t1 = t1 << t2  */
399		emit(ctx, srlv, lo(dst), lo(dst), src);    /* dl = dl >> src */
400		emit(ctx, srlv, hi(dst), hi(dst), src);    /* dh = dh >> src */
401		emit(ctx, or, lo(dst), lo(dst), t1);       /* dl = dl | t1   */
402		break;
403	/* dst = dst >> src (arithmetic) */
404	case BPF_ARSH:
405		/* Next: shift >= 32 */
406		emit(ctx, srav, lo(dst), hi(dst), src);   /* dl = dh >>a src */
407		emit(ctx, sra, hi(dst), hi(dst), 31);     /* dh = dh >>a 31  */
408		emit(ctx, b, 20);                         /* PC += 20        */
409		/* +16: shift < 32 */
410		emit(ctx, sll, t1, hi(dst), 1);           /* t1 = dl << 1    */
411		emit(ctx, sllv, t1, t1, t2);              /* t1 = t1 << t2   */
412		emit(ctx, srlv, lo(dst), lo(dst), src);   /* dl = dl >>a src */
413		emit(ctx, srav, hi(dst), hi(dst), src);   /* dh = dh >> src  */
414		emit(ctx, or, lo(dst), lo(dst), t1);      /* dl = dl | t1    */
415		break;
416	}
417
418	/* +20: Done */
419	clobber_reg64(ctx, dst);
420}
421
422/* ALU mul immediate (64x32-bit) */
423static void emit_mul_i64(struct jit_context *ctx, const u8 dst[], s32 imm)
424{
425	u8 src = MIPS_R_T6;
426	u8 tmp = MIPS_R_T9;
427
428	switch (imm) {
429	/* dst = dst * 1 is a no-op */
430	case 1:
431		break;
432	/* dst = dst * -1 */
433	case -1:
434		emit_neg_i64(ctx, dst);
435		break;
436	case 0:
437		emit_mov_r(ctx, lo(dst), MIPS_R_ZERO);
438		emit_mov_r(ctx, hi(dst), MIPS_R_ZERO);
439		break;
440	/* Full 64x32 multiply */
441	default:
442		/* hi(dst) = hi(dst) * src(imm) */
443		emit_mov_i(ctx, src, imm);
444		if (cpu_has_mips32r1 || cpu_has_mips32r6) {
445			emit(ctx, mul, hi(dst), hi(dst), src);
446		} else {
447			emit(ctx, multu, hi(dst), src);
448			emit(ctx, mflo, hi(dst));
449		}
450
451		/* hi(dst) = hi(dst) - lo(dst) */
452		if (imm < 0)
453			emit(ctx, subu, hi(dst), hi(dst), lo(dst));
454
455		/* tmp = lo(dst) * src(imm) >> 32 */
456		/* lo(dst) = lo(dst) * src(imm) */
457		if (cpu_has_mips32r6) {
458			emit(ctx, muhu, tmp, lo(dst), src);
459			emit(ctx, mulu, lo(dst), lo(dst), src);
460		} else {
461			emit(ctx, multu, lo(dst), src);
462			emit(ctx, mflo, lo(dst));
463			emit(ctx, mfhi, tmp);
464		}
465
466		/* hi(dst) += tmp */
467		emit(ctx, addu, hi(dst), hi(dst), tmp);
468		clobber_reg64(ctx, dst);
469		break;
470	}
471}
472
473/* ALU mul register (64x64-bit) */
474static void emit_mul_r64(struct jit_context *ctx,
475			 const u8 dst[], const u8 src[])
476{
477	u8 acc = MIPS_R_T8;
478	u8 tmp = MIPS_R_T9;
479
480	/* acc = hi(dst) * lo(src) */
481	if (cpu_has_mips32r1 || cpu_has_mips32r6) {
482		emit(ctx, mul, acc, hi(dst), lo(src));
483	} else {
484		emit(ctx, multu, hi(dst), lo(src));
485		emit(ctx, mflo, acc);
486	}
487
488	/* tmp = lo(dst) * hi(src) */
489	if (cpu_has_mips32r1 || cpu_has_mips32r6) {
490		emit(ctx, mul, tmp, lo(dst), hi(src));
491	} else {
492		emit(ctx, multu, lo(dst), hi(src));
493		emit(ctx, mflo, tmp);
494	}
495
496	/* acc += tmp */
497	emit(ctx, addu, acc, acc, tmp);
498
499	/* tmp = lo(dst) * lo(src) >> 32 */
500	/* lo(dst) = lo(dst) * lo(src) */
501	if (cpu_has_mips32r6) {
502		emit(ctx, muhu, tmp, lo(dst), lo(src));
503		emit(ctx, mulu, lo(dst), lo(dst), lo(src));
504	} else {
505		emit(ctx, multu, lo(dst), lo(src));
506		emit(ctx, mflo, lo(dst));
507		emit(ctx, mfhi, tmp);
508	}
509
510	/* hi(dst) = acc + tmp */
511	emit(ctx, addu, hi(dst), acc, tmp);
512	clobber_reg64(ctx, dst);
513}
514
515/* Helper function for 64-bit modulo */
516static u64 jit_mod64(u64 a, u64 b)
517{
518	u64 rem;
519
520	div64_u64_rem(a, b, &rem);
521	return rem;
522}
523
524/* ALU div/mod register (64-bit) */
525static void emit_divmod_r64(struct jit_context *ctx,
526			    const u8 dst[], const u8 src[], u8 op)
527{
528	const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
529	const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
530	const u8 *r2 = bpf2mips32[BPF_REG_2]; /* Mapped to a2-a3 */
531	int exclude, k;
532	u32 addr = 0;
533
534	/* Push caller-saved registers on stack */
535	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
536		  0, JIT_RESERVED_STACK);
537
538	/* Put 64-bit arguments 1 and 2 in registers a0-a3 */
539	for (k = 0; k < 2; k++) {
540		emit(ctx, move, MIPS_R_T9, src[k]);
541		emit(ctx, move, r1[k], dst[k]);
542		emit(ctx, move, r2[k], MIPS_R_T9);
543	}
544
545	/* Emit function call */
546	switch (BPF_OP(op)) {
547	/* dst = dst / src */
548	case BPF_DIV:
549		addr = (u32)&div64_u64;
550		break;
551	/* dst = dst % src */
552	case BPF_MOD:
553		addr = (u32)&jit_mod64;
554		break;
555	}
556	emit_mov_i(ctx, MIPS_R_T9, addr);
557	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
558	emit(ctx, nop); /* Delay slot */
559
560	/* Store the 64-bit result in dst */
561	emit(ctx, move, dst[0], r0[0]);
562	emit(ctx, move, dst[1], r0[1]);
563
564	/* Restore caller-saved registers, excluding the computed result */
565	exclude = BIT(lo(dst)) | BIT(hi(dst));
566	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
567		 exclude, JIT_RESERVED_STACK);
568	emit_load_delay(ctx);
569
570	clobber_reg64(ctx, dst);
571	clobber_reg(ctx, MIPS_R_V0);
572	clobber_reg(ctx, MIPS_R_V1);
573	clobber_reg(ctx, MIPS_R_RA);
574}
575
576/* Swap bytes in a register word */
577static void emit_swap8_r(struct jit_context *ctx, u8 dst, u8 src, u8 mask)
578{
579	u8 tmp = MIPS_R_T9;
580
581	emit(ctx, and, tmp, src, mask); /* tmp = src & 0x00ff00ff */
582	emit(ctx, sll, tmp, tmp, 8);    /* tmp = tmp << 8         */
583	emit(ctx, srl, dst, src, 8);    /* dst = src >> 8         */
584	emit(ctx, and, dst, dst, mask); /* dst = dst & 0x00ff00ff */
585	emit(ctx, or,  dst, dst, tmp);  /* dst = dst | tmp        */
586}
587
588/* Swap half words in a register word */
589static void emit_swap16_r(struct jit_context *ctx, u8 dst, u8 src)
590{
591	u8 tmp = MIPS_R_T9;
592
593	emit(ctx, sll, tmp, src, 16);  /* tmp = src << 16 */
594	emit(ctx, srl, dst, src, 16);  /* dst = src >> 16 */
595	emit(ctx, or,  dst, dst, tmp); /* dst = dst | tmp */
596}
597
598/* Swap bytes and truncate a register double word, word or half word */
599static void emit_bswap_r64(struct jit_context *ctx, const u8 dst[], u32 width)
600{
601	u8 tmp = MIPS_R_T8;
602
603	switch (width) {
604	/* Swap bytes in a double word */
605	case 64:
606		if (cpu_has_mips32r2 || cpu_has_mips32r6) {
607			emit(ctx, rotr, tmp, hi(dst), 16);
608			emit(ctx, rotr, hi(dst), lo(dst), 16);
609			emit(ctx, wsbh, lo(dst), tmp);
610			emit(ctx, wsbh, hi(dst), hi(dst));
611		} else {
612			emit_swap16_r(ctx, tmp, lo(dst));
613			emit_swap16_r(ctx, lo(dst), hi(dst));
614			emit(ctx, move, hi(dst), tmp);
615
616			emit(ctx, lui, tmp, 0xff);      /* tmp = 0x00ff0000 */
617			emit(ctx, ori, tmp, tmp, 0xff); /* tmp = 0x00ff00ff */
618			emit_swap8_r(ctx, lo(dst), lo(dst), tmp);
619			emit_swap8_r(ctx, hi(dst), hi(dst), tmp);
620		}
621		break;
622	/* Swap bytes in a word */
623	/* Swap bytes in a half word */
624	case 32:
625	case 16:
626		emit_bswap_r(ctx, lo(dst), width);
627		emit(ctx, move, hi(dst), MIPS_R_ZERO);
628		break;
629	}
630	clobber_reg64(ctx, dst);
631}
632
633/* Truncate a register double word, word or half word */
634static void emit_trunc_r64(struct jit_context *ctx, const u8 dst[], u32 width)
635{
636	switch (width) {
637	case 64:
638		break;
639	/* Zero-extend a word */
640	case 32:
641		emit(ctx, move, hi(dst), MIPS_R_ZERO);
642		clobber_reg(ctx, hi(dst));
643		break;
644	/* Zero-extend a half word */
645	case 16:
646		emit(ctx, move, hi(dst), MIPS_R_ZERO);
647		emit(ctx, andi, lo(dst), lo(dst), 0xffff);
648		clobber_reg64(ctx, dst);
649		break;
650	}
651}
652
653/* Load operation: dst = *(size*)(src + off) */
654static void emit_ldx(struct jit_context *ctx,
655		     const u8 dst[], u8 src, s16 off, u8 size)
656{
657	switch (size) {
658	/* Load a byte */
659	case BPF_B:
660		emit(ctx, lbu, lo(dst), off, src);
661		emit(ctx, move, hi(dst), MIPS_R_ZERO);
662		break;
663	/* Load a half word */
664	case BPF_H:
665		emit(ctx, lhu, lo(dst), off, src);
666		emit(ctx, move, hi(dst), MIPS_R_ZERO);
667		break;
668	/* Load a word */
669	case BPF_W:
670		emit(ctx, lw, lo(dst), off, src);
671		emit(ctx, move, hi(dst), MIPS_R_ZERO);
672		break;
673	/* Load a double word */
674	case BPF_DW:
675		if (dst[1] == src) {
676			emit(ctx, lw, dst[0], off + 4, src);
677			emit(ctx, lw, dst[1], off, src);
678		} else {
679			emit(ctx, lw, dst[1], off, src);
680			emit(ctx, lw, dst[0], off + 4, src);
681		}
682		emit_load_delay(ctx);
683		break;
684	}
685	clobber_reg64(ctx, dst);
686}
687
688/* Store operation: *(size *)(dst + off) = src */
689static void emit_stx(struct jit_context *ctx,
690		     const u8 dst, const u8 src[], s16 off, u8 size)
691{
692	switch (size) {
693	/* Store a byte */
694	case BPF_B:
695		emit(ctx, sb, lo(src), off, dst);
696		break;
697	/* Store a half word */
698	case BPF_H:
699		emit(ctx, sh, lo(src), off, dst);
700		break;
701	/* Store a word */
702	case BPF_W:
703		emit(ctx, sw, lo(src), off, dst);
704		break;
705	/* Store a double word */
706	case BPF_DW:
707		emit(ctx, sw, src[1], off, dst);
708		emit(ctx, sw, src[0], off + 4, dst);
709		break;
710	}
711}
712
713/* Atomic read-modify-write (32-bit, non-ll/sc fallback) */
714static void emit_atomic_r32(struct jit_context *ctx,
715			    u8 dst, u8 src, s16 off, u8 code)
716{
717	u32 exclude = 0;
718	u32 addr = 0;
719
720	/* Push caller-saved registers on stack */
721	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
722		  0, JIT_RESERVED_STACK);
723	/*
724	 * Argument 1: dst+off if xchg, otherwise src, passed in register a0
725	 * Argument 2: src if xchg, otherwise dst+off, passed in register a1
726	 */
727	emit(ctx, move, MIPS_R_T9, dst);
728	if (code == BPF_XCHG) {
729		emit(ctx, move, MIPS_R_A1, src);
730		emit(ctx, addiu, MIPS_R_A0, MIPS_R_T9, off);
731	} else {
732		emit(ctx, move, MIPS_R_A0, src);
733		emit(ctx, addiu, MIPS_R_A1, MIPS_R_T9, off);
734	}
735
736	/* Emit function call */
737	switch (code) {
738	case BPF_ADD:
739		addr = (u32)&atomic_add;
740		break;
741	case BPF_ADD | BPF_FETCH:
742		addr = (u32)&atomic_fetch_add;
743		break;
744	case BPF_SUB:
745		addr = (u32)&atomic_sub;
746		break;
747	case BPF_SUB | BPF_FETCH:
748		addr = (u32)&atomic_fetch_sub;
749		break;
750	case BPF_OR:
751		addr = (u32)&atomic_or;
752		break;
753	case BPF_OR | BPF_FETCH:
754		addr = (u32)&atomic_fetch_or;
755		break;
756	case BPF_AND:
757		addr = (u32)&atomic_and;
758		break;
759	case BPF_AND | BPF_FETCH:
760		addr = (u32)&atomic_fetch_and;
761		break;
762	case BPF_XOR:
763		addr = (u32)&atomic_xor;
764		break;
765	case BPF_XOR | BPF_FETCH:
766		addr = (u32)&atomic_fetch_xor;
767		break;
768	case BPF_XCHG:
769		addr = (u32)&atomic_xchg;
770		break;
771	}
772	emit_mov_i(ctx, MIPS_R_T9, addr);
773	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
774	emit(ctx, nop); /* Delay slot */
775
776	/* Update src register with old value, if specified */
777	if (code & BPF_FETCH) {
778		emit(ctx, move, src, MIPS_R_V0);
779		exclude = BIT(src);
780		clobber_reg(ctx, src);
781	}
782
783	/* Restore caller-saved registers, except any fetched value */
784	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
785		 exclude, JIT_RESERVED_STACK);
786	emit_load_delay(ctx);
787	clobber_reg(ctx, MIPS_R_RA);
788}
789
790/* Helper function for 64-bit atomic exchange */
791static s64 jit_xchg64(s64 a, atomic64_t *v)
792{
793	return atomic64_xchg(v, a);
794}
795
796/* Atomic read-modify-write (64-bit) */
797static void emit_atomic_r64(struct jit_context *ctx,
798			    u8 dst, const u8 src[], s16 off, u8 code)
799{
800	const u8 *r0 = bpf2mips32[BPF_REG_0]; /* Mapped to v0-v1 */
801	const u8 *r1 = bpf2mips32[BPF_REG_1]; /* Mapped to a0-a1 */
802	u32 exclude = 0;
803	u32 addr = 0;
804
805	/* Push caller-saved registers on stack */
806	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
807		  0, JIT_RESERVED_STACK);
808	/*
809	 * Argument 1: 64-bit src, passed in registers a0-a1
810	 * Argument 2: 32-bit dst+off, passed in register a2
811	 */
812	emit(ctx, move, MIPS_R_T9, dst);
813	emit(ctx, move, r1[0], src[0]);
814	emit(ctx, move, r1[1], src[1]);
815	emit(ctx, addiu, MIPS_R_A2, MIPS_R_T9, off);
816
817	/* Emit function call */
818	switch (code) {
819	case BPF_ADD:
820		addr = (u32)&atomic64_add;
821		break;
822	case BPF_ADD | BPF_FETCH:
823		addr = (u32)&atomic64_fetch_add;
824		break;
825	case BPF_SUB:
826		addr = (u32)&atomic64_sub;
827		break;
828	case BPF_SUB | BPF_FETCH:
829		addr = (u32)&atomic64_fetch_sub;
830		break;
831	case BPF_OR:
832		addr = (u32)&atomic64_or;
833		break;
834	case BPF_OR | BPF_FETCH:
835		addr = (u32)&atomic64_fetch_or;
836		break;
837	case BPF_AND:
838		addr = (u32)&atomic64_and;
839		break;
840	case BPF_AND | BPF_FETCH:
841		addr = (u32)&atomic64_fetch_and;
842		break;
843	case BPF_XOR:
844		addr = (u32)&atomic64_xor;
845		break;
846	case BPF_XOR | BPF_FETCH:
847		addr = (u32)&atomic64_fetch_xor;
848		break;
849	case BPF_XCHG:
850		addr = (u32)&jit_xchg64;
851		break;
852	}
853	emit_mov_i(ctx, MIPS_R_T9, addr);
854	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
855	emit(ctx, nop); /* Delay slot */
856
857	/* Update src register with old value, if specified */
858	if (code & BPF_FETCH) {
859		emit(ctx, move, lo(src), lo(r0));
860		emit(ctx, move, hi(src), hi(r0));
861		exclude = BIT(src[0]) | BIT(src[1]);
862		clobber_reg64(ctx, src);
863	}
864
865	/* Restore caller-saved registers, except any fetched value */
866	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
867		 exclude, JIT_RESERVED_STACK);
868	emit_load_delay(ctx);
869	clobber_reg(ctx, MIPS_R_RA);
870}
871
872/* Atomic compare-and-exchange (32-bit, non-ll/sc fallback) */
873static void emit_cmpxchg_r32(struct jit_context *ctx, u8 dst, u8 src, s16 off)
874{
875	const u8 *r0 = bpf2mips32[BPF_REG_0];
876
877	/* Push caller-saved registers on stack */
878	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
879		  JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
880	/*
881	 * Argument 1: 32-bit dst+off, passed in register a0
882	 * Argument 2: 32-bit r0, passed in register a1
883	 * Argument 3: 32-bit src, passed in register a2
884	 */
885	emit(ctx, addiu, MIPS_R_T9, dst, off);
886	emit(ctx, move, MIPS_R_T8, src);
887	emit(ctx, move, MIPS_R_A1, lo(r0));
888	emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
889	emit(ctx, move, MIPS_R_A2, MIPS_R_T8);
890
891	/* Emit function call */
892	emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic_cmpxchg);
893	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
894	emit(ctx, nop); /* Delay slot */
895
896#ifdef __BIG_ENDIAN
897	emit(ctx, move, lo(r0), MIPS_R_V0);
898#endif
899	/* Restore caller-saved registers, except the return value */
900	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
901		 JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
902	emit_load_delay(ctx);
903	clobber_reg(ctx, MIPS_R_V0);
904	clobber_reg(ctx, MIPS_R_V1);
905	clobber_reg(ctx, MIPS_R_RA);
906}
907
908/* Atomic compare-and-exchange (64-bit) */
909static void emit_cmpxchg_r64(struct jit_context *ctx,
910			     u8 dst, const u8 src[], s16 off)
911{
912	const u8 *r0 = bpf2mips32[BPF_REG_0];
913	const u8 *r2 = bpf2mips32[BPF_REG_2];
914
915	/* Push caller-saved registers on stack */
916	push_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
917		  JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
918	/*
919	 * Argument 1: 32-bit dst+off, passed in register a0 (a1 unused)
920	 * Argument 2: 64-bit r0, passed in registers a2-a3
921	 * Argument 3: 64-bit src, passed on stack
922	 */
923	push_regs(ctx, BIT(src[0]) | BIT(src[1]), 0, JIT_RESERVED_STACK);
924	emit(ctx, addiu, MIPS_R_T9, dst, off);
925	emit(ctx, move, r2[0], r0[0]);
926	emit(ctx, move, r2[1], r0[1]);
927	emit(ctx, move, MIPS_R_A0, MIPS_R_T9);
928
929	/* Emit function call */
930	emit_mov_i(ctx, MIPS_R_T9, (u32)&atomic64_cmpxchg);
931	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
932	emit(ctx, nop); /* Delay slot */
933
934	/* Restore caller-saved registers, except the return value */
935	pop_regs(ctx, ctx->clobbered & JIT_CALLER_REGS,
936		 JIT_RETURN_REGS, JIT_RESERVED_STACK + 2 * sizeof(u32));
937	emit_load_delay(ctx);
938	clobber_reg(ctx, MIPS_R_V0);
939	clobber_reg(ctx, MIPS_R_V1);
940	clobber_reg(ctx, MIPS_R_RA);
941}
942
943/*
944 * Conditional movz or an emulated equivalent.
945 * Note that the rs register may be modified.
946 */
947static void emit_movz_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
948{
949	if (cpu_has_mips_2) {
950		emit(ctx, movz, rd, rs, rt);           /* rd = rt ? rd : rs  */
951	} else if (cpu_has_mips32r6) {
952		if (rs != MIPS_R_ZERO)
953			emit(ctx, seleqz, rs, rs, rt); /* rs = 0 if rt == 0  */
954		emit(ctx, selnez, rd, rd, rt);         /* rd = 0 if rt != 0  */
955		if (rs != MIPS_R_ZERO)
956			emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
957	} else {
958		emit(ctx, bnez, rt, 8);                /* PC += 8 if rd != 0 */
959		emit(ctx, nop);                        /* +0: delay slot     */
960		emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
961	}
962	clobber_reg(ctx, rd);
963	clobber_reg(ctx, rs);
964}
965
966/*
967 * Conditional movn or an emulated equivalent.
968 * Note that the rs register may be modified.
969 */
970static void emit_movn_r(struct jit_context *ctx, u8 rd, u8 rs, u8 rt)
971{
972	if (cpu_has_mips_2) {
973		emit(ctx, movn, rd, rs, rt);           /* rd = rt ? rs : rd  */
974	} else if (cpu_has_mips32r6) {
975		if (rs != MIPS_R_ZERO)
976			emit(ctx, selnez, rs, rs, rt); /* rs = 0 if rt == 0  */
977		emit(ctx, seleqz, rd, rd, rt);         /* rd = 0 if rt != 0  */
978		if (rs != MIPS_R_ZERO)
979			emit(ctx, or, rd, rd, rs);     /* rd = rd | rs       */
980	} else {
981		emit(ctx, beqz, rt, 8);                /* PC += 8 if rd == 0 */
982		emit(ctx, nop);                        /* +0: delay slot     */
983		emit(ctx, or, rd, rs, MIPS_R_ZERO);    /* +4: rd = rs        */
984	}
985	clobber_reg(ctx, rd);
986	clobber_reg(ctx, rs);
987}
988
989/* Emulation of 64-bit sltiu rd, rs, imm, where imm may be S32_MAX + 1 */
990static void emit_sltiu_r64(struct jit_context *ctx, u8 rd,
991			   const u8 rs[], s64 imm)
992{
993	u8 tmp = MIPS_R_T9;
994
995	if (imm < 0) {
996		emit_mov_i(ctx, rd, imm);                 /* rd = imm        */
997		emit(ctx, sltu, rd, lo(rs), rd);          /* rd = rsl < rd   */
998		emit(ctx, sltiu, tmp, hi(rs), -1);        /* tmp = rsh < ~0U */
999		emit(ctx, or, rd, rd, tmp);               /* rd = rd | tmp   */
1000	} else { /* imm >= 0 */
1001		if (imm > 0x7fff) {
1002			emit_mov_i(ctx, rd, (s32)imm);     /* rd = imm       */
1003			emit(ctx, sltu, rd, lo(rs), rd);   /* rd = rsl < rd  */
1004		} else {
1005			emit(ctx, sltiu, rd, lo(rs), imm); /* rd = rsl < imm */
1006		}
1007		emit_movn_r(ctx, rd, MIPS_R_ZERO, hi(rs)); /* rd = 0 if rsh  */
1008	}
1009}
1010
1011/* Emulation of 64-bit sltu rd, rs, rt */
1012static void emit_sltu_r64(struct jit_context *ctx, u8 rd,
1013			  const u8 rs[], const u8 rt[])
1014{
1015	u8 tmp = MIPS_R_T9;
1016
1017	emit(ctx, sltu, rd, lo(rs), lo(rt));           /* rd = rsl < rtl     */
1018	emit(ctx, subu, tmp, hi(rs), hi(rt));          /* tmp = rsh - rth    */
1019	emit_movn_r(ctx, rd, MIPS_R_ZERO, tmp);        /* rd = 0 if tmp != 0 */
1020	emit(ctx, sltu, tmp, hi(rs), hi(rt));          /* tmp = rsh < rth    */
1021	emit(ctx, or, rd, rd, tmp);                    /* rd = rd | tmp      */
1022}
1023
1024/* Emulation of 64-bit slti rd, rs, imm, where imm may be S32_MAX + 1 */
1025static void emit_slti_r64(struct jit_context *ctx, u8 rd,
1026			  const u8 rs[], s64 imm)
1027{
1028	u8 t1 = MIPS_R_T8;
1029	u8 t2 = MIPS_R_T9;
1030	u8 cmp;
1031
1032	/*
1033	 * if ((rs < 0) ^ (imm < 0)) t1 = imm >u rsl
1034	 * else                      t1 = rsl <u imm
1035	 */
1036	emit_mov_i(ctx, rd, (s32)imm);
1037	emit(ctx, sltu, t1, lo(rs), rd);               /* t1 = rsl <u imm   */
1038	emit(ctx, sltu, t2, rd, lo(rs));               /* t2 = imm <u rsl   */
1039	emit(ctx, srl, rd, hi(rs), 31);                /* rd = rsh >> 31    */
1040	if (imm < 0)
1041		emit_movz_r(ctx, t1, t2, rd);          /* t1 = rd ? t1 : t2 */
1042	else
1043		emit_movn_r(ctx, t1, t2, rd);          /* t1 = rd ? t2 : t1 */
1044	/*
1045	 * if ((imm < 0 && rsh != 0xffffffff) ||
1046	 *     (imm >= 0 && rsh != 0))
1047	 *      t1 = 0
1048	 */
1049	if (imm < 0) {
1050		emit(ctx, addiu, rd, hi(rs), 1);       /* rd = rsh + 1 */
1051		cmp = rd;
1052	} else { /* imm >= 0 */
1053		cmp = hi(rs);
1054	}
1055	emit_movn_r(ctx, t1, MIPS_R_ZERO, cmp);        /* t1 = 0 if cmp != 0 */
1056
1057	/*
1058	 * if (imm < 0) rd = rsh < -1
1059	 * else         rd = rsh != 0
1060	 * rd = rd | t1
1061	 */
1062	emit(ctx, slti, rd, hi(rs), imm < 0 ? -1 : 0); /* rd = rsh < hi(imm) */
1063	emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1       */
1064}
1065
1066/* Emulation of 64-bit(slt rd, rs, rt) */
1067static void emit_slt_r64(struct jit_context *ctx, u8 rd,
1068			 const u8 rs[], const u8 rt[])
1069{
1070	u8 t1 = MIPS_R_T7;
1071	u8 t2 = MIPS_R_T8;
1072	u8 t3 = MIPS_R_T9;
1073
1074	/*
1075	 * if ((rs < 0) ^ (rt < 0)) t1 = rtl <u rsl
1076	 * else                     t1 = rsl <u rtl
1077	 * if (rsh == rth)          t1 = 0
1078	 */
1079	emit(ctx, sltu, t1, lo(rs), lo(rt));           /* t1 = rsl <u rtl   */
1080	emit(ctx, sltu, t2, lo(rt), lo(rs));           /* t2 = rtl <u rsl   */
1081	emit(ctx, xor, t3, hi(rs), hi(rt));            /* t3 = rlh ^ rth    */
1082	emit(ctx, srl, rd, t3, 31);                    /* rd = t3 >> 31     */
1083	emit_movn_r(ctx, t1, t2, rd);                  /* t1 = rd ? t2 : t1 */
1084	emit_movn_r(ctx, t1, MIPS_R_ZERO, t3);         /* t1 = 0 if t3 != 0 */
1085
1086	/* rd = (rsh < rth) | t1 */
1087	emit(ctx, slt, rd, hi(rs), hi(rt));            /* rd = rsh <s rth   */
1088	emit(ctx, or, rd, rd, t1);                     /* rd = rd | t1      */
1089}
1090
1091/* Jump immediate (64-bit) */
1092static void emit_jmp_i64(struct jit_context *ctx,
1093			 const u8 dst[], s32 imm, s32 off, u8 op)
1094{
1095	u8 tmp = MIPS_R_T6;
1096
1097	switch (op) {
1098	/* No-op, used internally for branch optimization */
1099	case JIT_JNOP:
1100		break;
1101	/* PC += off if dst == imm */
1102	/* PC += off if dst != imm */
1103	case BPF_JEQ:
1104	case BPF_JNE:
1105		if (imm >= -0x7fff && imm <= 0x8000) {
1106			emit(ctx, addiu, tmp, lo(dst), -imm);
1107		} else if ((u32)imm <= 0xffff) {
1108			emit(ctx, xori, tmp, lo(dst), imm);
1109		} else {       /* Register fallback */
1110			emit_mov_i(ctx, tmp, imm);
1111			emit(ctx, xor, tmp, lo(dst), tmp);
1112		}
1113		if (imm < 0) { /* Compare sign extension */
1114			emit(ctx, addu, MIPS_R_T9, hi(dst), 1);
1115			emit(ctx, or, tmp, tmp, MIPS_R_T9);
1116		} else {       /* Compare zero extension */
1117			emit(ctx, or, tmp, tmp, hi(dst));
1118		}
1119		if (op == BPF_JEQ)
1120			emit(ctx, beqz, tmp, off);
1121		else   /* BPF_JNE */
1122			emit(ctx, bnez, tmp, off);
1123		break;
1124	/* PC += off if dst & imm */
1125	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
1126	case BPF_JSET:
1127	case JIT_JNSET:
1128		if ((u32)imm <= 0xffff) {
1129			emit(ctx, andi, tmp, lo(dst), imm);
1130		} else {     /* Register fallback */
1131			emit_mov_i(ctx, tmp, imm);
1132			emit(ctx, and, tmp, lo(dst), tmp);
1133		}
1134		if (imm < 0) /* Sign-extension pulls in high word */
1135			emit(ctx, or, tmp, tmp, hi(dst));
1136		if (op == BPF_JSET)
1137			emit(ctx, bnez, tmp, off);
1138		else   /* JIT_JNSET */
1139			emit(ctx, beqz, tmp, off);
1140		break;
1141	/* PC += off if dst > imm */
1142	case BPF_JGT:
1143		emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
1144		emit(ctx, beqz, tmp, off);
1145		break;
1146	/* PC += off if dst >= imm */
1147	case BPF_JGE:
1148		emit_sltiu_r64(ctx, tmp, dst, imm);
1149		emit(ctx, beqz, tmp, off);
1150		break;
1151	/* PC += off if dst < imm */
1152	case BPF_JLT:
1153		emit_sltiu_r64(ctx, tmp, dst, imm);
1154		emit(ctx, bnez, tmp, off);
1155		break;
1156	/* PC += off if dst <= imm */
1157	case BPF_JLE:
1158		emit_sltiu_r64(ctx, tmp, dst, (s64)imm + 1);
1159		emit(ctx, bnez, tmp, off);
1160		break;
1161	/* PC += off if dst > imm (signed) */
1162	case BPF_JSGT:
1163		emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
1164		emit(ctx, beqz, tmp, off);
1165		break;
1166	/* PC += off if dst >= imm (signed) */
1167	case BPF_JSGE:
1168		emit_slti_r64(ctx, tmp, dst, imm);
1169		emit(ctx, beqz, tmp, off);
1170		break;
1171	/* PC += off if dst < imm (signed) */
1172	case BPF_JSLT:
1173		emit_slti_r64(ctx, tmp, dst, imm);
1174		emit(ctx, bnez, tmp, off);
1175		break;
1176	/* PC += off if dst <= imm (signed) */
1177	case BPF_JSLE:
1178		emit_slti_r64(ctx, tmp, dst, (s64)imm + 1);
1179		emit(ctx, bnez, tmp, off);
1180		break;
1181	}
1182}
1183
1184/* Jump register (64-bit) */
1185static void emit_jmp_r64(struct jit_context *ctx,
1186			 const u8 dst[], const u8 src[], s32 off, u8 op)
1187{
1188	u8 t1 = MIPS_R_T6;
1189	u8 t2 = MIPS_R_T7;
1190
1191	switch (op) {
1192	/* No-op, used internally for branch optimization */
1193	case JIT_JNOP:
1194		break;
1195	/* PC += off if dst == src */
1196	/* PC += off if dst != src */
1197	case BPF_JEQ:
1198	case BPF_JNE:
1199		emit(ctx, subu, t1, lo(dst), lo(src));
1200		emit(ctx, subu, t2, hi(dst), hi(src));
1201		emit(ctx, or, t1, t1, t2);
1202		if (op == BPF_JEQ)
1203			emit(ctx, beqz, t1, off);
1204		else   /* BPF_JNE */
1205			emit(ctx, bnez, t1, off);
1206		break;
1207	/* PC += off if dst & src */
1208	/* PC += off if (dst & imm) == 0 (not in BPF, used for long jumps) */
1209	case BPF_JSET:
1210	case JIT_JNSET:
1211		emit(ctx, and, t1, lo(dst), lo(src));
1212		emit(ctx, and, t2, hi(dst), hi(src));
1213		emit(ctx, or, t1, t1, t2);
1214		if (op == BPF_JSET)
1215			emit(ctx, bnez, t1, off);
1216		else   /* JIT_JNSET */
1217			emit(ctx, beqz, t1, off);
1218		break;
1219	/* PC += off if dst > src */
1220	case BPF_JGT:
1221		emit_sltu_r64(ctx, t1, src, dst);
1222		emit(ctx, bnez, t1, off);
1223		break;
1224	/* PC += off if dst >= src */
1225	case BPF_JGE:
1226		emit_sltu_r64(ctx, t1, dst, src);
1227		emit(ctx, beqz, t1, off);
1228		break;
1229	/* PC += off if dst < src */
1230	case BPF_JLT:
1231		emit_sltu_r64(ctx, t1, dst, src);
1232		emit(ctx, bnez, t1, off);
1233		break;
1234	/* PC += off if dst <= src */
1235	case BPF_JLE:
1236		emit_sltu_r64(ctx, t1, src, dst);
1237		emit(ctx, beqz, t1, off);
1238		break;
1239	/* PC += off if dst > src (signed) */
1240	case BPF_JSGT:
1241		emit_slt_r64(ctx, t1, src, dst);
1242		emit(ctx, bnez, t1, off);
1243		break;
1244	/* PC += off if dst >= src (signed) */
1245	case BPF_JSGE:
1246		emit_slt_r64(ctx, t1, dst, src);
1247		emit(ctx, beqz, t1, off);
1248		break;
1249	/* PC += off if dst < src (signed) */
1250	case BPF_JSLT:
1251		emit_slt_r64(ctx, t1, dst, src);
1252		emit(ctx, bnez, t1, off);
1253		break;
1254	/* PC += off if dst <= src (signed) */
1255	case BPF_JSLE:
1256		emit_slt_r64(ctx, t1, src, dst);
1257		emit(ctx, beqz, t1, off);
1258		break;
1259	}
1260}
1261
1262/* Function call */
1263static int emit_call(struct jit_context *ctx, const struct bpf_insn *insn)
1264{
1265	bool fixed;
1266	u64 addr;
1267
1268	/* Decode the call address */
1269	if (bpf_jit_get_func_addr(ctx->program, insn, false,
1270				  &addr, &fixed) < 0)
1271		return -1;
1272	if (!fixed)
1273		return -1;
1274
1275	/* Push stack arguments */
1276	push_regs(ctx, JIT_STACK_REGS, 0, JIT_RESERVED_STACK);
1277
1278	/* Emit function call */
1279	emit_mov_i(ctx, MIPS_R_T9, addr);
1280	emit(ctx, jalr, MIPS_R_RA, MIPS_R_T9);
1281	emit(ctx, nop); /* Delay slot */
1282
1283	clobber_reg(ctx, MIPS_R_RA);
1284	clobber_reg(ctx, MIPS_R_V0);
1285	clobber_reg(ctx, MIPS_R_V1);
1286	return 0;
1287}
1288
1289/* Function tail call */
1290static int emit_tail_call(struct jit_context *ctx)
1291{
1292	u8 ary = lo(bpf2mips32[BPF_REG_2]);
1293	u8 ind = lo(bpf2mips32[BPF_REG_3]);
1294	u8 t1 = MIPS_R_T8;
1295	u8 t2 = MIPS_R_T9;
1296	int off;
1297
1298	/*
1299	 * Tail call:
1300	 * eBPF R1   - function argument (context ptr), passed in a0-a1
1301	 * eBPF R2   - ptr to object with array of function entry points
1302	 * eBPF R3   - array index of function to be called
1303	 * stack[sz] - remaining tail call count, initialized in prologue
1304	 */
1305
1306	/* if (ind >= ary->map.max_entries) goto out */
1307	off = offsetof(struct bpf_array, map.max_entries);
1308	if (off > 0x7fff)
1309		return -1;
1310	emit(ctx, lw, t1, off, ary);             /* t1 = ary->map.max_entries*/
1311	emit_load_delay(ctx);                    /* Load delay slot          */
1312	emit(ctx, sltu, t1, ind, t1);            /* t1 = ind < t1            */
1313	emit(ctx, beqz, t1, get_offset(ctx, 1)); /* PC += off(1) if t1 == 0  */
1314						 /* (next insn delay slot)   */
1315	/* if (TCC-- <= 0) goto out */
1316	emit(ctx, lw, t2, ctx->stack_size, MIPS_R_SP);  /* t2 = *(SP + size) */
1317	emit_load_delay(ctx);                     /* Load delay slot         */
1318	emit(ctx, blez, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 <= 0 */
1319	emit(ctx, addiu, t2, t2, -1);             /* t2-- (delay slot)       */
1320	emit(ctx, sw, t2, ctx->stack_size, MIPS_R_SP);  /* *(SP + size) = t2 */
1321
1322	/* prog = ary->ptrs[ind] */
1323	off = offsetof(struct bpf_array, ptrs);
1324	if (off > 0x7fff)
1325		return -1;
1326	emit(ctx, sll, t1, ind, 2);               /* t1 = ind << 2           */
1327	emit(ctx, addu, t1, t1, ary);             /* t1 += ary               */
1328	emit(ctx, lw, t2, off, t1);               /* t2 = *(t1 + off)        */
1329	emit_load_delay(ctx);                     /* Load delay slot         */
1330
1331	/* if (prog == 0) goto out */
1332	emit(ctx, beqz, t2, get_offset(ctx, 1));  /* PC += off(1) if t2 == 0 */
1333	emit(ctx, nop);                           /* Delay slot              */
1334
1335	/* func = prog->bpf_func + 8 (prologue skip offset) */
1336	off = offsetof(struct bpf_prog, bpf_func);
1337	if (off > 0x7fff)
1338		return -1;
1339	emit(ctx, lw, t1, off, t2);                /* t1 = *(t2 + off)       */
1340	emit_load_delay(ctx);                      /* Load delay slot        */
1341	emit(ctx, addiu, t1, t1, JIT_TCALL_SKIP);  /* t1 += skip (8 or 12)   */
1342
1343	/* goto func */
1344	build_epilogue(ctx, t1);
1345	return 0;
1346}
1347
1348/*
1349 * Stack frame layout for a JITed program (stack grows down).
1350 *
1351 * Higher address  : Caller's stack frame       :
1352 *                 :----------------------------:
1353 *                 : 64-bit eBPF args r3-r5     :
1354 *                 :----------------------------:
1355 *                 : Reserved / tail call count :
1356 *                 +============================+  <--- MIPS sp before call
1357 *                 | Callee-saved registers,    |
1358 *                 | including RA and FP        |
1359 *                 +----------------------------+  <--- eBPF FP (MIPS zero,fp)
1360 *                 | Local eBPF variables       |
1361 *                 | allocated by program       |
1362 *                 +----------------------------+
1363 *                 | Reserved for caller-saved  |
1364 *                 | registers                  |
1365 *                 +----------------------------+
1366 *                 | Reserved for 64-bit eBPF   |
1367 *                 | args r3-r5 & args passed   |
1368 *                 | on stack in kernel calls   |
1369 * Lower address   +============================+  <--- MIPS sp
1370 */
1371
1372/* Build program prologue to set up the stack and registers */
1373void build_prologue(struct jit_context *ctx)
1374{
1375	const u8 *r1 = bpf2mips32[BPF_REG_1];
1376	const u8 *fp = bpf2mips32[BPF_REG_FP];
1377	int stack, saved, locals, reserved;
1378
1379	/*
1380	 * In the unlikely event that the TCC limit is raised to more
1381	 * than 16 bits, it is clamped to the maximum value allowed for
1382	 * the generated code (0xffff). It is better fail to compile
1383	 * instead of degrading gracefully.
1384	 */
1385	BUILD_BUG_ON(MAX_TAIL_CALL_CNT > 0xffff);
1386
1387	/*
1388	 * The first two instructions initialize TCC in the reserved (for us)
1389	 * 16-byte area in the parent's stack frame. On a tail call, the
1390	 * calling function jumps into the prologue after these instructions.
1391	 */
1392	emit(ctx, ori, MIPS_R_T9, MIPS_R_ZERO, MAX_TAIL_CALL_CNT);
1393	emit(ctx, sw, MIPS_R_T9, 0, MIPS_R_SP);
1394
1395	/*
1396	 * Register eBPF R1 contains the 32-bit context pointer argument.
1397	 * A 32-bit argument is always passed in MIPS register a0, regardless
1398	 * of CPU endianness. Initialize R1 accordingly and zero-extend.
1399	 */
1400#ifdef __BIG_ENDIAN
1401	emit(ctx, move, lo(r1), MIPS_R_A0);
1402#endif
1403
1404	/* === Entry-point for tail calls === */
1405
1406	/* Zero-extend the 32-bit argument */
1407	emit(ctx, move, hi(r1), MIPS_R_ZERO);
1408
1409	/* If the eBPF frame pointer was accessed it must be saved */
1410	if (ctx->accessed & BIT(BPF_REG_FP))
1411		clobber_reg64(ctx, fp);
1412
1413	/* Compute the stack space needed for callee-saved registers */
1414	saved = hweight32(ctx->clobbered & JIT_CALLEE_REGS) * sizeof(u32);
1415	saved = ALIGN(saved, MIPS_STACK_ALIGNMENT);
1416
1417	/* Stack space used by eBPF program local data */
1418	locals = ALIGN(ctx->program->aux->stack_depth, MIPS_STACK_ALIGNMENT);
1419
1420	/*
1421	 * If we are emitting function calls, reserve extra stack space for
1422	 * caller-saved registers and function arguments passed on the stack.
1423	 * The required space is computed automatically during resource
1424	 * usage discovery (pass 1).
1425	 */
1426	reserved = ctx->stack_used;
1427
1428	/* Allocate the stack frame */
1429	stack = ALIGN(saved + locals + reserved, MIPS_STACK_ALIGNMENT);
1430	emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, -stack);
1431
1432	/* Store callee-saved registers on stack */
1433	push_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0, stack - saved);
1434
1435	/* Initialize the eBPF frame pointer if accessed */
1436	if (ctx->accessed & BIT(BPF_REG_FP))
1437		emit(ctx, addiu, lo(fp), MIPS_R_SP, stack - saved);
1438
1439	ctx->saved_size = saved;
1440	ctx->stack_size = stack;
1441}
1442
1443/* Build the program epilogue to restore the stack and registers */
1444void build_epilogue(struct jit_context *ctx, int dest_reg)
1445{
1446	/* Restore callee-saved registers from stack */
1447	pop_regs(ctx, ctx->clobbered & JIT_CALLEE_REGS, 0,
1448		 ctx->stack_size - ctx->saved_size);
1449	/*
1450	 * A 32-bit return value is always passed in MIPS register v0,
1451	 * but on big-endian targets the low part of R0 is mapped to v1.
1452	 */
1453#ifdef __BIG_ENDIAN
1454	emit(ctx, move, MIPS_R_V0, MIPS_R_V1);
1455#endif
1456
1457	/* Jump to the return address and adjust the stack pointer */
1458	emit(ctx, jr, dest_reg);
1459	emit(ctx, addiu, MIPS_R_SP, MIPS_R_SP, ctx->stack_size);
1460}
1461
1462/* Build one eBPF instruction */
1463int build_insn(const struct bpf_insn *insn, struct jit_context *ctx)
1464{
1465	const u8 *dst = bpf2mips32[insn->dst_reg];
1466	const u8 *src = bpf2mips32[insn->src_reg];
1467	const u8 *res = bpf2mips32[BPF_REG_0];
1468	const u8 *tmp = bpf2mips32[JIT_REG_TMP];
1469	u8 code = insn->code;
1470	s16 off = insn->off;
1471	s32 imm = insn->imm;
1472	s32 val, rel;
1473	u8 alu, jmp;
1474
1475	switch (code) {
1476	/* ALU operations */
1477	/* dst = imm */
1478	case BPF_ALU | BPF_MOV | BPF_K:
1479		emit_mov_i(ctx, lo(dst), imm);
1480		emit_zext_ver(ctx, dst);
1481		break;
1482	/* dst = src */
1483	case BPF_ALU | BPF_MOV | BPF_X:
1484		if (imm == 1) {
1485			/* Special mov32 for zext */
1486			emit_mov_i(ctx, hi(dst), 0);
1487		} else {
1488			emit_mov_r(ctx, lo(dst), lo(src));
1489			emit_zext_ver(ctx, dst);
1490		}
1491		break;
1492	/* dst = -dst */
1493	case BPF_ALU | BPF_NEG:
1494		emit_alu_i(ctx, lo(dst), 0, BPF_NEG);
1495		emit_zext_ver(ctx, dst);
1496		break;
1497	/* dst = dst & imm */
1498	/* dst = dst | imm */
1499	/* dst = dst ^ imm */
1500	/* dst = dst << imm */
1501	/* dst = dst >> imm */
1502	/* dst = dst >> imm (arithmetic) */
1503	/* dst = dst + imm */
1504	/* dst = dst - imm */
1505	/* dst = dst * imm */
1506	/* dst = dst / imm */
1507	/* dst = dst % imm */
1508	case BPF_ALU | BPF_OR | BPF_K:
1509	case BPF_ALU | BPF_AND | BPF_K:
1510	case BPF_ALU | BPF_XOR | BPF_K:
1511	case BPF_ALU | BPF_LSH | BPF_K:
1512	case BPF_ALU | BPF_RSH | BPF_K:
1513	case BPF_ALU | BPF_ARSH | BPF_K:
1514	case BPF_ALU | BPF_ADD | BPF_K:
1515	case BPF_ALU | BPF_SUB | BPF_K:
1516	case BPF_ALU | BPF_MUL | BPF_K:
1517	case BPF_ALU | BPF_DIV | BPF_K:
1518	case BPF_ALU | BPF_MOD | BPF_K:
1519		if (!valid_alu_i(BPF_OP(code), imm)) {
1520			emit_mov_i(ctx, MIPS_R_T6, imm);
1521			emit_alu_r(ctx, lo(dst), MIPS_R_T6, BPF_OP(code));
1522		} else if (rewrite_alu_i(BPF_OP(code), imm, &alu, &val)) {
1523			emit_alu_i(ctx, lo(dst), val, alu);
1524		}
1525		emit_zext_ver(ctx, dst);
1526		break;
1527	/* dst = dst & src */
1528	/* dst = dst | src */
1529	/* dst = dst ^ src */
1530	/* dst = dst << src */
1531	/* dst = dst >> src */
1532	/* dst = dst >> src (arithmetic) */
1533	/* dst = dst + src */
1534	/* dst = dst - src */
1535	/* dst = dst * src */
1536	/* dst = dst / src */
1537	/* dst = dst % src */
1538	case BPF_ALU | BPF_AND | BPF_X:
1539	case BPF_ALU | BPF_OR | BPF_X:
1540	case BPF_ALU | BPF_XOR | BPF_X:
1541	case BPF_ALU | BPF_LSH | BPF_X:
1542	case BPF_ALU | BPF_RSH | BPF_X:
1543	case BPF_ALU | BPF_ARSH | BPF_X:
1544	case BPF_ALU | BPF_ADD | BPF_X:
1545	case BPF_ALU | BPF_SUB | BPF_X:
1546	case BPF_ALU | BPF_MUL | BPF_X:
1547	case BPF_ALU | BPF_DIV | BPF_X:
1548	case BPF_ALU | BPF_MOD | BPF_X:
1549		emit_alu_r(ctx, lo(dst), lo(src), BPF_OP(code));
1550		emit_zext_ver(ctx, dst);
1551		break;
1552	/* dst = imm (64-bit) */
1553	case BPF_ALU64 | BPF_MOV | BPF_K:
1554		emit_mov_se_i64(ctx, dst, imm);
1555		break;
1556	/* dst = src (64-bit) */
1557	case BPF_ALU64 | BPF_MOV | BPF_X:
1558		emit_mov_r(ctx, lo(dst), lo(src));
1559		emit_mov_r(ctx, hi(dst), hi(src));
1560		break;
1561	/* dst = -dst (64-bit) */
1562	case BPF_ALU64 | BPF_NEG:
1563		emit_neg_i64(ctx, dst);
1564		break;
1565	/* dst = dst & imm (64-bit) */
1566	case BPF_ALU64 | BPF_AND | BPF_K:
1567		emit_alu_i64(ctx, dst, imm, BPF_OP(code));
1568		break;
1569	/* dst = dst | imm (64-bit) */
1570	/* dst = dst ^ imm (64-bit) */
1571	/* dst = dst + imm (64-bit) */
1572	/* dst = dst - imm (64-bit) */
1573	case BPF_ALU64 | BPF_OR | BPF_K:
1574	case BPF_ALU64 | BPF_XOR | BPF_K:
1575	case BPF_ALU64 | BPF_ADD | BPF_K:
1576	case BPF_ALU64 | BPF_SUB | BPF_K:
1577		if (imm)
1578			emit_alu_i64(ctx, dst, imm, BPF_OP(code));
1579		break;
1580	/* dst = dst << imm (64-bit) */
1581	/* dst = dst >> imm (64-bit) */
1582	/* dst = dst >> imm (64-bit, arithmetic) */
1583	case BPF_ALU64 | BPF_LSH | BPF_K:
1584	case BPF_ALU64 | BPF_RSH | BPF_K:
1585	case BPF_ALU64 | BPF_ARSH | BPF_K:
1586		if (imm)
1587			emit_shift_i64(ctx, dst, imm, BPF_OP(code));
1588		break;
1589	/* dst = dst * imm (64-bit) */
1590	case BPF_ALU64 | BPF_MUL | BPF_K:
1591		emit_mul_i64(ctx, dst, imm);
1592		break;
1593	/* dst = dst / imm (64-bit) */
1594	/* dst = dst % imm (64-bit) */
1595	case BPF_ALU64 | BPF_DIV | BPF_K:
1596	case BPF_ALU64 | BPF_MOD | BPF_K:
1597		/*
1598		 * Sign-extend the immediate value into a temporary register,
1599		 * and then do the operation on this register.
1600		 */
1601		emit_mov_se_i64(ctx, tmp, imm);
1602		emit_divmod_r64(ctx, dst, tmp, BPF_OP(code));
1603		break;
1604	/* dst = dst & src (64-bit) */
1605	/* dst = dst | src (64-bit) */
1606	/* dst = dst ^ src (64-bit) */
1607	/* dst = dst + src (64-bit) */
1608	/* dst = dst - src (64-bit) */
1609	case BPF_ALU64 | BPF_AND | BPF_X:
1610	case BPF_ALU64 | BPF_OR | BPF_X:
1611	case BPF_ALU64 | BPF_XOR | BPF_X:
1612	case BPF_ALU64 | BPF_ADD | BPF_X:
1613	case BPF_ALU64 | BPF_SUB | BPF_X:
1614		emit_alu_r64(ctx, dst, src, BPF_OP(code));
1615		break;
1616	/* dst = dst << src (64-bit) */
1617	/* dst = dst >> src (64-bit) */
1618	/* dst = dst >> src (64-bit, arithmetic) */
1619	case BPF_ALU64 | BPF_LSH | BPF_X:
1620	case BPF_ALU64 | BPF_RSH | BPF_X:
1621	case BPF_ALU64 | BPF_ARSH | BPF_X:
1622		emit_shift_r64(ctx, dst, lo(src), BPF_OP(code));
1623		break;
1624	/* dst = dst * src (64-bit) */
1625	case BPF_ALU64 | BPF_MUL | BPF_X:
1626		emit_mul_r64(ctx, dst, src);
1627		break;
1628	/* dst = dst / src (64-bit) */
1629	/* dst = dst % src (64-bit) */
1630	case BPF_ALU64 | BPF_DIV | BPF_X:
1631	case BPF_ALU64 | BPF_MOD | BPF_X:
1632		emit_divmod_r64(ctx, dst, src, BPF_OP(code));
1633		break;
1634	/* dst = htole(dst) */
1635	/* dst = htobe(dst) */
1636	case BPF_ALU | BPF_END | BPF_FROM_LE:
1637	case BPF_ALU | BPF_END | BPF_FROM_BE:
1638		if (BPF_SRC(code) ==
1639#ifdef __BIG_ENDIAN
1640		    BPF_FROM_LE
1641#else
1642		    BPF_FROM_BE
1643#endif
1644		    )
1645			emit_bswap_r64(ctx, dst, imm);
1646		else
1647			emit_trunc_r64(ctx, dst, imm);
1648		break;
1649	/* dst = imm64 */
1650	case BPF_LD | BPF_IMM | BPF_DW:
1651		emit_mov_i(ctx, lo(dst), imm);
1652		emit_mov_i(ctx, hi(dst), insn[1].imm);
1653		return 1;
1654	/* LDX: dst = *(size *)(src + off) */
1655	case BPF_LDX | BPF_MEM | BPF_W:
1656	case BPF_LDX | BPF_MEM | BPF_H:
1657	case BPF_LDX | BPF_MEM | BPF_B:
1658	case BPF_LDX | BPF_MEM | BPF_DW:
1659		emit_ldx(ctx, dst, lo(src), off, BPF_SIZE(code));
1660		break;
1661	/* ST: *(size *)(dst + off) = imm */
1662	case BPF_ST | BPF_MEM | BPF_W:
1663	case BPF_ST | BPF_MEM | BPF_H:
1664	case BPF_ST | BPF_MEM | BPF_B:
1665	case BPF_ST | BPF_MEM | BPF_DW:
1666		switch (BPF_SIZE(code)) {
1667		case BPF_DW:
1668			/* Sign-extend immediate value into temporary reg */
1669			emit_mov_se_i64(ctx, tmp, imm);
1670			break;
1671		case BPF_W:
1672		case BPF_H:
1673		case BPF_B:
1674			emit_mov_i(ctx, lo(tmp), imm);
1675			break;
1676		}
1677		emit_stx(ctx, lo(dst), tmp, off, BPF_SIZE(code));
1678		break;
1679	/* STX: *(size *)(dst + off) = src */
1680	case BPF_STX | BPF_MEM | BPF_W:
1681	case BPF_STX | BPF_MEM | BPF_H:
1682	case BPF_STX | BPF_MEM | BPF_B:
1683	case BPF_STX | BPF_MEM | BPF_DW:
1684		emit_stx(ctx, lo(dst), src, off, BPF_SIZE(code));
1685		break;
1686	/* Speculation barrier */
1687	case BPF_ST | BPF_NOSPEC:
1688		break;
1689	/* Atomics */
1690	case BPF_STX | BPF_ATOMIC | BPF_W:
1691		switch (imm) {
1692		case BPF_ADD:
1693		case BPF_ADD | BPF_FETCH:
1694		case BPF_AND:
1695		case BPF_AND | BPF_FETCH:
1696		case BPF_OR:
1697		case BPF_OR | BPF_FETCH:
1698		case BPF_XOR:
1699		case BPF_XOR | BPF_FETCH:
1700		case BPF_XCHG:
1701			if (cpu_has_llsc)
1702				emit_atomic_r(ctx, lo(dst), lo(src), off, imm);
1703			else /* Non-ll/sc fallback */
1704				emit_atomic_r32(ctx, lo(dst), lo(src),
1705						off, imm);
1706			if (imm & BPF_FETCH)
1707				emit_zext_ver(ctx, src);
1708			break;
1709		case BPF_CMPXCHG:
1710			if (cpu_has_llsc)
1711				emit_cmpxchg_r(ctx, lo(dst), lo(src),
1712					       lo(res), off);
1713			else /* Non-ll/sc fallback */
1714				emit_cmpxchg_r32(ctx, lo(dst), lo(src), off);
1715			/* Result zero-extension inserted by verifier */
1716			break;
1717		default:
1718			goto notyet;
1719		}
1720		break;
1721	/* Atomics (64-bit) */
1722	case BPF_STX | BPF_ATOMIC | BPF_DW:
1723		switch (imm) {
1724		case BPF_ADD:
1725		case BPF_ADD | BPF_FETCH:
1726		case BPF_AND:
1727		case BPF_AND | BPF_FETCH:
1728		case BPF_OR:
1729		case BPF_OR | BPF_FETCH:
1730		case BPF_XOR:
1731		case BPF_XOR | BPF_FETCH:
1732		case BPF_XCHG:
1733			emit_atomic_r64(ctx, lo(dst), src, off, imm);
1734			break;
1735		case BPF_CMPXCHG:
1736			emit_cmpxchg_r64(ctx, lo(dst), src, off);
1737			break;
1738		default:
1739			goto notyet;
1740		}
1741		break;
1742	/* PC += off if dst == src */
1743	/* PC += off if dst != src */
1744	/* PC += off if dst & src */
1745	/* PC += off if dst > src */
1746	/* PC += off if dst >= src */
1747	/* PC += off if dst < src */
1748	/* PC += off if dst <= src */
1749	/* PC += off if dst > src (signed) */
1750	/* PC += off if dst >= src (signed) */
1751	/* PC += off if dst < src (signed) */
1752	/* PC += off if dst <= src (signed) */
1753	case BPF_JMP32 | BPF_JEQ | BPF_X:
1754	case BPF_JMP32 | BPF_JNE | BPF_X:
1755	case BPF_JMP32 | BPF_JSET | BPF_X:
1756	case BPF_JMP32 | BPF_JGT | BPF_X:
1757	case BPF_JMP32 | BPF_JGE | BPF_X:
1758	case BPF_JMP32 | BPF_JLT | BPF_X:
1759	case BPF_JMP32 | BPF_JLE | BPF_X:
1760	case BPF_JMP32 | BPF_JSGT | BPF_X:
1761	case BPF_JMP32 | BPF_JSGE | BPF_X:
1762	case BPF_JMP32 | BPF_JSLT | BPF_X:
1763	case BPF_JMP32 | BPF_JSLE | BPF_X:
1764		if (off == 0)
1765			break;
1766		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
1767		emit_jmp_r(ctx, lo(dst), lo(src), rel, jmp);
1768		if (finish_jmp(ctx, jmp, off) < 0)
1769			goto toofar;
1770		break;
1771	/* PC += off if dst == imm */
1772	/* PC += off if dst != imm */
1773	/* PC += off if dst & imm */
1774	/* PC += off if dst > imm */
1775	/* PC += off if dst >= imm */
1776	/* PC += off if dst < imm */
1777	/* PC += off if dst <= imm */
1778	/* PC += off if dst > imm (signed) */
1779	/* PC += off if dst >= imm (signed) */
1780	/* PC += off if dst < imm (signed) */
1781	/* PC += off if dst <= imm (signed) */
1782	case BPF_JMP32 | BPF_JEQ | BPF_K:
1783	case BPF_JMP32 | BPF_JNE | BPF_K:
1784	case BPF_JMP32 | BPF_JSET | BPF_K:
1785	case BPF_JMP32 | BPF_JGT | BPF_K:
1786	case BPF_JMP32 | BPF_JGE | BPF_K:
1787	case BPF_JMP32 | BPF_JLT | BPF_K:
1788	case BPF_JMP32 | BPF_JLE | BPF_K:
1789	case BPF_JMP32 | BPF_JSGT | BPF_K:
1790	case BPF_JMP32 | BPF_JSGE | BPF_K:
1791	case BPF_JMP32 | BPF_JSLT | BPF_K:
1792	case BPF_JMP32 | BPF_JSLE | BPF_K:
1793		if (off == 0)
1794			break;
1795		setup_jmp_i(ctx, imm, 32, BPF_OP(code), off, &jmp, &rel);
1796		if (valid_jmp_i(jmp, imm)) {
1797			emit_jmp_i(ctx, lo(dst), imm, rel, jmp);
1798		} else {
1799			/* Move large immediate to register */
1800			emit_mov_i(ctx, MIPS_R_T6, imm);
1801			emit_jmp_r(ctx, lo(dst), MIPS_R_T6, rel, jmp);
1802		}
1803		if (finish_jmp(ctx, jmp, off) < 0)
1804			goto toofar;
1805		break;
1806	/* PC += off if dst == src */
1807	/* PC += off if dst != src */
1808	/* PC += off if dst & src */
1809	/* PC += off if dst > src */
1810	/* PC += off if dst >= src */
1811	/* PC += off if dst < src */
1812	/* PC += off if dst <= src */
1813	/* PC += off if dst > src (signed) */
1814	/* PC += off if dst >= src (signed) */
1815	/* PC += off if dst < src (signed) */
1816	/* PC += off if dst <= src (signed) */
1817	case BPF_JMP | BPF_JEQ | BPF_X:
1818	case BPF_JMP | BPF_JNE | BPF_X:
1819	case BPF_JMP | BPF_JSET | BPF_X:
1820	case BPF_JMP | BPF_JGT | BPF_X:
1821	case BPF_JMP | BPF_JGE | BPF_X:
1822	case BPF_JMP | BPF_JLT | BPF_X:
1823	case BPF_JMP | BPF_JLE | BPF_X:
1824	case BPF_JMP | BPF_JSGT | BPF_X:
1825	case BPF_JMP | BPF_JSGE | BPF_X:
1826	case BPF_JMP | BPF_JSLT | BPF_X:
1827	case BPF_JMP | BPF_JSLE | BPF_X:
1828		if (off == 0)
1829			break;
1830		setup_jmp_r(ctx, dst == src, BPF_OP(code), off, &jmp, &rel);
1831		emit_jmp_r64(ctx, dst, src, rel, jmp);
1832		if (finish_jmp(ctx, jmp, off) < 0)
1833			goto toofar;
1834		break;
1835	/* PC += off if dst == imm */
1836	/* PC += off if dst != imm */
1837	/* PC += off if dst & imm */
1838	/* PC += off if dst > imm */
1839	/* PC += off if dst >= imm */
1840	/* PC += off if dst < imm */
1841	/* PC += off if dst <= imm */
1842	/* PC += off if dst > imm (signed) */
1843	/* PC += off if dst >= imm (signed) */
1844	/* PC += off if dst < imm (signed) */
1845	/* PC += off if dst <= imm (signed) */
1846	case BPF_JMP | BPF_JEQ | BPF_K:
1847	case BPF_JMP | BPF_JNE | BPF_K:
1848	case BPF_JMP | BPF_JSET | BPF_K:
1849	case BPF_JMP | BPF_JGT | BPF_K:
1850	case BPF_JMP | BPF_JGE | BPF_K:
1851	case BPF_JMP | BPF_JLT | BPF_K:
1852	case BPF_JMP | BPF_JLE | BPF_K:
1853	case BPF_JMP | BPF_JSGT | BPF_K:
1854	case BPF_JMP | BPF_JSGE | BPF_K:
1855	case BPF_JMP | BPF_JSLT | BPF_K:
1856	case BPF_JMP | BPF_JSLE | BPF_K:
1857		if (off == 0)
1858			break;
1859		setup_jmp_i(ctx, imm, 64, BPF_OP(code), off, &jmp, &rel);
1860		emit_jmp_i64(ctx, dst, imm, rel, jmp);
1861		if (finish_jmp(ctx, jmp, off) < 0)
1862			goto toofar;
1863		break;
1864	/* PC += off */
1865	case BPF_JMP | BPF_JA:
1866		if (off == 0)
1867			break;
1868		if (emit_ja(ctx, off) < 0)
1869			goto toofar;
1870		break;
1871	/* Tail call */
1872	case BPF_JMP | BPF_TAIL_CALL:
1873		if (emit_tail_call(ctx) < 0)
1874			goto invalid;
1875		break;
1876	/* Function call */
1877	case BPF_JMP | BPF_CALL:
1878		if (emit_call(ctx, insn) < 0)
1879			goto invalid;
1880		break;
1881	/* Function return */
1882	case BPF_JMP | BPF_EXIT:
1883		/*
1884		 * Optimization: when last instruction is EXIT
1885		 * simply continue to epilogue.
1886		 */
1887		if (ctx->bpf_index == ctx->program->len - 1)
1888			break;
1889		if (emit_exit(ctx) < 0)
1890			goto toofar;
1891		break;
1892
1893	default:
1894invalid:
1895		pr_err_once("unknown opcode %02x\n", code);
1896		return -EINVAL;
1897notyet:
1898		pr_info_once("*** NOT YET: opcode %02x ***\n", code);
1899		return -EFAULT;
1900toofar:
1901		pr_info_once("*** TOO FAR: jump at %u opcode %02x ***\n",
1902			     ctx->bpf_index, code);
1903		return -E2BIG;
1904	}
1905	return 0;
1906}
1907