1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
4 *
5 * Author: Wang YanQing (udknight@gmail.com)
6 * The code based on code and ideas from:
7 * Eric Dumazet (eric.dumazet@gmail.com)
8 * and from:
9 * Shubham Bansal <illusionist.neo@gmail.com>
10 */
11
12#include <linux/netdevice.h>
13#include <linux/filter.h>
14#include <linux/if_vlan.h>
15#include <asm/cacheflush.h>
16#include <asm/set_memory.h>
17#include <asm/nospec-branch.h>
18#include <asm/asm-prototypes.h>
19#include <linux/bpf.h>
20
21/*
22 * eBPF prog stack layout:
23 *
24 *                         high
25 * original ESP =>        +-----+
26 *                        |     | callee saved registers
27 *                        +-----+
28 *                        | ... | eBPF JIT scratch space
29 * BPF_FP,IA32_EBP  =>    +-----+
30 *                        | ... | eBPF prog stack
31 *                        +-----+
32 *                        |RSVD | JIT scratchpad
33 * current ESP =>         +-----+
34 *                        |     |
35 *                        | ... | Function call stack
36 *                        |     |
37 *                        +-----+
38 *                          low
39 *
40 * The callee saved registers:
41 *
42 *                                high
43 * original ESP =>        +------------------+ \
44 *                        |        ebp       | |
45 * current EBP =>         +------------------+ } callee saved registers
46 *                        |    ebx,esi,edi   | |
47 *                        +------------------+ /
48 *                                low
49 */
50
51static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
52{
53	if (len == 1)
54		*ptr = bytes;
55	else if (len == 2)
56		*(u16 *)ptr = bytes;
57	else {
58		*(u32 *)ptr = bytes;
59		barrier();
60	}
61	return ptr + len;
62}
63
64#define EMIT(bytes, len) \
65	do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
66
67#define EMIT1(b1)		EMIT(b1, 1)
68#define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
69#define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
70#define EMIT4(b1, b2, b3, b4)   \
71	EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
72
73#define EMIT1_off32(b1, off) \
74	do { EMIT1(b1); EMIT(off, 4); } while (0)
75#define EMIT2_off32(b1, b2, off) \
76	do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
77#define EMIT3_off32(b1, b2, b3, off) \
78	do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
79#define EMIT4_off32(b1, b2, b3, b4, off) \
80	do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
81
82#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
83
84static bool is_imm8(int value)
85{
86	return value <= 127 && value >= -128;
87}
88
89static bool is_simm32(s64 value)
90{
91	return value == (s64) (s32) value;
92}
93
94#define STACK_OFFSET(k)	(k)
95#define TCALL_CNT	(MAX_BPF_JIT_REG + 0)	/* Tail Call Count */
96
97#define IA32_EAX	(0x0)
98#define IA32_EBX	(0x3)
99#define IA32_ECX	(0x1)
100#define IA32_EDX	(0x2)
101#define IA32_ESI	(0x6)
102#define IA32_EDI	(0x7)
103#define IA32_EBP	(0x5)
104#define IA32_ESP	(0x4)
105
106/*
107 * List of x86 cond jumps opcodes (. + s8)
108 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
109 */
110#define IA32_JB  0x72
111#define IA32_JAE 0x73
112#define IA32_JE  0x74
113#define IA32_JNE 0x75
114#define IA32_JBE 0x76
115#define IA32_JA  0x77
116#define IA32_JL  0x7C
117#define IA32_JGE 0x7D
118#define IA32_JLE 0x7E
119#define IA32_JG  0x7F
120
121#define COND_JMP_OPCODE_INVALID	(0xFF)
122
123/*
124 * Map eBPF registers to IA32 32bit registers or stack scratch space.
125 *
126 * 1. All the registers, R0-R10, are mapped to scratch space on stack.
127 * 2. We need two 64 bit temp registers to do complex operations on eBPF
128 *    registers.
129 * 3. For performance reason, the BPF_REG_AX for blinding constant, is
130 *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
131 *
132 * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
133 * registers, we have to map each eBPF registers with two IA32 32 bit regs
134 * or scratch memory space and we have to build eBPF 64 bit register from those.
135 *
136 * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
137 */
138static const u8 bpf2ia32[][2] = {
139	/* Return value from in-kernel function, and exit value from eBPF */
140	[BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
141
142	/* The arguments from eBPF program to in-kernel function */
143	/* Stored on stack scratch space */
144	[BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
145	[BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
146	[BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
147	[BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
148	[BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
149
150	/* Callee saved registers that in-kernel function will preserve */
151	/* Stored on stack scratch space */
152	[BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
153	[BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
154	[BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
155	[BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
156
157	/* Read only Frame Pointer to access Stack */
158	[BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
159
160	/* Temporary register for blinding constants. */
161	[BPF_REG_AX] = {IA32_ESI, IA32_EDI},
162
163	/* Tail call count. Stored on stack scratch space. */
164	[TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
165};
166
167#define dst_lo	dst[0]
168#define dst_hi	dst[1]
169#define src_lo	src[0]
170#define src_hi	src[1]
171
172#define STACK_ALIGNMENT	8
173/*
174 * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
175 * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
176 * BPF_REG_FP, BPF_REG_AX and Tail call counts.
177 */
178#define SCRATCH_SIZE 96
179
180/* Total stack size used in JITed code */
181#define _STACK_SIZE	(stack_depth + SCRATCH_SIZE)
182
183#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
184
185/* Get the offset of eBPF REGISTERs stored on scratch space. */
186#define STACK_VAR(off) (off)
187
188/* Encode 'dst_reg' register into IA32 opcode 'byte' */
189static u8 add_1reg(u8 byte, u32 dst_reg)
190{
191	return byte + dst_reg;
192}
193
194/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
195static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
196{
197	return byte + dst_reg + (src_reg << 3);
198}
199
200static void jit_fill_hole(void *area, unsigned int size)
201{
202	/* Fill whole space with int3 instructions */
203	memset(area, 0xcc, size);
204}
205
206static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
207				   u8 **pprog)
208{
209	u8 *prog = *pprog;
210	int cnt = 0;
211
212	if (dstk) {
213		if (val == 0) {
214			/* xor eax,eax */
215			EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
216			/* mov dword ptr [ebp+off],eax */
217			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
218			      STACK_VAR(dst));
219		} else {
220			EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
221				    STACK_VAR(dst), val);
222		}
223	} else {
224		if (val == 0)
225			EMIT2(0x33, add_2reg(0xC0, dst, dst));
226		else
227			EMIT2_off32(0xC7, add_1reg(0xC0, dst),
228				    val);
229	}
230	*pprog = prog;
231}
232
233/* dst = imm (4 bytes)*/
234static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
235				   bool sstk, u8 **pprog)
236{
237	u8 *prog = *pprog;
238	int cnt = 0;
239	u8 sreg = sstk ? IA32_EAX : src;
240
241	if (sstk)
242		/* mov eax,dword ptr [ebp+off] */
243		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
244	if (dstk)
245		/* mov dword ptr [ebp+off],eax */
246		EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
247	else
248		/* mov dst,sreg */
249		EMIT2(0x89, add_2reg(0xC0, dst, sreg));
250
251	*pprog = prog;
252}
253
254/* dst = src */
255static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
256				     const u8 src[], bool dstk,
257				     bool sstk, u8 **pprog,
258				     const struct bpf_prog_aux *aux)
259{
260	emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
261	if (is64)
262		/* complete 8 byte move */
263		emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
264	else if (!aux->verifier_zext)
265		/* zero out high 4 bytes */
266		emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
267}
268
269/* Sign extended move */
270static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
271				     const u32 val, bool dstk, u8 **pprog)
272{
273	u32 hi = 0;
274
275	if (is64 && (val & (1<<31)))
276		hi = (u32)~0;
277	emit_ia32_mov_i(dst_lo, val, dstk, pprog);
278	emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
279}
280
281/*
282 * ALU operation (32 bit)
283 * dst = dst * src
284 */
285static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
286				   bool sstk, u8 **pprog)
287{
288	u8 *prog = *pprog;
289	int cnt = 0;
290	u8 sreg = sstk ? IA32_ECX : src;
291
292	if (sstk)
293		/* mov ecx,dword ptr [ebp+off] */
294		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
295
296	if (dstk)
297		/* mov eax,dword ptr [ebp+off] */
298		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
299	else
300		/* mov eax,dst */
301		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
302
303
304	EMIT2(0xF7, add_1reg(0xE0, sreg));
305
306	if (dstk)
307		/* mov dword ptr [ebp+off],eax */
308		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
309		      STACK_VAR(dst));
310	else
311		/* mov dst,eax */
312		EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
313
314	*pprog = prog;
315}
316
317static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
318					 bool dstk, u8 **pprog,
319					 const struct bpf_prog_aux *aux)
320{
321	u8 *prog = *pprog;
322	int cnt = 0;
323	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
324	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
325
326	if (dstk && val != 64) {
327		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
328		      STACK_VAR(dst_lo));
329		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
330		      STACK_VAR(dst_hi));
331	}
332	switch (val) {
333	case 16:
334		/*
335		 * Emit 'movzwl eax,ax' to zero extend 16-bit
336		 * into 64 bit
337		 */
338		EMIT2(0x0F, 0xB7);
339		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
340		if (!aux->verifier_zext)
341			/* xor dreg_hi,dreg_hi */
342			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
343		break;
344	case 32:
345		if (!aux->verifier_zext)
346			/* xor dreg_hi,dreg_hi */
347			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
348		break;
349	case 64:
350		/* nop */
351		break;
352	}
353
354	if (dstk && val != 64) {
355		/* mov dword ptr [ebp+off],dreg_lo */
356		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
357		      STACK_VAR(dst_lo));
358		/* mov dword ptr [ebp+off],dreg_hi */
359		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
360		      STACK_VAR(dst_hi));
361	}
362	*pprog = prog;
363}
364
365static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
366				       bool dstk, u8 **pprog,
367				       const struct bpf_prog_aux *aux)
368{
369	u8 *prog = *pprog;
370	int cnt = 0;
371	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
372	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
373
374	if (dstk) {
375		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
376		      STACK_VAR(dst_lo));
377		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
378		      STACK_VAR(dst_hi));
379	}
380	switch (val) {
381	case 16:
382		/* Emit 'ror %ax, 8' to swap lower 2 bytes */
383		EMIT1(0x66);
384		EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
385
386		EMIT2(0x0F, 0xB7);
387		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
388
389		if (!aux->verifier_zext)
390			/* xor dreg_hi,dreg_hi */
391			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
392		break;
393	case 32:
394		/* Emit 'bswap eax' to swap lower 4 bytes */
395		EMIT1(0x0F);
396		EMIT1(add_1reg(0xC8, dreg_lo));
397
398		if (!aux->verifier_zext)
399			/* xor dreg_hi,dreg_hi */
400			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
401		break;
402	case 64:
403		/* Emit 'bswap eax' to swap lower 4 bytes */
404		EMIT1(0x0F);
405		EMIT1(add_1reg(0xC8, dreg_lo));
406
407		/* Emit 'bswap edx' to swap lower 4 bytes */
408		EMIT1(0x0F);
409		EMIT1(add_1reg(0xC8, dreg_hi));
410
411		/* mov ecx,dreg_hi */
412		EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
413		/* mov dreg_hi,dreg_lo */
414		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
415		/* mov dreg_lo,ecx */
416		EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
417
418		break;
419	}
420	if (dstk) {
421		/* mov dword ptr [ebp+off],dreg_lo */
422		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
423		      STACK_VAR(dst_lo));
424		/* mov dword ptr [ebp+off],dreg_hi */
425		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
426		      STACK_VAR(dst_hi));
427	}
428	*pprog = prog;
429}
430
431/*
432 * ALU operation (32 bit)
433 * dst = dst (div|mod) src
434 */
435static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
436				       bool dstk, bool sstk, u8 **pprog)
437{
438	u8 *prog = *pprog;
439	int cnt = 0;
440
441	if (sstk)
442		/* mov ecx,dword ptr [ebp+off] */
443		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
444		      STACK_VAR(src));
445	else if (src != IA32_ECX)
446		/* mov ecx,src */
447		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
448
449	if (dstk)
450		/* mov eax,dword ptr [ebp+off] */
451		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
452		      STACK_VAR(dst));
453	else
454		/* mov eax,dst */
455		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
456
457	/* xor edx,edx */
458	EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
459	/* div ecx */
460	EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
461
462	if (op == BPF_MOD) {
463		if (dstk)
464			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
465			      STACK_VAR(dst));
466		else
467			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
468	} else {
469		if (dstk)
470			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
471			      STACK_VAR(dst));
472		else
473			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
474	}
475	*pprog = prog;
476}
477
478/*
479 * ALU operation (32 bit)
480 * dst = dst (shift) src
481 */
482static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
483				     bool dstk, bool sstk, u8 **pprog)
484{
485	u8 *prog = *pprog;
486	int cnt = 0;
487	u8 dreg = dstk ? IA32_EAX : dst;
488	u8 b2;
489
490	if (dstk)
491		/* mov eax,dword ptr [ebp+off] */
492		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
493
494	if (sstk)
495		/* mov ecx,dword ptr [ebp+off] */
496		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
497	else if (src != IA32_ECX)
498		/* mov ecx,src */
499		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
500
501	switch (op) {
502	case BPF_LSH:
503		b2 = 0xE0; break;
504	case BPF_RSH:
505		b2 = 0xE8; break;
506	case BPF_ARSH:
507		b2 = 0xF8; break;
508	default:
509		return;
510	}
511	EMIT2(0xD3, add_1reg(b2, dreg));
512
513	if (dstk)
514		/* mov dword ptr [ebp+off],dreg */
515		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
516	*pprog = prog;
517}
518
519/*
520 * ALU operation (32 bit)
521 * dst = dst (op) src
522 */
523static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
524				   const u8 dst, const u8 src, bool dstk,
525				   bool sstk, u8 **pprog)
526{
527	u8 *prog = *pprog;
528	int cnt = 0;
529	u8 sreg = sstk ? IA32_EAX : src;
530	u8 dreg = dstk ? IA32_EDX : dst;
531
532	if (sstk)
533		/* mov eax,dword ptr [ebp+off] */
534		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
535
536	if (dstk)
537		/* mov eax,dword ptr [ebp+off] */
538		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
539
540	switch (BPF_OP(op)) {
541	/* dst = dst + src */
542	case BPF_ADD:
543		if (hi && is64)
544			EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
545		else
546			EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
547		break;
548	/* dst = dst - src */
549	case BPF_SUB:
550		if (hi && is64)
551			EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
552		else
553			EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
554		break;
555	/* dst = dst | src */
556	case BPF_OR:
557		EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
558		break;
559	/* dst = dst & src */
560	case BPF_AND:
561		EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
562		break;
563	/* dst = dst ^ src */
564	case BPF_XOR:
565		EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
566		break;
567	}
568
569	if (dstk)
570		/* mov dword ptr [ebp+off],dreg */
571		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
572		      STACK_VAR(dst));
573	*pprog = prog;
574}
575
576/* ALU operation (64 bit) */
577static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
578				     const u8 dst[], const u8 src[],
579				     bool dstk,  bool sstk,
580				     u8 **pprog, const struct bpf_prog_aux *aux)
581{
582	u8 *prog = *pprog;
583
584	emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
585	if (is64)
586		emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
587				&prog);
588	else if (!aux->verifier_zext)
589		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
590	*pprog = prog;
591}
592
593/*
594 * ALU operation (32 bit)
595 * dst = dst (op) val
596 */
597static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
598				   const u8 dst, const s32 val, bool dstk,
599				   u8 **pprog)
600{
601	u8 *prog = *pprog;
602	int cnt = 0;
603	u8 dreg = dstk ? IA32_EAX : dst;
604	u8 sreg = IA32_EDX;
605
606	if (dstk)
607		/* mov eax,dword ptr [ebp+off] */
608		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
609
610	if (!is_imm8(val))
611		/* mov edx,imm32*/
612		EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
613
614	switch (op) {
615	/* dst = dst + val */
616	case BPF_ADD:
617		if (hi && is64) {
618			if (is_imm8(val))
619				EMIT3(0x83, add_1reg(0xD0, dreg), val);
620			else
621				EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
622		} else {
623			if (is_imm8(val))
624				EMIT3(0x83, add_1reg(0xC0, dreg), val);
625			else
626				EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
627		}
628		break;
629	/* dst = dst - val */
630	case BPF_SUB:
631		if (hi && is64) {
632			if (is_imm8(val))
633				EMIT3(0x83, add_1reg(0xD8, dreg), val);
634			else
635				EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
636		} else {
637			if (is_imm8(val))
638				EMIT3(0x83, add_1reg(0xE8, dreg), val);
639			else
640				EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
641		}
642		break;
643	/* dst = dst | val */
644	case BPF_OR:
645		if (is_imm8(val))
646			EMIT3(0x83, add_1reg(0xC8, dreg), val);
647		else
648			EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
649		break;
650	/* dst = dst & val */
651	case BPF_AND:
652		if (is_imm8(val))
653			EMIT3(0x83, add_1reg(0xE0, dreg), val);
654		else
655			EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
656		break;
657	/* dst = dst ^ val */
658	case BPF_XOR:
659		if (is_imm8(val))
660			EMIT3(0x83, add_1reg(0xF0, dreg), val);
661		else
662			EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
663		break;
664	case BPF_NEG:
665		EMIT2(0xF7, add_1reg(0xD8, dreg));
666		break;
667	}
668
669	if (dstk)
670		/* mov dword ptr [ebp+off],dreg */
671		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
672		      STACK_VAR(dst));
673	*pprog = prog;
674}
675
676/* ALU operation (64 bit) */
677static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
678				     const u8 dst[], const u32 val,
679				     bool dstk, u8 **pprog,
680				     const struct bpf_prog_aux *aux)
681{
682	u8 *prog = *pprog;
683	u32 hi = 0;
684
685	if (is64 && (val & (1<<31)))
686		hi = (u32)~0;
687
688	emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
689	if (is64)
690		emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
691	else if (!aux->verifier_zext)
692		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
693
694	*pprog = prog;
695}
696
697/* dst = ~dst (64 bit) */
698static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
699{
700	u8 *prog = *pprog;
701	int cnt = 0;
702	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
703	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
704
705	if (dstk) {
706		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
707		      STACK_VAR(dst_lo));
708		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
709		      STACK_VAR(dst_hi));
710	}
711
712	/* neg dreg_lo */
713	EMIT2(0xF7, add_1reg(0xD8, dreg_lo));
714	/* adc dreg_hi,0x0 */
715	EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00);
716	/* neg dreg_hi */
717	EMIT2(0xF7, add_1reg(0xD8, dreg_hi));
718
719	if (dstk) {
720		/* mov dword ptr [ebp+off],dreg_lo */
721		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
722		      STACK_VAR(dst_lo));
723		/* mov dword ptr [ebp+off],dreg_hi */
724		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
725		      STACK_VAR(dst_hi));
726	}
727	*pprog = prog;
728}
729
730/* dst = dst << src */
731static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
732				     bool dstk, bool sstk, u8 **pprog)
733{
734	u8 *prog = *pprog;
735	int cnt = 0;
736	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
737	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
738
739	if (dstk) {
740		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
741		      STACK_VAR(dst_lo));
742		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
743		      STACK_VAR(dst_hi));
744	}
745
746	if (sstk)
747		/* mov ecx,dword ptr [ebp+off] */
748		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
749		      STACK_VAR(src_lo));
750	else
751		/* mov ecx,src_lo */
752		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
753
754	/* shld dreg_hi,dreg_lo,cl */
755	EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo));
756	/* shl dreg_lo,cl */
757	EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
758
759	/* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */
760
761	/* cmp ecx,32 */
762	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
763	/* skip the next two instructions (4 bytes) when < 32 */
764	EMIT2(IA32_JB, 4);
765
766	/* mov dreg_hi,dreg_lo */
767	EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
768	/* xor dreg_lo,dreg_lo */
769	EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
770
771	if (dstk) {
772		/* mov dword ptr [ebp+off],dreg_lo */
773		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
774		      STACK_VAR(dst_lo));
775		/* mov dword ptr [ebp+off],dreg_hi */
776		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
777		      STACK_VAR(dst_hi));
778	}
779	/* out: */
780	*pprog = prog;
781}
782
783/* dst = dst >> src (signed)*/
784static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
785				      bool dstk, bool sstk, u8 **pprog)
786{
787	u8 *prog = *pprog;
788	int cnt = 0;
789	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
790	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
791
792	if (dstk) {
793		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
794		      STACK_VAR(dst_lo));
795		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
796		      STACK_VAR(dst_hi));
797	}
798
799	if (sstk)
800		/* mov ecx,dword ptr [ebp+off] */
801		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
802		      STACK_VAR(src_lo));
803	else
804		/* mov ecx,src_lo */
805		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
806
807	/* shrd dreg_lo,dreg_hi,cl */
808	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
809	/* sar dreg_hi,cl */
810	EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
811
812	/* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */
813
814	/* cmp ecx,32 */
815	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
816	/* skip the next two instructions (5 bytes) when < 32 */
817	EMIT2(IA32_JB, 5);
818
819	/* mov dreg_lo,dreg_hi */
820	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
821	/* sar dreg_hi,31 */
822	EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
823
824	if (dstk) {
825		/* mov dword ptr [ebp+off],dreg_lo */
826		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
827		      STACK_VAR(dst_lo));
828		/* mov dword ptr [ebp+off],dreg_hi */
829		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
830		      STACK_VAR(dst_hi));
831	}
832	/* out: */
833	*pprog = prog;
834}
835
836/* dst = dst >> src */
837static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
838				     bool sstk, u8 **pprog)
839{
840	u8 *prog = *pprog;
841	int cnt = 0;
842	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
843	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
844
845	if (dstk) {
846		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
847		      STACK_VAR(dst_lo));
848		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
849		      STACK_VAR(dst_hi));
850	}
851
852	if (sstk)
853		/* mov ecx,dword ptr [ebp+off] */
854		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
855		      STACK_VAR(src_lo));
856	else
857		/* mov ecx,src_lo */
858		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
859
860	/* shrd dreg_lo,dreg_hi,cl */
861	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
862	/* shr dreg_hi,cl */
863	EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
864
865	/* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */
866
867	/* cmp ecx,32 */
868	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
869	/* skip the next two instructions (4 bytes) when < 32 */
870	EMIT2(IA32_JB, 4);
871
872	/* mov dreg_lo,dreg_hi */
873	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
874	/* xor dreg_hi,dreg_hi */
875	EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
876
877	if (dstk) {
878		/* mov dword ptr [ebp+off],dreg_lo */
879		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
880		      STACK_VAR(dst_lo));
881		/* mov dword ptr [ebp+off],dreg_hi */
882		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
883		      STACK_VAR(dst_hi));
884	}
885	/* out: */
886	*pprog = prog;
887}
888
889/* dst = dst << val */
890static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
891				     bool dstk, u8 **pprog)
892{
893	u8 *prog = *pprog;
894	int cnt = 0;
895	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
896	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
897
898	if (dstk) {
899		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
900		      STACK_VAR(dst_lo));
901		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
902		      STACK_VAR(dst_hi));
903	}
904	/* Do LSH operation */
905	if (val < 32) {
906		/* shld dreg_hi,dreg_lo,imm8 */
907		EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val);
908		/* shl dreg_lo,imm8 */
909		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
910	} else if (val >= 32 && val < 64) {
911		u32 value = val - 32;
912
913		/* shl dreg_lo,imm8 */
914		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
915		/* mov dreg_hi,dreg_lo */
916		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
917		/* xor dreg_lo,dreg_lo */
918		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
919	} else {
920		/* xor dreg_lo,dreg_lo */
921		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
922		/* xor dreg_hi,dreg_hi */
923		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
924	}
925
926	if (dstk) {
927		/* mov dword ptr [ebp+off],dreg_lo */
928		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
929		      STACK_VAR(dst_lo));
930		/* mov dword ptr [ebp+off],dreg_hi */
931		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
932		      STACK_VAR(dst_hi));
933	}
934	*pprog = prog;
935}
936
937/* dst = dst >> val */
938static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
939				     bool dstk, u8 **pprog)
940{
941	u8 *prog = *pprog;
942	int cnt = 0;
943	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
944	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
945
946	if (dstk) {
947		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
948		      STACK_VAR(dst_lo));
949		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
950		      STACK_VAR(dst_hi));
951	}
952
953	/* Do RSH operation */
954	if (val < 32) {
955		/* shrd dreg_lo,dreg_hi,imm8 */
956		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
957		/* shr dreg_hi,imm8 */
958		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
959	} else if (val >= 32 && val < 64) {
960		u32 value = val - 32;
961
962		/* shr dreg_hi,imm8 */
963		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
964		/* mov dreg_lo,dreg_hi */
965		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
966		/* xor dreg_hi,dreg_hi */
967		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
968	} else {
969		/* xor dreg_lo,dreg_lo */
970		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
971		/* xor dreg_hi,dreg_hi */
972		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
973	}
974
975	if (dstk) {
976		/* mov dword ptr [ebp+off],dreg_lo */
977		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
978		      STACK_VAR(dst_lo));
979		/* mov dword ptr [ebp+off],dreg_hi */
980		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
981		      STACK_VAR(dst_hi));
982	}
983	*pprog = prog;
984}
985
986/* dst = dst >> val (signed) */
987static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
988				      bool dstk, u8 **pprog)
989{
990	u8 *prog = *pprog;
991	int cnt = 0;
992	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
993	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
994
995	if (dstk) {
996		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
997		      STACK_VAR(dst_lo));
998		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
999		      STACK_VAR(dst_hi));
1000	}
1001	/* Do RSH operation */
1002	if (val < 32) {
1003		/* shrd dreg_lo,dreg_hi,imm8 */
1004		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
1005		/* ashr dreg_hi,imm8 */
1006		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1007	} else if (val >= 32 && val < 64) {
1008		u32 value = val - 32;
1009
1010		/* ashr dreg_hi,imm8 */
1011		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1012		/* mov dreg_lo,dreg_hi */
1013		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1014
1015		/* ashr dreg_hi,imm8 */
1016		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1017	} else {
1018		/* ashr dreg_hi,imm8 */
1019		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1020		/* mov dreg_lo,dreg_hi */
1021		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1022	}
1023
1024	if (dstk) {
1025		/* mov dword ptr [ebp+off],dreg_lo */
1026		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1027		      STACK_VAR(dst_lo));
1028		/* mov dword ptr [ebp+off],dreg_hi */
1029		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1030		      STACK_VAR(dst_hi));
1031	}
1032	*pprog = prog;
1033}
1034
1035static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1036				     bool sstk, u8 **pprog)
1037{
1038	u8 *prog = *pprog;
1039	int cnt = 0;
1040
1041	if (dstk)
1042		/* mov eax,dword ptr [ebp+off] */
1043		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1044		      STACK_VAR(dst_hi));
1045	else
1046		/* mov eax,dst_hi */
1047		EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1048
1049	if (sstk)
1050		/* mul dword ptr [ebp+off] */
1051		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1052	else
1053		/* mul src_lo */
1054		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1055
1056	/* mov ecx,eax */
1057	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1058
1059	if (dstk)
1060		/* mov eax,dword ptr [ebp+off] */
1061		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1062		      STACK_VAR(dst_lo));
1063	else
1064		/* mov eax,dst_lo */
1065		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1066
1067	if (sstk)
1068		/* mul dword ptr [ebp+off] */
1069		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1070	else
1071		/* mul src_hi */
1072		EMIT2(0xF7, add_1reg(0xE0, src_hi));
1073
1074	/* add eax,eax */
1075	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1076
1077	if (dstk)
1078		/* mov eax,dword ptr [ebp+off] */
1079		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1080		      STACK_VAR(dst_lo));
1081	else
1082		/* mov eax,dst_lo */
1083		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1084
1085	if (sstk)
1086		/* mul dword ptr [ebp+off] */
1087		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1088	else
1089		/* mul src_lo */
1090		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1091
1092	/* add ecx,edx */
1093	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1094
1095	if (dstk) {
1096		/* mov dword ptr [ebp+off],eax */
1097		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1098		      STACK_VAR(dst_lo));
1099		/* mov dword ptr [ebp+off],ecx */
1100		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1101		      STACK_VAR(dst_hi));
1102	} else {
1103		/* mov dst_lo,eax */
1104		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1105		/* mov dst_hi,ecx */
1106		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1107	}
1108
1109	*pprog = prog;
1110}
1111
1112static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1113				     bool dstk, u8 **pprog)
1114{
1115	u8 *prog = *pprog;
1116	int cnt = 0;
1117	u32 hi;
1118
1119	hi = val & (1<<31) ? (u32)~0 : 0;
1120	/* movl eax,imm32 */
1121	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1122	if (dstk)
1123		/* mul dword ptr [ebp+off] */
1124		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1125	else
1126		/* mul dst_hi */
1127		EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1128
1129	/* mov ecx,eax */
1130	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1131
1132	/* movl eax,imm32 */
1133	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1134	if (dstk)
1135		/* mul dword ptr [ebp+off] */
1136		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1137	else
1138		/* mul dst_lo */
1139		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1140	/* add ecx,eax */
1141	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1142
1143	/* movl eax,imm32 */
1144	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1145	if (dstk)
1146		/* mul dword ptr [ebp+off] */
1147		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1148	else
1149		/* mul dst_lo */
1150		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1151
1152	/* add ecx,edx */
1153	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1154
1155	if (dstk) {
1156		/* mov dword ptr [ebp+off],eax */
1157		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1158		      STACK_VAR(dst_lo));
1159		/* mov dword ptr [ebp+off],ecx */
1160		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1161		      STACK_VAR(dst_hi));
1162	} else {
1163		/* mov dword ptr [ebp+off],eax */
1164		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1165		/* mov dword ptr [ebp+off],ecx */
1166		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1167	}
1168
1169	*pprog = prog;
1170}
1171
1172static int bpf_size_to_x86_bytes(int bpf_size)
1173{
1174	if (bpf_size == BPF_W)
1175		return 4;
1176	else if (bpf_size == BPF_H)
1177		return 2;
1178	else if (bpf_size == BPF_B)
1179		return 1;
1180	else if (bpf_size == BPF_DW)
1181		return 4; /* imm32 */
1182	else
1183		return 0;
1184}
1185
1186struct jit_context {
1187	int cleanup_addr; /* Epilogue code offset */
1188};
1189
1190/* Maximum number of bytes emitted while JITing one eBPF insn */
1191#define BPF_MAX_INSN_SIZE	128
1192#define BPF_INSN_SAFETY		64
1193
1194#define PROLOGUE_SIZE 35
1195
1196/*
1197 * Emit prologue code for BPF program and check its size.
1198 * bpf_tail_call helper will skip it while jumping into another program.
1199 */
1200static void emit_prologue(u8 **pprog, u32 stack_depth)
1201{
1202	u8 *prog = *pprog;
1203	int cnt = 0;
1204	const u8 *r1 = bpf2ia32[BPF_REG_1];
1205	const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1206	const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1207	const u8 *tcc = bpf2ia32[TCALL_CNT];
1208
1209	/* push ebp */
1210	EMIT1(0x55);
1211	/* mov ebp,esp */
1212	EMIT2(0x89, 0xE5);
1213	/* push edi */
1214	EMIT1(0x57);
1215	/* push esi */
1216	EMIT1(0x56);
1217	/* push ebx */
1218	EMIT1(0x53);
1219
1220	/* sub esp,STACK_SIZE */
1221	EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1222	/* sub ebp,SCRATCH_SIZE+12*/
1223	EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
1224	/* xor ebx,ebx */
1225	EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1226
1227	/* Set up BPF prog stack base register */
1228	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1229	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1230
1231	/* Move BPF_CTX (EAX) to BPF_REG_R1 */
1232	/* mov dword ptr [ebp+off],eax */
1233	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1234	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1235
1236	/* Initialize Tail Count */
1237	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1238	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1239
1240	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1241	*pprog = prog;
1242}
1243
1244/* Emit epilogue code for BPF program */
1245static void emit_epilogue(u8 **pprog, u32 stack_depth)
1246{
1247	u8 *prog = *pprog;
1248	const u8 *r0 = bpf2ia32[BPF_REG_0];
1249	int cnt = 0;
1250
1251	/* mov eax,dword ptr [ebp+off]*/
1252	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1253	/* mov edx,dword ptr [ebp+off]*/
1254	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1255
1256	/* add ebp,SCRATCH_SIZE+12*/
1257	EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
1258
1259	/* mov ebx,dword ptr [ebp-12]*/
1260	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1261	/* mov esi,dword ptr [ebp-8]*/
1262	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1263	/* mov edi,dword ptr [ebp-4]*/
1264	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1265
1266	EMIT1(0xC9); /* leave */
1267	EMIT1(0xC3); /* ret */
1268	*pprog = prog;
1269}
1270
1271static int emit_jmp_edx(u8 **pprog, u8 *ip)
1272{
1273	u8 *prog = *pprog;
1274	int cnt = 0;
1275
1276#ifdef CONFIG_MITIGATION_RETPOLINE
1277	EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
1278#else
1279	EMIT2(0xFF, 0xE2);
1280#endif
1281	*pprog = prog;
1282
1283	return cnt;
1284}
1285
1286/*
1287 * Generate the following code:
1288 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1289 *   if (index >= array->map.max_entries)
1290 *     goto out;
1291 *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1292 *     goto out;
1293 *   prog = array->ptrs[index];
1294 *   if (prog == NULL)
1295 *     goto out;
1296 *   goto *(prog->bpf_func + prologue_size);
1297 * out:
1298 */
1299static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
1300{
1301	u8 *prog = *pprog;
1302	int cnt = 0;
1303	const u8 *r1 = bpf2ia32[BPF_REG_1];
1304	const u8 *r2 = bpf2ia32[BPF_REG_2];
1305	const u8 *r3 = bpf2ia32[BPF_REG_3];
1306	const u8 *tcc = bpf2ia32[TCALL_CNT];
1307	u32 lo, hi;
1308	static int jmp_label1 = -1;
1309
1310	/*
1311	 * if (index >= array->map.max_entries)
1312	 *     goto out;
1313	 */
1314	/* mov eax,dword ptr [ebp+off] */
1315	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1316	/* mov edx,dword ptr [ebp+off] */
1317	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1318
1319	/* cmp dword ptr [eax+off],edx */
1320	EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1321	      offsetof(struct bpf_array, map.max_entries));
1322	/* jbe out */
1323	EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1324
1325	/*
1326	 * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
1327	 *     goto out;
1328	 */
1329	lo = (u32)MAX_TAIL_CALL_CNT;
1330	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1331	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1332	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1333
1334	/* cmp edx,hi */
1335	EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1336	EMIT2(IA32_JNE, 3);
1337	/* cmp ecx,lo */
1338	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1339
1340	/* jae out */
1341	EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1342
1343	/* add eax,0x1 */
1344	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1345	/* adc ebx,0x0 */
1346	EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1347
1348	/* mov dword ptr [ebp+off],eax */
1349	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1350	/* mov dword ptr [ebp+off],edx */
1351	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1352
1353	/* prog = array->ptrs[index]; */
1354	/* mov edx, [eax + edx * 4 + offsetof(...)] */
1355	EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1356
1357	/*
1358	 * if (prog == NULL)
1359	 *     goto out;
1360	 */
1361	/* test edx,edx */
1362	EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1363	/* je out */
1364	EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1365
1366	/* goto *(prog->bpf_func + prologue_size); */
1367	/* mov edx, dword ptr [edx + 32] */
1368	EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1369	      offsetof(struct bpf_prog, bpf_func));
1370	/* add edx,prologue_size */
1371	EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1372
1373	/* mov eax,dword ptr [ebp+off] */
1374	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1375
1376	/*
1377	 * Now we're ready to jump into next BPF program:
1378	 * eax == ctx (1st arg)
1379	 * edx == prog->bpf_func + prologue_size
1380	 */
1381	cnt += emit_jmp_edx(&prog, ip + cnt);
1382
1383	if (jmp_label1 == -1)
1384		jmp_label1 = cnt;
1385
1386	/* out: */
1387	*pprog = prog;
1388}
1389
1390/* Push the scratch stack register on top of the stack. */
1391static inline void emit_push_r64(const u8 src[], u8 **pprog)
1392{
1393	u8 *prog = *pprog;
1394	int cnt = 0;
1395
1396	/* mov ecx,dword ptr [ebp+off] */
1397	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1398	/* push ecx */
1399	EMIT1(0x51);
1400
1401	/* mov ecx,dword ptr [ebp+off] */
1402	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1403	/* push ecx */
1404	EMIT1(0x51);
1405
1406	*pprog = prog;
1407}
1408
1409static void emit_push_r32(const u8 src[], u8 **pprog)
1410{
1411	u8 *prog = *pprog;
1412	int cnt = 0;
1413
1414	/* mov ecx,dword ptr [ebp+off] */
1415	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1416	/* push ecx */
1417	EMIT1(0x51);
1418
1419	*pprog = prog;
1420}
1421
1422static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
1423{
1424	u8 jmp_cond;
1425
1426	/* Convert BPF opcode to x86 */
1427	switch (op) {
1428	case BPF_JEQ:
1429		jmp_cond = IA32_JE;
1430		break;
1431	case BPF_JSET:
1432	case BPF_JNE:
1433		jmp_cond = IA32_JNE;
1434		break;
1435	case BPF_JGT:
1436		/* GT is unsigned '>', JA in x86 */
1437		jmp_cond = IA32_JA;
1438		break;
1439	case BPF_JLT:
1440		/* LT is unsigned '<', JB in x86 */
1441		jmp_cond = IA32_JB;
1442		break;
1443	case BPF_JGE:
1444		/* GE is unsigned '>=', JAE in x86 */
1445		jmp_cond = IA32_JAE;
1446		break;
1447	case BPF_JLE:
1448		/* LE is unsigned '<=', JBE in x86 */
1449		jmp_cond = IA32_JBE;
1450		break;
1451	case BPF_JSGT:
1452		if (!is_cmp_lo)
1453			/* Signed '>', GT in x86 */
1454			jmp_cond = IA32_JG;
1455		else
1456			/* GT is unsigned '>', JA in x86 */
1457			jmp_cond = IA32_JA;
1458		break;
1459	case BPF_JSLT:
1460		if (!is_cmp_lo)
1461			/* Signed '<', LT in x86 */
1462			jmp_cond = IA32_JL;
1463		else
1464			/* LT is unsigned '<', JB in x86 */
1465			jmp_cond = IA32_JB;
1466		break;
1467	case BPF_JSGE:
1468		if (!is_cmp_lo)
1469			/* Signed '>=', GE in x86 */
1470			jmp_cond = IA32_JGE;
1471		else
1472			/* GE is unsigned '>=', JAE in x86 */
1473			jmp_cond = IA32_JAE;
1474		break;
1475	case BPF_JSLE:
1476		if (!is_cmp_lo)
1477			/* Signed '<=', LE in x86 */
1478			jmp_cond = IA32_JLE;
1479		else
1480			/* LE is unsigned '<=', JBE in x86 */
1481			jmp_cond = IA32_JBE;
1482		break;
1483	default: /* to silence GCC warning */
1484		jmp_cond = COND_JMP_OPCODE_INVALID;
1485		break;
1486	}
1487
1488	return jmp_cond;
1489}
1490
1491/* i386 kernel compiles with "-mregparm=3".  From gcc document:
1492 *
1493 * ==== snippet ====
1494 * regparm (number)
1495 *	On x86-32 targets, the regparm attribute causes the compiler
1496 *	to pass arguments number one to (number) if they are of integral
1497 *	type in registers EAX, EDX, and ECX instead of on the stack.
1498 *	Functions that take a variable number of arguments continue
1499 *	to be passed all of their arguments on the stack.
1500 * ==== snippet ====
1501 *
1502 * The first three args of a function will be considered for
1503 * putting into the 32bit register EAX, EDX, and ECX.
1504 *
1505 * Two 32bit registers are used to pass a 64bit arg.
1506 *
1507 * For example,
1508 * void foo(u32 a, u32 b, u32 c, u32 d):
1509 *	u32 a: EAX
1510 *	u32 b: EDX
1511 *	u32 c: ECX
1512 *	u32 d: stack
1513 *
1514 * void foo(u64 a, u32 b, u32 c):
1515 *	u64 a: EAX (lo32) EDX (hi32)
1516 *	u32 b: ECX
1517 *	u32 c: stack
1518 *
1519 * void foo(u32 a, u64 b, u32 c):
1520 *	u32 a: EAX
1521 *	u64 b: EDX (lo32) ECX (hi32)
1522 *	u32 c: stack
1523 *
1524 * void foo(u32 a, u32 b, u64 c):
1525 *	u32 a: EAX
1526 *	u32 b: EDX
1527 *	u64 c: stack
1528 *
1529 * The return value will be stored in the EAX (and EDX for 64bit value).
1530 *
1531 * For example,
1532 * u32 foo(u32 a, u32 b, u32 c):
1533 *	return value: EAX
1534 *
1535 * u64 foo(u32 a, u32 b, u32 c):
1536 *	return value: EAX (lo32) EDX (hi32)
1537 *
1538 * Notes:
1539 *	The verifier only accepts function having integer and pointers
1540 *	as its args and return value, so it does not have
1541 *	struct-by-value.
1542 *
1543 * emit_kfunc_call() finds out the btf_func_model by calling
1544 * bpf_jit_find_kfunc_model().  A btf_func_model
1545 * has the details about the number of args, size of each arg,
1546 * and the size of the return value.
1547 *
1548 * It first decides how many args can be passed by EAX, EDX, and ECX.
1549 * That will decide what args should be pushed to the stack:
1550 * [first_stack_regno, last_stack_regno] are the bpf regnos
1551 * that should be pushed to the stack.
1552 *
1553 * It will first push all args to the stack because the push
1554 * will need to use ECX.  Then, it moves
1555 * [BPF_REG_1, first_stack_regno) to EAX, EDX, and ECX.
1556 *
1557 * When emitting a call (0xE8), it needs to figure out
1558 * the jmp_offset relative to the jit-insn address immediately
1559 * following the call (0xE8) instruction.  At this point, it knows
1560 * the end of the jit-insn address after completely translated the
1561 * current (BPF_JMP | BPF_CALL) bpf-insn.  It is passed as "end_addr"
1562 * to the emit_kfunc_call().  Thus, it can learn the "immediate-follow-call"
1563 * address by figuring out how many jit-insn is generated between
1564 * the call (0xE8) and the end_addr:
1565 *	- 0-1 jit-insn (3 bytes each) to restore the esp pointer if there
1566 *	  is arg pushed to the stack.
1567 *	- 0-2 jit-insns (3 bytes each) to handle the return value.
1568 */
1569static int emit_kfunc_call(const struct bpf_prog *bpf_prog, u8 *end_addr,
1570			   const struct bpf_insn *insn, u8 **pprog)
1571{
1572	const u8 arg_regs[] = { IA32_EAX, IA32_EDX, IA32_ECX };
1573	int i, cnt = 0, first_stack_regno, last_stack_regno;
1574	int free_arg_regs = ARRAY_SIZE(arg_regs);
1575	const struct btf_func_model *fm;
1576	int bytes_in_stack = 0;
1577	const u8 *cur_arg_reg;
1578	u8 *prog = *pprog;
1579	s64 jmp_offset;
1580
1581	fm = bpf_jit_find_kfunc_model(bpf_prog, insn);
1582	if (!fm)
1583		return -EINVAL;
1584
1585	first_stack_regno = BPF_REG_1;
1586	for (i = 0; i < fm->nr_args; i++) {
1587		int regs_needed = fm->arg_size[i] > sizeof(u32) ? 2 : 1;
1588
1589		if (regs_needed > free_arg_regs)
1590			break;
1591
1592		free_arg_regs -= regs_needed;
1593		first_stack_regno++;
1594	}
1595
1596	/* Push the args to the stack */
1597	last_stack_regno = BPF_REG_0 + fm->nr_args;
1598	for (i = last_stack_regno; i >= first_stack_regno; i--) {
1599		if (fm->arg_size[i - 1] > sizeof(u32)) {
1600			emit_push_r64(bpf2ia32[i], &prog);
1601			bytes_in_stack += 8;
1602		} else {
1603			emit_push_r32(bpf2ia32[i], &prog);
1604			bytes_in_stack += 4;
1605		}
1606	}
1607
1608	cur_arg_reg = &arg_regs[0];
1609	for (i = BPF_REG_1; i < first_stack_regno; i++) {
1610		/* mov e[adc]x,dword ptr [ebp+off] */
1611		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
1612		      STACK_VAR(bpf2ia32[i][0]));
1613		if (fm->arg_size[i - 1] > sizeof(u32))
1614			/* mov e[adc]x,dword ptr [ebp+off] */
1615			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, *cur_arg_reg++),
1616			      STACK_VAR(bpf2ia32[i][1]));
1617	}
1618
1619	if (bytes_in_stack)
1620		/* add esp,"bytes_in_stack" */
1621		end_addr -= 3;
1622
1623	/* mov dword ptr [ebp+off],edx */
1624	if (fm->ret_size > sizeof(u32))
1625		end_addr -= 3;
1626
1627	/* mov dword ptr [ebp+off],eax */
1628	if (fm->ret_size)
1629		end_addr -= 3;
1630
1631	jmp_offset = (u8 *)__bpf_call_base + insn->imm - end_addr;
1632	if (!is_simm32(jmp_offset)) {
1633		pr_err("unsupported BPF kernel function jmp_offset:%lld\n",
1634		       jmp_offset);
1635		return -EINVAL;
1636	}
1637
1638	EMIT1_off32(0xE8, jmp_offset);
1639
1640	if (fm->ret_size)
1641		/* mov dword ptr [ebp+off],eax */
1642		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1643		      STACK_VAR(bpf2ia32[BPF_REG_0][0]));
1644
1645	if (fm->ret_size > sizeof(u32))
1646		/* mov dword ptr [ebp+off],edx */
1647		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1648		      STACK_VAR(bpf2ia32[BPF_REG_0][1]));
1649
1650	if (bytes_in_stack)
1651		/* add esp,"bytes_in_stack" */
1652		EMIT3(0x83, add_1reg(0xC0, IA32_ESP), bytes_in_stack);
1653
1654	*pprog = prog;
1655
1656	return 0;
1657}
1658
1659static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1660		  int oldproglen, struct jit_context *ctx)
1661{
1662	struct bpf_insn *insn = bpf_prog->insnsi;
1663	int insn_cnt = bpf_prog->len;
1664	bool seen_exit = false;
1665	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1666	int i, cnt = 0;
1667	int proglen = 0;
1668	u8 *prog = temp;
1669
1670	emit_prologue(&prog, bpf_prog->aux->stack_depth);
1671
1672	for (i = 0; i < insn_cnt; i++, insn++) {
1673		const s32 imm32 = insn->imm;
1674		const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1675		const bool dstk = insn->dst_reg != BPF_REG_AX;
1676		const bool sstk = insn->src_reg != BPF_REG_AX;
1677		const u8 code = insn->code;
1678		const u8 *dst = bpf2ia32[insn->dst_reg];
1679		const u8 *src = bpf2ia32[insn->src_reg];
1680		const u8 *r0 = bpf2ia32[BPF_REG_0];
1681		s64 jmp_offset;
1682		u8 jmp_cond;
1683		int ilen;
1684		u8 *func;
1685
1686		switch (code) {
1687		/* ALU operations */
1688		/* dst = src */
1689		case BPF_ALU | BPF_MOV | BPF_K:
1690		case BPF_ALU | BPF_MOV | BPF_X:
1691		case BPF_ALU64 | BPF_MOV | BPF_K:
1692		case BPF_ALU64 | BPF_MOV | BPF_X:
1693			switch (BPF_SRC(code)) {
1694			case BPF_X:
1695				if (imm32 == 1) {
1696					/* Special mov32 for zext. */
1697					emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1698					break;
1699				}
1700				emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
1701						  &prog, bpf_prog->aux);
1702				break;
1703			case BPF_K:
1704				/* Sign-extend immediate value to dst reg */
1705				emit_ia32_mov_i64(is64, dst, imm32,
1706						  dstk, &prog);
1707				break;
1708			}
1709			break;
1710		/* dst = dst + src/imm */
1711		/* dst = dst - src/imm */
1712		/* dst = dst | src/imm */
1713		/* dst = dst & src/imm */
1714		/* dst = dst ^ src/imm */
1715		/* dst = dst * src/imm */
1716		/* dst = dst << src */
1717		/* dst = dst >> src */
1718		case BPF_ALU | BPF_ADD | BPF_K:
1719		case BPF_ALU | BPF_ADD | BPF_X:
1720		case BPF_ALU | BPF_SUB | BPF_K:
1721		case BPF_ALU | BPF_SUB | BPF_X:
1722		case BPF_ALU | BPF_OR | BPF_K:
1723		case BPF_ALU | BPF_OR | BPF_X:
1724		case BPF_ALU | BPF_AND | BPF_K:
1725		case BPF_ALU | BPF_AND | BPF_X:
1726		case BPF_ALU | BPF_XOR | BPF_K:
1727		case BPF_ALU | BPF_XOR | BPF_X:
1728		case BPF_ALU64 | BPF_ADD | BPF_K:
1729		case BPF_ALU64 | BPF_ADD | BPF_X:
1730		case BPF_ALU64 | BPF_SUB | BPF_K:
1731		case BPF_ALU64 | BPF_SUB | BPF_X:
1732		case BPF_ALU64 | BPF_OR | BPF_K:
1733		case BPF_ALU64 | BPF_OR | BPF_X:
1734		case BPF_ALU64 | BPF_AND | BPF_K:
1735		case BPF_ALU64 | BPF_AND | BPF_X:
1736		case BPF_ALU64 | BPF_XOR | BPF_K:
1737		case BPF_ALU64 | BPF_XOR | BPF_X:
1738			switch (BPF_SRC(code)) {
1739			case BPF_X:
1740				emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1741						  src, dstk, sstk, &prog,
1742						  bpf_prog->aux);
1743				break;
1744			case BPF_K:
1745				emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1746						  imm32, dstk, &prog,
1747						  bpf_prog->aux);
1748				break;
1749			}
1750			break;
1751		case BPF_ALU | BPF_MUL | BPF_K:
1752		case BPF_ALU | BPF_MUL | BPF_X:
1753			switch (BPF_SRC(code)) {
1754			case BPF_X:
1755				emit_ia32_mul_r(dst_lo, src_lo, dstk,
1756						sstk, &prog);
1757				break;
1758			case BPF_K:
1759				/* mov ecx,imm32*/
1760				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1761					    imm32);
1762				emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1763						false, &prog);
1764				break;
1765			}
1766			if (!bpf_prog->aux->verifier_zext)
1767				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1768			break;
1769		case BPF_ALU | BPF_LSH | BPF_X:
1770		case BPF_ALU | BPF_RSH | BPF_X:
1771		case BPF_ALU | BPF_ARSH | BPF_K:
1772		case BPF_ALU | BPF_ARSH | BPF_X:
1773			switch (BPF_SRC(code)) {
1774			case BPF_X:
1775				emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1776						  dstk, sstk, &prog);
1777				break;
1778			case BPF_K:
1779				/* mov ecx,imm32*/
1780				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1781					    imm32);
1782				emit_ia32_shift_r(BPF_OP(code), dst_lo,
1783						  IA32_ECX, dstk, false,
1784						  &prog);
1785				break;
1786			}
1787			if (!bpf_prog->aux->verifier_zext)
1788				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1789			break;
1790		/* dst = dst / src(imm) */
1791		/* dst = dst % src(imm) */
1792		case BPF_ALU | BPF_DIV | BPF_K:
1793		case BPF_ALU | BPF_DIV | BPF_X:
1794		case BPF_ALU | BPF_MOD | BPF_K:
1795		case BPF_ALU | BPF_MOD | BPF_X:
1796			switch (BPF_SRC(code)) {
1797			case BPF_X:
1798				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1799						    src_lo, dstk, sstk, &prog);
1800				break;
1801			case BPF_K:
1802				/* mov ecx,imm32*/
1803				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1804					    imm32);
1805				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1806						    IA32_ECX, dstk, false,
1807						    &prog);
1808				break;
1809			}
1810			if (!bpf_prog->aux->verifier_zext)
1811				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1812			break;
1813		case BPF_ALU64 | BPF_DIV | BPF_K:
1814		case BPF_ALU64 | BPF_DIV | BPF_X:
1815		case BPF_ALU64 | BPF_MOD | BPF_K:
1816		case BPF_ALU64 | BPF_MOD | BPF_X:
1817			goto notyet;
1818		/* dst = dst >> imm */
1819		/* dst = dst << imm */
1820		case BPF_ALU | BPF_RSH | BPF_K:
1821		case BPF_ALU | BPF_LSH | BPF_K:
1822			if (unlikely(imm32 > 31))
1823				return -EINVAL;
1824			/* mov ecx,imm32*/
1825			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1826			emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1827					  false, &prog);
1828			if (!bpf_prog->aux->verifier_zext)
1829				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1830			break;
1831		/* dst = dst << imm */
1832		case BPF_ALU64 | BPF_LSH | BPF_K:
1833			if (unlikely(imm32 > 63))
1834				return -EINVAL;
1835			emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1836			break;
1837		/* dst = dst >> imm */
1838		case BPF_ALU64 | BPF_RSH | BPF_K:
1839			if (unlikely(imm32 > 63))
1840				return -EINVAL;
1841			emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1842			break;
1843		/* dst = dst << src */
1844		case BPF_ALU64 | BPF_LSH | BPF_X:
1845			emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1846			break;
1847		/* dst = dst >> src */
1848		case BPF_ALU64 | BPF_RSH | BPF_X:
1849			emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1850			break;
1851		/* dst = dst >> src (signed) */
1852		case BPF_ALU64 | BPF_ARSH | BPF_X:
1853			emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1854			break;
1855		/* dst = dst >> imm (signed) */
1856		case BPF_ALU64 | BPF_ARSH | BPF_K:
1857			if (unlikely(imm32 > 63))
1858				return -EINVAL;
1859			emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1860			break;
1861		/* dst = ~dst */
1862		case BPF_ALU | BPF_NEG:
1863			emit_ia32_alu_i(is64, false, BPF_OP(code),
1864					dst_lo, 0, dstk, &prog);
1865			if (!bpf_prog->aux->verifier_zext)
1866				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1867			break;
1868		/* dst = ~dst (64 bit) */
1869		case BPF_ALU64 | BPF_NEG:
1870			emit_ia32_neg64(dst, dstk, &prog);
1871			break;
1872		/* dst = dst * src/imm */
1873		case BPF_ALU64 | BPF_MUL | BPF_X:
1874		case BPF_ALU64 | BPF_MUL | BPF_K:
1875			switch (BPF_SRC(code)) {
1876			case BPF_X:
1877				emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1878				break;
1879			case BPF_K:
1880				emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1881				break;
1882			}
1883			break;
1884		/* dst = htole(dst) */
1885		case BPF_ALU | BPF_END | BPF_FROM_LE:
1886			emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
1887					    bpf_prog->aux);
1888			break;
1889		/* dst = htobe(dst) */
1890		case BPF_ALU | BPF_END | BPF_FROM_BE:
1891			emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
1892					    bpf_prog->aux);
1893			break;
1894		/* dst = imm64 */
1895		case BPF_LD | BPF_IMM | BPF_DW: {
1896			s32 hi, lo = imm32;
1897
1898			hi = insn[1].imm;
1899			emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1900			emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1901			insn++;
1902			i++;
1903			break;
1904		}
1905		/* speculation barrier */
1906		case BPF_ST | BPF_NOSPEC:
1907			if (boot_cpu_has(X86_FEATURE_XMM2))
1908				/* Emit 'lfence' */
1909				EMIT3(0x0F, 0xAE, 0xE8);
1910			break;
1911		/* ST: *(u8*)(dst_reg + off) = imm */
1912		case BPF_ST | BPF_MEM | BPF_H:
1913		case BPF_ST | BPF_MEM | BPF_B:
1914		case BPF_ST | BPF_MEM | BPF_W:
1915		case BPF_ST | BPF_MEM | BPF_DW:
1916			if (dstk)
1917				/* mov eax,dword ptr [ebp+off] */
1918				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1919				      STACK_VAR(dst_lo));
1920			else
1921				/* mov eax,dst_lo */
1922				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1923
1924			switch (BPF_SIZE(code)) {
1925			case BPF_B:
1926				EMIT(0xC6, 1); break;
1927			case BPF_H:
1928				EMIT2(0x66, 0xC7); break;
1929			case BPF_W:
1930			case BPF_DW:
1931				EMIT(0xC7, 1); break;
1932			}
1933
1934			if (is_imm8(insn->off))
1935				EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1936			else
1937				EMIT1_off32(add_1reg(0x80, IA32_EAX),
1938					    insn->off);
1939			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1940
1941			if (BPF_SIZE(code) == BPF_DW) {
1942				u32 hi;
1943
1944				hi = imm32 & (1<<31) ? (u32)~0 : 0;
1945				EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1946					    insn->off + 4);
1947				EMIT(hi, 4);
1948			}
1949			break;
1950
1951		/* STX: *(u8*)(dst_reg + off) = src_reg */
1952		case BPF_STX | BPF_MEM | BPF_B:
1953		case BPF_STX | BPF_MEM | BPF_H:
1954		case BPF_STX | BPF_MEM | BPF_W:
1955		case BPF_STX | BPF_MEM | BPF_DW:
1956			if (dstk)
1957				/* mov eax,dword ptr [ebp+off] */
1958				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1959				      STACK_VAR(dst_lo));
1960			else
1961				/* mov eax,dst_lo */
1962				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1963
1964			if (sstk)
1965				/* mov edx,dword ptr [ebp+off] */
1966				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1967				      STACK_VAR(src_lo));
1968			else
1969				/* mov edx,src_lo */
1970				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1971
1972			switch (BPF_SIZE(code)) {
1973			case BPF_B:
1974				EMIT(0x88, 1); break;
1975			case BPF_H:
1976				EMIT2(0x66, 0x89); break;
1977			case BPF_W:
1978			case BPF_DW:
1979				EMIT(0x89, 1); break;
1980			}
1981
1982			if (is_imm8(insn->off))
1983				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1984				      insn->off);
1985			else
1986				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1987					    insn->off);
1988
1989			if (BPF_SIZE(code) == BPF_DW) {
1990				if (sstk)
1991					/* mov edi,dword ptr [ebp+off] */
1992					EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1993							     IA32_EDX),
1994					      STACK_VAR(src_hi));
1995				else
1996					/* mov edi,src_hi */
1997					EMIT2(0x8B, add_2reg(0xC0, src_hi,
1998							     IA32_EDX));
1999				EMIT1(0x89);
2000				if (is_imm8(insn->off + 4)) {
2001					EMIT2(add_2reg(0x40, IA32_EAX,
2002						       IA32_EDX),
2003					      insn->off + 4);
2004				} else {
2005					EMIT1(add_2reg(0x80, IA32_EAX,
2006						       IA32_EDX));
2007					EMIT(insn->off + 4, 4);
2008				}
2009			}
2010			break;
2011
2012		/* LDX: dst_reg = *(u8*)(src_reg + off) */
2013		case BPF_LDX | BPF_MEM | BPF_B:
2014		case BPF_LDX | BPF_MEM | BPF_H:
2015		case BPF_LDX | BPF_MEM | BPF_W:
2016		case BPF_LDX | BPF_MEM | BPF_DW:
2017			if (sstk)
2018				/* mov eax,dword ptr [ebp+off] */
2019				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2020				      STACK_VAR(src_lo));
2021			else
2022				/* mov eax,dword ptr [ebp+off] */
2023				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
2024
2025			switch (BPF_SIZE(code)) {
2026			case BPF_B:
2027				EMIT2(0x0F, 0xB6); break;
2028			case BPF_H:
2029				EMIT2(0x0F, 0xB7); break;
2030			case BPF_W:
2031			case BPF_DW:
2032				EMIT(0x8B, 1); break;
2033			}
2034
2035			if (is_imm8(insn->off))
2036				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
2037				      insn->off);
2038			else
2039				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
2040					    insn->off);
2041
2042			if (dstk)
2043				/* mov dword ptr [ebp+off],edx */
2044				EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2045				      STACK_VAR(dst_lo));
2046			else
2047				/* mov dst_lo,edx */
2048				EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
2049			switch (BPF_SIZE(code)) {
2050			case BPF_B:
2051			case BPF_H:
2052			case BPF_W:
2053				if (bpf_prog->aux->verifier_zext)
2054					break;
2055				if (dstk) {
2056					EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
2057					      STACK_VAR(dst_hi));
2058					EMIT(0x0, 4);
2059				} else {
2060					/* xor dst_hi,dst_hi */
2061					EMIT2(0x33,
2062					      add_2reg(0xC0, dst_hi, dst_hi));
2063				}
2064				break;
2065			case BPF_DW:
2066				EMIT2_off32(0x8B,
2067					    add_2reg(0x80, IA32_EAX, IA32_EDX),
2068					    insn->off + 4);
2069				if (dstk)
2070					EMIT3(0x89,
2071					      add_2reg(0x40, IA32_EBP,
2072						       IA32_EDX),
2073					      STACK_VAR(dst_hi));
2074				else
2075					EMIT2(0x89,
2076					      add_2reg(0xC0, dst_hi, IA32_EDX));
2077				break;
2078			default:
2079				break;
2080			}
2081			break;
2082		/* call */
2083		case BPF_JMP | BPF_CALL:
2084		{
2085			const u8 *r1 = bpf2ia32[BPF_REG_1];
2086			const u8 *r2 = bpf2ia32[BPF_REG_2];
2087			const u8 *r3 = bpf2ia32[BPF_REG_3];
2088			const u8 *r4 = bpf2ia32[BPF_REG_4];
2089			const u8 *r5 = bpf2ia32[BPF_REG_5];
2090
2091			if (insn->src_reg == BPF_PSEUDO_CALL)
2092				goto notyet;
2093
2094			if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
2095				int err;
2096
2097				err = emit_kfunc_call(bpf_prog,
2098						      image + addrs[i],
2099						      insn, &prog);
2100
2101				if (err)
2102					return err;
2103				break;
2104			}
2105
2106			func = (u8 *) __bpf_call_base + imm32;
2107			jmp_offset = func - (image + addrs[i]);
2108
2109			if (!imm32 || !is_simm32(jmp_offset)) {
2110				pr_err("unsupported BPF func %d addr %p image %p\n",
2111				       imm32, func, image);
2112				return -EINVAL;
2113			}
2114
2115			/* mov eax,dword ptr [ebp+off] */
2116			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2117			      STACK_VAR(r1[0]));
2118			/* mov edx,dword ptr [ebp+off] */
2119			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
2120			      STACK_VAR(r1[1]));
2121
2122			emit_push_r64(r5, &prog);
2123			emit_push_r64(r4, &prog);
2124			emit_push_r64(r3, &prog);
2125			emit_push_r64(r2, &prog);
2126
2127			EMIT1_off32(0xE8, jmp_offset + 9);
2128
2129			/* mov dword ptr [ebp+off],eax */
2130			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
2131			      STACK_VAR(r0[0]));
2132			/* mov dword ptr [ebp+off],edx */
2133			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
2134			      STACK_VAR(r0[1]));
2135
2136			/* add esp,32 */
2137			EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
2138			break;
2139		}
2140		case BPF_JMP | BPF_TAIL_CALL:
2141			emit_bpf_tail_call(&prog, image + addrs[i - 1]);
2142			break;
2143
2144		/* cond jump */
2145		case BPF_JMP | BPF_JEQ | BPF_X:
2146		case BPF_JMP | BPF_JNE | BPF_X:
2147		case BPF_JMP | BPF_JGT | BPF_X:
2148		case BPF_JMP | BPF_JLT | BPF_X:
2149		case BPF_JMP | BPF_JGE | BPF_X:
2150		case BPF_JMP | BPF_JLE | BPF_X:
2151		case BPF_JMP32 | BPF_JEQ | BPF_X:
2152		case BPF_JMP32 | BPF_JNE | BPF_X:
2153		case BPF_JMP32 | BPF_JGT | BPF_X:
2154		case BPF_JMP32 | BPF_JLT | BPF_X:
2155		case BPF_JMP32 | BPF_JGE | BPF_X:
2156		case BPF_JMP32 | BPF_JLE | BPF_X:
2157		case BPF_JMP32 | BPF_JSGT | BPF_X:
2158		case BPF_JMP32 | BPF_JSLE | BPF_X:
2159		case BPF_JMP32 | BPF_JSLT | BPF_X:
2160		case BPF_JMP32 | BPF_JSGE | BPF_X: {
2161			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2162			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2163			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2164			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2165			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2166
2167			if (dstk) {
2168				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2169				      STACK_VAR(dst_lo));
2170				if (is_jmp64)
2171					EMIT3(0x8B,
2172					      add_2reg(0x40, IA32_EBP,
2173						       IA32_EDX),
2174					      STACK_VAR(dst_hi));
2175			}
2176
2177			if (sstk) {
2178				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2179				      STACK_VAR(src_lo));
2180				if (is_jmp64)
2181					EMIT3(0x8B,
2182					      add_2reg(0x40, IA32_EBP,
2183						       IA32_EBX),
2184					      STACK_VAR(src_hi));
2185			}
2186
2187			if (is_jmp64) {
2188				/* cmp dreg_hi,sreg_hi */
2189				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2190				EMIT2(IA32_JNE, 2);
2191			}
2192			/* cmp dreg_lo,sreg_lo */
2193			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2194			goto emit_cond_jmp;
2195		}
2196		case BPF_JMP | BPF_JSGT | BPF_X:
2197		case BPF_JMP | BPF_JSLE | BPF_X:
2198		case BPF_JMP | BPF_JSLT | BPF_X:
2199		case BPF_JMP | BPF_JSGE | BPF_X: {
2200			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2201			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2202			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2203			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2204
2205			if (dstk) {
2206				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2207				      STACK_VAR(dst_lo));
2208				EMIT3(0x8B,
2209				      add_2reg(0x40, IA32_EBP,
2210					       IA32_EDX),
2211				      STACK_VAR(dst_hi));
2212			}
2213
2214			if (sstk) {
2215				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2216				      STACK_VAR(src_lo));
2217				EMIT3(0x8B,
2218				      add_2reg(0x40, IA32_EBP,
2219					       IA32_EBX),
2220				      STACK_VAR(src_hi));
2221			}
2222
2223			/* cmp dreg_hi,sreg_hi */
2224			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2225			EMIT2(IA32_JNE, 10);
2226			/* cmp dreg_lo,sreg_lo */
2227			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2228			goto emit_cond_jmp_signed;
2229		}
2230		case BPF_JMP | BPF_JSET | BPF_X:
2231		case BPF_JMP32 | BPF_JSET | BPF_X: {
2232			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2233			u8 dreg_lo = IA32_EAX;
2234			u8 dreg_hi = IA32_EDX;
2235			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2236			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2237
2238			if (dstk) {
2239				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2240				      STACK_VAR(dst_lo));
2241				if (is_jmp64)
2242					EMIT3(0x8B,
2243					      add_2reg(0x40, IA32_EBP,
2244						       IA32_EDX),
2245					      STACK_VAR(dst_hi));
2246			} else {
2247				/* mov dreg_lo,dst_lo */
2248				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
2249				if (is_jmp64)
2250					/* mov dreg_hi,dst_hi */
2251					EMIT2(0x89,
2252					      add_2reg(0xC0, dreg_hi, dst_hi));
2253			}
2254
2255			if (sstk) {
2256				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2257				      STACK_VAR(src_lo));
2258				if (is_jmp64)
2259					EMIT3(0x8B,
2260					      add_2reg(0x40, IA32_EBP,
2261						       IA32_EBX),
2262					      STACK_VAR(src_hi));
2263			}
2264			/* and dreg_lo,sreg_lo */
2265			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2266			if (is_jmp64) {
2267				/* and dreg_hi,sreg_hi */
2268				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2269				/* or dreg_lo,dreg_hi */
2270				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2271			}
2272			goto emit_cond_jmp;
2273		}
2274		case BPF_JMP | BPF_JSET | BPF_K:
2275		case BPF_JMP32 | BPF_JSET | BPF_K: {
2276			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2277			u8 dreg_lo = IA32_EAX;
2278			u8 dreg_hi = IA32_EDX;
2279			u8 sreg_lo = IA32_ECX;
2280			u8 sreg_hi = IA32_EBX;
2281			u32 hi;
2282
2283			if (dstk) {
2284				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2285				      STACK_VAR(dst_lo));
2286				if (is_jmp64)
2287					EMIT3(0x8B,
2288					      add_2reg(0x40, IA32_EBP,
2289						       IA32_EDX),
2290					      STACK_VAR(dst_hi));
2291			} else {
2292				/* mov dreg_lo,dst_lo */
2293				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
2294				if (is_jmp64)
2295					/* mov dreg_hi,dst_hi */
2296					EMIT2(0x89,
2297					      add_2reg(0xC0, dreg_hi, dst_hi));
2298			}
2299
2300			/* mov ecx,imm32 */
2301			EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
2302
2303			/* and dreg_lo,sreg_lo */
2304			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2305			if (is_jmp64) {
2306				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2307				/* mov ebx,imm32 */
2308				EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
2309				/* and dreg_hi,sreg_hi */
2310				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2311				/* or dreg_lo,dreg_hi */
2312				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2313			}
2314			goto emit_cond_jmp;
2315		}
2316		case BPF_JMP | BPF_JEQ | BPF_K:
2317		case BPF_JMP | BPF_JNE | BPF_K:
2318		case BPF_JMP | BPF_JGT | BPF_K:
2319		case BPF_JMP | BPF_JLT | BPF_K:
2320		case BPF_JMP | BPF_JGE | BPF_K:
2321		case BPF_JMP | BPF_JLE | BPF_K:
2322		case BPF_JMP32 | BPF_JEQ | BPF_K:
2323		case BPF_JMP32 | BPF_JNE | BPF_K:
2324		case BPF_JMP32 | BPF_JGT | BPF_K:
2325		case BPF_JMP32 | BPF_JLT | BPF_K:
2326		case BPF_JMP32 | BPF_JGE | BPF_K:
2327		case BPF_JMP32 | BPF_JLE | BPF_K:
2328		case BPF_JMP32 | BPF_JSGT | BPF_K:
2329		case BPF_JMP32 | BPF_JSLE | BPF_K:
2330		case BPF_JMP32 | BPF_JSLT | BPF_K:
2331		case BPF_JMP32 | BPF_JSGE | BPF_K: {
2332			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2333			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2334			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2335			u8 sreg_lo = IA32_ECX;
2336			u8 sreg_hi = IA32_EBX;
2337			u32 hi;
2338
2339			if (dstk) {
2340				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2341				      STACK_VAR(dst_lo));
2342				if (is_jmp64)
2343					EMIT3(0x8B,
2344					      add_2reg(0x40, IA32_EBP,
2345						       IA32_EDX),
2346					      STACK_VAR(dst_hi));
2347			}
2348
2349			/* mov ecx,imm32 */
2350			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2351			if (is_jmp64) {
2352				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2353				/* mov ebx,imm32 */
2354				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2355				/* cmp dreg_hi,sreg_hi */
2356				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2357				EMIT2(IA32_JNE, 2);
2358			}
2359			/* cmp dreg_lo,sreg_lo */
2360			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2361
2362emit_cond_jmp:		jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2363			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2364				return -EFAULT;
2365			jmp_offset = addrs[i + insn->off] - addrs[i];
2366			if (is_imm8(jmp_offset)) {
2367				EMIT2(jmp_cond, jmp_offset);
2368			} else if (is_simm32(jmp_offset)) {
2369				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2370			} else {
2371				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2372				return -EFAULT;
2373			}
2374			break;
2375		}
2376		case BPF_JMP | BPF_JSGT | BPF_K:
2377		case BPF_JMP | BPF_JSLE | BPF_K:
2378		case BPF_JMP | BPF_JSLT | BPF_K:
2379		case BPF_JMP | BPF_JSGE | BPF_K: {
2380			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2381			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2382			u8 sreg_lo = IA32_ECX;
2383			u8 sreg_hi = IA32_EBX;
2384			u32 hi;
2385
2386			if (dstk) {
2387				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2388				      STACK_VAR(dst_lo));
2389				EMIT3(0x8B,
2390				      add_2reg(0x40, IA32_EBP,
2391					       IA32_EDX),
2392				      STACK_VAR(dst_hi));
2393			}
2394
2395			/* mov ecx,imm32 */
2396			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2397			hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2398			/* mov ebx,imm32 */
2399			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2400			/* cmp dreg_hi,sreg_hi */
2401			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2402			EMIT2(IA32_JNE, 10);
2403			/* cmp dreg_lo,sreg_lo */
2404			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2405
2406			/*
2407			 * For simplicity of branch offset computation,
2408			 * let's use fixed jump coding here.
2409			 */
2410emit_cond_jmp_signed:	/* Check the condition for low 32-bit comparison */
2411			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true);
2412			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2413				return -EFAULT;
2414			jmp_offset = addrs[i + insn->off] - addrs[i] + 8;
2415			if (is_simm32(jmp_offset)) {
2416				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2417			} else {
2418				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2419				return -EFAULT;
2420			}
2421			EMIT2(0xEB, 6);
2422
2423			/* Check the condition for high 32-bit comparison */
2424			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2425			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2426				return -EFAULT;
2427			jmp_offset = addrs[i + insn->off] - addrs[i];
2428			if (is_simm32(jmp_offset)) {
2429				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2430			} else {
2431				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2432				return -EFAULT;
2433			}
2434			break;
2435		}
2436		case BPF_JMP | BPF_JA:
2437			if (insn->off == -1)
2438				/* -1 jmp instructions will always jump
2439				 * backwards two bytes. Explicitly handling
2440				 * this case avoids wasting too many passes
2441				 * when there are long sequences of replaced
2442				 * dead code.
2443				 */
2444				jmp_offset = -2;
2445			else
2446				jmp_offset = addrs[i + insn->off] - addrs[i];
2447
2448			if (!jmp_offset)
2449				/* Optimize out nop jumps */
2450				break;
2451emit_jmp:
2452			if (is_imm8(jmp_offset)) {
2453				EMIT2(0xEB, jmp_offset);
2454			} else if (is_simm32(jmp_offset)) {
2455				EMIT1_off32(0xE9, jmp_offset);
2456			} else {
2457				pr_err("jmp gen bug %llx\n", jmp_offset);
2458				return -EFAULT;
2459			}
2460			break;
2461		case BPF_STX | BPF_ATOMIC | BPF_W:
2462		case BPF_STX | BPF_ATOMIC | BPF_DW:
2463			goto notyet;
2464		case BPF_JMP | BPF_EXIT:
2465			if (seen_exit) {
2466				jmp_offset = ctx->cleanup_addr - addrs[i];
2467				goto emit_jmp;
2468			}
2469			seen_exit = true;
2470			/* Update cleanup_addr */
2471			ctx->cleanup_addr = proglen;
2472			emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2473			break;
2474notyet:
2475			pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2476			return -EFAULT;
2477		default:
2478			/*
2479			 * This error will be seen if new instruction was added
2480			 * to interpreter, but not to JIT or if there is junk in
2481			 * bpf_prog
2482			 */
2483			pr_err("bpf_jit: unknown opcode %02x\n", code);
2484			return -EINVAL;
2485		}
2486
2487		ilen = prog - temp;
2488		if (ilen > BPF_MAX_INSN_SIZE) {
2489			pr_err("bpf_jit: fatal insn size error\n");
2490			return -EFAULT;
2491		}
2492
2493		if (image) {
2494			/*
2495			 * When populating the image, assert that:
2496			 *
2497			 *  i) We do not write beyond the allocated space, and
2498			 * ii) addrs[i] did not change from the prior run, in order
2499			 *     to validate assumptions made for computing branch
2500			 *     displacements.
2501			 */
2502			if (unlikely(proglen + ilen > oldproglen ||
2503				     proglen + ilen != addrs[i])) {
2504				pr_err("bpf_jit: fatal error\n");
2505				return -EFAULT;
2506			}
2507			memcpy(image + proglen, temp, ilen);
2508		}
2509		proglen += ilen;
2510		addrs[i] = proglen;
2511		prog = temp;
2512	}
2513	return proglen;
2514}
2515
2516bool bpf_jit_needs_zext(void)
2517{
2518	return true;
2519}
2520
2521struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2522{
2523	struct bpf_binary_header *header = NULL;
2524	struct bpf_prog *tmp, *orig_prog = prog;
2525	int proglen, oldproglen = 0;
2526	struct jit_context ctx = {};
2527	bool tmp_blinded = false;
2528	u8 *image = NULL;
2529	int *addrs;
2530	int pass;
2531	int i;
2532
2533	if (!prog->jit_requested)
2534		return orig_prog;
2535
2536	tmp = bpf_jit_blind_constants(prog);
2537	/*
2538	 * If blinding was requested and we failed during blinding,
2539	 * we must fall back to the interpreter.
2540	 */
2541	if (IS_ERR(tmp))
2542		return orig_prog;
2543	if (tmp != prog) {
2544		tmp_blinded = true;
2545		prog = tmp;
2546	}
2547
2548	addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
2549	if (!addrs) {
2550		prog = orig_prog;
2551		goto out;
2552	}
2553
2554	/*
2555	 * Before first pass, make a rough estimation of addrs[]
2556	 * each BPF instruction is translated to less than 64 bytes
2557	 */
2558	for (proglen = 0, i = 0; i < prog->len; i++) {
2559		proglen += 64;
2560		addrs[i] = proglen;
2561	}
2562	ctx.cleanup_addr = proglen;
2563
2564	/*
2565	 * JITed image shrinks with every pass and the loop iterates
2566	 * until the image stops shrinking. Very large BPF programs
2567	 * may converge on the last pass. In such case do one more
2568	 * pass to emit the final image.
2569	 */
2570	for (pass = 0; pass < 20 || image; pass++) {
2571		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2572		if (proglen <= 0) {
2573out_image:
2574			image = NULL;
2575			if (header)
2576				bpf_jit_binary_free(header);
2577			prog = orig_prog;
2578			goto out_addrs;
2579		}
2580		if (image) {
2581			if (proglen != oldproglen) {
2582				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2583				       proglen, oldproglen);
2584				goto out_image;
2585			}
2586			break;
2587		}
2588		if (proglen == oldproglen) {
2589			header = bpf_jit_binary_alloc(proglen, &image,
2590						      1, jit_fill_hole);
2591			if (!header) {
2592				prog = orig_prog;
2593				goto out_addrs;
2594			}
2595		}
2596		oldproglen = proglen;
2597		cond_resched();
2598	}
2599
2600	if (bpf_jit_enable > 1)
2601		bpf_jit_dump(prog->len, proglen, pass + 1, image);
2602
2603	if (image) {
2604		bpf_jit_binary_lock_ro(header);
2605		prog->bpf_func = (void *)image;
2606		prog->jited = 1;
2607		prog->jited_len = proglen;
2608	} else {
2609		prog = orig_prog;
2610	}
2611
2612out_addrs:
2613	kfree(addrs);
2614out:
2615	if (tmp_blinded)
2616		bpf_jit_prog_release_other(prog, prog == orig_prog ?
2617					   tmp : orig_prog);
2618	return prog;
2619}
2620
2621bool bpf_jit_supports_kfunc_call(void)
2622{
2623	return true;
2624}
2625