1#!/bin/awk -f
2# SPDX-License-Identifier: GPL-2.0
3# gen-insn-attr-x86.awk: Instruction attribute table generator
4# Written by Masami Hiramatsu <mhiramat@redhat.com>
5#
6# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
7
8# Awk implementation sanity check
9function check_awk_implement() {
10	if (sprintf("%x", 0) != "0")
11		return "Your awk has a printf-format problem."
12	return ""
13}
14
15# Clear working vars
16function clear_vars() {
17	delete table
18	delete lptable2
19	delete lptable1
20	delete lptable3
21	eid = -1 # escape id
22	gid = -1 # group id
23	aid = -1 # AVX id
24	tname = ""
25}
26
27BEGIN {
28	# Implementation error checking
29	awkchecked = check_awk_implement()
30	if (awkchecked != "") {
31		print "Error: " awkchecked > "/dev/stderr"
32		print "Please try to use gawk." > "/dev/stderr"
33		exit 1
34	}
35
36	# Setup generating tables
37	print "/* x86 opcode map generated from x86-opcode-map.txt */"
38	print "/* Do not change this code. */\n"
39	ggid = 1
40	geid = 1
41	gaid = 0
42	delete etable
43	delete gtable
44	delete atable
45
46	opnd_expr = "^[A-Za-z/]"
47	ext_expr = "^\\("
48	sep_expr = "^\\|$"
49	group_expr = "^Grp[0-9A-Za-z]+"
50
51	imm_expr = "^[IJAOL][a-z]"
52	imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
53	imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
54	imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
55	imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
56	imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
57	imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
58	imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
59	imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
60	imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
61	imm_flag["Ob"] = "INAT_MOFFSET"
62	imm_flag["Ov"] = "INAT_MOFFSET"
63	imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
64
65	modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
66	force64_expr = "\\([df]64\\)"
67	rex_expr = "^REX(\\.[XRWB]+)*"
68	fpu_expr = "^ESC" # TODO
69
70	lprefix1_expr = "\\((66|!F3)\\)"
71	lprefix2_expr = "\\(F3\\)"
72	lprefix3_expr = "\\((F2|!F3|66&F2)\\)"
73	lprefix_expr = "\\((66|F2|F3)\\)"
74	max_lprefix = 4
75
76	# All opcodes starting with lower-case 'v', 'k' or with (v1) superscript
77	# accepts VEX prefix
78	vexok_opcode_expr = "^[vk].*"
79	vexok_expr = "\\(v1\\)"
80	# All opcodes with (v) superscript supports *only* VEX prefix
81	vexonly_expr = "\\(v\\)"
82	# All opcodes with (ev) superscript supports *only* EVEX prefix
83	evexonly_expr = "\\(ev\\)"
84
85	prefix_expr = "\\(Prefix\\)"
86	prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
87	prefix_num["REPNE"] = "INAT_PFX_REPNE"
88	prefix_num["REP/REPE"] = "INAT_PFX_REPE"
89	prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
90	prefix_num["XRELEASE"] = "INAT_PFX_REPE"
91	prefix_num["LOCK"] = "INAT_PFX_LOCK"
92	prefix_num["SEG=CS"] = "INAT_PFX_CS"
93	prefix_num["SEG=DS"] = "INAT_PFX_DS"
94	prefix_num["SEG=ES"] = "INAT_PFX_ES"
95	prefix_num["SEG=FS"] = "INAT_PFX_FS"
96	prefix_num["SEG=GS"] = "INAT_PFX_GS"
97	prefix_num["SEG=SS"] = "INAT_PFX_SS"
98	prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
99	prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
100	prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
101	prefix_num["EVEX"] = "INAT_PFX_EVEX"
102
103	clear_vars()
104}
105
106function semantic_error(msg) {
107	print "Semantic error at " NR ": " msg > "/dev/stderr"
108	exit 1
109}
110
111function debug(msg) {
112	print "DEBUG: " msg
113}
114
115function array_size(arr,   i,c) {
116	c = 0
117	for (i in arr)
118		c++
119	return c
120}
121
122/^Table:/ {
123	print "/* " $0 " */"
124	if (tname != "")
125		semantic_error("Hit Table: before EndTable:.");
126}
127
128/^Referrer:/ {
129	if (NF != 1) {
130		# escape opcode table
131		ref = ""
132		for (i = 2; i <= NF; i++)
133			ref = ref $i
134		eid = escape[ref]
135		tname = sprintf("inat_escape_table_%d", eid)
136	}
137}
138
139/^AVXcode:/ {
140	if (NF != 1) {
141		# AVX/escape opcode table
142		aid = $2
143		if (gaid <= aid)
144			gaid = aid + 1
145		if (tname == "")	# AVX only opcode table
146			tname = sprintf("inat_avx_table_%d", $2)
147	}
148	if (aid == -1 && eid == -1)	# primary opcode table
149		tname = "inat_primary_table"
150}
151
152/^GrpTable:/ {
153	print "/* " $0 " */"
154	if (!($2 in group))
155		semantic_error("No group: " $2 )
156	gid = group[$2]
157	tname = "inat_group_table_" gid
158}
159
160function print_table(tbl,name,fmt,n)
161{
162	print "const insn_attr_t " name " = {"
163	for (i = 0; i < n; i++) {
164		id = sprintf(fmt, i)
165		if (tbl[id])
166			print "	[" id "] = " tbl[id] ","
167	}
168	print "};"
169}
170
171/^EndTable/ {
172	if (gid != -1) {
173		# print group tables
174		if (array_size(table) != 0) {
175			print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
176				    "0x%x", 8)
177			gtable[gid,0] = tname
178		}
179		if (array_size(lptable1) != 0) {
180			print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
181				    "0x%x", 8)
182			gtable[gid,1] = tname "_1"
183		}
184		if (array_size(lptable2) != 0) {
185			print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
186				    "0x%x", 8)
187			gtable[gid,2] = tname "_2"
188		}
189		if (array_size(lptable3) != 0) {
190			print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
191				    "0x%x", 8)
192			gtable[gid,3] = tname "_3"
193		}
194	} else {
195		# print primary/escaped tables
196		if (array_size(table) != 0) {
197			print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
198				    "0x%02x", 256)
199			etable[eid,0] = tname
200			if (aid >= 0)
201				atable[aid,0] = tname
202		}
203		if (array_size(lptable1) != 0) {
204			print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
205				    "0x%02x", 256)
206			etable[eid,1] = tname "_1"
207			if (aid >= 0)
208				atable[aid,1] = tname "_1"
209		}
210		if (array_size(lptable2) != 0) {
211			print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
212				    "0x%02x", 256)
213			etable[eid,2] = tname "_2"
214			if (aid >= 0)
215				atable[aid,2] = tname "_2"
216		}
217		if (array_size(lptable3) != 0) {
218			print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
219				    "0x%02x", 256)
220			etable[eid,3] = tname "_3"
221			if (aid >= 0)
222				atable[aid,3] = tname "_3"
223		}
224	}
225	print ""
226	clear_vars()
227}
228
229function add_flags(old,new) {
230	if (old && new)
231		return old " | " new
232	else if (old)
233		return old
234	else
235		return new
236}
237
238# convert operands to flags.
239function convert_operands(count,opnd,       i,j,imm,mod)
240{
241	imm = null
242	mod = null
243	for (j = 1; j <= count; j++) {
244		i = opnd[j]
245		if (match(i, imm_expr) == 1) {
246			if (!imm_flag[i])
247				semantic_error("Unknown imm opnd: " i)
248			if (imm) {
249				if (i != "Ib")
250					semantic_error("Second IMM error")
251				imm = add_flags(imm, "INAT_SCNDIMM")
252			} else
253				imm = imm_flag[i]
254		} else if (match(i, modrm_expr))
255			mod = "INAT_MODRM"
256	}
257	return add_flags(imm, mod)
258}
259
260/^[0-9a-f]+:/ {
261	if (NR == 1)
262		next
263	# get index
264	idx = "0x" substr($1, 1, index($1,":") - 1)
265	if (idx in table)
266		semantic_error("Redefine " idx " in " tname)
267
268	# check if escaped opcode
269	if ("escape" == $2) {
270		if ($3 != "#")
271			semantic_error("No escaped name")
272		ref = ""
273		for (i = 4; i <= NF; i++)
274			ref = ref $i
275		if (ref in escape)
276			semantic_error("Redefine escape (" ref ")")
277		escape[ref] = geid
278		geid++
279		table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
280		next
281	}
282
283	variant = null
284	# converts
285	i = 2
286	while (i <= NF) {
287		opcode = $(i++)
288		delete opnds
289		ext = null
290		flags = null
291		opnd = null
292		# parse one opcode
293		if (match($i, opnd_expr)) {
294			opnd = $i
295			count = split($(i++), opnds, ",")
296			flags = convert_operands(count, opnds)
297		}
298		if (match($i, ext_expr))
299			ext = $(i++)
300		if (match($i, sep_expr))
301			i++
302		else if (i < NF)
303			semantic_error($i " is not a separator")
304
305		# check if group opcode
306		if (match(opcode, group_expr)) {
307			if (!(opcode in group)) {
308				group[opcode] = ggid
309				ggid++
310			}
311			flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
312		}
313		# check force(or default) 64bit
314		if (match(ext, force64_expr))
315			flags = add_flags(flags, "INAT_FORCE64")
316
317		# check REX prefix
318		if (match(opcode, rex_expr))
319			flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
320
321		# check coprocessor escape : TODO
322		if (match(opcode, fpu_expr))
323			flags = add_flags(flags, "INAT_MODRM")
324
325		# check VEX codes
326		if (match(ext, evexonly_expr))
327			flags = add_flags(flags, "INAT_VEXOK | INAT_EVEXONLY")
328		else if (match(ext, vexonly_expr))
329			flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
330		else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
331			flags = add_flags(flags, "INAT_VEXOK")
332
333		# check prefixes
334		if (match(ext, prefix_expr)) {
335			if (!prefix_num[opcode])
336				semantic_error("Unknown prefix: " opcode)
337			flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
338		}
339		if (length(flags) == 0)
340			continue
341		# check if last prefix
342		if (match(ext, lprefix1_expr)) {
343			lptable1[idx] = add_flags(lptable1[idx],flags)
344			variant = "INAT_VARIANT"
345		}
346		if (match(ext, lprefix2_expr)) {
347			lptable2[idx] = add_flags(lptable2[idx],flags)
348			variant = "INAT_VARIANT"
349		}
350		if (match(ext, lprefix3_expr)) {
351			lptable3[idx] = add_flags(lptable3[idx],flags)
352			variant = "INAT_VARIANT"
353		}
354		if (!match(ext, lprefix_expr)){
355			table[idx] = add_flags(table[idx],flags)
356		}
357	}
358	if (variant)
359		table[idx] = add_flags(table[idx],variant)
360}
361
362END {
363	if (awkchecked != "")
364		exit 1
365
366	print "#ifndef __BOOT_COMPRESSED\n"
367
368	# print escape opcode map's array
369	print "/* Escape opcode map array */"
370	print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
371	      "[INAT_LSTPFX_MAX + 1] = {"
372	for (i = 0; i < geid; i++)
373		for (j = 0; j < max_lprefix; j++)
374			if (etable[i,j])
375				print "	["i"]["j"] = "etable[i,j]","
376	print "};\n"
377	# print group opcode map's array
378	print "/* Group opcode map array */"
379	print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
380	      "[INAT_LSTPFX_MAX + 1] = {"
381	for (i = 0; i < ggid; i++)
382		for (j = 0; j < max_lprefix; j++)
383			if (gtable[i,j])
384				print "	["i"]["j"] = "gtable[i,j]","
385	print "};\n"
386	# print AVX opcode map's array
387	print "/* AVX opcode map array */"
388	print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
389	      "[INAT_LSTPFX_MAX + 1] = {"
390	for (i = 0; i < gaid; i++)
391		for (j = 0; j < max_lprefix; j++)
392			if (atable[i,j])
393				print "	["i"]["j"] = "atable[i,j]","
394	print "};\n"
395
396	print "#else /* !__BOOT_COMPRESSED */\n"
397
398	print "/* Escape opcode map array */"
399	print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \
400	      "[INAT_LSTPFX_MAX + 1];"
401	print ""
402
403	print "/* Group opcode map array */"
404	print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\
405	      "[INAT_LSTPFX_MAX + 1];"
406	print ""
407
408	print "/* AVX opcode map array */"
409	print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\
410	      "[INAT_LSTPFX_MAX + 1];"
411	print ""
412
413	print "static void inat_init_tables(void)"
414	print "{"
415
416	# print escape opcode map's array
417	print "\t/* Print Escape opcode map array */"
418	for (i = 0; i < geid; i++)
419		for (j = 0; j < max_lprefix; j++)
420			if (etable[i,j])
421				print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";"
422	print ""
423
424	# print group opcode map's array
425	print "\t/* Print Group opcode map array */"
426	for (i = 0; i < ggid; i++)
427		for (j = 0; j < max_lprefix; j++)
428			if (gtable[i,j])
429				print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";"
430	print ""
431	# print AVX opcode map's array
432	print "\t/* Print AVX opcode map array */"
433	for (i = 0; i < gaid; i++)
434		for (j = 0; j < max_lprefix; j++)
435			if (atable[i,j])
436				print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
437
438	print "}"
439	print "#endif"
440}
441
442