1#!/bin/awk -f 2# gen-insn-attr-x86.awk: Instruction attribute table generator 3# Written by Masami Hiramatsu <mhiramat@redhat.com> 4# 5# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c 6 7# Awk implementation sanity check 8function check_awk_implement() { 9 if (sprintf("%x", 0) != "0") 10 return "Your awk has a printf-format problem." 11 return "" 12} 13 14# Clear working vars 15function clear_vars() { 16 delete table 17 delete lptable2 18 delete lptable1 19 delete lptable3 20 eid = -1 # escape id 21 gid = -1 # group id 22 aid = -1 # AVX id 23 tname = "" 24} 25 26BEGIN { 27 # Implementation error checking 28 awkchecked = check_awk_implement() 29 if (awkchecked != "") { 30 print "Error: " awkchecked > "/dev/stderr" 31 print "Please try to use gawk." > "/dev/stderr" 32 exit 1 33 } 34 35 # Setup generating tables 36 print "/* x86 opcode map generated from x86-opcode-map.txt */" 37 print "/* Do not change this code. */\n" 38 ggid = 1 39 geid = 1 40 gaid = 0 41 delete etable 42 delete gtable 43 delete atable 44 45 opnd_expr = "^[A-Za-z/]" 46 ext_expr = "^\\(" 47 sep_expr = "^\\|$" 48 group_expr = "^Grp[0-9A-Za-z]+" 49 50 imm_expr = "^[IJAO][a-z]" 51 imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 52 imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)" 53 imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)" 54 imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)" 55 imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)" 56 imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)" 57 imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" 58 imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)" 59 imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)" 60 imm_flag["Ob"] = "INAT_MOFFSET" 61 imm_flag["Ov"] = "INAT_MOFFSET" 62 63 modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" 64 force64_expr = "\\([df]64\\)" 65 rex_expr = "^REX(\\.[XRWB]+)*" 66 fpu_expr = "^ESC" # TODO 67 68 lprefix1_expr = "\\(66\\)" 69 lprefix2_expr = "\\(F3\\)" 70 lprefix3_expr = "\\(F2\\)" 71 max_lprefix = 4 72 73 vexok_expr = "\\(VEX\\)" 74 vexonly_expr = "\\(oVEX\\)" 75 76 prefix_expr = "\\(Prefix\\)" 77 prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ" 78 prefix_num["REPNE"] = "INAT_PFX_REPNE" 79 prefix_num["REP/REPE"] = "INAT_PFX_REPE" 80 prefix_num["LOCK"] = "INAT_PFX_LOCK" 81 prefix_num["SEG=CS"] = "INAT_PFX_CS" 82 prefix_num["SEG=DS"] = "INAT_PFX_DS" 83 prefix_num["SEG=ES"] = "INAT_PFX_ES" 84 prefix_num["SEG=FS"] = "INAT_PFX_FS" 85 prefix_num["SEG=GS"] = "INAT_PFX_GS" 86 prefix_num["SEG=SS"] = "INAT_PFX_SS" 87 prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ" 88 prefix_num["2bytes-VEX"] = "INAT_PFX_VEX2" 89 prefix_num["3bytes-VEX"] = "INAT_PFX_VEX3" 90 91 clear_vars() 92} 93 94function semantic_error(msg) { 95 print "Semantic error at " NR ": " msg > "/dev/stderr" 96 exit 1 97} 98 99function debug(msg) { 100 print "DEBUG: " msg 101} 102 103function array_size(arr, i,c) { 104 c = 0 105 for (i in arr) 106 c++ 107 return c 108} 109 110/^Table:/ { 111 print "/* " $0 " */" 112 if (tname != "") 113 semantic_error("Hit Table: before EndTable:."); 114} 115 116/^Referrer:/ { 117 if (NF != 1) { 118 # escape opcode table 119 ref = "" 120 for (i = 2; i <= NF; i++) 121 ref = ref $i 122 eid = escape[ref] 123 tname = sprintf("inat_escape_table_%d", eid) 124 } 125} 126 127/^AVXcode:/ { 128 if (NF != 1) { 129 # AVX/escape opcode table 130 aid = $2 131 if (gaid <= aid) 132 gaid = aid + 1 133 if (tname == "") # AVX only opcode table 134 tname = sprintf("inat_avx_table_%d", $2) 135 } 136 if (aid == -1 && eid == -1) # primary opcode table 137 tname = "inat_primary_table" 138} 139 140/^GrpTable:/ { 141 print "/* " $0 " */" 142 if (!($2 in group)) 143 semantic_error("No group: " $2 ) 144 gid = group[$2] 145 tname = "inat_group_table_" gid 146} 147 148function print_table(tbl,name,fmt,n) 149{ 150 print "const insn_attr_t " name " = {" 151 for (i = 0; i < n; i++) { 152 id = sprintf(fmt, i) 153 if (tbl[id]) 154 print " [" id "] = " tbl[id] "," 155 } 156 print "};" 157} 158 159/^EndTable/ { 160 if (gid != -1) { 161 # print group tables 162 if (array_size(table) != 0) { 163 print_table(table, tname "[INAT_GROUP_TABLE_SIZE]", 164 "0x%x", 8) 165 gtable[gid,0] = tname 166 } 167 if (array_size(lptable1) != 0) { 168 print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]", 169 "0x%x", 8) 170 gtable[gid,1] = tname "_1" 171 } 172 if (array_size(lptable2) != 0) { 173 print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]", 174 "0x%x", 8) 175 gtable[gid,2] = tname "_2" 176 } 177 if (array_size(lptable3) != 0) { 178 print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]", 179 "0x%x", 8) 180 gtable[gid,3] = tname "_3" 181 } 182 } else { 183 # print primary/escaped tables 184 if (array_size(table) != 0) { 185 print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]", 186 "0x%02x", 256) 187 etable[eid,0] = tname 188 if (aid >= 0) 189 atable[aid,0] = tname 190 } 191 if (array_size(lptable1) != 0) { 192 print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]", 193 "0x%02x", 256) 194 etable[eid,1] = tname "_1" 195 if (aid >= 0) 196 atable[aid,1] = tname "_1" 197 } 198 if (array_size(lptable2) != 0) { 199 print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]", 200 "0x%02x", 256) 201 etable[eid,2] = tname "_2" 202 if (aid >= 0) 203 atable[aid,2] = tname "_2" 204 } 205 if (array_size(lptable3) != 0) { 206 print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]", 207 "0x%02x", 256) 208 etable[eid,3] = tname "_3" 209 if (aid >= 0) 210 atable[aid,3] = tname "_3" 211 } 212 } 213 print "" 214 clear_vars() 215} 216 217function add_flags(old,new) { 218 if (old && new) 219 return old " | " new 220 else if (old) 221 return old 222 else 223 return new 224} 225 226# convert operands to flags. 227function convert_operands(count,opnd, i,j,imm,mod) 228{ 229 imm = null 230 mod = null 231 for (j = 1; j <= count; j++) { 232 i = opnd[j] 233 if (match(i, imm_expr) == 1) { 234 if (!imm_flag[i]) 235 semantic_error("Unknown imm opnd: " i) 236 if (imm) { 237 if (i != "Ib") 238 semantic_error("Second IMM error") 239 imm = add_flags(imm, "INAT_SCNDIMM") 240 } else 241 imm = imm_flag[i] 242 } else if (match(i, modrm_expr)) 243 mod = "INAT_MODRM" 244 } 245 return add_flags(imm, mod) 246} 247 248/^[0-9a-f]+\:/ { 249 if (NR == 1) 250 next 251 # get index 252 idx = "0x" substr($1, 1, index($1,":") - 1) 253 if (idx in table) 254 semantic_error("Redefine " idx " in " tname) 255 256 # check if escaped opcode 257 if ("escape" == $2) { 258 if ($3 != "#") 259 semantic_error("No escaped name") 260 ref = "" 261 for (i = 4; i <= NF; i++) 262 ref = ref $i 263 if (ref in escape) 264 semantic_error("Redefine escape (" ref ")") 265 escape[ref] = geid 266 geid++ 267 table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")" 268 next 269 } 270 271 variant = null 272 # converts 273 i = 2 274 while (i <= NF) { 275 opcode = $(i++) 276 delete opnds 277 ext = null 278 flags = null 279 opnd = null 280 # parse one opcode 281 if (match($i, opnd_expr)) { 282 opnd = $i 283 count = split($(i++), opnds, ",") 284 flags = convert_operands(count, opnds) 285 } 286 if (match($i, ext_expr)) 287 ext = $(i++) 288 if (match($i, sep_expr)) 289 i++ 290 else if (i < NF) 291 semantic_error($i " is not a separator") 292 293 # check if group opcode 294 if (match(opcode, group_expr)) { 295 if (!(opcode in group)) { 296 group[opcode] = ggid 297 ggid++ 298 } 299 flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")") 300 } 301 # check force(or default) 64bit 302 if (match(ext, force64_expr)) 303 flags = add_flags(flags, "INAT_FORCE64") 304 305 # check REX prefix 306 if (match(opcode, rex_expr)) 307 flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)") 308 309 # check coprocessor escape : TODO 310 if (match(opcode, fpu_expr)) 311 flags = add_flags(flags, "INAT_MODRM") 312 313 # check VEX only code 314 if (match(ext, vexonly_expr)) 315 flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY") 316 317 # check VEX only code 318 if (match(ext, vexok_expr)) 319 flags = add_flags(flags, "INAT_VEXOK") 320 321 # check prefixes 322 if (match(ext, prefix_expr)) { 323 if (!prefix_num[opcode]) 324 semantic_error("Unknown prefix: " opcode) 325 flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")") 326 } 327 if (length(flags) == 0) 328 continue 329 # check if last prefix 330 if (match(ext, lprefix1_expr)) { 331 lptable1[idx] = add_flags(lptable1[idx],flags) 332 variant = "INAT_VARIANT" 333 } else if (match(ext, lprefix2_expr)) { 334 lptable2[idx] = add_flags(lptable2[idx],flags) 335 variant = "INAT_VARIANT" 336 } else if (match(ext, lprefix3_expr)) { 337 lptable3[idx] = add_flags(lptable3[idx],flags) 338 variant = "INAT_VARIANT" 339 } else { 340 table[idx] = add_flags(table[idx],flags) 341 } 342 } 343 if (variant) 344 table[idx] = add_flags(table[idx],variant) 345} 346 347END { 348 if (awkchecked != "") 349 exit 1 350 # print escape opcode map's array 351 print "/* Escape opcode map array */" 352 print "const insn_attr_t const *inat_escape_tables[INAT_ESC_MAX + 1]" \ 353 "[INAT_LSTPFX_MAX + 1] = {" 354 for (i = 0; i < geid; i++) 355 for (j = 0; j < max_lprefix; j++) 356 if (etable[i,j]) 357 print " ["i"]["j"] = "etable[i,j]"," 358 print "};\n" 359 # print group opcode map's array 360 print "/* Group opcode map array */" 361 print "const insn_attr_t const *inat_group_tables[INAT_GRP_MAX + 1]"\ 362 "[INAT_LSTPFX_MAX + 1] = {" 363 for (i = 0; i < ggid; i++) 364 for (j = 0; j < max_lprefix; j++) 365 if (gtable[i,j]) 366 print " ["i"]["j"] = "gtable[i,j]"," 367 print "};\n" 368 # print AVX opcode map's array 369 print "/* AVX opcode map array */" 370 print "const insn_attr_t const *inat_avx_tables[X86_VEX_M_MAX + 1]"\ 371 "[INAT_LSTPFX_MAX + 1] = {" 372 for (i = 0; i < gaid; i++) 373 for (j = 0; j < max_lprefix; j++) 374 if (atable[i,j]) 375 print " ["i"]["j"] = "atable[i,j]"," 376 print "};" 377} 378 379