1import re 2 3''' 4Regular expression based single instruction parsing utility. 5Appopriated from previous work for interfacing with Chronos 6Ideally, fix me by using a proper parser and/or the arm model. 7The regexes are directly copied from Bernard's quoll, particularly machine_arm.py 8This file used to be a part of a static execution engine used by Qoull. 9''' 10 11''' 12The following constants culminate to valid_instruction_re, a regex search pattern, which is used to decipher an instruction mnemonic into the base instruction and all the 13possible modifiers that can apply to it. 14''' 15# These instructions can have the s suffix to set condition codes. 16# They are separated to avoid ambiguity from things like "bls". 17 18valid_arith_instructions = ( 19 # 2 operands: 20 'mov', 'mvn', 21 'movw', 22 'movt', 23 'clz', 24 'rrx', 25 # 3 operands: 26 'add', 'adc', 'sub', 'sbc', 'rsb', 'rsc', 27 'and', 'orr', 'bic', 'eor', 28 'lsl', 'lsr', 'asr', 'ror', 29 'mul', 30 'smulbb', 'smultb', 31 # 4 operands: 32 'mla', 'umull', 'umlal', 'smlabb', 'smull', 'smlal', 33 'ubfx', 'sbfx', 'bfi', 'bfc', 34) 35 36# These instructions cannot have the s suffix. 37valid_other_instructions = ( 38 'push', 'pop', 39 'cmp', 'cmn', 'tst', 'teq', 'uxtb', 'uxtab', 'sxtb', 'uxth', 'sxth', 40 'str', 'strb', 'strh', 'strd', 'ldr', 'ldrb', 'ldrh', 'ldrd', 41 'ldrsh', 'ldrsb', 42 'ldrex', 'strex', 43 'strt', 'strbt', 'ldrt', 'ldrbt', 44 '(?:ldm|stm|srs|rfe|dmb)(?P<dirflags>[di][ba])?', 45 'b', 'bl', 'blx', 'bx', 46 'mcr', 'mrc', 'mcrr', 47 'msr', 'mrs', 48 'cps(?P<cpsflags>i[de])?', 49 'nop', 50 'isb', 51 'dsb', 52 'swp', 53 'vmrs', 'vmsr', 'vstmia', 'vldmia', 54 'svc', 55) 56 57valid_conditions = ( 58 '', 'ne', 'eq', 59 'cs', 'hs', 60 'cc', 'lo', 61 'mi', 'pl', 'vs', 'vc', 'hi', 'ls', 'ge', 'lt', 'gt', 'le', 62) 63 64valid_instruction_re = re.compile( 65 r'''^(?: 66 (?P<instruction1>%(arith_instructions)s) 67 (?P<setcc>s?) 68 (?P<cond1>%(conditions)s) | 69 (?P<instruction2>%(other_instructions)s) 70 (?P<cond2>%(conditions)s) 71 )$''' % { 72 'arith_instructions': '|'.join(valid_arith_instructions), 73 'other_instructions': '|'.join(valid_other_instructions), 74 'conditions': '|'.join(valid_conditions) 75 }, re.X) 76 77# 78# The following regexes take the arguments of a specific instruction (whose 79# form we already know), and extract all the relevant arguments and operands 80# from the instruction. 81 82all_registers = ( 83 'r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 84 'r8', 'r9', 'r10', 'r11', 'r12', 'sp', 'lr', 'pc', 85 'cc', 86 #'mode', 87) 88aliases = { 89 'sl': 'r10', 90 'fp': 'r11', 91 'ip': 'r12', 92 'r13':'sp', 93 'r14':'lr', 94 'r15':'pc', 95} 96 97any_register = r'%s' % ('|'.join(list(all_registers) + aliases.keys())) 98ldrstr_args_re = re.compile( 99 r'''(?:(?:%(any_register)s),\s*)? 100 (?P<target_reg>%(any_register)s),\s* 101 \[ 102 (?P<base_addr_reg>%(any_register)s)\s* 103 (?:,\s* 104 (?: 105 \#(?P<incr_val>-?[0-9]+) | 106 (?P<incr_reg>%(any_register)s)\s* 107 (?:,\s* 108 (?P<shift_method>lsl|lsr|asr|ror|rrx)\s+ 109 \#(?P<shift_amount>[0-9]+) 110 )? 111 ) 112 )? 113 \] 114 (?: 115 (?P<writeback> !) | 116 ,\s* (?P<writeback_incr_reg>%(any_register)s) | 117 ,\s* \#(?P<writeback_incr_amount>-?[0-9]+) 118 )?\s*(;.*)? 119 $''' % {'any_register' : any_register}, 120 re.X) 121 122operand2 = r'''(?: 123 \#(?P<op2_val>-?[0-9]+) | 124 (?: 125 (?P<op2_reg>%(any_register)s 126 ) 127 (?:,\s* 128 (?P<shift_method>lsl|lsr|asr|ror|rrx)\s+ 129 (?: 130 \#(?P<shift_amount>[0-9]+) | 131 (?P<shift_by_reg>%(any_register)s) 132 ) 133 )? 134 ) 135 )''' 136 137onereg_and_operand2_re = re.compile( 138 (r'''(?P<target_reg>%(any_register)s),\s*''' + operand2 + '(\s*;.*)?$') % { 139 'any_register' : any_register}, 140 re.X) 141 142tworegs_and_operand2_re = re.compile( 143 (r'''(?P<target_reg>%(any_register)s),\s* 144 (?P<source_reg>%(any_register)s),\s*''' + operand2 + '(\s*;.*)?$') % { 145 'any_register' : any_register}, 146 re.X) 147 148 149 150#just used for decoding for us 151class ARMInstruction: 152 def __init__(self, addr, value, disassembly, 153 mnemonic, condition, dirflags, cpsflags, setcc, args): 154 155 self.addr = addr 156 self.value = value 157 self.disassembly = disassembly 158 159 # Populate member fields with data. 160 self.mnemonic = mnemonic 161 self.condition = condition 162 self.dirflags = dirflags 163 self.cpsflags = cpsflags 164 self.setcc = setcc 165 self.args = args 166 self.is_loop_cond = False 167 168 self.output_registers = [] 169 self.input_registers = [] 170 171 if self.setcc: 172 self.output_registers.append('cc') 173 174 # decode must be overridden by child classes to work with the specific 175 # instructions. 176 #self.decode() 177 178 def decode(self): 179 raise NotImplementedError 180 181 182class LoadStoreInstruction(ARMInstruction): 183 '''ARM ldr/str[bh] instruction.''' 184 def decode(self): 185 #print 'args %s' % self.args 186 g = ldrstr_args_re.match(self.args) 187 assert g is not None 188 args = g.groupdict() 189 190 tmp_mnemonic = self.mnemonic 191 sign_extend = False 192 if tmp_mnemonic[-2:] == 'ex': 193 tmp_mnemonic = tmp_mnemonic[:3] + tmp_mnemonic[5:] 194 195 if self.mnemonic[-1] == 't': 196 self.mnemonic = self.mnemonic[:-1] 197 198 if len(tmp_mnemonic) == 5: 199 assert tmp_mnemonic[-2] == 's' 200 sign_extend = True 201 # Fudge the mnemonic to something else. 202 tmp_mnemonic = tmp_mnemonic[:3] + tmp_mnemonic[-1:] 203 204 if len(tmp_mnemonic) == 4: 205 suffix = tmp_mnemonic[-1] 206 assert suffix in ('b', 'h') 207 if suffix == 'b': 208 access_size = 1 209 elif suffix == 'h': 210 access_size = 2 211 else: 212 assert len(tmp_mnemonic) == 3 213 access_size = 4 214 215 if tmp_mnemonic.startswith('ldr'): 216 load = True 217 else: 218 assert tmp_mnemonic.startswith('str') 219 load = False 220 221 # Special handling for switch statements. 222 # if tmp_mnemonic == 'ldr' and args['target_reg'] == 'pc' and \ 223 # self.condition == 'ls': 224 # decode_as_switch = True 225 #else: 226 # decode_as_switch = False 227 228 # Record input and output registers. 229 # if load: 230 self.output_registers.append(args['target_reg']) 231 #self.input_registers.append('memory') 232 # else: 233 # self.input_registers.append(args['target_reg']) 234 #self.output_registers.append('memory') 235 self.input_registers.append(args['base_addr_reg']) 236 if args['incr_reg']: 237 self.input_registers.append(args['incr_reg']) 238 if args['incr_val']: 239 self.input_registers.append('#' + args['incr_val']) 240 if args['writeback_incr_reg']: 241 self.input_registers.append(args['writeback_incr_reg']) 242 #if args['writeback'] or \ 243 # args['writeback_incr_reg'] or args['writeback_incr_amount']: 244 #self.output_registers.append(args['base_addr_reg']) 245 if args['writeback_incr_amount']: 246 self.input_registers.append('#' + args['writeback_incr_amount']) 247 if args.get('shift_by_reg') != None: 248 self.shift_reg = args['shift_by_reg'] 249 if args.get('shift_amount') != None: 250 self.shift_val = args['shift_amount'] 251 if args.get('shift_method') != None: 252 self.shift_mode = args['shift_method'] 253 254 255class LoadStoreMultipleInstruction(ARMInstruction): 256 '''ARM ldm*/stm* instruction.''' 257 def decode(self): 258 # Default direction. 259 increment = +4 260 after = True 261 #strip everything after the ;, if any 262 self.args = self.args.split(';',1)[0] 263 addr_reg, reg_list = [x.strip() for x in self.args.split(',', 1)] 264 writeback = addr_reg[-1] == '!' 265 if writeback: 266 self.output_registers.append('writeback') 267 addr_reg = addr_reg.rstrip('!') 268 # self.output_registers.append(addr_reg) 269 # self.input_registers.append(addr_reg) 270 271 if reg_list[-1] == '^': 272 # Saving/copying user-mode registers. 273 # TODO: We didn't think of that! It doesn't matter too much 274 # hopefully. But we should at least warn the user. 275 reg_list = reg_list.rstrip('^') 276 assert reg_list[0] == '{' 277 assert reg_list[-1] == '}' 278 279 rw_regs = reg_list.strip('{}') 280 rw_regs = [x.strip() for x in rw_regs.split(',')] 281 282 if self.dirflags is not None: 283 if self.dirflags[0] == 'i': 284 increment = +4 285 elif self.dirflags[0] == 'd': 286 increment = -4 287 rw_regs.reverse() 288 else: 289 assert False, "Invalid direction flag (%s). Wanted i or d." % ( 290 self.dirflags[0]) 291 if self.dirflags[1] == 'a': 292 after = True 293 elif self.dirflags[1] == 'b': 294 after = False 295 else: 296 assert False, \ 297 "Invalid after/before flag (%s). Wanted a or b." % ( 298 self.dirflags[0]) 299 300 if self.mnemonic == 'ldm': 301 load = True 302 self.output_registers.extend(rw_regs) 303 self.input_registers.append(addr_reg) 304 # self.input_registers.append('memory') 305 elif self.mnemonic == 'stm': 306 load = False 307 self.input_registers.extend(rw_regs) 308 self.output_registers.append(addr_reg) 309 # self.output_registers.append('memory') 310 else: 311 assert False, "Not an ldm/stm" 312 313class PushPopInstruction(LoadStoreMultipleInstruction): 314 def decode(self): 315 # Translate us into a ldm/stm instruction. 316 if self.mnemonic == 'push': 317 self.mnemonic = 'stm' 318 self.dirflags = 'db' 319 self.args = 'sp!, %s' % (self.args) 320 elif self.mnemonic == 'pop': 321 self.mnemonic = 'ldm' 322 self.dirflags = 'ia' 323 self.args = 'sp!, %s' % (self.args) 324 else: 325 assert False, "Expected a push/pop to be a push or pop! Not %s" % ( 326 self.mnemonic) 327 328 LoadStoreMultipleInstruction.decode(self) 329 330class ArithmeticInstruction(ARMInstruction): 331 '''ARM arithmetic instruction with 3 arguments and the result is stored 332 in the first argument.''' 333 def decode(self): 334 g = tworegs_and_operand2_re.match(self.args) 335 assert g is not None, "Failed to match op2: %s" % self.args 336 337 args = g.groupdict() 338 339 # Record input and output registers. 340 self.output_registers.append(args['target_reg']) 341 self.input_registers.append(args['source_reg']) 342 if args['op2_reg'] is not None: 343 self.input_registers.append(args['op2_reg']) 344 if args['op2_val'] is not None: 345 self.input_registers.append('#' + args['op2_val']) 346 347 if args.get('shift_by_reg') != None: 348 self.shift_reg = args['shift_by_reg'] 349 if args.get('shift_amount') != None: 350 self.shift_val = args['shift_amount'] 351 if args.get('shift_method') != None: 352 self.shift_mode = args['shift_method'] 353 354class RotateRighteXtendInstruction(ArithmeticInstruction): 355 '''rrx - two arguments only.''' 356 357 def decode(self): 358 g = onereg_and_operand2_re.match(self.args) 359 assert g is not None, "Failed to match op2: %s" % self.args 360 361 args = g.groupdict() 362 363 # Record input and output registers. 364 self.output_registers.append(args['target_reg']) 365 if args['op2_reg'] is not None: 366 self.input_registers.append(args['op2_reg']) 367 if args['op2_val'] is not None: 368 self.input_registers.append('#' + args['op2_val']) 369 370 if args.get('shift_by_reg') != None: 371 self.shift_reg = args['shift_by_reg'] 372 if args.get('shift_amount') != None: 373 self.shift_val = args['shift_amount'] 374 if args.get('shift_method') != None: 375 self.shift_mode = args['shift_method'] 376 377 378class MoveInstruction(ARMInstruction): 379 '''ARM move instruction with 2 arguments and the result is stored 380 in the first argument.''' 381 def decode(self): 382 g = onereg_and_operand2_re.match(self.args) 383 assert g is not None 384 args = g.groupdict() 385 386 # Record input and output registers. 387 self.output_registers.append(args['target_reg']) 388 if args['op2_reg'] is not None: 389 self.input_registers.append(args['op2_reg']) 390 if args['op2_val'] is not None: 391 self.input_registers.append('#' + args['op2_val']) 392 393class HalfMoveInstruction(ARMInstruction): 394 '''ARM halfmove instruction (movt/movw).''' 395 def decode(self): 396 assert self.mnemonic in ('movt', 'movw') 397 top_half = self.mnemonic[-1] == 't' 398 399 dst_reg, imm = [x.strip() for x in self.args.split(',')] 400 assert imm[0] == '#' 401 402 # Record input and output registers. 403 self.output_registers.append(dst_reg) 404 self.input_registers.append(imm) 405 if top_half: 406 # We preserve the lower 16 bits of this. 407 self.input_registers.append(dst_reg) 408 409 imm = int(imm[1:]) 410 411class CompareInstruction(ARMInstruction): 412 '''ARM comparison instruction with 2 arguments and the result is stored 413 in the first argument.''' 414 def decode(self): 415 g = onereg_and_operand2_re.match(self.args) 416 assert g is not None 417 args = g.groupdict() 418 419 # Record input and output registers. 420 self.output_registers.append('cc') 421 # "target_reg" is a misnomer here. 422 self.input_registers.append(args['target_reg']) 423 if args['op2_reg'] is not None: 424 self.input_registers.append(args['op2_reg']) 425 if args['op2_val'] is not None: 426 self.input_registers.append('#' + args['op2_val']) 427 428 if args.get('shift_by_reg') != None: 429 self.shift_reg = args['shift_by_reg'] 430 if args.get('shift_amount') != None: 431 self.shift_val = args['shift_amount'] 432 if args.get('shift_method') != None: 433 self.shift_mode = args['shift_method'] 434 435class BranchInstruction(ARMInstruction): 436 '''Nothing we(felix) need from this, just a dummy''' 437 def decode(self): 438 return 439 440 441class IndirectBranchInstruction(ARMInstruction): 442 def decode(self): 443 reg = self.args 444 self.input_registers.append(reg) 445 self.output_registers.append('pc') 446 447class ReturnFromExceptionInstruction(ARMInstruction): 448 '''Implement rfe.''' 449 def decode(self): 450 pass 451 452class NopInstruction(ARMInstruction): 453 '''Implement the ARM nop instruction.''' 454 def decode(self): 455 # We do nothing! 456 pass 457 458class UnhandledInstruction(NopInstruction): 459 # Treat unhandled instructions like a nop. 460 def decode(self): 461 NopInstruction.decode(self) 462 print 'Unhandled instruction "%s" at %#x' % (self.mnemonic, self.addr) 463 464class MRCInstruction(ARMInstruction): 465 '''Provide a dummy implementation of the ARM mrc instruction.''' 466 def decode(self): 467 # Effectively a nop. 468 cp, op2, reg, cp0, cp1, op1 = [x.strip() for x in self.args.split(',')] 469 self.reg = reg 470 471 self.output_registers.append(reg) 472 473 474class MCRInstruction(ARMInstruction): 475 '''Provide a dummy implementation of the ARM mcr instruction.''' 476 def decode(self): 477 # Effectively a nop. 478 cp, op2, reg, cp0, cp1, op1 = [x.strip() for x in self.args.split(',')] 479 self.reg = reg 480 481 self.input_registers.append(reg) 482 483 484class BitFieldExtractInstruction(ARMInstruction): 485 '''Implement ARM's ubfx/sbfx instruction.''' 486 487 def decode(self): 488 assert self.mnemonic in ('ubfx', 'sbfx') 489 sign_extend = (self.mnemonic[0] == 's') 490 491 dst_reg, src_reg, start_bit, bit_length = [x.strip() for x in self.args.split(',')] 492 assert start_bit[0] == '#' 493 assert bit_length[0] == '#' 494 start_bit = int(start_bit[1:]) 495 bit_length = int(bit_length[1:]) 496 497 # Record input and output registers. 498 self.output_registers.append(dst_reg) 499 self.input_registers.append(src_reg) 500 501class SignExtendInstruction(ARMInstruction): 502 '''Implement ARM's [us]xt[bh] instruction.''' 503 def decode(self): 504 assert self.mnemonic in ('uxtb', 'sxtb', 'uxth', 'sxth') 505 #src_size = (self.mnemonic[-1]) # b or h 506 #sign_extend = (self.mnemonic[0] == 's') 507 508 dst_reg, src_reg = [x.strip() for x in self.args.split(',')] 509 510 # Record input and output registers. 511 self.output_registers.append(dst_reg) 512 self.input_registers.append(src_reg) 513 514mnemonic_groups_to_class_map = { 515 ('ldr', 'str', 516 'ldrb', 'ldrsb', 'strb', 517 'ldrh', 'ldrsh', 'strh', 518 'ldrex', 'strex', 519 'ldrbt', 'strbt'): LoadStoreInstruction, 520 ('ldm', 'stm'): LoadStoreMultipleInstruction, 521 ('push', 'pop'): PushPopInstruction, 522 ('add', 'adc', 'sub', 'sbc', 'rsb', 'rsc', 523 'and', 'orr', 'bic', 'eor', 524 'lsl', 'lsr', 'asr', 'ror', 525 'mul'): ArithmeticInstruction, 526 ('rrx',): RotateRighteXtendInstruction, 527 ('mov', 'mvn'): MoveInstruction, 528 ('movt', 'movw'): HalfMoveInstruction, 529 ('nop',): NopInstruction, 530 ('cmp', 'cmn', 'tst', 'teq'): CompareInstruction, 531 ('b', 'bl'): BranchInstruction, 532 ('bx', 'blx'): IndirectBranchInstruction, 533 ('rfe',): ReturnFromExceptionInstruction, 534 ('mrc',): MRCInstruction, 535 ('mcr',): MCRInstruction, 536 ('ubfx', 'sbfx'): BitFieldExtractInstruction, 537 ('uxtb', 'sxtb', 'uxth', 'sxth'): SignExtendInstruction, 538 #('bfi',): BitFieldInsertInstruction, 539 #('bfc',): BitFieldClearInstruction, 540 541 # Instructions that we can just treat as nops for now (FIXME) 542 ('cps', 'mcrr', 'isb', 'dsb'): NopInstruction, 543 544 # Don't bother simulating VFP 545 ('vmrs', 'vmsr', 'vstmia', 'vldmia'): NopInstruction, 546 547 # FIXME 548 ('swp', 'svc'): NopInstruction, 549} 550 551# Convert above into mnemonic -> class map. 552mnemonic_to_class_map = dict([(m, c) 553 for ms, c in mnemonic_groups_to_class_map.iteritems() 554 for m in ms]) 555 556def decode_instruction(addr, value, decoding): 557 decoding = decoding.strip() 558 bits = decoding.split(None, 1) 559 if len(bits) == 1: 560 instruction, args = bits[0], [] 561 else: 562 instruction, args = bits 563 564 g = valid_instruction_re.match(instruction) 565 if g is None: 566 raise FatalError("Unknown instruction %s at address %#x" % (instruction, addr)) 567 568 # Extract relevant data from re match groups. 569 instruction = g.group('instruction1') 570 if instruction is None: 571 instruction = g.group('instruction2') 572 573 condition = g.group('cond1') 574 if condition is None: 575 condition = g.group('cond2') 576 577 dirflags = g.group('dirflags') 578 cpsflags = g.group('cpsflags') 579 setcc = g.group('setcc') == 's' 580 581 # Trim trailing "ia/fd/etc..." suffixes. 582 if dirflags is not None: 583 instruction = instruction[:-len(dirflags)] 584 if cpsflags is not None: 585 instruction = instruction[:-len(cpsflags)] 586 #print 'instruction :' + instruction 587 cls = mnemonic_to_class_map.get(instruction, UnhandledInstruction) 588 #print '%s: %s \n instruction %s \n condition %s\n dirflags %s\n cpsflags %s\n setcc %s\n args %s\n' % (addr,decoding, instruction,condition,dirflags,cpsflags,setcc,args) 589 590 arm_inst = cls(addr, value, decoding, 591 instruction, condition, dirflags, cpsflags, setcc, args) 592 arm_inst.decode() 593 594 mnemonic = arm_inst.mnemonic 595 condition = arm_inst.condition 596 dirflags = arm_inst.dirflags 597 cpsflags = arm_inst.cpsflags 598 setcc = arm_inst.setcc 599 #args = arm_inst.args 600 output_registers = arm_inst.output_registers 601 input_registers = arm_inst.input_registers 602 return arm_inst 603