1import re
2
3'''
4Regular expression based single instruction parsing utility.
5Appopriated from previous work for interfacing with Chronos
6Ideally, fix me by using a proper parser and/or the arm model.
7The regexes are directly copied from Bernard's quoll, particularly machine_arm.py
8This file used to be a part of a static execution engine used by Qoull.
9'''
10
11'''
12The following constants culminate to valid_instruction_re, a regex search pattern, which is used to decipher an instruction mnemonic into the base instruction and all the
13possible modifiers that can apply to it.
14'''
15# These instructions can have the s suffix to set condition codes.
16# They are separated to avoid ambiguity from things like "bls".
17
18valid_arith_instructions = (
19    # 2 operands:
20    'mov', 'mvn',
21    'movw',
22    'movt',
23    'clz',
24    'rrx',
25    # 3 operands:
26    'add', 'adc', 'sub', 'sbc', 'rsb', 'rsc',
27    'and', 'orr', 'bic', 'eor',
28    'lsl', 'lsr', 'asr', 'ror',
29    'mul',
30    'smulbb', 'smultb',
31    # 4 operands:
32    'mla', 'umull', 'umlal', 'smlabb', 'smull', 'smlal',
33    'ubfx', 'sbfx', 'bfi', 'bfc',
34)
35
36# These instructions cannot have the s suffix.
37valid_other_instructions = (
38    'push', 'pop',
39    'cmp', 'cmn', 'tst', 'teq', 'uxtb', 'uxtab', 'sxtb', 'uxth', 'sxth',
40    'str', 'strb', 'strh', 'strd', 'ldr', 'ldrb', 'ldrh', 'ldrd',
41    'ldrsh', 'ldrsb',
42    'ldrex', 'strex',
43    'strt', 'strbt', 'ldrt', 'ldrbt',
44    '(?:ldm|stm|srs|rfe|dmb)(?P<dirflags>[di][ba])?',
45    'b', 'bl', 'blx', 'bx',
46    'mcr', 'mrc', 'mcrr',
47    'msr', 'mrs',
48    'cps(?P<cpsflags>i[de])?',
49    'nop',
50    'isb',
51    'dsb',
52    'swp',
53    'vmrs', 'vmsr', 'vstmia', 'vldmia',
54    'svc',
55)
56
57valid_conditions = (
58    '', 'ne', 'eq',
59    'cs', 'hs',
60    'cc', 'lo',
61    'mi', 'pl', 'vs', 'vc', 'hi', 'ls', 'ge', 'lt', 'gt', 'le',
62)
63
64valid_instruction_re = re.compile(
65    r'''^(?:
66            (?P<instruction1>%(arith_instructions)s)
67            (?P<setcc>s?)
68            (?P<cond1>%(conditions)s) |
69            (?P<instruction2>%(other_instructions)s)
70            (?P<cond2>%(conditions)s)
71        )$''' % {
72            'arith_instructions': '|'.join(valid_arith_instructions),
73            'other_instructions': '|'.join(valid_other_instructions),
74            'conditions': '|'.join(valid_conditions)
75        }, re.X)
76
77#
78# The following regexes take the arguments of a specific instruction (whose
79# form we already know), and extract all the relevant arguments and operands
80# from the instruction.
81
82all_registers = (
83    'r0', 'r1',  'r2',  'r3',  'r4',  'r5',  'r6',  'r7',
84    'r8', 'r9', 'r10', 'r11', 'r12',  'sp',  'lr', 'pc',
85    'cc',
86    #'mode',
87)
88aliases = {
89    'sl': 'r10',
90    'fp': 'r11',
91    'ip': 'r12',
92    'r13':'sp',
93    'r14':'lr',
94    'r15':'pc',
95}
96
97any_register = r'%s' % ('|'.join(list(all_registers) + aliases.keys()))
98ldrstr_args_re = re.compile(
99    r'''(?:(?:%(any_register)s),\s*)?
100        (?P<target_reg>%(any_register)s),\s*
101        \[
102        (?P<base_addr_reg>%(any_register)s)\s*
103        (?:,\s*
104            (?:
105                \#(?P<incr_val>-?[0-9]+) |
106                (?P<incr_reg>%(any_register)s)\s*
107                (?:,\s*
108                    (?P<shift_method>lsl|lsr|asr|ror|rrx)\s+
109                    \#(?P<shift_amount>[0-9]+)
110                )?
111            )
112        )?
113    \]
114    (?:
115        (?P<writeback> !) |
116        ,\s* (?P<writeback_incr_reg>%(any_register)s) |
117        ,\s* \#(?P<writeback_incr_amount>-?[0-9]+)
118    )?\s*(;.*)?
119    $''' % {'any_register' : any_register},
120    re.X)
121
122operand2 = r'''(?:
123            \#(?P<op2_val>-?[0-9]+) |
124            (?:
125                (?P<op2_reg>%(any_register)s
126                )
127                (?:,\s*
128                    (?P<shift_method>lsl|lsr|asr|ror|rrx)\s+
129                    (?:
130                        \#(?P<shift_amount>[0-9]+) |
131                        (?P<shift_by_reg>%(any_register)s)
132                    )
133                )?
134            )
135        )'''
136
137onereg_and_operand2_re = re.compile(
138    (r'''(?P<target_reg>%(any_register)s),\s*''' + operand2 + '(\s*;.*)?$') % {
139            'any_register' : any_register},
140    re.X)
141
142tworegs_and_operand2_re = re.compile(
143    (r'''(?P<target_reg>%(any_register)s),\s*
144        (?P<source_reg>%(any_register)s),\s*''' + operand2 + '(\s*;.*)?$') % {
145            'any_register' : any_register},
146    re.X)
147
148
149
150#just used for decoding for us
151class ARMInstruction:
152    def __init__(self, addr, value, disassembly,
153            mnemonic, condition, dirflags, cpsflags, setcc, args):
154
155        self.addr = addr
156        self.value = value
157        self.disassembly = disassembly
158
159        # Populate member fields with data.
160        self.mnemonic = mnemonic
161        self.condition = condition
162        self.dirflags = dirflags
163        self.cpsflags = cpsflags
164        self.setcc = setcc
165        self.args = args
166        self.is_loop_cond = False
167
168        self.output_registers = []
169        self.input_registers = []
170
171        if self.setcc:
172            self.output_registers.append('cc')
173
174        # decode must be overridden by child classes to work with the specific
175        # instructions.
176        #self.decode()
177
178    def decode(self):
179        raise NotImplementedError
180
181
182class LoadStoreInstruction(ARMInstruction):
183    '''ARM ldr/str[bh] instruction.'''
184    def decode(self):
185        #print 'args %s' % self.args
186        g = ldrstr_args_re.match(self.args)
187        assert g is not None
188        args = g.groupdict()
189
190        tmp_mnemonic = self.mnemonic
191        sign_extend = False
192        if tmp_mnemonic[-2:] == 'ex':
193            tmp_mnemonic = tmp_mnemonic[:3] + tmp_mnemonic[5:]
194
195        if self.mnemonic[-1] == 't':
196            self.mnemonic = self.mnemonic[:-1]
197
198        if len(tmp_mnemonic) == 5:
199            assert tmp_mnemonic[-2] == 's'
200            sign_extend = True
201            # Fudge the mnemonic to something else.
202            tmp_mnemonic = tmp_mnemonic[:3] + tmp_mnemonic[-1:]
203
204        if len(tmp_mnemonic) == 4:
205            suffix = tmp_mnemonic[-1]
206            assert suffix in ('b', 'h')
207            if suffix == 'b':
208                access_size = 1
209            elif suffix == 'h':
210                access_size = 2
211        else:
212            assert len(tmp_mnemonic) == 3
213            access_size = 4
214
215        if tmp_mnemonic.startswith('ldr'):
216            load = True
217        else:
218            assert tmp_mnemonic.startswith('str')
219            load = False
220
221        # Special handling for switch statements.
222        #  if tmp_mnemonic == 'ldr' and args['target_reg'] == 'pc' and \
223        #        self.condition == 'ls':
224        #    decode_as_switch = True
225        #else:
226        #    decode_as_switch = False
227
228        # Record input and output registers.
229        # if load:
230        self.output_registers.append(args['target_reg'])
231            #self.input_registers.append('memory')
232        # else:
233        #    self.input_registers.append(args['target_reg'])
234            #self.output_registers.append('memory')
235        self.input_registers.append(args['base_addr_reg'])
236        if args['incr_reg']:
237            self.input_registers.append(args['incr_reg'])
238        if args['incr_val']:
239            self.input_registers.append('#' + args['incr_val'])
240        if args['writeback_incr_reg']:
241            self.input_registers.append(args['writeback_incr_reg'])
242        #if args['writeback'] or \
243        #        args['writeback_incr_reg'] or args['writeback_incr_amount']:
244            #self.output_registers.append(args['base_addr_reg'])
245        if args['writeback_incr_amount']:
246            self.input_registers.append('#' + args['writeback_incr_amount'])
247        if args.get('shift_by_reg') != None:
248            self.shift_reg = args['shift_by_reg']
249        if args.get('shift_amount') != None:
250            self.shift_val = args['shift_amount']
251        if args.get('shift_method') != None:
252            self.shift_mode = args['shift_method']
253
254
255class LoadStoreMultipleInstruction(ARMInstruction):
256    '''ARM ldm*/stm* instruction.'''
257    def decode(self):
258        # Default direction.
259        increment = +4
260        after = True
261        #strip everything after the ;, if any
262        self.args = self.args.split(';',1)[0]
263        addr_reg, reg_list = [x.strip() for x in self.args.split(',', 1)]
264        writeback = addr_reg[-1] == '!'
265        if writeback:
266             self.output_registers.append('writeback')
267             addr_reg = addr_reg.rstrip('!')
268        #    self.output_registers.append(addr_reg)
269        # self.input_registers.append(addr_reg)
270
271        if reg_list[-1] == '^':
272            # Saving/copying user-mode registers.
273            # TODO: We didn't think of that! It doesn't matter too much
274            # hopefully. But we should at least warn the user.
275            reg_list = reg_list.rstrip('^')
276        assert reg_list[0] == '{'
277        assert reg_list[-1] == '}'
278
279        rw_regs = reg_list.strip('{}')
280        rw_regs = [x.strip() for x in rw_regs.split(',')]
281
282        if self.dirflags is not None:
283            if self.dirflags[0] == 'i':
284                increment = +4
285            elif self.dirflags[0] == 'd':
286                increment = -4
287                rw_regs.reverse()
288            else:
289                assert False, "Invalid direction flag (%s). Wanted i or d." % (
290                    self.dirflags[0])
291            if self.dirflags[1] == 'a':
292                after = True
293            elif self.dirflags[1] == 'b':
294                after = False
295            else:
296                assert False, \
297                    "Invalid after/before flag (%s). Wanted a or b." % (
298                        self.dirflags[0])
299
300        if self.mnemonic == 'ldm':
301            load = True
302            self.output_registers.extend(rw_regs)
303            self.input_registers.append(addr_reg)
304        #    self.input_registers.append('memory')
305        elif self.mnemonic == 'stm':
306            load = False
307            self.input_registers.extend(rw_regs)
308            self.output_registers.append(addr_reg)
309        #    self.output_registers.append('memory')
310        else:
311            assert False, "Not an ldm/stm"
312
313class PushPopInstruction(LoadStoreMultipleInstruction):
314    def decode(self):
315        # Translate us into a ldm/stm instruction.
316        if self.mnemonic == 'push':
317            self.mnemonic = 'stm'
318            self.dirflags = 'db'
319            self.args = 'sp!, %s' % (self.args)
320        elif self.mnemonic == 'pop':
321            self.mnemonic = 'ldm'
322            self.dirflags = 'ia'
323            self.args = 'sp!, %s' % (self.args)
324        else:
325            assert False, "Expected a push/pop to be a push or pop! Not %s" % (
326                self.mnemonic)
327
328        LoadStoreMultipleInstruction.decode(self)
329
330class ArithmeticInstruction(ARMInstruction):
331    '''ARM arithmetic instruction with 3 arguments and the result is stored
332    in the first argument.'''
333    def decode(self):
334        g = tworegs_and_operand2_re.match(self.args)
335        assert g is not None, "Failed to match op2: %s" % self.args
336
337        args = g.groupdict()
338
339        # Record input and output registers.
340        self.output_registers.append(args['target_reg'])
341        self.input_registers.append(args['source_reg'])
342        if args['op2_reg'] is not None:
343            self.input_registers.append(args['op2_reg'])
344        if args['op2_val'] is not None:
345            self.input_registers.append('#' + args['op2_val'])
346
347        if args.get('shift_by_reg') != None:
348            self.shift_reg = args['shift_by_reg']
349        if args.get('shift_amount') != None:
350            self.shift_val = args['shift_amount']
351        if args.get('shift_method') != None:
352            self.shift_mode = args['shift_method']
353
354class RotateRighteXtendInstruction(ArithmeticInstruction):
355    '''rrx - two arguments only.'''
356
357    def decode(self):
358        g = onereg_and_operand2_re.match(self.args)
359        assert g is not None, "Failed to match op2: %s" % self.args
360
361        args = g.groupdict()
362
363        # Record input and output registers.
364        self.output_registers.append(args['target_reg'])
365        if args['op2_reg'] is not None:
366            self.input_registers.append(args['op2_reg'])
367        if args['op2_val'] is not None:
368            self.input_registers.append('#' + args['op2_val'])
369
370        if args.get('shift_by_reg') != None:
371            self.shift_reg = args['shift_by_reg']
372        if args.get('shift_amount') != None:
373            self.shift_val = args['shift_amount']
374        if args.get('shift_method') != None:
375            self.shift_mode = args['shift_method']
376
377
378class MoveInstruction(ARMInstruction):
379    '''ARM move instruction with 2 arguments and the result is stored
380    in the first argument.'''
381    def decode(self):
382        g = onereg_and_operand2_re.match(self.args)
383        assert g is not None
384        args = g.groupdict()
385
386        # Record input and output registers.
387        self.output_registers.append(args['target_reg'])
388        if args['op2_reg'] is not None:
389            self.input_registers.append(args['op2_reg'])
390        if args['op2_val'] is not None:
391            self.input_registers.append('#' + args['op2_val'])
392
393class HalfMoveInstruction(ARMInstruction):
394    '''ARM halfmove instruction (movt/movw).'''
395    def decode(self):
396        assert self.mnemonic in ('movt', 'movw')
397        top_half = self.mnemonic[-1] == 't'
398
399        dst_reg, imm = [x.strip() for x in self.args.split(',')]
400        assert imm[0] == '#'
401
402        # Record input and output registers.
403        self.output_registers.append(dst_reg)
404        self.input_registers.append(imm)
405        if top_half:
406            # We preserve the lower 16 bits of this.
407            self.input_registers.append(dst_reg)
408
409        imm = int(imm[1:])
410
411class CompareInstruction(ARMInstruction):
412    '''ARM comparison instruction with 2 arguments and the result is stored
413    in the first argument.'''
414    def decode(self):
415        g = onereg_and_operand2_re.match(self.args)
416        assert g is not None
417        args = g.groupdict()
418
419        # Record input and output registers.
420        self.output_registers.append('cc')
421        # "target_reg" is a misnomer here.
422        self.input_registers.append(args['target_reg'])
423        if args['op2_reg'] is not None:
424            self.input_registers.append(args['op2_reg'])
425        if args['op2_val'] is not None:
426            self.input_registers.append('#' + args['op2_val'])
427
428        if args.get('shift_by_reg') != None:
429            self.shift_reg = args['shift_by_reg']
430        if args.get('shift_amount') != None:
431            self.shift_val = args['shift_amount']
432        if args.get('shift_method') != None:
433            self.shift_mode = args['shift_method']
434
435class BranchInstruction(ARMInstruction):
436    '''Nothing we(felix) need from this, just a dummy'''
437    def decode(self):
438        return
439
440
441class IndirectBranchInstruction(ARMInstruction):
442    def decode(self):
443        reg = self.args
444        self.input_registers.append(reg)
445        self.output_registers.append('pc')
446
447class ReturnFromExceptionInstruction(ARMInstruction):
448    '''Implement rfe.'''
449    def decode(self):
450        pass
451
452class NopInstruction(ARMInstruction):
453    '''Implement the ARM nop instruction.'''
454    def decode(self):
455        # We do nothing!
456        pass
457
458class UnhandledInstruction(NopInstruction):
459    # Treat unhandled instructions like a nop.
460    def decode(self):
461        NopInstruction.decode(self)
462        print 'Unhandled instruction "%s" at %#x' % (self.mnemonic, self.addr)
463
464class MRCInstruction(ARMInstruction):
465    '''Provide a dummy implementation of the ARM mrc instruction.'''
466    def decode(self):
467        # Effectively a nop.
468        cp, op2, reg, cp0, cp1, op1 = [x.strip() for x in self.args.split(',')]
469        self.reg = reg
470
471        self.output_registers.append(reg)
472
473
474class MCRInstruction(ARMInstruction):
475    '''Provide a dummy implementation of the ARM mcr instruction.'''
476    def decode(self):
477        # Effectively a nop.
478        cp, op2, reg, cp0, cp1, op1 = [x.strip() for x in self.args.split(',')]
479        self.reg = reg
480
481        self.input_registers.append(reg)
482
483
484class BitFieldExtractInstruction(ARMInstruction):
485    '''Implement ARM's ubfx/sbfx instruction.'''
486
487    def decode(self):
488        assert self.mnemonic in ('ubfx', 'sbfx')
489        sign_extend = (self.mnemonic[0] == 's')
490
491        dst_reg, src_reg, start_bit, bit_length = [x.strip() for x in self.args.split(',')]
492        assert start_bit[0] == '#'
493        assert bit_length[0] == '#'
494        start_bit = int(start_bit[1:])
495        bit_length = int(bit_length[1:])
496
497        # Record input and output registers.
498        self.output_registers.append(dst_reg)
499        self.input_registers.append(src_reg)
500
501class SignExtendInstruction(ARMInstruction):
502    '''Implement ARM's [us]xt[bh] instruction.'''
503    def decode(self):
504        assert self.mnemonic in ('uxtb', 'sxtb', 'uxth', 'sxth')
505        #src_size = (self.mnemonic[-1]) # b or h
506        #sign_extend = (self.mnemonic[0] == 's')
507
508        dst_reg, src_reg = [x.strip() for x in self.args.split(',')]
509
510        # Record input and output registers.
511        self.output_registers.append(dst_reg)
512        self.input_registers.append(src_reg)
513
514mnemonic_groups_to_class_map = {
515    ('ldr', 'str',
516     'ldrb', 'ldrsb', 'strb',
517     'ldrh', 'ldrsh', 'strh',
518     'ldrex', 'strex',
519     'ldrbt', 'strbt'): LoadStoreInstruction,
520    ('ldm', 'stm'): LoadStoreMultipleInstruction,
521    ('push', 'pop'): PushPopInstruction,
522    ('add', 'adc', 'sub', 'sbc', 'rsb', 'rsc',
523     'and', 'orr', 'bic', 'eor',
524     'lsl', 'lsr', 'asr', 'ror',
525     'mul'): ArithmeticInstruction,
526    ('rrx',): RotateRighteXtendInstruction,
527    ('mov', 'mvn'): MoveInstruction,
528    ('movt', 'movw'): HalfMoveInstruction,
529    ('nop',): NopInstruction,
530    ('cmp', 'cmn', 'tst', 'teq'): CompareInstruction,
531    ('b', 'bl'): BranchInstruction,
532    ('bx', 'blx'): IndirectBranchInstruction,
533    ('rfe',): ReturnFromExceptionInstruction,
534    ('mrc',): MRCInstruction,
535    ('mcr',): MCRInstruction,
536    ('ubfx', 'sbfx'): BitFieldExtractInstruction,
537    ('uxtb', 'sxtb', 'uxth', 'sxth'): SignExtendInstruction,
538    #('bfi',): BitFieldInsertInstruction,
539    #('bfc',): BitFieldClearInstruction,
540
541    # Instructions that we can just treat as nops for now (FIXME)
542    ('cps', 'mcrr', 'isb', 'dsb'): NopInstruction,
543
544    # Don't bother simulating VFP
545    ('vmrs', 'vmsr', 'vstmia', 'vldmia'): NopInstruction,
546
547    # FIXME
548    ('swp', 'svc'): NopInstruction,
549}
550
551# Convert above into mnemonic -> class map.
552mnemonic_to_class_map = dict([(m, c)
553        for ms, c in mnemonic_groups_to_class_map.iteritems()
554        for m in ms])
555
556def decode_instruction(addr, value, decoding):
557    decoding = decoding.strip()
558    bits = decoding.split(None, 1)
559    if len(bits) == 1:
560        instruction, args = bits[0], []
561    else:
562        instruction, args = bits
563
564    g = valid_instruction_re.match(instruction)
565    if g is None:
566        raise FatalError("Unknown instruction %s at address %#x" % (instruction, addr))
567
568    # Extract relevant data from re match groups.
569    instruction = g.group('instruction1')
570    if instruction is None:
571        instruction = g.group('instruction2')
572
573    condition = g.group('cond1')
574    if condition is None:
575        condition = g.group('cond2')
576
577    dirflags = g.group('dirflags')
578    cpsflags = g.group('cpsflags')
579    setcc = g.group('setcc') == 's'
580
581    # Trim trailing "ia/fd/etc..." suffixes.
582    if dirflags is not None:
583        instruction = instruction[:-len(dirflags)]
584    if cpsflags is not None:
585        instruction = instruction[:-len(cpsflags)]
586    #print 'instruction :' + instruction
587    cls = mnemonic_to_class_map.get(instruction, UnhandledInstruction)
588    #print '%s: %s \n    instruction %s \n   condition %s\n    dirflags %s\n     cpsflags %s\n    setcc %s\n      args %s\n' % (addr,decoding, instruction,condition,dirflags,cpsflags,setcc,args)
589
590    arm_inst = cls(addr, value, decoding,
591        instruction, condition, dirflags, cpsflags, setcc, args)
592    arm_inst.decode()
593
594    mnemonic = arm_inst.mnemonic
595    condition = arm_inst.condition
596    dirflags = arm_inst.dirflags
597    cpsflags = arm_inst.cpsflags
598    setcc = arm_inst.setcc
599    #args = arm_inst.args
600    output_registers = arm_inst.output_registers
601    input_registers = arm_inst.input_registers
602    return arm_inst
603