ARMDisassembler.cpp revision 206274
1206124Srdivacky//===- ARMDisassembler.cpp - Disassembler for ARM/Thumb ISA -----*- C++ -*-===// 2206124Srdivacky// 3206124Srdivacky// The LLVM Compiler Infrastructure 4206124Srdivacky// 5206124Srdivacky// This file is distributed under the University of Illinois Open Source 6206124Srdivacky// License. See LICENSE.TXT for details. 7206124Srdivacky// 8206124Srdivacky//===----------------------------------------------------------------------===// 9206124Srdivacky// 10206124Srdivacky// This file is part of the ARM Disassembler. 11206124Srdivacky// It contains code to implement the public interfaces of ARMDisassembler and 12206124Srdivacky// ThumbDisassembler, both of which are instances of MCDisassembler. 13206124Srdivacky// 14206124Srdivacky//===----------------------------------------------------------------------===// 15206124Srdivacky 16206124Srdivacky#define DEBUG_TYPE "arm-disassembler" 17206124Srdivacky 18206124Srdivacky#include "ARMDisassembler.h" 19206124Srdivacky#include "ARMDisassemblerCore.h" 20206124Srdivacky 21206124Srdivacky#include "llvm/MC/MCInst.h" 22206124Srdivacky#include "llvm/Target/TargetRegistry.h" 23206124Srdivacky#include "llvm/Support/Debug.h" 24206124Srdivacky#include "llvm/Support/MemoryObject.h" 25206124Srdivacky#include "llvm/Support/ErrorHandling.h" 26206124Srdivacky#include "llvm/Support/raw_ostream.h" 27206124Srdivacky 28206124Srdivacky/// ARMGenDecoderTables.inc - ARMDecoderTables.inc is tblgen'ed from 29206124Srdivacky/// ARMDecoderEmitter.cpp TableGen backend. It contains: 30206124Srdivacky/// 31206124Srdivacky/// o Mappings from opcode to ARM/Thumb instruction format 32206124Srdivacky/// 33206124Srdivacky/// o static uint16_t decodeInstruction(uint32_t insn) - the decoding function 34206124Srdivacky/// for an ARM instruction. 35206124Srdivacky/// 36206124Srdivacky/// o static uint16_t decodeThumbInstruction(field_t insn) - the decoding 37206124Srdivacky/// function for a Thumb instruction. 38206124Srdivacky/// 39206124Srdivacky#include "../ARMGenDecoderTables.inc" 40206124Srdivacky 41206124Srdivackynamespace llvm { 42206124Srdivacky 43206124Srdivacky/// showBitVector - Use the raw_ostream to log a diagnostic message describing 44206124Srdivacky/// the inidividual bits of the instruction. 45206124Srdivacky/// 46206124Srdivackystatic inline void showBitVector(raw_ostream &os, const uint32_t &insn) { 47206124Srdivacky // Split the bit position markers into more than one lines to fit 80 columns. 48206124Srdivacky os << " 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 15 14 13 12 11" 49206124Srdivacky << " 10 9 8 7 6 5 4 3 2 1 0 \n"; 50206124Srdivacky os << "---------------------------------------------------------------" 51206124Srdivacky << "----------------------------------\n"; 52206124Srdivacky os << '|'; 53206124Srdivacky for (unsigned i = 32; i != 0; --i) { 54206124Srdivacky if (insn >> (i - 1) & 0x01) 55206124Srdivacky os << " 1"; 56206124Srdivacky else 57206124Srdivacky os << " 0"; 58206124Srdivacky os << (i%4 == 1 ? '|' : ':'); 59206124Srdivacky } 60206124Srdivacky os << '\n'; 61206124Srdivacky // Split the bit position markers into more than one lines to fit 80 columns. 62206124Srdivacky os << "---------------------------------------------------------------" 63206124Srdivacky << "----------------------------------\n"; 64206124Srdivacky os << '\n'; 65206124Srdivacky} 66206124Srdivacky 67206124Srdivacky/// decodeARMInstruction is a decorator function which tries special cases of 68206124Srdivacky/// instruction matching before calling the auto-generated decoder function. 69206124Srdivackystatic unsigned decodeARMInstruction(uint32_t &insn) { 70206124Srdivacky if (slice(insn, 31, 28) == 15) 71206124Srdivacky goto AutoGenedDecoder; 72206124Srdivacky 73206124Srdivacky // Special case processing, if any, goes here.... 74206124Srdivacky 75206124Srdivacky // LLVM combines the offset mode of A8.6.197 & A8.6.198 into STRB. 76206124Srdivacky // The insufficient encoding information of the combined instruction confuses 77206124Srdivacky // the decoder wrt BFC/BFI. Therefore, we try to recover here. 78206124Srdivacky // For BFC, Inst{27-21} = 0b0111110 & Inst{6-0} = 0b0011111. 79206124Srdivacky // For BFI, Inst{27-21} = 0b0111110 & Inst{6-4} = 0b001 & Inst{3-0} =! 0b1111. 80206124Srdivacky if (slice(insn, 27, 21) == 0x3e && slice(insn, 6, 4) == 1) { 81206124Srdivacky if (slice(insn, 3, 0) == 15) 82206124Srdivacky return ARM::BFC; 83206124Srdivacky else 84206124Srdivacky return ARM::BFI; 85206124Srdivacky } 86206124Srdivacky 87206124Srdivacky // Ditto for ADDSrs, which is a super-instruction for A8.6.7 & A8.6.8. 88206124Srdivacky // As a result, the decoder fails to decode UMULL properly. 89206124Srdivacky if (slice(insn, 27, 21) == 0x04 && slice(insn, 7, 4) == 9) { 90206124Srdivacky return ARM::UMULL; 91206124Srdivacky } 92206124Srdivacky 93206124Srdivacky // Ditto for STR_PRE, which is a super-instruction for A8.6.194 & A8.6.195. 94206124Srdivacky // As a result, the decoder fails to decode SBFX properly. 95206124Srdivacky if (slice(insn, 27, 21) == 0x3d && slice(insn, 6, 4) == 5) 96206124Srdivacky return ARM::SBFX; 97206124Srdivacky 98206124Srdivacky // And STRB_PRE, which is a super-instruction for A8.6.197 & A8.6.198. 99206124Srdivacky // As a result, the decoder fails to decode UBFX properly. 100206124Srdivacky if (slice(insn, 27, 21) == 0x3f && slice(insn, 6, 4) == 5) 101206124Srdivacky return ARM::UBFX; 102206124Srdivacky 103206124Srdivacky // Ditto for STRT, which is a super-instruction for A8.6.210 Encoding A1 & A2. 104206124Srdivacky // As a result, the decoder fails to deocode SSAT properly. 105206124Srdivacky if (slice(insn, 27, 21) == 0x35 && slice(insn, 5, 4) == 1) 106206124Srdivacky return slice(insn, 6, 6) == 0 ? ARM::SSATlsl : ARM::SSATasr; 107206124Srdivacky 108206124Srdivacky // Ditto for RSCrs, which is a super-instruction for A8.6.146 & A8.6.147. 109206124Srdivacky // As a result, the decoder fails to decode STRHT/LDRHT/LDRSHT/LDRSBT. 110206124Srdivacky if (slice(insn, 27, 24) == 0) { 111206124Srdivacky switch (slice(insn, 21, 20)) { 112206124Srdivacky case 2: 113206124Srdivacky switch (slice(insn, 7, 4)) { 114206124Srdivacky case 11: 115206124Srdivacky return ARM::STRHT; 116206124Srdivacky default: 117206124Srdivacky break; // fallthrough 118206124Srdivacky } 119206124Srdivacky break; 120206124Srdivacky case 3: 121206124Srdivacky switch (slice(insn, 7, 4)) { 122206124Srdivacky case 11: 123206124Srdivacky return ARM::LDRHT; 124206124Srdivacky case 13: 125206124Srdivacky return ARM::LDRSBT; 126206124Srdivacky case 15: 127206124Srdivacky return ARM::LDRSHT; 128206124Srdivacky default: 129206124Srdivacky break; // fallthrough 130206124Srdivacky } 131206124Srdivacky break; 132206124Srdivacky default: 133206124Srdivacky break; // fallthrough 134206124Srdivacky } 135206124Srdivacky } 136206124Srdivacky 137206124Srdivacky // Ditto for SBCrs, which is a super-instruction for A8.6.152 & A8.6.153. 138206124Srdivacky // As a result, the decoder fails to decode STRH_Post/LDRD_POST/STRD_POST 139206124Srdivacky // properly. 140206124Srdivacky if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 0) { 141206124Srdivacky unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21); 142206124Srdivacky switch (slice(insn, 7, 4)) { 143206124Srdivacky case 11: 144206124Srdivacky switch (PW) { 145206124Srdivacky case 2: // Offset 146206124Srdivacky return ARM::STRH; 147206124Srdivacky case 3: // Pre-indexed 148206124Srdivacky return ARM::STRH_PRE; 149206124Srdivacky case 0: // Post-indexed 150206124Srdivacky return ARM::STRH_POST; 151206124Srdivacky default: 152206124Srdivacky break; // fallthrough 153206124Srdivacky } 154206124Srdivacky break; 155206124Srdivacky case 13: 156206124Srdivacky switch (PW) { 157206124Srdivacky case 2: // Offset 158206124Srdivacky return ARM::LDRD; 159206124Srdivacky case 3: // Pre-indexed 160206124Srdivacky return ARM::LDRD_PRE; 161206124Srdivacky case 0: // Post-indexed 162206124Srdivacky return ARM::LDRD_POST; 163206124Srdivacky default: 164206124Srdivacky break; // fallthrough 165206124Srdivacky } 166206124Srdivacky break; 167206124Srdivacky case 15: 168206124Srdivacky switch (PW) { 169206124Srdivacky case 2: // Offset 170206124Srdivacky return ARM::STRD; 171206124Srdivacky case 3: // Pre-indexed 172206124Srdivacky return ARM::STRD_PRE; 173206124Srdivacky case 0: // Post-indexed 174206124Srdivacky return ARM::STRD_POST; 175206124Srdivacky default: 176206124Srdivacky break; // fallthrough 177206124Srdivacky } 178206124Srdivacky break; 179206124Srdivacky default: 180206124Srdivacky break; // fallthrough 181206124Srdivacky } 182206124Srdivacky } 183206124Srdivacky 184206124Srdivacky // Ditto for SBCSSrs, which is a super-instruction for A8.6.152 & A8.6.153. 185206124Srdivacky // As a result, the decoder fails to decode LDRH_POST/LDRSB_POST/LDRSH_POST 186206124Srdivacky // properly. 187206124Srdivacky if (slice(insn, 27, 25) == 0 && slice(insn, 20, 20) == 1) { 188206124Srdivacky unsigned PW = slice(insn, 24, 24) << 1 | slice(insn, 21, 21); 189206124Srdivacky switch (slice(insn, 7, 4)) { 190206124Srdivacky case 11: 191206124Srdivacky switch (PW) { 192206124Srdivacky case 2: // Offset 193206124Srdivacky return ARM::LDRH; 194206124Srdivacky case 3: // Pre-indexed 195206124Srdivacky return ARM::LDRH_PRE; 196206124Srdivacky case 0: // Post-indexed 197206124Srdivacky return ARM::LDRH_POST; 198206124Srdivacky default: 199206124Srdivacky break; // fallthrough 200206124Srdivacky } 201206124Srdivacky break; 202206124Srdivacky case 13: 203206124Srdivacky switch (PW) { 204206124Srdivacky case 2: // Offset 205206124Srdivacky return ARM::LDRSB; 206206124Srdivacky case 3: // Pre-indexed 207206124Srdivacky return ARM::LDRSB_PRE; 208206124Srdivacky case 0: // Post-indexed 209206124Srdivacky return ARM::LDRSB_POST; 210206124Srdivacky default: 211206124Srdivacky break; // fallthrough 212206124Srdivacky } 213206124Srdivacky break; 214206124Srdivacky case 15: 215206124Srdivacky switch (PW) { 216206124Srdivacky case 2: // Offset 217206124Srdivacky return ARM::LDRSH; 218206124Srdivacky case 3: // Pre-indexed 219206124Srdivacky return ARM::LDRSH_PRE; 220206124Srdivacky case 0: // Post-indexed 221206124Srdivacky return ARM::LDRSH_POST; 222206124Srdivacky default: 223206124Srdivacky break; // fallthrough 224206124Srdivacky } 225206124Srdivacky break; 226206124Srdivacky default: 227206124Srdivacky break; // fallthrough 228206124Srdivacky } 229206124Srdivacky } 230206124Srdivacky 231206124SrdivackyAutoGenedDecoder: 232206124Srdivacky // Calling the auto-generated decoder function. 233206124Srdivacky return decodeInstruction(insn); 234206124Srdivacky} 235206124Srdivacky 236206124Srdivacky// Helper function for special case handling of LDR (literal) and friends. 237206124Srdivacky// See, for example, A6.3.7 Load word: Table A6-18 Load word. 238206124Srdivacky// See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode 239206124Srdivacky// before returning it. 240206124Srdivackystatic unsigned T2Morph2LoadLiteral(unsigned Opcode) { 241206124Srdivacky switch (Opcode) { 242206124Srdivacky default: 243206124Srdivacky return Opcode; // Return unmorphed opcode. 244206124Srdivacky 245206124Srdivacky case ARM::t2LDRDi8: 246206124Srdivacky return ARM::t2LDRDpci; 247206124Srdivacky 248206124Srdivacky case ARM::t2LDR_POST: case ARM::t2LDR_PRE: 249206124Srdivacky case ARM::t2LDRi12: case ARM::t2LDRi8: 250206124Srdivacky case ARM::t2LDRs: 251206124Srdivacky return ARM::t2LDRpci; 252206124Srdivacky 253206124Srdivacky case ARM::t2LDRB_POST: case ARM::t2LDRB_PRE: 254206124Srdivacky case ARM::t2LDRBi12: case ARM::t2LDRBi8: 255206124Srdivacky case ARM::t2LDRBs: 256206124Srdivacky return ARM::t2LDRBpci; 257206124Srdivacky 258206124Srdivacky case ARM::t2LDRH_POST: case ARM::t2LDRH_PRE: 259206124Srdivacky case ARM::t2LDRHi12: case ARM::t2LDRHi8: 260206124Srdivacky case ARM::t2LDRHs: 261206124Srdivacky return ARM::t2LDRHpci; 262206124Srdivacky 263206124Srdivacky case ARM::t2LDRSB_POST: case ARM::t2LDRSB_PRE: 264206124Srdivacky case ARM::t2LDRSBi12: case ARM::t2LDRSBi8: 265206124Srdivacky case ARM::t2LDRSBs: 266206124Srdivacky return ARM::t2LDRSBpci; 267206124Srdivacky 268206124Srdivacky case ARM::t2LDRSH_POST: case ARM::t2LDRSH_PRE: 269206124Srdivacky case ARM::t2LDRSHi12: case ARM::t2LDRSHi8: 270206124Srdivacky case ARM::t2LDRSHs: 271206124Srdivacky return ARM::t2LDRSHpci; 272206124Srdivacky } 273206124Srdivacky} 274206124Srdivacky 275206124Srdivacky/// decodeThumbSideEffect is a decorator function which can potentially twiddle 276206124Srdivacky/// the instruction or morph the returned opcode under Thumb2. 277206124Srdivacky/// 278206124Srdivacky/// First it checks whether the insn is a NEON or VFP instr; if true, bit 279206124Srdivacky/// twiddling could be performed on insn to turn it into an ARM NEON/VFP 280206124Srdivacky/// equivalent instruction and decodeInstruction is called with the transformed 281206124Srdivacky/// insn. 282206124Srdivacky/// 283206124Srdivacky/// Next, there is special handling for Load byte/halfword/word instruction by 284206124Srdivacky/// checking whether Rn=0b1111 and call T2Morph2LoadLiteral() on the decoded 285206124Srdivacky/// Thumb2 instruction. See comments below for further details. 286206124Srdivacky/// 287206124Srdivacky/// Finally, one last check is made to see whether the insn is a NEON/VFP and 288206124Srdivacky/// decodeInstruction(insn) is invoked on the original insn. 289206124Srdivacky/// 290206124Srdivacky/// Otherwise, decodeThumbInstruction is called with the original insn. 291206124Srdivackystatic unsigned decodeThumbSideEffect(bool IsThumb2, uint32_t &insn) { 292206124Srdivacky if (IsThumb2) { 293206124Srdivacky uint16_t op1 = slice(insn, 28, 27); 294206124Srdivacky uint16_t op2 = slice(insn, 26, 20); 295206124Srdivacky 296206124Srdivacky // A6.3 32-bit Thumb instruction encoding 297206124Srdivacky // Table A6-9 32-bit Thumb instruction encoding 298206124Srdivacky 299206124Srdivacky // The coprocessor instructions of interest are transformed to their ARM 300206124Srdivacky // equivalents. 301206124Srdivacky 302206124Srdivacky // --------- Transform Begin Marker --------- 303206124Srdivacky if ((op1 == 1 || op1 == 3) && slice(op2, 6, 4) == 7) { 304206124Srdivacky // A7.4 Advanced SIMD data-processing instructions 305206124Srdivacky // U bit of Thumb corresponds to Inst{24} of ARM. 306206124Srdivacky uint16_t U = slice(op1, 1, 1); 307206124Srdivacky 308206124Srdivacky // Inst{28-24} of ARM = {1,0,0,1,U}; 309206124Srdivacky uint16_t bits28_24 = 9 << 1 | U; 310206124Srdivacky DEBUG(showBitVector(errs(), insn)); 311206124Srdivacky setSlice(insn, 28, 24, bits28_24); 312206124Srdivacky return decodeInstruction(insn); 313206124Srdivacky } 314206124Srdivacky 315206124Srdivacky if (op1 == 3 && slice(op2, 6, 4) == 1 && slice(op2, 0, 0) == 0) { 316206124Srdivacky // A7.7 Advanced SIMD element or structure load/store instructions 317206124Srdivacky // Inst{27-24} of Thumb = 0b1001 318206124Srdivacky // Inst{27-24} of ARM = 0b0100 319206124Srdivacky DEBUG(showBitVector(errs(), insn)); 320206124Srdivacky setSlice(insn, 27, 24, 4); 321206124Srdivacky return decodeInstruction(insn); 322206124Srdivacky } 323206124Srdivacky // --------- Transform End Marker --------- 324206124Srdivacky 325206124Srdivacky // See, for example, A6.3.7 Load word: Table A6-18 Load word. 326206124Srdivacky // See A8.6.57 T3, T4 & A8.6.60 T2 and friends for why we morphed the opcode 327206124Srdivacky // before returning it to our caller. 328206124Srdivacky if (op1 == 3 && slice(op2, 6, 5) == 0 && slice(op2, 0, 0) == 1 329206124Srdivacky && slice(insn, 19, 16) == 15) 330206124Srdivacky return T2Morph2LoadLiteral(decodeThumbInstruction(insn)); 331206124Srdivacky 332206124Srdivacky // One last check for NEON/VFP instructions. 333206124Srdivacky if ((op1 == 1 || op1 == 3) && slice(op2, 6, 6) == 1) 334206124Srdivacky return decodeInstruction(insn); 335206124Srdivacky 336206124Srdivacky // Fall through. 337206124Srdivacky } 338206124Srdivacky 339206124Srdivacky return decodeThumbInstruction(insn); 340206124Srdivacky} 341206124Srdivacky 342206124Srdivackystatic inline bool Thumb2PreloadOpcodeNoPCI(unsigned Opcode) { 343206124Srdivacky switch (Opcode) { 344206124Srdivacky default: 345206124Srdivacky return false; 346206124Srdivacky case ARM::t2PLDi12: case ARM::t2PLDi8: 347206124Srdivacky case ARM::t2PLDr: case ARM::t2PLDs: 348206124Srdivacky case ARM::t2PLDWi12: case ARM::t2PLDWi8: 349206124Srdivacky case ARM::t2PLDWr: case ARM::t2PLDWs: 350206124Srdivacky case ARM::t2PLIi12: case ARM::t2PLIi8: 351206124Srdivacky case ARM::t2PLIr: case ARM::t2PLIs: 352206124Srdivacky return true; 353206124Srdivacky } 354206124Srdivacky} 355206124Srdivacky 356206124Srdivackystatic inline unsigned T2Morph2Preload2PCI(unsigned Opcode) { 357206124Srdivacky switch (Opcode) { 358206124Srdivacky default: 359206124Srdivacky return 0; 360206124Srdivacky case ARM::t2PLDi12: case ARM::t2PLDi8: 361206124Srdivacky case ARM::t2PLDr: case ARM::t2PLDs: 362206124Srdivacky return ARM::t2PLDpci; 363206124Srdivacky case ARM::t2PLDWi12: case ARM::t2PLDWi8: 364206124Srdivacky case ARM::t2PLDWr: case ARM::t2PLDWs: 365206124Srdivacky return ARM::t2PLDWpci; 366206124Srdivacky case ARM::t2PLIi12: case ARM::t2PLIi8: 367206124Srdivacky case ARM::t2PLIr: case ARM::t2PLIs: 368206124Srdivacky return ARM::t2PLIpci; 369206124Srdivacky } 370206124Srdivacky} 371206124Srdivacky 372206124Srdivacky// 373206124Srdivacky// Public interface for the disassembler 374206124Srdivacky// 375206124Srdivacky 376206124Srdivackybool ARMDisassembler::getInstruction(MCInst &MI, 377206124Srdivacky uint64_t &Size, 378206124Srdivacky const MemoryObject &Region, 379206124Srdivacky uint64_t Address, 380206124Srdivacky raw_ostream &os) const { 381206124Srdivacky // The machine instruction. 382206124Srdivacky uint32_t insn; 383206274Srdivacky uint8_t bytes[4]; 384206124Srdivacky 385206124Srdivacky // We want to read exactly 4 bytes of data. 386206274Srdivacky if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) 387206124Srdivacky return false; 388206124Srdivacky 389206274Srdivacky // Encoded as a small-endian 32-bit word in the stream. 390206274Srdivacky insn = (bytes[3] << 24) | 391206274Srdivacky (bytes[2] << 16) | 392206274Srdivacky (bytes[1] << 8) | 393206274Srdivacky (bytes[0] << 0); 394206274Srdivacky 395206124Srdivacky unsigned Opcode = decodeARMInstruction(insn); 396206124Srdivacky ARMFormat Format = ARMFormats[Opcode]; 397206124Srdivacky Size = 4; 398206124Srdivacky 399206124Srdivacky DEBUG({ 400206124Srdivacky errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode) 401206124Srdivacky << " Format=" << stringForARMFormat(Format) << '(' << (int)Format 402206124Srdivacky << ")\n"; 403206124Srdivacky showBitVector(errs(), insn); 404206124Srdivacky }); 405206124Srdivacky 406206124Srdivacky ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); 407206124Srdivacky 408206124Srdivacky if (!Builder) 409206124Srdivacky return false; 410206124Srdivacky 411206124Srdivacky if (!Builder->Build(MI, insn)) 412206124Srdivacky return false; 413206124Srdivacky 414206124Srdivacky delete Builder; 415206124Srdivacky 416206124Srdivacky return true; 417206124Srdivacky} 418206124Srdivacky 419206124Srdivackybool ThumbDisassembler::getInstruction(MCInst &MI, 420206124Srdivacky uint64_t &Size, 421206124Srdivacky const MemoryObject &Region, 422206124Srdivacky uint64_t Address, 423206124Srdivacky raw_ostream &os) const { 424206274Srdivacky // The Thumb instruction stream is a sequence of halhwords. 425206274Srdivacky 426206274Srdivacky // This represents the first halfword as well as the machine instruction 427206274Srdivacky // passed to decodeThumbInstruction(). For 16-bit Thumb instruction, the top 428206274Srdivacky // halfword of insn is 0x00 0x00; otherwise, the first halfword is moved to 429206274Srdivacky // the top half followed by the second halfword. 430206124Srdivacky uint32_t insn = 0; 431206274Srdivacky // Possible second halfword. 432206274Srdivacky uint16_t insn1 = 0; 433206124Srdivacky 434206124Srdivacky // A6.1 Thumb instruction set encoding 435206124Srdivacky // 436206124Srdivacky // If bits [15:11] of the halfword being decoded take any of the following 437206124Srdivacky // values, the halfword is the first halfword of a 32-bit instruction: 438206124Srdivacky // o 0b11101 439206124Srdivacky // o 0b11110 440206124Srdivacky // o 0b11111. 441206124Srdivacky // 442206124Srdivacky // Otherwise, the halfword is a 16-bit instruction. 443206124Srdivacky 444206124Srdivacky // Read 2 bytes of data first. 445206274Srdivacky uint8_t bytes[2]; 446206274Srdivacky if (Region.readBytes(Address, 2, (uint8_t*)bytes, NULL) == -1) 447206124Srdivacky return false; 448206124Srdivacky 449206274Srdivacky // Encoded as a small-endian 16-bit halfword in the stream. 450206274Srdivacky insn = (bytes[1] << 8) | bytes[0]; 451206124Srdivacky unsigned bits15_11 = slice(insn, 15, 11); 452206124Srdivacky bool IsThumb2 = false; 453206124Srdivacky 454206124Srdivacky // 32-bit instructions if the bits [15:11] of the halfword matches 455206124Srdivacky // { 0b11101 /* 0x1D */, 0b11110 /* 0x1E */, ob11111 /* 0x1F */ }. 456206124Srdivacky if (bits15_11 == 0x1D || bits15_11 == 0x1E || bits15_11 == 0x1F) { 457206124Srdivacky IsThumb2 = true; 458206274Srdivacky if (Region.readBytes(Address + 2, 2, (uint8_t*)bytes, NULL) == -1) 459206124Srdivacky return false; 460206274Srdivacky // Encoded as a small-endian 16-bit halfword in the stream. 461206274Srdivacky insn1 = (bytes[1] << 8) | bytes[0]; 462206124Srdivacky insn = (insn << 16 | insn1); 463206124Srdivacky } 464206124Srdivacky 465206124Srdivacky // The insn could potentially be bit-twiddled in order to be decoded as an ARM 466206124Srdivacky // NEON/VFP opcode. In such case, the modified insn is later disassembled as 467206124Srdivacky // an ARM NEON/VFP instruction. 468206124Srdivacky // 469206124Srdivacky // This is a short term solution for lack of encoding bits specified for the 470206124Srdivacky // Thumb2 NEON/VFP instructions. The long term solution could be adding some 471206124Srdivacky // infrastructure to have each instruction support more than one encodings. 472206124Srdivacky // Which encoding is used would be based on which subtarget the compiler/ 473206124Srdivacky // disassembler is working with at the time. This would allow the sharing of 474206124Srdivacky // the NEON patterns between ARM and Thumb2, as well as potential greater 475206124Srdivacky // sharing between the regular ARM instructions and the 32-bit wide Thumb2 476206124Srdivacky // instructions as well. 477206124Srdivacky unsigned Opcode = decodeThumbSideEffect(IsThumb2, insn); 478206124Srdivacky 479206124Srdivacky // A8.6.117/119/120/121. 480206124Srdivacky // PLD/PLDW/PLI instructions with Rn==15 is transformed to the pci variant. 481206124Srdivacky if (Thumb2PreloadOpcodeNoPCI(Opcode) && slice(insn, 19, 16) == 15) 482206124Srdivacky Opcode = T2Morph2Preload2PCI(Opcode); 483206124Srdivacky 484206124Srdivacky ARMFormat Format = ARMFormats[Opcode]; 485206124Srdivacky Size = IsThumb2 ? 4 : 2; 486206124Srdivacky 487206124Srdivacky DEBUG({ 488206124Srdivacky errs() << "Opcode=" << Opcode << " Name=" << ARMUtils::OpcodeName(Opcode) 489206124Srdivacky << " Format=" << stringForARMFormat(Format) << '(' << (int)Format 490206124Srdivacky << ")\n"; 491206124Srdivacky showBitVector(errs(), insn); 492206124Srdivacky }); 493206124Srdivacky 494206124Srdivacky ARMBasicMCBuilder *Builder = CreateMCBuilder(Opcode, Format); 495206124Srdivacky Builder->setSession(const_cast<Session *>(&SO)); 496206124Srdivacky 497206124Srdivacky if (!Builder) 498206124Srdivacky return false; 499206124Srdivacky 500206124Srdivacky if (!Builder->Build(MI, insn)) 501206124Srdivacky return false; 502206124Srdivacky 503206124Srdivacky delete Builder; 504206124Srdivacky 505206124Srdivacky return true; 506206124Srdivacky} 507206124Srdivacky 508206124Srdivacky// A8.6.50 509206124Srdivackystatic unsigned short CountITSize(unsigned ITMask) { 510206124Srdivacky // First count the trailing zeros of the IT mask. 511206124Srdivacky unsigned TZ = CountTrailingZeros_32(ITMask); 512206124Srdivacky assert(TZ <= 3 && "Encoding error"); 513206124Srdivacky return (4 - TZ); 514206124Srdivacky} 515206124Srdivacky 516206124Srdivacky/// Init ITState. 517206124Srdivackyvoid Session::InitIT(unsigned short bits7_0) { 518206124Srdivacky ITCounter = CountITSize(slice(bits7_0, 3, 0)); 519206124Srdivacky ITState = bits7_0; 520206124Srdivacky} 521206124Srdivacky 522206124Srdivacky/// Update ITState if necessary. 523206124Srdivackyvoid Session::UpdateIT() { 524206124Srdivacky assert(ITCounter); 525206124Srdivacky --ITCounter; 526206124Srdivacky if (ITCounter == 0) 527206124Srdivacky ITState = 0; 528206124Srdivacky else { 529206124Srdivacky unsigned short NewITState4_0 = slice(ITState, 4, 0) << 1; 530206124Srdivacky setSlice(ITState, 4, 0, NewITState4_0); 531206124Srdivacky } 532206124Srdivacky} 533206124Srdivacky 534206124Srdivackystatic MCDisassembler *createARMDisassembler(const Target &T) { 535206124Srdivacky return new ARMDisassembler; 536206124Srdivacky} 537206124Srdivacky 538206124Srdivackystatic MCDisassembler *createThumbDisassembler(const Target &T) { 539206124Srdivacky return new ThumbDisassembler; 540206124Srdivacky} 541206124Srdivacky 542206124Srdivackyextern "C" void LLVMInitializeARMDisassembler() { 543206124Srdivacky // Register the disassembler. 544206124Srdivacky TargetRegistry::RegisterMCDisassembler(TheARMTarget, 545206124Srdivacky createARMDisassembler); 546206124Srdivacky TargetRegistry::RegisterMCDisassembler(TheThumbTarget, 547206124Srdivacky createThumbDisassembler); 548206124Srdivacky} 549206124Srdivacky 550206124Srdivacky} // namespace llvm 551