1277323Sdim//===-- MCExternalSymbolizer.cpp - External symbolizer --------------------===//
2277323Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6277323Sdim//
7277323Sdim//===----------------------------------------------------------------------===//
8277323Sdim
9309124Sdim#include "llvm/MC/MCDisassembler/MCExternalSymbolizer.h"
10277323Sdim#include "llvm/MC/MCContext.h"
11277323Sdim#include "llvm/MC/MCExpr.h"
12277323Sdim#include "llvm/MC/MCInst.h"
13277323Sdim#include "llvm/Support/raw_ostream.h"
14277323Sdim#include <cstring>
15277323Sdim
16277323Sdimusing namespace llvm;
17277323Sdim
18288943Sdimnamespace llvm {
19288943Sdimclass Triple;
20288943Sdim}
21288943Sdim
22277323Sdim// This function tries to add a symbolic operand in place of the immediate
23277323Sdim// Value in the MCInst. The immediate Value has had any PC adjustment made by
24277323Sdim// the caller. If the instruction is a branch instruction then IsBranch is true,
25277323Sdim// else false. If the getOpInfo() function was set as part of the
26277323Sdim// setupForSymbolicDisassembly() call then that function is called to get any
27277323Sdim// symbolic information at the Address for this instruction. If that returns
28277323Sdim// non-zero then the symbolic information it returns is used to create an MCExpr
29277323Sdim// and that is added as an operand to the MCInst. If getOpInfo() returns zero
30277323Sdim// and IsBranch is true then a symbol look up for Value is done and if a symbol
31277323Sdim// is found an MCExpr is created with that, else an MCExpr with Value is
32277323Sdim// created. This function returns true if it adds an operand to the MCInst and
33277323Sdim// false otherwise.
34277323Sdimbool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
35277323Sdim                                                    raw_ostream &cStream,
36277323Sdim                                                    int64_t Value,
37277323Sdim                                                    uint64_t Address,
38277323Sdim                                                    bool IsBranch,
39277323Sdim                                                    uint64_t Offset,
40277323Sdim                                                    uint64_t InstSize) {
41277323Sdim  struct LLVMOpInfo1 SymbolicOp;
42277323Sdim  std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
43277323Sdim  SymbolicOp.Value = Value;
44277323Sdim
45277323Sdim  if (!GetOpInfo ||
46277323Sdim      !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
47277323Sdim    // Clear SymbolicOp.Value from above and also all other fields.
48277323Sdim    std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
49277323Sdim
50277323Sdim    // At this point, GetOpInfo() did not find any relocation information about
51277323Sdim    // this operand and we are left to use the SymbolLookUp() call back to guess
52277323Sdim    // if the Value is the address of a symbol.  In the case this is a branch
53277323Sdim    // that always makes sense to guess.  But in the case of an immediate it is
54277323Sdim    // a bit more questionable if it is an address of a symbol or some other
55277323Sdim    // reference.  So if the immediate Value comes from a width of 1 byte,
56277323Sdim    // InstSize, we will not guess it is an address of a symbol.  Because in
57277323Sdim    // object files assembled starting at address 0 this usually leads to
58277323Sdim    // incorrect symbolication.
59277323Sdim    if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
60277323Sdim      return false;
61277323Sdim
62277323Sdim    uint64_t ReferenceType;
63277323Sdim    if (IsBranch)
64277323Sdim       ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
65277323Sdim    else
66277323Sdim       ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
67277323Sdim    const char *ReferenceName;
68277323Sdim    const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address,
69277323Sdim                                    &ReferenceName);
70277323Sdim    if (Name) {
71277323Sdim      SymbolicOp.AddSymbol.Name = Name;
72277323Sdim      SymbolicOp.AddSymbol.Present = true;
73277323Sdim      // If Name is a C++ symbol name put the human readable name in a comment.
74277323Sdim      if(ReferenceType == LLVMDisassembler_ReferenceType_DeMangled_Name)
75277323Sdim        cStream << ReferenceName;
76277323Sdim    }
77277323Sdim    // For branches always create an MCExpr so it gets printed as hex address.
78277323Sdim    else if (IsBranch) {
79277323Sdim      SymbolicOp.Value = Value;
80277323Sdim    }
81277323Sdim    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub)
82277323Sdim      cStream << "symbol stub for: " << ReferenceName;
83277323Sdim    else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message)
84277323Sdim      cStream << "Objc message: " << ReferenceName;
85277323Sdim    if (!Name && !IsBranch)
86277323Sdim      return false;
87277323Sdim  }
88277323Sdim
89277323Sdim  const MCExpr *Add = nullptr;
90277323Sdim  if (SymbolicOp.AddSymbol.Present) {
91277323Sdim    if (SymbolicOp.AddSymbol.Name) {
92277323Sdim      StringRef Name(SymbolicOp.AddSymbol.Name);
93288943Sdim      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
94288943Sdim      Add = MCSymbolRefExpr::create(Sym, Ctx);
95277323Sdim    } else {
96288943Sdim      Add = MCConstantExpr::create((int)SymbolicOp.AddSymbol.Value, Ctx);
97277323Sdim    }
98277323Sdim  }
99277323Sdim
100277323Sdim  const MCExpr *Sub = nullptr;
101277323Sdim  if (SymbolicOp.SubtractSymbol.Present) {
102277323Sdim      if (SymbolicOp.SubtractSymbol.Name) {
103277323Sdim      StringRef Name(SymbolicOp.SubtractSymbol.Name);
104288943Sdim      MCSymbol *Sym = Ctx.getOrCreateSymbol(Name);
105288943Sdim      Sub = MCSymbolRefExpr::create(Sym, Ctx);
106277323Sdim    } else {
107288943Sdim      Sub = MCConstantExpr::create((int)SymbolicOp.SubtractSymbol.Value, Ctx);
108277323Sdim    }
109277323Sdim  }
110277323Sdim
111277323Sdim  const MCExpr *Off = nullptr;
112277323Sdim  if (SymbolicOp.Value != 0)
113288943Sdim    Off = MCConstantExpr::create(SymbolicOp.Value, Ctx);
114277323Sdim
115277323Sdim  const MCExpr *Expr;
116277323Sdim  if (Sub) {
117277323Sdim    const MCExpr *LHS;
118277323Sdim    if (Add)
119288943Sdim      LHS = MCBinaryExpr::createSub(Add, Sub, Ctx);
120277323Sdim    else
121288943Sdim      LHS = MCUnaryExpr::createMinus(Sub, Ctx);
122277323Sdim    if (Off)
123288943Sdim      Expr = MCBinaryExpr::createAdd(LHS, Off, Ctx);
124277323Sdim    else
125277323Sdim      Expr = LHS;
126277323Sdim  } else if (Add) {
127277323Sdim    if (Off)
128288943Sdim      Expr = MCBinaryExpr::createAdd(Add, Off, Ctx);
129277323Sdim    else
130277323Sdim      Expr = Add;
131277323Sdim  } else {
132277323Sdim    if (Off)
133277323Sdim      Expr = Off;
134277323Sdim    else
135288943Sdim      Expr = MCConstantExpr::create(0, Ctx);
136277323Sdim  }
137277323Sdim
138277323Sdim  Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind);
139277323Sdim  if (!Expr)
140277323Sdim    return false;
141277323Sdim
142288943Sdim  MI.addOperand(MCOperand::createExpr(Expr));
143277323Sdim  return true;
144277323Sdim}
145277323Sdim
146277323Sdim// This function tries to add a comment as to what is being referenced by a load
147277323Sdim// instruction with the base register that is the Pc.  These can often be values
148277323Sdim// in a literal pool near the Address of the instruction. The Address of the
149277323Sdim// instruction and its immediate Value are used as a possible literal pool entry.
150277323Sdim// The SymbolLookUp call back will return the name of a symbol referenced by the
151277323Sdim// literal pool's entry if the referenced address is that of a symbol. Or it
152277323Sdim// will return a pointer to a literal 'C' string if the referenced address of
153277323Sdim// the literal pool's entry is an address into a section with C string literals.
154277323Sdim// Or if the reference is to an Objective-C data structure it will return a
155277323Sdim// specific reference type for it and a string.
156277323Sdimvoid MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
157277323Sdim                                                           int64_t Value,
158277323Sdim                                                           uint64_t Address) {
159277323Sdim  if (SymbolLookUp) {
160277323Sdim    uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load;
161277323Sdim    const char *ReferenceName;
162277323Sdim    (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName);
163277323Sdim    if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr)
164277323Sdim      cStream << "literal pool symbol address: " << ReferenceName;
165277323Sdim    else if(ReferenceType ==
166277323Sdim            LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) {
167277323Sdim      cStream << "literal pool for: \"";
168277323Sdim      cStream.write_escaped(ReferenceName);
169277323Sdim      cStream << "\"";
170277323Sdim    }
171277323Sdim    else if(ReferenceType ==
172277323Sdim            LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref)
173277323Sdim      cStream << "Objc cfstring ref: @\"" << ReferenceName << "\"";
174277323Sdim    else if(ReferenceType ==
175277323Sdim            LLVMDisassembler_ReferenceType_Out_Objc_Message)
176277323Sdim      cStream << "Objc message: " << ReferenceName;
177277323Sdim    else if(ReferenceType ==
178277323Sdim            LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref)
179277323Sdim      cStream << "Objc message ref: " << ReferenceName;
180277323Sdim    else if(ReferenceType ==
181277323Sdim            LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref)
182277323Sdim      cStream << "Objc selector ref: " << ReferenceName;
183277323Sdim    else if(ReferenceType ==
184277323Sdim            LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref)
185277323Sdim      cStream << "Objc class ref: " << ReferenceName;
186277323Sdim  }
187277323Sdim}
188277323Sdim
189277323Sdimnamespace llvm {
190288943SdimMCSymbolizer *createMCSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
191277323Sdim                                 LLVMSymbolLookupCallback SymbolLookUp,
192288943Sdim                                 void *DisInfo, MCContext *Ctx,
193288943Sdim                                 std::unique_ptr<MCRelocationInfo> &&RelInfo) {
194277323Sdim  assert(Ctx && "No MCContext given for symbolic disassembly");
195277323Sdim
196288943Sdim  return new MCExternalSymbolizer(*Ctx, std::move(RelInfo), GetOpInfo,
197288943Sdim                                  SymbolLookUp, DisInfo);
198277323Sdim}
199277323Sdim}
200