1//===-- ClangExpressionSourceCode.cpp ---------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ClangExpressionSourceCode.h"
10
11#include "clang/Basic/CharInfo.h"
12#include "clang/Basic/SourceManager.h"
13#include "clang/Lex/Lexer.h"
14#include "llvm/ADT/StringRef.h"
15
16#include "Plugins/ExpressionParser/Clang/ClangModulesDeclVendor.h"
17#include "Plugins/ExpressionParser/Clang/ClangPersistentVariables.h"
18#include "lldb/Symbol/Block.h"
19#include "lldb/Symbol/CompileUnit.h"
20#include "lldb/Symbol/DebugMacros.h"
21#include "lldb/Symbol/TypeSystem.h"
22#include "lldb/Symbol/VariableList.h"
23#include "lldb/Target/ExecutionContext.h"
24#include "lldb/Target/Language.h"
25#include "lldb/Target/Platform.h"
26#include "lldb/Target/StackFrame.h"
27#include "lldb/Target/Target.h"
28#include "lldb/Utility/StreamString.h"
29
30using namespace lldb_private;
31
32#define PREFIX_NAME "<lldb wrapper prefix>"
33
34const llvm::StringRef ClangExpressionSourceCode::g_prefix_file_name = PREFIX_NAME;
35
36const char *ClangExpressionSourceCode::g_expression_prefix =
37"#line 1 \"" PREFIX_NAME R"("
38#ifndef offsetof
39#define offsetof(t, d) __builtin_offsetof(t, d)
40#endif
41#ifndef NULL
42#define NULL (__null)
43#endif
44#ifndef Nil
45#define Nil (__null)
46#endif
47#ifndef nil
48#define nil (__null)
49#endif
50#ifndef YES
51#define YES ((BOOL)1)
52#endif
53#ifndef NO
54#define NO ((BOOL)0)
55#endif
56typedef __INT8_TYPE__ int8_t;
57typedef __UINT8_TYPE__ uint8_t;
58typedef __INT16_TYPE__ int16_t;
59typedef __UINT16_TYPE__ uint16_t;
60typedef __INT32_TYPE__ int32_t;
61typedef __UINT32_TYPE__ uint32_t;
62typedef __INT64_TYPE__ int64_t;
63typedef __UINT64_TYPE__ uint64_t;
64typedef __INTPTR_TYPE__ intptr_t;
65typedef __UINTPTR_TYPE__ uintptr_t;
66typedef __SIZE_TYPE__ size_t;
67typedef __PTRDIFF_TYPE__ ptrdiff_t;
68typedef unsigned short unichar;
69extern "C"
70{
71    int printf(const char * __restrict, ...);
72}
73)";
74
75namespace {
76
77class AddMacroState {
78  enum State {
79    CURRENT_FILE_NOT_YET_PUSHED,
80    CURRENT_FILE_PUSHED,
81    CURRENT_FILE_POPPED
82  };
83
84public:
85  AddMacroState(const FileSpec &current_file, const uint32_t current_file_line)
86      : m_state(CURRENT_FILE_NOT_YET_PUSHED), m_current_file(current_file),
87        m_current_file_line(current_file_line) {}
88
89  void StartFile(const FileSpec &file) {
90    m_file_stack.push_back(file);
91    if (file == m_current_file)
92      m_state = CURRENT_FILE_PUSHED;
93  }
94
95  void EndFile() {
96    if (m_file_stack.size() == 0)
97      return;
98
99    FileSpec old_top = m_file_stack.back();
100    m_file_stack.pop_back();
101    if (old_top == m_current_file)
102      m_state = CURRENT_FILE_POPPED;
103  }
104
105  // An entry is valid if it occurs before the current line in the current
106  // file.
107  bool IsValidEntry(uint32_t line) {
108    switch (m_state) {
109    case CURRENT_FILE_NOT_YET_PUSHED:
110      return true;
111    case CURRENT_FILE_PUSHED:
112      // If we are in file included in the current file, the entry should be
113      // added.
114      if (m_file_stack.back() != m_current_file)
115        return true;
116
117      return line < m_current_file_line;
118    default:
119      return false;
120    }
121  }
122
123private:
124  std::vector<FileSpec> m_file_stack;
125  State m_state;
126  FileSpec m_current_file;
127  uint32_t m_current_file_line;
128};
129
130} // anonymous namespace
131
132static void AddMacros(const DebugMacros *dm, CompileUnit *comp_unit,
133                      AddMacroState &state, StreamString &stream) {
134  if (dm == nullptr)
135    return;
136
137  for (size_t i = 0; i < dm->GetNumMacroEntries(); i++) {
138    const DebugMacroEntry &entry = dm->GetMacroEntryAtIndex(i);
139    uint32_t line;
140
141    switch (entry.GetType()) {
142    case DebugMacroEntry::DEFINE:
143      if (state.IsValidEntry(entry.GetLineNumber()))
144        stream.Printf("#define %s\n", entry.GetMacroString().AsCString());
145      else
146        return;
147      break;
148    case DebugMacroEntry::UNDEF:
149      if (state.IsValidEntry(entry.GetLineNumber()))
150        stream.Printf("#undef %s\n", entry.GetMacroString().AsCString());
151      else
152        return;
153      break;
154    case DebugMacroEntry::START_FILE:
155      line = entry.GetLineNumber();
156      if (state.IsValidEntry(line))
157        state.StartFile(entry.GetFileSpec(comp_unit));
158      else
159        return;
160      break;
161    case DebugMacroEntry::END_FILE:
162      state.EndFile();
163      break;
164    case DebugMacroEntry::INDIRECT:
165      AddMacros(entry.GetIndirectDebugMacros(), comp_unit, state, stream);
166      break;
167    default:
168      // This is an unknown/invalid entry. Ignore.
169      break;
170    }
171  }
172}
173
174lldb_private::ClangExpressionSourceCode::ClangExpressionSourceCode(
175    llvm::StringRef filename, llvm::StringRef name, llvm::StringRef prefix,
176    llvm::StringRef body, Wrapping wrap)
177    : ExpressionSourceCode(name, prefix, body, wrap) {
178  // Use #line markers to pretend that we have a single-line source file
179  // containing only the user expression. This will hide our wrapper code
180  // from the user when we render diagnostics with Clang.
181  m_start_marker = "#line 1 \"" + filename.str() + "\"\n";
182  m_end_marker = "\n;\n#line 1 \"<lldb wrapper suffix>\"\n";
183}
184
185namespace {
186/// Allows checking if a token is contained in a given expression.
187class TokenVerifier {
188  /// The tokens we found in the expression.
189  llvm::StringSet<> m_tokens;
190
191public:
192  TokenVerifier(std::string body);
193  /// Returns true iff the given expression body contained a token with the
194  /// given content.
195  bool hasToken(llvm::StringRef token) const {
196    return m_tokens.find(token) != m_tokens.end();
197  }
198};
199} // namespace
200
201TokenVerifier::TokenVerifier(std::string body) {
202  using namespace clang;
203
204  // We only care about tokens and not their original source locations. If we
205  // move the whole expression to only be in one line we can simplify the
206  // following code that extracts the token contents.
207  std::replace(body.begin(), body.end(), '\n', ' ');
208  std::replace(body.begin(), body.end(), '\r', ' ');
209
210  FileSystemOptions file_opts;
211  FileManager file_mgr(file_opts,
212                       FileSystem::Instance().GetVirtualFileSystem());
213
214  // Let's build the actual source code Clang needs and setup some utility
215  // objects.
216  llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
217  llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
218      new DiagnosticOptions());
219  DiagnosticsEngine diags(diag_ids, diags_opts);
220  clang::SourceManager SM(diags, file_mgr);
221  auto buf = llvm::MemoryBuffer::getMemBuffer(body);
222
223  FileID FID = SM.createFileID(clang::SourceManager::Unowned, buf.get());
224
225  // Let's just enable the latest ObjC and C++ which should get most tokens
226  // right.
227  LangOptions Opts;
228  Opts.ObjC = true;
229  Opts.DollarIdents = true;
230  Opts.CPlusPlus17 = true;
231  Opts.LineComment = true;
232
233  Lexer lex(FID, buf.get(), SM, Opts);
234
235  Token token;
236  bool exit = false;
237  while (!exit) {
238    // Returns true if this is the last token we get from the lexer.
239    exit = lex.LexFromRawLexer(token);
240
241    // Extract the column number which we need to extract the token content.
242    // Our expression is just one line, so we don't need to handle any line
243    // numbers here.
244    bool invalid = false;
245    unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
246    if (invalid)
247      continue;
248    // Column numbers start at 1, but indexes in our string start at 0.
249    --start;
250
251    // Annotations don't have a length, so let's skip them.
252    if (token.isAnnotation())
253      continue;
254
255    // Extract the token string from our source code and store it.
256    std::string token_str = body.substr(start, token.getLength());
257    if (token_str.empty())
258      continue;
259    m_tokens.insert(token_str);
260  }
261}
262
263static void AddLocalVariableDecls(const lldb::VariableListSP &var_list_sp,
264                                  StreamString &stream,
265                                  const std::string &expr,
266                                  lldb::LanguageType wrapping_language) {
267  TokenVerifier tokens(expr);
268
269  for (size_t i = 0; i < var_list_sp->GetSize(); i++) {
270    lldb::VariableSP var_sp = var_list_sp->GetVariableAtIndex(i);
271
272    ConstString var_name = var_sp->GetName();
273
274
275    // We can check for .block_descriptor w/o checking for langauge since this
276    // is not a valid identifier in either C or C++.
277    if (!var_name || var_name == ".block_descriptor")
278      continue;
279
280    if (!expr.empty() && !tokens.hasToken(var_name.GetStringRef()))
281      continue;
282
283    if ((var_name == "self" || var_name == "_cmd") &&
284        (wrapping_language == lldb::eLanguageTypeObjC ||
285         wrapping_language == lldb::eLanguageTypeObjC_plus_plus))
286      continue;
287
288    if (var_name == "this" &&
289        wrapping_language == lldb::eLanguageTypeC_plus_plus)
290      continue;
291
292    stream.Printf("using $__lldb_local_vars::%s;\n", var_name.AsCString());
293  }
294}
295
296bool ClangExpressionSourceCode::GetText(
297    std::string &text, lldb::LanguageType wrapping_language, bool static_method,
298    ExecutionContext &exe_ctx, bool add_locals, bool force_add_all_locals,
299    llvm::ArrayRef<std::string> modules) const {
300  const char *target_specific_defines = "typedef signed char BOOL;\n";
301  std::string module_macros;
302
303  Target *target = exe_ctx.GetTargetPtr();
304  if (target) {
305    if (target->GetArchitecture().GetMachine() == llvm::Triple::aarch64 ||
306        target->GetArchitecture().GetMachine() == llvm::Triple::aarch64_32) {
307      target_specific_defines = "typedef bool BOOL;\n";
308    }
309    if (target->GetArchitecture().GetMachine() == llvm::Triple::x86_64) {
310      if (lldb::PlatformSP platform_sp = target->GetPlatform()) {
311        static ConstString g_platform_ios_simulator("ios-simulator");
312        if (platform_sp->GetPluginName() == g_platform_ios_simulator) {
313          target_specific_defines = "typedef bool BOOL;\n";
314        }
315      }
316    }
317
318    ClangModulesDeclVendor *decl_vendor = target->GetClangModulesDeclVendor();
319    auto *persistent_vars = llvm::cast<ClangPersistentVariables>(
320        target->GetPersistentExpressionStateForLanguage(lldb::eLanguageTypeC));
321    if (decl_vendor && persistent_vars) {
322      const ClangModulesDeclVendor::ModuleVector &hand_imported_modules =
323          persistent_vars->GetHandLoadedClangModules();
324      ClangModulesDeclVendor::ModuleVector modules_for_macros;
325
326      for (ClangModulesDeclVendor::ModuleID module : hand_imported_modules) {
327        modules_for_macros.push_back(module);
328      }
329
330      if (target->GetEnableAutoImportClangModules()) {
331        if (StackFrame *frame = exe_ctx.GetFramePtr()) {
332          if (Block *block = frame->GetFrameBlock()) {
333            SymbolContext sc;
334
335            block->CalculateSymbolContext(&sc);
336
337            if (sc.comp_unit) {
338              StreamString error_stream;
339
340              decl_vendor->AddModulesForCompileUnit(
341                  *sc.comp_unit, modules_for_macros, error_stream);
342            }
343          }
344        }
345      }
346
347      decl_vendor->ForEachMacro(
348          modules_for_macros,
349          [&module_macros](const std::string &expansion) -> bool {
350            module_macros.append(expansion);
351            module_macros.append("\n");
352            return false;
353          });
354    }
355  }
356
357  StreamString debug_macros_stream;
358  StreamString lldb_local_var_decls;
359  if (StackFrame *frame = exe_ctx.GetFramePtr()) {
360    const SymbolContext &sc = frame->GetSymbolContext(
361        lldb::eSymbolContextCompUnit | lldb::eSymbolContextLineEntry);
362
363    if (sc.comp_unit && sc.line_entry.IsValid()) {
364      DebugMacros *dm = sc.comp_unit->GetDebugMacros();
365      if (dm) {
366        AddMacroState state(sc.line_entry.file, sc.line_entry.line);
367        AddMacros(dm, sc.comp_unit, state, debug_macros_stream);
368      }
369    }
370
371    if (add_locals)
372      if (target->GetInjectLocalVariables(&exe_ctx)) {
373        lldb::VariableListSP var_list_sp =
374            frame->GetInScopeVariableList(false, true);
375        AddLocalVariableDecls(var_list_sp, lldb_local_var_decls,
376                              force_add_all_locals ? "" : m_body,
377                              wrapping_language);
378      }
379  }
380
381  if (m_wrap) {
382    switch (wrapping_language) {
383    default:
384      return false;
385    case lldb::eLanguageTypeC:
386    case lldb::eLanguageTypeC_plus_plus:
387    case lldb::eLanguageTypeObjC:
388      break;
389    }
390
391    // Generate a list of @import statements that will import the specified
392    // module into our expression.
393    std::string module_imports;
394    for (const std::string &module : modules) {
395      module_imports.append("@import ");
396      module_imports.append(module);
397      module_imports.append(";\n");
398    }
399
400    StreamString wrap_stream;
401
402    wrap_stream.Printf("%s\n%s\n%s\n%s\n%s\n", module_macros.c_str(),
403                       debug_macros_stream.GetData(), g_expression_prefix,
404                       target_specific_defines, m_prefix.c_str());
405
406    // First construct a tagged form of the user expression so we can find it
407    // later:
408    std::string tagged_body;
409    switch (wrapping_language) {
410    default:
411      tagged_body = m_body;
412      break;
413    case lldb::eLanguageTypeC:
414    case lldb::eLanguageTypeC_plus_plus:
415    case lldb::eLanguageTypeObjC:
416      tagged_body.append(m_start_marker);
417      tagged_body.append(m_body);
418      tagged_body.append(m_end_marker);
419      break;
420    }
421    switch (wrapping_language) {
422    default:
423      break;
424    case lldb::eLanguageTypeC:
425      wrap_stream.Printf("%s"
426                         "void                           \n"
427                         "%s(void *$__lldb_arg)          \n"
428                         "{                              \n"
429                         "    %s;                        \n"
430                         "%s"
431                         "}                              \n",
432                         module_imports.c_str(), m_name.c_str(),
433                         lldb_local_var_decls.GetData(), tagged_body.c_str());
434      break;
435    case lldb::eLanguageTypeC_plus_plus:
436      wrap_stream.Printf("%s"
437                         "void                                   \n"
438                         "$__lldb_class::%s(void *$__lldb_arg)   \n"
439                         "{                                      \n"
440                         "    %s;                                \n"
441                         "%s"
442                         "}                                      \n",
443                         module_imports.c_str(), m_name.c_str(),
444                         lldb_local_var_decls.GetData(), tagged_body.c_str());
445      break;
446    case lldb::eLanguageTypeObjC:
447      if (static_method) {
448        wrap_stream.Printf(
449            "%s"
450            "@interface $__lldb_objc_class ($__lldb_category)        \n"
451            "+(void)%s:(void *)$__lldb_arg;                          \n"
452            "@end                                                    \n"
453            "@implementation $__lldb_objc_class ($__lldb_category)   \n"
454            "+(void)%s:(void *)$__lldb_arg                           \n"
455            "{                                                       \n"
456            "    %s;                                                 \n"
457            "%s"
458            "}                                                       \n"
459            "@end                                                    \n",
460            module_imports.c_str(), m_name.c_str(), m_name.c_str(),
461            lldb_local_var_decls.GetData(), tagged_body.c_str());
462      } else {
463        wrap_stream.Printf(
464            "%s"
465            "@interface $__lldb_objc_class ($__lldb_category)       \n"
466            "-(void)%s:(void *)$__lldb_arg;                         \n"
467            "@end                                                   \n"
468            "@implementation $__lldb_objc_class ($__lldb_category)  \n"
469            "-(void)%s:(void *)$__lldb_arg                          \n"
470            "{                                                      \n"
471            "    %s;                                                \n"
472            "%s"
473            "}                                                      \n"
474            "@end                                                   \n",
475            module_imports.c_str(), m_name.c_str(), m_name.c_str(),
476            lldb_local_var_decls.GetData(), tagged_body.c_str());
477      }
478      break;
479    }
480
481    text = wrap_stream.GetString();
482  } else {
483    text.append(m_body);
484  }
485
486  return true;
487}
488
489bool ClangExpressionSourceCode::GetOriginalBodyBounds(
490    std::string transformed_text, lldb::LanguageType wrapping_language,
491    size_t &start_loc, size_t &end_loc) {
492  switch (wrapping_language) {
493  default:
494    return false;
495  case lldb::eLanguageTypeC:
496  case lldb::eLanguageTypeC_plus_plus:
497  case lldb::eLanguageTypeObjC:
498    break;
499  }
500
501  start_loc = transformed_text.find(m_start_marker);
502  if (start_loc == std::string::npos)
503    return false;
504  start_loc += m_start_marker.size();
505  end_loc = transformed_text.find(m_end_marker);
506  return end_loc != std::string::npos;
507}
508