Args.h revision 355940
1//===-- Args.h --------------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLDB_UTILITY_ARGS_H
10#define LLDB_UTILITY_ARGS_H
11
12#include "lldb/Utility/Environment.h"
13#include "lldb/lldb-private-types.h"
14#include "lldb/lldb-types.h"
15#include "llvm/ADT/ArrayRef.h"
16#include "llvm/ADT/StringRef.h"
17#include <string>
18#include <utility>
19#include <vector>
20
21namespace lldb_private {
22
23/// \class Args Args.h "lldb/Utility/Args.h"
24/// A command line argument class.
25///
26/// The Args class is designed to be fed a command line. The command line is
27/// copied into an internal buffer and then split up into arguments. Arguments
28/// are space delimited if there are no quotes (single, double, or backtick
29/// quotes) surrounding the argument. Spaces can be escaped using a \
30/// character to avoid having to surround an argument that contains a space
31/// with quotes.
32class Args {
33public:
34  struct ArgEntry {
35  private:
36    friend class Args;
37    std::unique_ptr<char[]> ptr;
38
39    char *data() { return ptr.get(); }
40
41  public:
42    ArgEntry() = default;
43    ArgEntry(llvm::StringRef str, char quote);
44
45    llvm::StringRef ref;
46    char quote;
47    const char *c_str() const { return ptr.get(); }
48
49    /// Returns true if this argument was quoted in any way.
50    bool IsQuoted() const { return quote != '\0'; }
51  };
52
53  /// Construct with an option command string.
54  ///
55  /// \param[in] command
56  ///     A NULL terminated command that will be copied and split up
57  ///     into arguments.
58  ///
59  /// \see Args::SetCommandString(llvm::StringRef)
60  Args(llvm::StringRef command = llvm::StringRef());
61
62  Args(const Args &rhs);
63  explicit Args(const StringList &list);
64
65  Args &operator=(const Args &rhs);
66
67  /// Destructor.
68  ~Args();
69
70  explicit Args(const Environment &env) : Args() {
71    SetArguments(const_cast<const char **>(env.getEnvp().get()));
72  }
73
74  explicit operator Environment() const { return GetConstArgumentVector(); }
75
76  /// Dump all entries to the stream \a s using label \a label_name.
77  ///
78  /// If label_name is nullptr, the dump operation is skipped.
79  ///
80  /// \param[in] s
81  ///     The stream to which to dump all arguments in the argument
82  ///     vector.
83  /// \param[in] label_name
84  ///     The label_name to use as the label printed for each
85  ///     entry of the args like so:
86  ///       {label_name}[{index}]={value}
87  void Dump(Stream &s, const char *label_name = "argv") const;
88
89  /// Sets the command string contained by this object.
90  ///
91  /// The command string will be copied and split up into arguments that can
92  /// be accessed via the accessor functions.
93  ///
94  /// \param[in] command
95  ///     A command StringRef that will be copied and split up
96  ///     into arguments.
97  ///
98  /// \see Args::GetArgumentCount() const
99  /// \see Args::GetArgumentAtIndex (size_t) const @see
100  /// Args::GetArgumentVector () \see Args::Shift () \see Args::Unshift (const
101  /// char *)
102  void SetCommandString(llvm::StringRef command);
103
104  bool GetCommandString(std::string &command) const;
105
106  bool GetQuotedCommandString(std::string &command) const;
107
108  /// Gets the number of arguments left in this command object.
109  ///
110  /// \return
111  ///     The number or arguments in this object.
112  size_t GetArgumentCount() const;
113  bool empty() const { return GetArgumentCount() == 0; }
114
115  /// Gets the NULL terminated C string argument pointer for the argument at
116  /// index \a idx.
117  ///
118  /// \return
119  ///     The NULL terminated C string argument pointer if \a idx is a
120  ///     valid argument index, NULL otherwise.
121  const char *GetArgumentAtIndex(size_t idx) const;
122
123  llvm::ArrayRef<ArgEntry> entries() const { return m_entries; }
124  char GetArgumentQuoteCharAtIndex(size_t idx) const;
125
126  using const_iterator = std::vector<ArgEntry>::const_iterator;
127
128  const_iterator begin() const { return m_entries.begin(); }
129  const_iterator end() const { return m_entries.end(); }
130
131  size_t size() const { return GetArgumentCount(); }
132  const ArgEntry &operator[](size_t n) const { return m_entries[n]; }
133
134  /// Gets the argument vector.
135  ///
136  /// The value returned by this function can be used by any function that
137  /// takes and vector. The return value is just like \a argv in the standard
138  /// C entry point function:
139  ///     \code
140  ///         int main (int argc, const char **argv);
141  ///     \endcode
142  ///
143  /// \return
144  ///     An array of NULL terminated C string argument pointers that
145  ///     also has a terminating NULL C string pointer
146  char **GetArgumentVector();
147
148  /// Gets the argument vector.
149  ///
150  /// The value returned by this function can be used by any function that
151  /// takes and vector. The return value is just like \a argv in the standard
152  /// C entry point function:
153  ///     \code
154  ///         int main (int argc, const char **argv);
155  ///     \endcode
156  ///
157  /// \return
158  ///     An array of NULL terminate C string argument pointers that
159  ///     also has a terminating NULL C string pointer
160  const char **GetConstArgumentVector() const;
161
162  /// Gets the argument as an ArrayRef. Note that the return value does *not*
163  /// have a nullptr const char * at the end, as the size of the list is
164  /// embedded in the ArrayRef object.
165  llvm::ArrayRef<const char *> GetArgumentArrayRef() const {
166    return llvm::makeArrayRef(m_argv).drop_back();
167  }
168
169  /// Appends a new argument to the end of the list argument list.
170  ///
171  /// \param[in] arg_cstr
172  ///     The new argument as a NULL terminated C string.
173  ///
174  /// \param[in] quote_char
175  ///     If the argument was originally quoted, put in the quote char here.
176  void AppendArgument(llvm::StringRef arg_str, char quote_char = '\0');
177
178  void AppendArguments(const Args &rhs);
179
180  void AppendArguments(const char **argv);
181
182  /// Insert the argument value at index \a idx to \a arg_cstr.
183  ///
184  /// \param[in] idx
185  ///     The index of where to insert the argument.
186  ///
187  /// \param[in] arg_cstr
188  ///     The new argument as a NULL terminated C string.
189  ///
190  /// \param[in] quote_char
191  ///     If the argument was originally quoted, put in the quote char here.
192  ///
193  /// \return
194  ///     The NULL terminated C string of the copy of \a arg_cstr.
195  void InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
196                             char quote_char = '\0');
197
198  /// Replaces the argument value at index \a idx to \a arg_cstr if \a idx is
199  /// a valid argument index.
200  ///
201  /// \param[in] idx
202  ///     The index of the argument that will have its value replaced.
203  ///
204  /// \param[in] arg_cstr
205  ///     The new argument as a NULL terminated C string.
206  ///
207  /// \param[in] quote_char
208  ///     If the argument was originally quoted, put in the quote char here.
209  void ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
210                              char quote_char = '\0');
211
212  /// Deletes the argument value at index
213  /// if \a idx is a valid argument index.
214  ///
215  /// \param[in] idx
216  ///     The index of the argument that will have its value replaced.
217  ///
218  void DeleteArgumentAtIndex(size_t idx);
219
220  /// Sets the argument vector value, optionally copying all arguments into an
221  /// internal buffer.
222  ///
223  /// Sets the arguments to match those found in \a argv. All argument strings
224  /// will be copied into an internal buffers.
225  //
226  //  FIXME: Handle the quote character somehow.
227  void SetArguments(size_t argc, const char **argv);
228
229  void SetArguments(const char **argv);
230
231  /// Shifts the first argument C string value of the array off the argument
232  /// array.
233  ///
234  /// The string value will be freed, so a copy of the string should be made
235  /// by calling Args::GetArgumentAtIndex (size_t) const first and copying the
236  /// returned value before calling Args::Shift().
237  ///
238  /// \see Args::GetArgumentAtIndex (size_t) const
239  void Shift();
240
241  /// Inserts a class owned copy of \a arg_cstr at the beginning of the
242  /// argument vector.
243  ///
244  /// A copy \a arg_cstr will be made.
245  ///
246  /// \param[in] arg_cstr
247  ///     The argument to push on the front of the argument stack.
248  ///
249  /// \param[in] quote_char
250  ///     If the argument was originally quoted, put in the quote char here.
251  void Unshift(llvm::StringRef arg_str, char quote_char = '\0');
252
253  // Clear the arguments.
254  //
255  // For re-setting or blanking out the list of arguments.
256  void Clear();
257
258  static const char *StripSpaces(std::string &s, bool leading = true,
259                                 bool trailing = true,
260                                 bool return_null_if_empty = true);
261
262  static bool UInt64ValueIsValidForByteSize(uint64_t uval64,
263                                            size_t total_byte_size) {
264    if (total_byte_size > 8)
265      return false;
266
267    if (total_byte_size == 8)
268      return true;
269
270    const uint64_t max = (static_cast<uint64_t>(1)
271                          << static_cast<uint64_t>(total_byte_size * 8)) -
272                         1;
273    return uval64 <= max;
274  }
275
276  static bool SInt64ValueIsValidForByteSize(int64_t sval64,
277                                            size_t total_byte_size) {
278    if (total_byte_size > 8)
279      return false;
280
281    if (total_byte_size == 8)
282      return true;
283
284    const int64_t max = (static_cast<int64_t>(1)
285                         << static_cast<uint64_t>(total_byte_size * 8 - 1)) -
286                        1;
287    const int64_t min = ~(max);
288    return min <= sval64 && sval64 <= max;
289  }
290
291  static lldb::Encoding
292  StringToEncoding(llvm::StringRef s,
293                   lldb::Encoding fail_value = lldb::eEncodingInvalid);
294
295  static uint32_t StringToGenericRegister(llvm::StringRef s);
296
297  static const char *GetShellSafeArgument(const FileSpec &shell,
298                                          const char *unsafe_arg,
299                                          std::string &safe_arg);
300
301  // EncodeEscapeSequences will change the textual representation of common
302  // escape sequences like "\n" (two characters) into a single '\n'. It does
303  // this for all of the supported escaped sequences and for the \0ooo (octal)
304  // and \xXX (hex). The resulting "dst" string will contain the character
305  // versions of all supported escape sequences. The common supported escape
306  // sequences are: "\a", "\b", "\f", "\n", "\r", "\t", "\v", "\'", "\"", "\\".
307
308  static void EncodeEscapeSequences(const char *src, std::string &dst);
309
310  // ExpandEscapeSequences will change a string of possibly non-printable
311  // characters and expand them into text. So '\n' will turn into two
312  // characters like "\n" which is suitable for human reading. When a character
313  // is not printable and isn't one of the common in escape sequences listed in
314  // the help for EncodeEscapeSequences, then it will be encoded as octal.
315  // Printable characters are left alone.
316  static void ExpandEscapedCharacters(const char *src, std::string &dst);
317
318  static std::string EscapeLLDBCommandArgument(const std::string &arg,
319                                               char quote_char);
320
321private:
322  std::vector<ArgEntry> m_entries;
323  std::vector<char *> m_argv;
324};
325
326/// \class OptionsWithRaw Args.h "lldb/Utility/Args.h"
327/// A pair of an option list with a 'raw' string as a suffix.
328///
329/// This class works similar to Args, but handles the case where we have a
330/// trailing string that shouldn't be interpreted as a list of arguments but
331/// preserved as is. It is also only useful for handling command line options
332/// (e.g. '-foo bar -i0') that start with a dash.
333///
334/// The leading option list is optional. If the first non-space character
335/// in the string starts with a dash, and the string contains an argument
336/// that is an unquoted double dash (' -- '), then everything up to the double
337/// dash is parsed as a list of arguments. Everything after the double dash
338/// is interpreted as the raw suffix string. Note that the space behind the
339/// double dash is not part of the raw suffix.
340///
341/// All strings not matching the above format as considered to be just a raw
342/// string without any options.
343///
344/// \see Args
345class OptionsWithRaw {
346public:
347  /// Parse the given string as a list of optional arguments with a raw suffix.
348  ///
349  /// See the class description for a description of the input format.
350  ///
351  /// \param[in] argument_string
352  ///     The string that should be parsed.
353  explicit OptionsWithRaw(llvm::StringRef argument_string);
354
355  /// Returns true if there are any arguments before the raw suffix.
356  bool HasArgs() const { return m_has_args; }
357
358  /// Returns the list of arguments.
359  ///
360  /// You can only call this method if HasArgs returns true.
361  Args &GetArgs() {
362    assert(m_has_args);
363    return m_args;
364  }
365
366  /// Returns the list of arguments.
367  ///
368  /// You can only call this method if HasArgs returns true.
369  const Args &GetArgs() const {
370    assert(m_has_args);
371    return m_args;
372  }
373
374  /// Returns the part of the input string that was used for parsing the
375  /// argument list. This string also includes the double dash that is used
376  /// for separating the argument list from the suffix.
377  ///
378  /// You can only call this method if HasArgs returns true.
379  llvm::StringRef GetArgStringWithDelimiter() const {
380    assert(m_has_args);
381    return m_arg_string_with_delimiter;
382  }
383
384  /// Returns the part of the input string that was used for parsing the
385  /// argument list.
386  ///
387  /// You can only call this method if HasArgs returns true.
388  llvm::StringRef GetArgString() const {
389    assert(m_has_args);
390    return m_arg_string;
391  }
392
393  /// Returns the raw suffix part of the parsed string.
394  const std::string &GetRawPart() const { return m_suffix; }
395
396private:
397  void SetFromString(llvm::StringRef arg_string);
398
399  /// Keeps track if we have parsed and stored any arguments.
400  bool m_has_args = false;
401  Args m_args;
402  llvm::StringRef m_arg_string;
403  llvm::StringRef m_arg_string_with_delimiter;
404
405  // FIXME: This should be a StringRef, but some of the calling code expect a
406  // C string here so only a real std::string is possible.
407  std::string m_suffix;
408};
409
410} // namespace lldb_private
411
412#endif // LLDB_UTILITY_ARGS_H
413