1336823Sdim//===-- Args.h --------------------------------------------------*- C++ -*-===//
2336823Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6336823Sdim//
7336823Sdim//===----------------------------------------------------------------------===//
8336823Sdim
9336823Sdim#ifndef LLDB_UTILITY_ARGS_H
10336823Sdim#define LLDB_UTILITY_ARGS_H
11336823Sdim
12336823Sdim#include "lldb/Utility/Environment.h"
13336823Sdim#include "lldb/lldb-private-types.h"
14336823Sdim#include "lldb/lldb-types.h"
15336823Sdim#include "llvm/ADT/ArrayRef.h"
16336823Sdim#include "llvm/ADT/StringRef.h"
17336823Sdim#include <string>
18336823Sdim#include <utility>
19336823Sdim#include <vector>
20336823Sdim
21336823Sdimnamespace lldb_private {
22336823Sdim
23353358Sdim/// \class Args Args.h "lldb/Utility/Args.h"
24336823Sdim/// A command line argument class.
25336823Sdim///
26336823Sdim/// The Args class is designed to be fed a command line. The command line is
27336823Sdim/// copied into an internal buffer and then split up into arguments. Arguments
28336823Sdim/// are space delimited if there are no quotes (single, double, or backtick
29336823Sdim/// quotes) surrounding the argument. Spaces can be escaped using a \
30336823Sdim/// character to avoid having to surround an argument that contains a space
31336823Sdim/// with quotes.
32336823Sdimclass Args {
33336823Sdimpublic:
34336823Sdim  struct ArgEntry {
35336823Sdim  private:
36336823Sdim    friend class Args;
37336823Sdim    std::unique_ptr<char[]> ptr;
38360784Sdim    char quote;
39336823Sdim
40336823Sdim    char *data() { return ptr.get(); }
41336823Sdim
42336823Sdim  public:
43336823Sdim    ArgEntry() = default;
44336823Sdim    ArgEntry(llvm::StringRef str, char quote);
45336823Sdim
46360784Sdim    llvm::StringRef ref() const { return c_str(); }
47336823Sdim    const char *c_str() const { return ptr.get(); }
48336823Sdim
49336823Sdim    /// Returns true if this argument was quoted in any way.
50336823Sdim    bool IsQuoted() const { return quote != '\0'; }
51360784Sdim    char GetQuoteChar() const { return quote; }
52336823Sdim  };
53336823Sdim
54336823Sdim  /// Construct with an option command string.
55336823Sdim  ///
56353358Sdim  /// \param[in] command
57336823Sdim  ///     A NULL terminated command that will be copied and split up
58336823Sdim  ///     into arguments.
59336823Sdim  ///
60353358Sdim  /// \see Args::SetCommandString(llvm::StringRef)
61336823Sdim  Args(llvm::StringRef command = llvm::StringRef());
62336823Sdim
63336823Sdim  Args(const Args &rhs);
64336823Sdim  explicit Args(const StringList &list);
65336823Sdim
66336823Sdim  Args &operator=(const Args &rhs);
67336823Sdim
68336823Sdim  /// Destructor.
69336823Sdim  ~Args();
70336823Sdim
71336823Sdim  explicit Args(const Environment &env) : Args() {
72336823Sdim    SetArguments(const_cast<const char **>(env.getEnvp().get()));
73336823Sdim  }
74336823Sdim
75336823Sdim  explicit operator Environment() const { return GetConstArgumentVector(); }
76336823Sdim
77336823Sdim  /// Dump all entries to the stream \a s using label \a label_name.
78336823Sdim  ///
79336823Sdim  /// If label_name is nullptr, the dump operation is skipped.
80336823Sdim  ///
81353358Sdim  /// \param[in] s
82336823Sdim  ///     The stream to which to dump all arguments in the argument
83336823Sdim  ///     vector.
84353358Sdim  /// \param[in] label_name
85336823Sdim  ///     The label_name to use as the label printed for each
86336823Sdim  ///     entry of the args like so:
87336823Sdim  ///       {label_name}[{index}]={value}
88336823Sdim  void Dump(Stream &s, const char *label_name = "argv") const;
89336823Sdim
90336823Sdim  /// Sets the command string contained by this object.
91336823Sdim  ///
92336823Sdim  /// The command string will be copied and split up into arguments that can
93336823Sdim  /// be accessed via the accessor functions.
94336823Sdim  ///
95353358Sdim  /// \param[in] command
96336823Sdim  ///     A command StringRef that will be copied and split up
97336823Sdim  ///     into arguments.
98336823Sdim  ///
99353358Sdim  /// \see Args::GetArgumentCount() const
100353358Sdim  /// \see Args::GetArgumentAtIndex (size_t) const @see
101353358Sdim  /// Args::GetArgumentVector () \see Args::Shift () \see Args::Unshift (const
102336823Sdim  /// char *)
103336823Sdim  void SetCommandString(llvm::StringRef command);
104336823Sdim
105336823Sdim  bool GetCommandString(std::string &command) const;
106336823Sdim
107336823Sdim  bool GetQuotedCommandString(std::string &command) const;
108336823Sdim
109336823Sdim  /// Gets the number of arguments left in this command object.
110336823Sdim  ///
111353358Sdim  /// \return
112336823Sdim  ///     The number or arguments in this object.
113336823Sdim  size_t GetArgumentCount() const;
114336823Sdim  bool empty() const { return GetArgumentCount() == 0; }
115336823Sdim
116336823Sdim  /// Gets the NULL terminated C string argument pointer for the argument at
117336823Sdim  /// index \a idx.
118336823Sdim  ///
119353358Sdim  /// \return
120336823Sdim  ///     The NULL terminated C string argument pointer if \a idx is a
121336823Sdim  ///     valid argument index, NULL otherwise.
122336823Sdim  const char *GetArgumentAtIndex(size_t idx) const;
123336823Sdim
124336823Sdim  llvm::ArrayRef<ArgEntry> entries() const { return m_entries; }
125336823Sdim
126336823Sdim  using const_iterator = std::vector<ArgEntry>::const_iterator;
127336823Sdim
128336823Sdim  const_iterator begin() const { return m_entries.begin(); }
129336823Sdim  const_iterator end() const { return m_entries.end(); }
130336823Sdim
131336823Sdim  size_t size() const { return GetArgumentCount(); }
132336823Sdim  const ArgEntry &operator[](size_t n) const { return m_entries[n]; }
133336823Sdim
134336823Sdim  /// Gets the argument vector.
135336823Sdim  ///
136336823Sdim  /// The value returned by this function can be used by any function that
137336823Sdim  /// takes and vector. The return value is just like \a argv in the standard
138336823Sdim  /// C entry point function:
139336823Sdim  ///     \code
140336823Sdim  ///         int main (int argc, const char **argv);
141336823Sdim  ///     \endcode
142336823Sdim  ///
143353358Sdim  /// \return
144336823Sdim  ///     An array of NULL terminated C string argument pointers that
145336823Sdim  ///     also has a terminating NULL C string pointer
146336823Sdim  char **GetArgumentVector();
147336823Sdim
148336823Sdim  /// Gets the argument vector.
149336823Sdim  ///
150336823Sdim  /// The value returned by this function can be used by any function that
151336823Sdim  /// takes and vector. The return value is just like \a argv in the standard
152336823Sdim  /// C entry point function:
153336823Sdim  ///     \code
154336823Sdim  ///         int main (int argc, const char **argv);
155336823Sdim  ///     \endcode
156336823Sdim  ///
157353358Sdim  /// \return
158336823Sdim  ///     An array of NULL terminate C string argument pointers that
159336823Sdim  ///     also has a terminating NULL C string pointer
160336823Sdim  const char **GetConstArgumentVector() const;
161336823Sdim
162336823Sdim  /// Gets the argument as an ArrayRef. Note that the return value does *not*
163336823Sdim  /// have a nullptr const char * at the end, as the size of the list is
164336823Sdim  /// embedded in the ArrayRef object.
165336823Sdim  llvm::ArrayRef<const char *> GetArgumentArrayRef() const {
166336823Sdim    return llvm::makeArrayRef(m_argv).drop_back();
167336823Sdim  }
168336823Sdim
169336823Sdim  /// Appends a new argument to the end of the list argument list.
170336823Sdim  ///
171360784Sdim  /// \param[in] arg_str
172360784Sdim  ///     The new argument.
173336823Sdim  ///
174353358Sdim  /// \param[in] quote_char
175336823Sdim  ///     If the argument was originally quoted, put in the quote char here.
176336823Sdim  void AppendArgument(llvm::StringRef arg_str, char quote_char = '\0');
177336823Sdim
178336823Sdim  void AppendArguments(const Args &rhs);
179336823Sdim
180336823Sdim  void AppendArguments(const char **argv);
181336823Sdim
182360784Sdim  /// Insert the argument value at index \a idx to \a arg_str.
183336823Sdim  ///
184353358Sdim  /// \param[in] idx
185336823Sdim  ///     The index of where to insert the argument.
186336823Sdim  ///
187360784Sdim  /// \param[in] arg_str
188360784Sdim  ///     The new argument.
189336823Sdim  ///
190353358Sdim  /// \param[in] quote_char
191336823Sdim  ///     If the argument was originally quoted, put in the quote char here.
192336823Sdim  void InsertArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
193336823Sdim                             char quote_char = '\0');
194336823Sdim
195360784Sdim  /// Replaces the argument value at index \a idx to \a arg_str if \a idx is
196336823Sdim  /// a valid argument index.
197336823Sdim  ///
198353358Sdim  /// \param[in] idx
199336823Sdim  ///     The index of the argument that will have its value replaced.
200336823Sdim  ///
201360784Sdim  /// \param[in] arg_str
202360784Sdim  ///     The new argument.
203336823Sdim  ///
204353358Sdim  /// \param[in] quote_char
205336823Sdim  ///     If the argument was originally quoted, put in the quote char here.
206336823Sdim  void ReplaceArgumentAtIndex(size_t idx, llvm::StringRef arg_str,
207336823Sdim                              char quote_char = '\0');
208336823Sdim
209336823Sdim  /// Deletes the argument value at index
210336823Sdim  /// if \a idx is a valid argument index.
211336823Sdim  ///
212353358Sdim  /// \param[in] idx
213336823Sdim  ///     The index of the argument that will have its value replaced.
214336823Sdim  ///
215336823Sdim  void DeleteArgumentAtIndex(size_t idx);
216336823Sdim
217336823Sdim  /// Sets the argument vector value, optionally copying all arguments into an
218336823Sdim  /// internal buffer.
219336823Sdim  ///
220336823Sdim  /// Sets the arguments to match those found in \a argv. All argument strings
221336823Sdim  /// will be copied into an internal buffers.
222336823Sdim  //
223336823Sdim  //  FIXME: Handle the quote character somehow.
224336823Sdim  void SetArguments(size_t argc, const char **argv);
225336823Sdim
226336823Sdim  void SetArguments(const char **argv);
227336823Sdim
228336823Sdim  /// Shifts the first argument C string value of the array off the argument
229336823Sdim  /// array.
230336823Sdim  ///
231336823Sdim  /// The string value will be freed, so a copy of the string should be made
232336823Sdim  /// by calling Args::GetArgumentAtIndex (size_t) const first and copying the
233336823Sdim  /// returned value before calling Args::Shift().
234336823Sdim  ///
235353358Sdim  /// \see Args::GetArgumentAtIndex (size_t) const
236336823Sdim  void Shift();
237336823Sdim
238360784Sdim  /// Inserts a class owned copy of \a arg_str at the beginning of the
239336823Sdim  /// argument vector.
240336823Sdim  ///
241360784Sdim  /// A copy \a arg_str will be made.
242336823Sdim  ///
243360784Sdim  /// \param[in] arg_str
244336823Sdim  ///     The argument to push on the front of the argument stack.
245336823Sdim  ///
246353358Sdim  /// \param[in] quote_char
247336823Sdim  ///     If the argument was originally quoted, put in the quote char here.
248336823Sdim  void Unshift(llvm::StringRef arg_str, char quote_char = '\0');
249336823Sdim
250336823Sdim  // Clear the arguments.
251336823Sdim  //
252336823Sdim  // For re-setting or blanking out the list of arguments.
253336823Sdim  void Clear();
254336823Sdim
255336823Sdim  static lldb::Encoding
256336823Sdim  StringToEncoding(llvm::StringRef s,
257336823Sdim                   lldb::Encoding fail_value = lldb::eEncodingInvalid);
258336823Sdim
259336823Sdim  static uint32_t StringToGenericRegister(llvm::StringRef s);
260336823Sdim
261336823Sdim  static const char *GetShellSafeArgument(const FileSpec &shell,
262336823Sdim                                          const char *unsafe_arg,
263336823Sdim                                          std::string &safe_arg);
264336823Sdim
265336823Sdim  // EncodeEscapeSequences will change the textual representation of common
266336823Sdim  // escape sequences like "\n" (two characters) into a single '\n'. It does
267336823Sdim  // this for all of the supported escaped sequences and for the \0ooo (octal)
268336823Sdim  // and \xXX (hex). The resulting "dst" string will contain the character
269336823Sdim  // versions of all supported escape sequences. The common supported escape
270336823Sdim  // sequences are: "\a", "\b", "\f", "\n", "\r", "\t", "\v", "\'", "\"", "\\".
271336823Sdim
272336823Sdim  static void EncodeEscapeSequences(const char *src, std::string &dst);
273336823Sdim
274336823Sdim  // ExpandEscapeSequences will change a string of possibly non-printable
275336823Sdim  // characters and expand them into text. So '\n' will turn into two
276336823Sdim  // characters like "\n" which is suitable for human reading. When a character
277336823Sdim  // is not printable and isn't one of the common in escape sequences listed in
278336823Sdim  // the help for EncodeEscapeSequences, then it will be encoded as octal.
279336823Sdim  // Printable characters are left alone.
280336823Sdim  static void ExpandEscapedCharacters(const char *src, std::string &dst);
281336823Sdim
282336823Sdim  static std::string EscapeLLDBCommandArgument(const std::string &arg,
283336823Sdim                                               char quote_char);
284336823Sdim
285336823Sdimprivate:
286336823Sdim  std::vector<ArgEntry> m_entries;
287336823Sdim  std::vector<char *> m_argv;
288336823Sdim};
289336823Sdim
290353358Sdim/// \class OptionsWithRaw Args.h "lldb/Utility/Args.h"
291336823Sdim/// A pair of an option list with a 'raw' string as a suffix.
292336823Sdim///
293336823Sdim/// This class works similar to Args, but handles the case where we have a
294336823Sdim/// trailing string that shouldn't be interpreted as a list of arguments but
295336823Sdim/// preserved as is. It is also only useful for handling command line options
296336823Sdim/// (e.g. '-foo bar -i0') that start with a dash.
297336823Sdim///
298336823Sdim/// The leading option list is optional. If the first non-space character
299336823Sdim/// in the string starts with a dash, and the string contains an argument
300336823Sdim/// that is an unquoted double dash (' -- '), then everything up to the double
301336823Sdim/// dash is parsed as a list of arguments. Everything after the double dash
302336823Sdim/// is interpreted as the raw suffix string. Note that the space behind the
303336823Sdim/// double dash is not part of the raw suffix.
304336823Sdim///
305336823Sdim/// All strings not matching the above format as considered to be just a raw
306336823Sdim/// string without any options.
307336823Sdim///
308353358Sdim/// \see Args
309336823Sdimclass OptionsWithRaw {
310336823Sdimpublic:
311336823Sdim  /// Parse the given string as a list of optional arguments with a raw suffix.
312336823Sdim  ///
313336823Sdim  /// See the class description for a description of the input format.
314336823Sdim  ///
315353358Sdim  /// \param[in] argument_string
316336823Sdim  ///     The string that should be parsed.
317336823Sdim  explicit OptionsWithRaw(llvm::StringRef argument_string);
318336823Sdim
319336823Sdim  /// Returns true if there are any arguments before the raw suffix.
320336823Sdim  bool HasArgs() const { return m_has_args; }
321336823Sdim
322336823Sdim  /// Returns the list of arguments.
323336823Sdim  ///
324336823Sdim  /// You can only call this method if HasArgs returns true.
325336823Sdim  Args &GetArgs() {
326336823Sdim    assert(m_has_args);
327336823Sdim    return m_args;
328336823Sdim  }
329336823Sdim
330336823Sdim  /// Returns the list of arguments.
331336823Sdim  ///
332336823Sdim  /// You can only call this method if HasArgs returns true.
333336823Sdim  const Args &GetArgs() const {
334336823Sdim    assert(m_has_args);
335336823Sdim    return m_args;
336336823Sdim  }
337336823Sdim
338336823Sdim  /// Returns the part of the input string that was used for parsing the
339336823Sdim  /// argument list. This string also includes the double dash that is used
340336823Sdim  /// for separating the argument list from the suffix.
341336823Sdim  ///
342336823Sdim  /// You can only call this method if HasArgs returns true.
343336823Sdim  llvm::StringRef GetArgStringWithDelimiter() const {
344336823Sdim    assert(m_has_args);
345336823Sdim    return m_arg_string_with_delimiter;
346336823Sdim  }
347336823Sdim
348336823Sdim  /// Returns the part of the input string that was used for parsing the
349336823Sdim  /// argument list.
350336823Sdim  ///
351336823Sdim  /// You can only call this method if HasArgs returns true.
352336823Sdim  llvm::StringRef GetArgString() const {
353336823Sdim    assert(m_has_args);
354336823Sdim    return m_arg_string;
355336823Sdim  }
356336823Sdim
357336823Sdim  /// Returns the raw suffix part of the parsed string.
358336823Sdim  const std::string &GetRawPart() const { return m_suffix; }
359336823Sdim
360336823Sdimprivate:
361336823Sdim  void SetFromString(llvm::StringRef arg_string);
362336823Sdim
363336823Sdim  /// Keeps track if we have parsed and stored any arguments.
364336823Sdim  bool m_has_args = false;
365336823Sdim  Args m_args;
366336823Sdim  llvm::StringRef m_arg_string;
367336823Sdim  llvm::StringRef m_arg_string_with_delimiter;
368336823Sdim
369336823Sdim  // FIXME: This should be a StringRef, but some of the calling code expect a
370336823Sdim  // C string here so only a real std::string is possible.
371336823Sdim  std::string m_suffix;
372336823Sdim};
373336823Sdim
374336823Sdim} // namespace lldb_private
375336823Sdim
376336823Sdim#endif // LLDB_UTILITY_ARGS_H
377