1##
2# == Manipulates strings like the UNIX Bourne shell
3#
4# This module manipulates strings according to the word parsing rules
5# of the UNIX Bourne shell.
6#
7# The shellwords() function was originally a port of shellwords.pl,
8# but modified to conform to POSIX / SUSv3 (IEEE Std 1003.1-2001 [1]).
9#
10# === Usage
11#
12# You can use shellwords to parse a string into a Bourne shell friendly Array.
13#
14#   require 'shellwords'
15#
16#   argv = Shellwords.split('three blind "mice"')
17#   argv #=> ["three", "blind", "mice"]
18#
19# Once you've required Shellwords, you can use the #split alias
20# String#shellsplit.
21#
22#   argv = "see how they run".shellsplit
23#   argv #=> ["see", "how", "they", "run"]
24#
25# Be careful you don't leave a quote unmatched.
26#
27#   argv = "they all ran after the farmer's wife".shellsplit
28#        #=> ArgumentError: Unmatched double quote: ...
29#
30# In this case, you might want to use Shellwords.escape, or it's alias
31# String#shellescape.
32#
33# This method will escape the String for you to safely use with a Bourne shell.
34#
35#   argv = Shellwords.escape("special's.txt")
36#   argv #=> "special\\s.txt"
37#   system("cat " + argv)
38#
39# Shellwords also comes with a core extension for Array, Array#shelljoin.
40#
41#   argv = %w{ls -lta lib}
42#   system(argv.shelljoin)
43#
44# You can use this method to create an escaped string out of an array of tokens
45# separated by a space. In this example we'll use the literal shortcut for
46# Array.new.
47#
48# === Authors
49# * Wakou Aoyama
50# * Akinori MUSHA <knu@iDaemons.org>
51#
52# === Contact
53# * Akinori MUSHA <knu@iDaemons.org> (current maintainer)
54#
55# === Resources
56#
57# 1: {IEEE Std 1003.1-2004}[http://pubs.opengroup.org/onlinepubs/009695399/toc.htm]
58
59module Shellwords
60  # Splits a string into an array of tokens in the same way the UNIX
61  # Bourne shell does.
62  #
63  #   argv = Shellwords.split('here are "two words"')
64  #   argv #=> ["here", "are", "two words"]
65  #
66  # String#shellsplit is a shortcut for this function.
67  #
68  #   argv = 'here are "two words"'.shellsplit
69  #   argv #=> ["here", "are", "two words"]
70  def shellsplit(line)
71    words = []
72    field = ''
73    line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do
74      |word, sq, dq, esc, garbage, sep|
75      raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage
76      field << (word || sq || (dq || esc).gsub(/\\(.)/, '\\1'))
77      if sep
78        words << field
79        field = ''
80      end
81    end
82    words
83  end
84
85  alias shellwords shellsplit
86
87  module_function :shellsplit, :shellwords
88
89  class << self
90    alias split shellsplit
91  end
92
93  # Escapes a string so that it can be safely used in a Bourne shell
94  # command line.  +str+ can be a non-string object that responds to
95  # +to_s+.
96  #
97  # Note that a resulted string should be used unquoted and is not
98  # intended for use in double quotes nor in single quotes.
99  #
100  #   argv = Shellwords.escape("It's better to give than to receive")
101  #   argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive"
102  #
103  # String#shellescape is a shorthand for this function.
104  #
105  #   argv = "It's better to give than to receive".shellescape
106  #   argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive"
107  #
108  #   # Search files in lib for method definitions
109  #   pattern = "^[ \t]*def "
110  #   open("| grep -Ern #{pattern.shellescape} lib") { |grep|
111  #     grep.each_line { |line|
112  #       file, lineno, matched_line = line.split(':', 3)
113  #       # ...
114  #     }
115  #   }
116  #
117  # It is the caller's responsibility to encode the string in the right
118  # encoding for the shell environment where this string is used.
119  #
120  # Multibyte characters are treated as multibyte characters, not bytes.
121  #
122  # Returns an empty quoted String if +str+ has a length of zero.
123  def shellescape(str)
124    str = str.to_s
125
126    # An empty argument will be skipped, so return empty quotes.
127    return "''" if str.empty?
128
129    str = str.dup
130
131    # Treat multibyte characters as is.  It is caller's responsibility
132    # to encode the string in the right encoding for the shell
133    # environment.
134    str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/, "\\\\\\1")
135
136    # A LF cannot be escaped with a backslash because a backslash + LF
137    # combo is regarded as line continuation and simply ignored.
138    str.gsub!(/\n/, "'\n'")
139
140    return str
141  end
142
143  module_function :shellescape
144
145  class << self
146    alias escape shellescape
147  end
148
149  # Builds a command line string from an argument list, +array+.
150  #
151  # All elements are joined into a single string with fields separated by a
152  # space, where each element is escaped for Bourne shell and stringified using
153  # +to_s+.
154  #
155  #   ary = ["There's", "a", "time", "and", "place", "for", "everything"]
156  #   argv = Shellwords.join(ary)
157  #   argv #=> "There\\'s a time and place for everything"
158  #
159  # Array#shelljoin is a shortcut for this function.
160  #
161  #   ary = ["Don't", "rock", "the", "boat"]
162  #   argv = ary.shelljoin
163  #   argv #=> "Don\\'t rock the boat"
164  #
165  # You can also mix non-string objects in the elements as allowed in Array#join.
166  #
167  #   output = `#{['ps', '-p', $$].shelljoin}`
168  #
169  def shelljoin(array)
170    array.map { |arg| shellescape(arg) }.join(' ')
171  end
172
173  module_function :shelljoin
174
175  class << self
176    alias join shelljoin
177  end
178end
179
180class String
181  # call-seq:
182  #   str.shellsplit => array
183  #
184  # Splits +str+ into an array of tokens in the same way the UNIX
185  # Bourne shell does.
186  #
187  # See Shellwords.shellsplit for details.
188  def shellsplit
189    Shellwords.split(self)
190  end
191
192  # call-seq:
193  #   str.shellescape => string
194  #
195  # Escapes +str+ so that it can be safely used in a Bourne shell
196  # command line.
197  #
198  # See Shellwords.shellescape for details.
199  def shellescape
200    Shellwords.escape(self)
201  end
202end
203
204class Array
205  # call-seq:
206  #   array.shelljoin => string
207  #
208  # Builds a command line string from an argument list +array+ joining
209  # all elements escaped for Bourne shell and separated by a space.
210  #
211  # See Shellwords.shelljoin for details.
212  def shelljoin
213    Shellwords.join(self)
214  end
215end
216