1## 2# == Manipulates strings like the UNIX Bourne shell 3# 4# This module manipulates strings according to the word parsing rules 5# of the UNIX Bourne shell. 6# 7# The shellwords() function was originally a port of shellwords.pl, 8# but modified to conform to POSIX / SUSv3 (IEEE Std 1003.1-2001 [1]). 9# 10# === Usage 11# 12# You can use shellwords to parse a string into a Bourne shell friendly Array. 13# 14# require 'shellwords' 15# 16# argv = Shellwords.split('three blind "mice"') 17# argv #=> ["three", "blind", "mice"] 18# 19# Once you've required Shellwords, you can use the #split alias 20# String#shellsplit. 21# 22# argv = "see how they run".shellsplit 23# argv #=> ["see", "how", "they", "run"] 24# 25# Be careful you don't leave a quote unmatched. 26# 27# argv = "they all ran after the farmer's wife".shellsplit 28# #=> ArgumentError: Unmatched double quote: ... 29# 30# In this case, you might want to use Shellwords.escape, or it's alias 31# String#shellescape. 32# 33# This method will escape the String for you to safely use with a Bourne shell. 34# 35# argv = Shellwords.escape("special's.txt") 36# argv #=> "special\\s.txt" 37# system("cat " + argv) 38# 39# Shellwords also comes with a core extension for Array, Array#shelljoin. 40# 41# argv = %w{ls -lta lib} 42# system(argv.shelljoin) 43# 44# You can use this method to create an escaped string out of an array of tokens 45# separated by a space. In this example we'll use the literal shortcut for 46# Array.new. 47# 48# === Authors 49# * Wakou Aoyama 50# * Akinori MUSHA <knu@iDaemons.org> 51# 52# === Contact 53# * Akinori MUSHA <knu@iDaemons.org> (current maintainer) 54# 55# === Resources 56# 57# 1: {IEEE Std 1003.1-2004}[http://pubs.opengroup.org/onlinepubs/009695399/toc.htm] 58 59module Shellwords 60 # Splits a string into an array of tokens in the same way the UNIX 61 # Bourne shell does. 62 # 63 # argv = Shellwords.split('here are "two words"') 64 # argv #=> ["here", "are", "two words"] 65 # 66 # String#shellsplit is a shortcut for this function. 67 # 68 # argv = 'here are "two words"'.shellsplit 69 # argv #=> ["here", "are", "two words"] 70 def shellsplit(line) 71 words = [] 72 field = '' 73 line.scan(/\G\s*(?>([^\s\\\'\"]+)|'([^\']*)'|"((?:[^\"\\]|\\.)*)"|(\\.?)|(\S))(\s|\z)?/m) do 74 |word, sq, dq, esc, garbage, sep| 75 raise ArgumentError, "Unmatched double quote: #{line.inspect}" if garbage 76 field << (word || sq || (dq || esc).gsub(/\\(.)/, '\\1')) 77 if sep 78 words << field 79 field = '' 80 end 81 end 82 words 83 end 84 85 alias shellwords shellsplit 86 87 module_function :shellsplit, :shellwords 88 89 class << self 90 alias split shellsplit 91 end 92 93 # Escapes a string so that it can be safely used in a Bourne shell 94 # command line. +str+ can be a non-string object that responds to 95 # +to_s+. 96 # 97 # Note that a resulted string should be used unquoted and is not 98 # intended for use in double quotes nor in single quotes. 99 # 100 # argv = Shellwords.escape("It's better to give than to receive") 101 # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive" 102 # 103 # String#shellescape is a shorthand for this function. 104 # 105 # argv = "It's better to give than to receive".shellescape 106 # argv #=> "It\\'s\\ better\\ to\\ give\\ than\\ to\\ receive" 107 # 108 # # Search files in lib for method definitions 109 # pattern = "^[ \t]*def " 110 # open("| grep -Ern #{pattern.shellescape} lib") { |grep| 111 # grep.each_line { |line| 112 # file, lineno, matched_line = line.split(':', 3) 113 # # ... 114 # } 115 # } 116 # 117 # It is the caller's responsibility to encode the string in the right 118 # encoding for the shell environment where this string is used. 119 # 120 # Multibyte characters are treated as multibyte characters, not bytes. 121 # 122 # Returns an empty quoted String if +str+ has a length of zero. 123 def shellescape(str) 124 str = str.to_s 125 126 # An empty argument will be skipped, so return empty quotes. 127 return "''" if str.empty? 128 129 str = str.dup 130 131 # Treat multibyte characters as is. It is caller's responsibility 132 # to encode the string in the right encoding for the shell 133 # environment. 134 str.gsub!(/([^A-Za-z0-9_\-.,:\/@\n])/, "\\\\\\1") 135 136 # A LF cannot be escaped with a backslash because a backslash + LF 137 # combo is regarded as line continuation and simply ignored. 138 str.gsub!(/\n/, "'\n'") 139 140 return str 141 end 142 143 module_function :shellescape 144 145 class << self 146 alias escape shellescape 147 end 148 149 # Builds a command line string from an argument list, +array+. 150 # 151 # All elements are joined into a single string with fields separated by a 152 # space, where each element is escaped for Bourne shell and stringified using 153 # +to_s+. 154 # 155 # ary = ["There's", "a", "time", "and", "place", "for", "everything"] 156 # argv = Shellwords.join(ary) 157 # argv #=> "There\\'s a time and place for everything" 158 # 159 # Array#shelljoin is a shortcut for this function. 160 # 161 # ary = ["Don't", "rock", "the", "boat"] 162 # argv = ary.shelljoin 163 # argv #=> "Don\\'t rock the boat" 164 # 165 # You can also mix non-string objects in the elements as allowed in Array#join. 166 # 167 # output = `#{['ps', '-p', $$].shelljoin}` 168 # 169 def shelljoin(array) 170 array.map { |arg| shellescape(arg) }.join(' ') 171 end 172 173 module_function :shelljoin 174 175 class << self 176 alias join shelljoin 177 end 178end 179 180class String 181 # call-seq: 182 # str.shellsplit => array 183 # 184 # Splits +str+ into an array of tokens in the same way the UNIX 185 # Bourne shell does. 186 # 187 # See Shellwords.shellsplit for details. 188 def shellsplit 189 Shellwords.split(self) 190 end 191 192 # call-seq: 193 # str.shellescape => string 194 # 195 # Escapes +str+ so that it can be safely used in a Bourne shell 196 # command line. 197 # 198 # See Shellwords.shellescape for details. 199 def shellescape 200 Shellwords.escape(self) 201 end 202end 203 204class Array 205 # call-seq: 206 # array.shelljoin => string 207 # 208 # Builds a command line string from an argument list +array+ joining 209 # all elements escaped for Bourne shell and separated by a space. 210 # 211 # See Shellwords.shelljoin for details. 212 def shelljoin 213 Shellwords.join(self) 214 end 215end 216