1# coding: utf-8 2 3## 4# For RDoc::Text#to_html 5 6require 'strscan' 7 8## 9# For RDoc::Text#snippet 10 11begin 12 gem 'json' 13rescue Gem::LoadError 14end 15 16require 'json' 17 18## 19# Methods for manipulating comment text 20 21module RDoc::Text 22 23 ## 24 # Maps markup formats to classes that can parse them. If the format is 25 # unknown, "rdoc" format is used. 26 27 MARKUP_FORMAT = { 28 'markdown' => RDoc::Markdown, 29 'rdoc' => RDoc::Markup, 30 'rd' => RDoc::RD, 31 'tomdoc' => RDoc::TomDoc, 32 } 33 34 MARKUP_FORMAT.default = RDoc::Markup 35 36 ## 37 # Maps an encoding to a Hash of characters properly transcoded for that 38 # encoding. 39 # 40 # See also encode_fallback. 41 42 TO_HTML_CHARACTERS = Hash.new do |h, encoding| 43 h[encoding] = { 44 :close_dquote => encode_fallback('”', encoding, '"'), 45 :close_squote => encode_fallback('’', encoding, '\''), 46 :copyright => encode_fallback('©', encoding, '(c)'), 47 :ellipsis => encode_fallback('…', encoding, '...'), 48 :em_dash => encode_fallback('—', encoding, '---'), 49 :en_dash => encode_fallback('–', encoding, '--'), 50 :open_dquote => encode_fallback('“', encoding, '"'), 51 :open_squote => encode_fallback('‘', encoding, '\''), 52 :trademark => encode_fallback('®', encoding, '(r)'), 53 } 54 end if Object.const_defined? :Encoding 55 56 ## 57 # Transcodes +character+ to +encoding+ with a +fallback+ character. 58 59 def self.encode_fallback character, encoding, fallback 60 character.encode(encoding, :fallback => { character => fallback }, 61 :undef => :replace, :replace => fallback) 62 end 63 64 ## 65 # Expands tab characters in +text+ to eight spaces 66 67 def expand_tabs text 68 expanded = [] 69 70 text.each_line do |line| 71 line.gsub!(/^((?:.{8})*?)([^\t\r\n]{0,7})\t/) do 72 r = "#{$1}#{$2}#{' ' * (8 - $2.size)}" 73 r.force_encoding text.encoding if Object.const_defined? :Encoding 74 r 75 end until line !~ /\t/ 76 77 expanded << line 78 end 79 80 expanded.join 81 end 82 83 ## 84 # Flush +text+ left based on the shortest line 85 86 def flush_left text 87 indent = 9999 88 89 text.each_line do |line| 90 line_indent = line =~ /\S/ || 9999 91 indent = line_indent if indent > line_indent 92 end 93 94 empty = '' 95 empty.force_encoding text.encoding if Object.const_defined? :Encoding 96 97 text.gsub(/^ {0,#{indent}}/, empty) 98 end 99 100 ## 101 # Convert a string in markup format into HTML. 102 # 103 # Requires the including class to implement #formatter 104 105 def markup text 106 parse(text).accept formatter 107 end 108 109 ## 110 # Strips hashes, expands tabs then flushes +text+ to the left 111 112 def normalize_comment text 113 return text if text.empty? 114 115 text = strip_stars text 116 text = strip_hashes text 117 text = expand_tabs text 118 text = flush_left text 119 text = strip_newlines text 120 text 121 end 122 123 ## 124 # Normalizes +text+ then builds a RDoc::Markup::Document from it 125 126 def parse text, format = 'rdoc' 127 return text if RDoc::Markup::Document === text 128 return text.parse if RDoc::Comment === text 129 130 text = normalize_comment text # TODO remove, should not be necessary 131 132 return RDoc::Markup::Document.new if text =~ /\A\n*\z/ 133 134 MARKUP_FORMAT[format].parse text 135 end 136 137 ## 138 # The first +limit+ characters of +text+ as HTML 139 140 def snippet text, limit = 100 141 document = parse text 142 143 RDoc::Markup::ToHtmlSnippet.new(limit).convert document 144 end 145 146 ## 147 # Strips leading # characters from +text+ 148 149 def strip_hashes text 150 return text if text =~ /^(?>\s*)[^\#]/ 151 152 empty = '' 153 empty.force_encoding text.encoding if Object.const_defined? :Encoding 154 155 text.gsub(/^\s*(#+)/) { $1.tr '#', ' ' }.gsub(/^\s+$/, empty) 156 end 157 158 ## 159 # Strips leading and trailing \n characters from +text+ 160 161 def strip_newlines text 162 text.gsub(/\A\n*(.*?)\n*\z/m) do $1 end # block preserves String encoding 163 end 164 165 ## 166 # Strips /* */ style comments 167 168 def strip_stars text 169 return text unless text =~ %r%/\*.*\*/%m 170 171 encoding = text.encoding if Object.const_defined? :Encoding 172 173 text = text.gsub %r%Document-method:\s+[\w:.#=!?]+%, '' 174 175 space = ' ' 176 space.force_encoding encoding if encoding 177 178 text.sub! %r%/\*+% do space * $&.length end 179 text.sub! %r%\*+/% do space * $&.length end 180 text.gsub! %r%^[ \t]*\*%m do space * $&.length end 181 182 empty = '' 183 empty.force_encoding encoding if encoding 184 text.gsub(/^\s+$/, empty) 185 end 186 187 ## 188 # Converts ampersand, dashes, ellipsis, quotes, copyright and registered 189 # trademark symbols in +text+ to properly encoded characters. 190 191 def to_html text 192 if Object.const_defined? :Encoding then 193 html = ''.encode text.encoding 194 195 encoded = RDoc::Text::TO_HTML_CHARACTERS[text.encoding] 196 else 197 html = '' 198 encoded = { 199 :close_dquote => '���', 200 :close_squote => '���', 201 :copyright => '��', 202 :ellipsis => '���', 203 :em_dash => '���', 204 :en_dash => '���', 205 :open_dquote => '���', 206 :open_squote => '���', 207 :trademark => '��', 208 } 209 end 210 211 s = StringScanner.new text 212 insquotes = false 213 indquotes = false 214 after_word = nil 215 216 until s.eos? do 217 case 218 when s.scan(/<(tt|code)>.*?<\/\1>/) then # skip contents of tt 219 html << s.matched.gsub('\\\\', '\\') 220 when s.scan(/<(tt|code)>.*?/) then 221 warn "mismatched <#{s[1]}> tag" # TODO signal file/line 222 html << s.matched 223 when s.scan(/<[^>]+\/?s*>/) then # skip HTML tags 224 html << s.matched 225 when s.scan(/\\(\S)/) then # unhandled suppressed crossref 226 html << s[1] 227 after_word = nil 228 when s.scan(/\.\.\.(\.?)/) then 229 html << s[1] << encoded[:ellipsis] 230 after_word = nil 231 when s.scan(/\(c\)/) then 232 html << encoded[:copyright] 233 after_word = nil 234 when s.scan(/\(r\)/) then 235 html << encoded[:trademark] 236 after_word = nil 237 when s.scan(/---/) then 238 html << encoded[:em_dash] 239 after_word = nil 240 when s.scan(/--/) then 241 html << encoded[:en_dash] 242 after_word = nil 243 when s.scan(/"|"/) then 244 html << encoded[indquotes ? :close_dquote : :open_dquote] 245 indquotes = !indquotes 246 after_word = nil 247 when s.scan(/``/) then # backtick double quote 248 html << encoded[:open_dquote] 249 after_word = nil 250 when s.scan(/''/) then # tick double quote 251 html << encoded[:close_dquote] 252 after_word = nil 253 when s.scan(/'/) then # single quote 254 if insquotes 255 html << encoded[:close_squote] 256 insquotes = false 257 elsif after_word 258 # Mary's dog, my parents' house: do not start paired quotes 259 html << encoded[:close_squote] 260 else 261 html << encoded[:open_squote] 262 insquotes = true 263 end 264 265 after_word = nil 266 else # advance to the next potentially significant character 267 match = s.scan(/.+?(?=[<\\.("'`&-])/) #" 268 269 if match then 270 html << match 271 after_word = match =~ /\w$/ 272 else 273 html << s.rest 274 break 275 end 276 end 277 end 278 279 html 280 end 281 282 ## 283 # Wraps +txt+ to +line_len+ 284 285 def wrap(txt, line_len = 76) 286 res = [] 287 sp = 0 288 ep = txt.length 289 290 while sp < ep 291 # scan back for a space 292 p = sp + line_len - 1 293 if p >= ep 294 p = ep 295 else 296 while p > sp and txt[p] != ?\s 297 p -= 1 298 end 299 if p <= sp 300 p = sp + line_len 301 while p < ep and txt[p] != ?\s 302 p += 1 303 end 304 end 305 end 306 res << txt[sp...p] << "\n" 307 sp = p 308 sp += 1 while sp < ep and txt[sp] == ?\s 309 end 310 311 res.join.strip 312 end 313 314end 315 316