1# coding: UTF-8 2# :markup: markdown 3 4## 5#-- 6# This set of literals is for ruby 1.9 regular expressions and gives full 7# unicode support. 8# 9# Unlike peg-markdown, this set of literals recognizes Unicode alphanumeric 10# characters, newlines and spaces. 11class RDoc::Markdown::Literals 12 # :stopdoc: 13 14 # This is distinct from setup_parser so that a standalone parser 15 # can redefine #initialize and still have access to the proper 16 # parser setup code. 17 def initialize(str, debug=false) 18 setup_parser(str, debug) 19 end 20 21 22 23 # Prepares for parsing +str+. If you define a custom initialize you must 24 # call this method before #parse 25 def setup_parser(str, debug=false) 26 @string = str 27 @pos = 0 28 @memoizations = Hash.new { |h,k| h[k] = {} } 29 @result = nil 30 @failed_rule = nil 31 @failing_rule_offset = -1 32 33 setup_foreign_grammar 34 end 35 36 attr_reader :string 37 attr_reader :failing_rule_offset 38 attr_accessor :result, :pos 39 40 41 def current_column(target=pos) 42 if c = string.rindex("\n", target-1) 43 return target - c - 1 44 end 45 46 target + 1 47 end 48 49 def current_line(target=pos) 50 cur_offset = 0 51 cur_line = 0 52 53 string.each_line do |line| 54 cur_line += 1 55 cur_offset += line.size 56 return cur_line if cur_offset >= target 57 end 58 59 -1 60 end 61 62 def lines 63 lines = [] 64 string.each_line { |l| lines << l } 65 lines 66 end 67 68 69 70 def get_text(start) 71 @string[start..@pos-1] 72 end 73 74 def show_pos 75 width = 10 76 if @pos < width 77 "#{@pos} (\"#{@string[0,@pos]}\" @ \"#{@string[@pos,width]}\")" 78 else 79 "#{@pos} (\"... #{@string[@pos - width, width]}\" @ \"#{@string[@pos,width]}\")" 80 end 81 end 82 83 def failure_info 84 l = current_line @failing_rule_offset 85 c = current_column @failing_rule_offset 86 87 if @failed_rule.kind_of? Symbol 88 info = self.class::Rules[@failed_rule] 89 "line #{l}, column #{c}: failed rule '#{info.name}' = '#{info.rendered}'" 90 else 91 "line #{l}, column #{c}: failed rule '#{@failed_rule}'" 92 end 93 end 94 95 def failure_caret 96 l = current_line @failing_rule_offset 97 c = current_column @failing_rule_offset 98 99 line = lines[l-1] 100 "#{line}\n#{' ' * (c - 1)}^" 101 end 102 103 def failure_character 104 l = current_line @failing_rule_offset 105 c = current_column @failing_rule_offset 106 lines[l-1][c-1, 1] 107 end 108 109 def failure_oneline 110 l = current_line @failing_rule_offset 111 c = current_column @failing_rule_offset 112 113 char = lines[l-1][c-1, 1] 114 115 if @failed_rule.kind_of? Symbol 116 info = self.class::Rules[@failed_rule] 117 "@#{l}:#{c} failed rule '#{info.name}', got '#{char}'" 118 else 119 "@#{l}:#{c} failed rule '#{@failed_rule}', got '#{char}'" 120 end 121 end 122 123 class ParseError < RuntimeError 124 end 125 126 def raise_error 127 raise ParseError, failure_oneline 128 end 129 130 def show_error(io=STDOUT) 131 error_pos = @failing_rule_offset 132 line_no = current_line(error_pos) 133 col_no = current_column(error_pos) 134 135 io.puts "On line #{line_no}, column #{col_no}:" 136 137 if @failed_rule.kind_of? Symbol 138 info = self.class::Rules[@failed_rule] 139 io.puts "Failed to match '#{info.rendered}' (rule '#{info.name}')" 140 else 141 io.puts "Failed to match rule '#{@failed_rule}'" 142 end 143 144 io.puts "Got: #{string[error_pos,1].inspect}" 145 line = lines[line_no-1] 146 io.puts "=> #{line}" 147 io.print(" " * (col_no + 3)) 148 io.puts "^" 149 end 150 151 def set_failed_rule(name) 152 if @pos > @failing_rule_offset 153 @failed_rule = name 154 @failing_rule_offset = @pos 155 end 156 end 157 158 attr_reader :failed_rule 159 160 def match_string(str) 161 len = str.size 162 if @string[pos,len] == str 163 @pos += len 164 return str 165 end 166 167 return nil 168 end 169 170 def scan(reg) 171 if m = reg.match(@string[@pos..-1]) 172 width = m.end(0) 173 @pos += width 174 return true 175 end 176 177 return nil 178 end 179 180 if "".respond_to? :getbyte 181 def get_byte 182 if @pos >= @string.size 183 return nil 184 end 185 186 s = @string.getbyte @pos 187 @pos += 1 188 s 189 end 190 else 191 def get_byte 192 if @pos >= @string.size 193 return nil 194 end 195 196 s = @string[@pos] 197 @pos += 1 198 s 199 end 200 end 201 202 def parse(rule=nil) 203 # We invoke the rules indirectly via apply 204 # instead of by just calling them as methods because 205 # if the rules use left recursion, apply needs to 206 # manage that. 207 208 if !rule 209 apply(:_root) 210 else 211 method = rule.gsub("-","_hyphen_") 212 apply :"_#{method}" 213 end 214 end 215 216 class MemoEntry 217 def initialize(ans, pos) 218 @ans = ans 219 @pos = pos 220 @result = nil 221 @set = false 222 @left_rec = false 223 end 224 225 attr_reader :ans, :pos, :result, :set 226 attr_accessor :left_rec 227 228 def move!(ans, pos, result) 229 @ans = ans 230 @pos = pos 231 @result = result 232 @set = true 233 @left_rec = false 234 end 235 end 236 237 def external_invoke(other, rule, *args) 238 old_pos = @pos 239 old_string = @string 240 241 @pos = other.pos 242 @string = other.string 243 244 begin 245 if val = __send__(rule, *args) 246 other.pos = @pos 247 other.result = @result 248 else 249 other.set_failed_rule "#{self.class}##{rule}" 250 end 251 val 252 ensure 253 @pos = old_pos 254 @string = old_string 255 end 256 end 257 258 def apply_with_args(rule, *args) 259 memo_key = [rule, args] 260 if m = @memoizations[memo_key][@pos] 261 @pos = m.pos 262 if !m.set 263 m.left_rec = true 264 return nil 265 end 266 267 @result = m.result 268 269 return m.ans 270 else 271 m = MemoEntry.new(nil, @pos) 272 @memoizations[memo_key][@pos] = m 273 start_pos = @pos 274 275 ans = __send__ rule, *args 276 277 lr = m.left_rec 278 279 m.move! ans, @pos, @result 280 281 # Don't bother trying to grow the left recursion 282 # if it's failing straight away (thus there is no seed) 283 if ans and lr 284 return grow_lr(rule, args, start_pos, m) 285 else 286 return ans 287 end 288 289 return ans 290 end 291 end 292 293 def apply(rule) 294 if m = @memoizations[rule][@pos] 295 @pos = m.pos 296 if !m.set 297 m.left_rec = true 298 return nil 299 end 300 301 @result = m.result 302 303 return m.ans 304 else 305 m = MemoEntry.new(nil, @pos) 306 @memoizations[rule][@pos] = m 307 start_pos = @pos 308 309 ans = __send__ rule 310 311 lr = m.left_rec 312 313 m.move! ans, @pos, @result 314 315 # Don't bother trying to grow the left recursion 316 # if it's failing straight away (thus there is no seed) 317 if ans and lr 318 return grow_lr(rule, nil, start_pos, m) 319 else 320 return ans 321 end 322 323 return ans 324 end 325 end 326 327 def grow_lr(rule, args, start_pos, m) 328 while true 329 @pos = start_pos 330 @result = m.result 331 332 if args 333 ans = __send__ rule, *args 334 else 335 ans = __send__ rule 336 end 337 return nil unless ans 338 339 break if @pos <= m.pos 340 341 m.move! ans, @pos, @result 342 end 343 344 @result = m.result 345 @pos = m.pos 346 return m.ans 347 end 348 349 class RuleInfo 350 def initialize(name, rendered) 351 @name = name 352 @rendered = rendered 353 end 354 355 attr_reader :name, :rendered 356 end 357 358 def self.rule_info(name, rendered) 359 RuleInfo.new(name, rendered) 360 end 361 362 363 # :startdoc: 364 # :stopdoc: 365 def setup_foreign_grammar; end 366 367 # Alphanumeric = /\p{Word}/ 368 def _Alphanumeric 369 _tmp = scan(/\A(?-mix:\p{Word})/) 370 set_failed_rule :_Alphanumeric unless _tmp 371 return _tmp 372 end 373 374 # AlphanumericAscii = /[A-Za-z0-9]/ 375 def _AlphanumericAscii 376 _tmp = scan(/\A(?-mix:[A-Za-z0-9])/) 377 set_failed_rule :_AlphanumericAscii unless _tmp 378 return _tmp 379 end 380 381 # BOM = "uFEFF" 382 def _BOM 383 _tmp = match_string("uFEFF") 384 set_failed_rule :_BOM unless _tmp 385 return _tmp 386 end 387 388 # Newline = /\n|\r\n?|\p{Zl}|\p{Zp}/ 389 def _Newline 390 _tmp = scan(/\A(?-mix:\n|\r\n?|\p{Zl}|\p{Zp})/) 391 set_failed_rule :_Newline unless _tmp 392 return _tmp 393 end 394 395 # NonAlphanumeric = /\p{^Word}/ 396 def _NonAlphanumeric 397 _tmp = scan(/\A(?-mix:\p{^Word})/) 398 set_failed_rule :_NonAlphanumeric unless _tmp 399 return _tmp 400 end 401 402 # Spacechar = /\t|\p{Zs}/ 403 def _Spacechar 404 _tmp = scan(/\A(?-mix:\t|\p{Zs})/) 405 set_failed_rule :_Spacechar unless _tmp 406 return _tmp 407 end 408 409 Rules = {} 410 Rules[:_Alphanumeric] = rule_info("Alphanumeric", "/\\p{Word}/") 411 Rules[:_AlphanumericAscii] = rule_info("AlphanumericAscii", "/[A-Za-z0-9]/") 412 Rules[:_BOM] = rule_info("BOM", "\"uFEFF\"") 413 Rules[:_Newline] = rule_info("Newline", "/\\n|\\r\\n?|\\p{Zl}|\\p{Zp}/") 414 Rules[:_NonAlphanumeric] = rule_info("NonAlphanumeric", "/\\p{^Word}/") 415 Rules[:_Spacechar] = rule_info("Spacechar", "/\\t|\\p{Zs}/") 416 # :startdoc: 417end 418