1require 'rexml/namespace' 2require 'rexml/xmltokens' 3 4module REXML 5 module Parsers 6 # You don't want to use this class. Really. Use XPath, which is a wrapper 7 # for this class. Believe me. You don't want to poke around in here. 8 # There is strange, dark magic at work in this code. Beware. Go back! Go 9 # back while you still can! 10 class XPathParser 11 include XMLTokens 12 LITERAL = /^'([^']*)'|^"([^"]*)"/u 13 14 def namespaces=( namespaces ) 15 Functions::namespace_context = namespaces 16 @namespaces = namespaces 17 end 18 19 def parse path 20 path = path.dup 21 path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces 22 path.gsub!( /\s+([\]\)])/, '\1') 23 parsed = [] 24 path = OrExpr(path, parsed) 25 parsed 26 end 27 28 def predicate path 29 parsed = [] 30 Predicate( "[#{path}]", parsed ) 31 parsed 32 end 33 34 def abbreviate( path ) 35 path = path.kind_of?(String) ? parse( path ) : path 36 string = "" 37 document = false 38 while path.size > 0 39 op = path.shift 40 case op 41 when :node 42 when :attribute 43 string << "/" if string.size > 0 44 string << "@" 45 when :child 46 string << "/" if string.size > 0 47 when :descendant_or_self 48 string << "/" 49 when :self 50 string << "." 51 when :parent 52 string << ".." 53 when :any 54 string << "*" 55 when :text 56 string << "text()" 57 when :following, :following_sibling, 58 :ancestor, :ancestor_or_self, :descendant, 59 :namespace, :preceding, :preceding_sibling 60 string << "/" unless string.size == 0 61 string << op.to_s.tr("_", "-") 62 string << "::" 63 when :qname 64 prefix = path.shift 65 name = path.shift 66 string << prefix+":" if prefix.size > 0 67 string << name 68 when :predicate 69 string << '[' 70 string << predicate_to_string( path.shift ) {|x| abbreviate( x ) } 71 string << ']' 72 when :document 73 document = true 74 when :function 75 string << path.shift 76 string << "( " 77 string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )} 78 string << " )" 79 when :literal 80 string << %Q{ "#{path.shift}" } 81 else 82 string << "/" unless string.size == 0 83 string << "UNKNOWN(" 84 string << op.inspect 85 string << ")" 86 end 87 end 88 string = "/"+string if document 89 return string 90 end 91 92 def expand( path ) 93 path = path.kind_of?(String) ? parse( path ) : path 94 string = "" 95 document = false 96 while path.size > 0 97 op = path.shift 98 case op 99 when :node 100 string << "node()" 101 when :attribute, :child, :following, :following_sibling, 102 :ancestor, :ancestor_or_self, :descendant, :descendant_or_self, 103 :namespace, :preceding, :preceding_sibling, :self, :parent 104 string << "/" unless string.size == 0 105 string << op.to_s.tr("_", "-") 106 string << "::" 107 when :any 108 string << "*" 109 when :qname 110 prefix = path.shift 111 name = path.shift 112 string << prefix+":" if prefix.size > 0 113 string << name 114 when :predicate 115 string << '[' 116 string << predicate_to_string( path.shift ) { |x| expand(x) } 117 string << ']' 118 when :document 119 document = true 120 else 121 string << "/" unless string.size == 0 122 string << "UNKNOWN(" 123 string << op.inspect 124 string << ")" 125 end 126 end 127 string = "/"+string if document 128 return string 129 end 130 131 def predicate_to_string( path, &block ) 132 string = "" 133 case path[0] 134 when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union 135 op = path.shift 136 case op 137 when :eq 138 op = "=" 139 when :lt 140 op = "<" 141 when :gt 142 op = ">" 143 when :lteq 144 op = "<=" 145 when :gteq 146 op = ">=" 147 when :neq 148 op = "!=" 149 when :union 150 op = "|" 151 end 152 left = predicate_to_string( path.shift, &block ) 153 right = predicate_to_string( path.shift, &block ) 154 string << " " 155 string << left 156 string << " " 157 string << op.to_s 158 string << " " 159 string << right 160 string << " " 161 when :function 162 path.shift 163 name = path.shift 164 string << name 165 string << "( " 166 string << predicate_to_string( path.shift, &block ) 167 string << " )" 168 when :literal 169 path.shift 170 string << " " 171 string << path.shift.inspect 172 string << " " 173 else 174 string << " " 175 string << yield( path ) 176 string << " " 177 end 178 return string.squeeze(" ") 179 end 180 181 private 182 #LocationPath 183 # | RelativeLocationPath 184 # | '/' RelativeLocationPath? 185 # | '//' RelativeLocationPath 186 def LocationPath path, parsed 187 #puts "LocationPath '#{path}'" 188 path = path.strip 189 if path[0] == ?/ 190 parsed << :document 191 if path[1] == ?/ 192 parsed << :descendant_or_self 193 parsed << :node 194 path = path[2..-1] 195 else 196 path = path[1..-1] 197 end 198 end 199 #puts parsed.inspect 200 return RelativeLocationPath( path, parsed ) if path.size > 0 201 end 202 203 #RelativeLocationPath 204 # | Step 205 # | (AXIS_NAME '::' | '@' | '') AxisSpecifier 206 # NodeTest 207 # Predicate 208 # | '.' | '..' AbbreviatedStep 209 # | RelativeLocationPath '/' Step 210 # | RelativeLocationPath '//' Step 211 AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/ 212 def RelativeLocationPath path, parsed 213 #puts "RelativeLocationPath #{path}" 214 while path.size > 0 215 # (axis or @ or <child::>) nodetest predicate > 216 # OR > / Step 217 # (. or ..) > 218 if path[0] == ?. 219 if path[1] == ?. 220 parsed << :parent 221 parsed << :node 222 path = path[2..-1] 223 else 224 parsed << :self 225 parsed << :node 226 path = path[1..-1] 227 end 228 else 229 if path[0] == ?@ 230 #puts "ATTRIBUTE" 231 parsed << :attribute 232 path = path[1..-1] 233 # Goto Nodetest 234 elsif path =~ AXIS 235 parsed << $1.tr('-','_').intern 236 path = $' 237 # Goto Nodetest 238 else 239 parsed << :child 240 end 241 242 #puts "NODETESTING '#{path}'" 243 n = [] 244 path = NodeTest( path, n) 245 #puts "NODETEST RETURNED '#{path}'" 246 247 if path[0] == ?[ 248 path = Predicate( path, n ) 249 end 250 251 parsed.concat(n) 252 end 253 254 if path.size > 0 255 if path[0] == ?/ 256 if path[1] == ?/ 257 parsed << :descendant_or_self 258 parsed << :node 259 path = path[2..-1] 260 else 261 path = path[1..-1] 262 end 263 else 264 return path 265 end 266 end 267 end 268 return path 269 end 270 271 # Returns a 1-1 map of the nodeset 272 # The contents of the resulting array are either: 273 # true/false, if a positive match 274 # String, if a name match 275 #NodeTest 276 # | ('*' | NCNAME ':' '*' | QNAME) NameTest 277 # | NODE_TYPE '(' ')' NodeType 278 # | PI '(' LITERAL ')' PI 279 # | '[' expr ']' Predicate 280 NCNAMETEST= /^(#{NCNAME_STR}):\*/u 281 QNAME = Namespace::NAMESPLIT 282 NODE_TYPE = /^(comment|text|node)\(\s*\)/m 283 PI = /^processing-instruction\(/ 284 def NodeTest path, parsed 285 #puts "NodeTest with #{path}" 286 case path 287 when /^\*/ 288 path = $' 289 parsed << :any 290 when NODE_TYPE 291 type = $1 292 path = $' 293 parsed << type.tr('-', '_').intern 294 when PI 295 path = $' 296 literal = nil 297 if path !~ /^\s*\)/ 298 path =~ LITERAL 299 literal = $1 300 path = $' 301 raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?) 302 path = path[1..-1] 303 end 304 parsed << :processing_instruction 305 parsed << (literal || '') 306 when NCNAMETEST 307 #puts "NCNAMETEST" 308 prefix = $1 309 path = $' 310 parsed << :namespace 311 parsed << prefix 312 when QNAME 313 #puts "QNAME" 314 prefix = $1 315 name = $2 316 path = $' 317 prefix = "" unless prefix 318 parsed << :qname 319 parsed << prefix 320 parsed << name 321 end 322 return path 323 end 324 325 # Filters the supplied nodeset on the predicate(s) 326 def Predicate path, parsed 327 #puts "PREDICATE with #{path}" 328 return nil unless path[0] == ?[ 329 predicates = [] 330 while path[0] == ?[ 331 path, expr = get_group(path) 332 predicates << expr[1..-2] if expr 333 end 334 #puts "PREDICATES = #{predicates.inspect}" 335 predicates.each{ |pred| 336 #puts "ORING #{pred}" 337 preds = [] 338 parsed << :predicate 339 parsed << preds 340 OrExpr(pred, preds) 341 } 342 #puts "PREDICATES = #{predicates.inspect}" 343 path 344 end 345 346 # The following return arrays of true/false, a 1-1 mapping of the 347 # supplied nodeset, except for axe(), which returns a filtered 348 # nodeset 349 350 #| OrExpr S 'or' S AndExpr 351 #| AndExpr 352 def OrExpr path, parsed 353 #puts "OR >>> #{path}" 354 n = [] 355 rest = AndExpr( path, n ) 356 #puts "OR <<< #{rest}" 357 if rest != path 358 while rest =~ /^\s*( or )/ 359 n = [ :or, n, [] ] 360 rest = AndExpr( $', n[-1] ) 361 end 362 end 363 if parsed.size == 0 and n.size != 0 364 parsed.replace(n) 365 elsif n.size > 0 366 parsed << n 367 end 368 rest 369 end 370 371 #| AndExpr S 'and' S EqualityExpr 372 #| EqualityExpr 373 def AndExpr path, parsed 374 #puts "AND >>> #{path}" 375 n = [] 376 rest = EqualityExpr( path, n ) 377 #puts "AND <<< #{rest}" 378 if rest != path 379 while rest =~ /^\s*( and )/ 380 n = [ :and, n, [] ] 381 #puts "AND >>> #{rest}" 382 rest = EqualityExpr( $', n[-1] ) 383 #puts "AND <<< #{rest}" 384 end 385 end 386 if parsed.size == 0 and n.size != 0 387 parsed.replace(n) 388 elsif n.size > 0 389 parsed << n 390 end 391 rest 392 end 393 394 #| EqualityExpr ('=' | '!=') RelationalExpr 395 #| RelationalExpr 396 def EqualityExpr path, parsed 397 #puts "EQUALITY >>> #{path}" 398 n = [] 399 rest = RelationalExpr( path, n ) 400 #puts "EQUALITY <<< #{rest}" 401 if rest != path 402 while rest =~ /^\s*(!?=)\s*/ 403 if $1[0] == ?! 404 n = [ :neq, n, [] ] 405 else 406 n = [ :eq, n, [] ] 407 end 408 rest = RelationalExpr( $', n[-1] ) 409 end 410 end 411 if parsed.size == 0 and n.size != 0 412 parsed.replace(n) 413 elsif n.size > 0 414 parsed << n 415 end 416 rest 417 end 418 419 #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr 420 #| AdditiveExpr 421 def RelationalExpr path, parsed 422 #puts "RELATION >>> #{path}" 423 n = [] 424 rest = AdditiveExpr( path, n ) 425 #puts "RELATION <<< #{rest}" 426 if rest != path 427 while rest =~ /^\s*([<>]=?)\s*/ 428 if $1[0] == ?< 429 sym = "lt" 430 else 431 sym = "gt" 432 end 433 sym << "eq" if $1[-1] == ?= 434 n = [ sym.intern, n, [] ] 435 rest = AdditiveExpr( $', n[-1] ) 436 end 437 end 438 if parsed.size == 0 and n.size != 0 439 parsed.replace(n) 440 elsif n.size > 0 441 parsed << n 442 end 443 rest 444 end 445 446 #| AdditiveExpr ('+' | S '-') MultiplicativeExpr 447 #| MultiplicativeExpr 448 def AdditiveExpr path, parsed 449 #puts "ADDITIVE >>> #{path}" 450 n = [] 451 rest = MultiplicativeExpr( path, n ) 452 #puts "ADDITIVE <<< #{rest}" 453 if rest != path 454 while rest =~ /^\s*(\+| -)\s*/ 455 if $1[0] == ?+ 456 n = [ :plus, n, [] ] 457 else 458 n = [ :minus, n, [] ] 459 end 460 rest = MultiplicativeExpr( $', n[-1] ) 461 end 462 end 463 if parsed.size == 0 and n.size != 0 464 parsed.replace(n) 465 elsif n.size > 0 466 parsed << n 467 end 468 rest 469 end 470 471 #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr 472 #| UnaryExpr 473 def MultiplicativeExpr path, parsed 474 #puts "MULT >>> #{path}" 475 n = [] 476 rest = UnaryExpr( path, n ) 477 #puts "MULT <<< #{rest}" 478 if rest != path 479 while rest =~ /^\s*(\*| div | mod )\s*/ 480 if $1[0] == ?* 481 n = [ :mult, n, [] ] 482 elsif $1.include?( "div" ) 483 n = [ :div, n, [] ] 484 else 485 n = [ :mod, n, [] ] 486 end 487 rest = UnaryExpr( $', n[-1] ) 488 end 489 end 490 if parsed.size == 0 and n.size != 0 491 parsed.replace(n) 492 elsif n.size > 0 493 parsed << n 494 end 495 rest 496 end 497 498 #| '-' UnaryExpr 499 #| UnionExpr 500 def UnaryExpr path, parsed 501 path =~ /^(\-*)/ 502 path = $' 503 if $1 and (($1.size % 2) != 0) 504 mult = -1 505 else 506 mult = 1 507 end 508 parsed << :neg if mult < 0 509 510 #puts "UNARY >>> #{path}" 511 n = [] 512 path = UnionExpr( path, n ) 513 #puts "UNARY <<< #{path}" 514 parsed.concat( n ) 515 path 516 end 517 518 #| UnionExpr '|' PathExpr 519 #| PathExpr 520 def UnionExpr path, parsed 521 #puts "UNION >>> #{path}" 522 n = [] 523 rest = PathExpr( path, n ) 524 #puts "UNION <<< #{rest}" 525 if rest != path 526 while rest =~ /^\s*(\|)\s*/ 527 n = [ :union, n, [] ] 528 rest = PathExpr( $', n[-1] ) 529 end 530 end 531 if parsed.size == 0 and n.size != 0 532 parsed.replace( n ) 533 elsif n.size > 0 534 parsed << n 535 end 536 rest 537 end 538 539 #| LocationPath 540 #| FilterExpr ('/' | '//') RelativeLocationPath 541 def PathExpr path, parsed 542 path =~ /^\s*/ 543 path = $' 544 #puts "PATH >>> #{path}" 545 n = [] 546 rest = FilterExpr( path, n ) 547 #puts "PATH <<< '#{rest}'" 548 if rest != path 549 if rest and rest[0] == ?/ 550 return RelativeLocationPath(rest, n) 551 end 552 end 553 #puts "BEFORE WITH '#{rest}'" 554 rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/ 555 parsed.concat(n) 556 return rest 557 end 558 559 #| FilterExpr Predicate 560 #| PrimaryExpr 561 def FilterExpr path, parsed 562 #puts "FILTER >>> #{path}" 563 n = [] 564 path = PrimaryExpr( path, n ) 565 #puts "FILTER <<< #{path}" 566 path = Predicate(path, n) if path and path[0] == ?[ 567 #puts "FILTER <<< #{path}" 568 parsed.concat(n) 569 path 570 end 571 572 #| VARIABLE_REFERENCE 573 #| '(' expr ')' 574 #| LITERAL 575 #| NUMBER 576 #| FunctionCall 577 VARIABLE_REFERENCE = /^\$(#{NAME_STR})/u 578 NUMBER = /^(\d*\.?\d+)/ 579 NT = /^comment|text|processing-instruction|node$/ 580 def PrimaryExpr path, parsed 581 case path 582 when VARIABLE_REFERENCE 583 varname = $1 584 path = $' 585 parsed << :variable 586 parsed << varname 587 #arry << @variables[ varname ] 588 when /^(\w[-\w]*)(?:\()/ 589 #puts "PrimaryExpr :: Function >>> #$1 -- '#$''" 590 fname = $1 591 tmp = $' 592 #puts "#{fname} =~ #{NT.inspect}" 593 return path if fname =~ NT 594 path = tmp 595 parsed << :function 596 parsed << fname 597 path = FunctionCall(path, parsed) 598 when NUMBER 599 #puts "LITERAL or NUMBER: #$1" 600 varname = $1.nil? ? $2 : $1 601 path = $' 602 parsed << :literal 603 parsed << (varname.include?('.') ? varname.to_f : varname.to_i) 604 when LITERAL 605 #puts "LITERAL or NUMBER: #$1" 606 varname = $1.nil? ? $2 : $1 607 path = $' 608 parsed << :literal 609 parsed << varname 610 when /^\(/ #/ 611 path, contents = get_group(path) 612 contents = contents[1..-2] 613 n = [] 614 OrExpr( contents, n ) 615 parsed.concat(n) 616 end 617 path 618 end 619 620 #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')' 621 def FunctionCall rest, parsed 622 path, arguments = parse_args(rest) 623 argset = [] 624 for argument in arguments 625 args = [] 626 OrExpr( argument, args ) 627 argset << args 628 end 629 parsed << argset 630 path 631 end 632 633 # get_group( '[foo]bar' ) -> ['bar', '[foo]'] 634 def get_group string 635 ind = 0 636 depth = 0 637 st = string[0,1] 638 en = (st == "(" ? ")" : "]") 639 begin 640 case string[ind,1] 641 when st 642 depth += 1 643 when en 644 depth -= 1 645 end 646 ind += 1 647 end while depth > 0 and ind < string.length 648 return nil unless depth==0 649 [string[ind..-1], string[0..ind-1]] 650 end 651 652 def parse_args( string ) 653 arguments = [] 654 ind = 0 655 inquot = false 656 inapos = false 657 depth = 1 658 begin 659 case string[ind] 660 when ?" 661 inquot = !inquot unless inapos 662 when ?' 663 inapos = !inapos unless inquot 664 else 665 unless inquot or inapos 666 case string[ind] 667 when ?( 668 depth += 1 669 if depth == 1 670 string = string[1..-1] 671 ind -= 1 672 end 673 when ?) 674 depth -= 1 675 if depth == 0 676 s = string[0,ind].strip 677 arguments << s unless s == "" 678 string = string[ind+1..-1] 679 end 680 when ?, 681 if depth == 1 682 s = string[0,ind].strip 683 arguments << s unless s == "" 684 string = string[ind+1..-1] 685 ind = -1 686 end 687 end 688 end 689 end 690 ind += 1 691 end while depth > 0 and ind < string.length 692 return nil unless depth==0 693 [string,arguments] 694 end 695 end 696 end 697end 698