1require 'rexml/namespace'
2require 'rexml/xmltokens'
3
4module REXML
5  module Parsers
6    # You don't want to use this class.  Really.  Use XPath, which is a wrapper
7    # for this class.  Believe me.  You don't want to poke around in here.
8    # There is strange, dark magic at work in this code.  Beware.  Go back!  Go
9    # back while you still can!
10    class XPathParser
11      include XMLTokens
12      LITERAL    = /^'([^']*)'|^"([^"]*)"/u
13
14      def namespaces=( namespaces )
15        Functions::namespace_context = namespaces
16        @namespaces = namespaces
17      end
18
19      def parse path
20        path = path.dup
21        path.gsub!(/([\(\[])\s+/, '\1') # Strip ignorable spaces
22        path.gsub!( /\s+([\]\)])/, '\1')
23        parsed = []
24        path = OrExpr(path, parsed)
25        parsed
26      end
27
28      def predicate path
29        parsed = []
30        Predicate( "[#{path}]", parsed )
31        parsed
32      end
33
34      def abbreviate( path )
35        path = path.kind_of?(String) ? parse( path ) : path
36        string = ""
37        document = false
38        while path.size > 0
39          op = path.shift
40          case op
41          when :node
42          when :attribute
43            string << "/" if string.size > 0
44            string << "@"
45          when :child
46            string << "/" if string.size > 0
47          when :descendant_or_self
48            string << "/"
49          when :self
50            string << "."
51          when :parent
52            string << ".."
53          when :any
54            string << "*"
55          when :text
56            string << "text()"
57          when :following, :following_sibling,
58                :ancestor, :ancestor_or_self, :descendant,
59                :namespace, :preceding, :preceding_sibling
60            string << "/" unless string.size == 0
61            string << op.to_s.tr("_", "-")
62            string << "::"
63          when :qname
64            prefix = path.shift
65            name = path.shift
66            string << prefix+":" if prefix.size > 0
67            string << name
68          when :predicate
69            string << '['
70            string << predicate_to_string( path.shift ) {|x| abbreviate( x ) }
71            string << ']'
72          when :document
73            document = true
74          when :function
75            string << path.shift
76            string << "( "
77            string << predicate_to_string( path.shift[0] ) {|x| abbreviate( x )}
78            string << " )"
79          when :literal
80            string << %Q{ "#{path.shift}" }
81          else
82            string << "/" unless string.size == 0
83            string << "UNKNOWN("
84            string << op.inspect
85            string << ")"
86          end
87        end
88        string = "/"+string if document
89        return string
90      end
91
92      def expand( path )
93        path = path.kind_of?(String) ? parse( path ) : path
94        string = ""
95        document = false
96        while path.size > 0
97          op = path.shift
98          case op
99          when :node
100            string << "node()"
101          when :attribute, :child, :following, :following_sibling,
102                :ancestor, :ancestor_or_self, :descendant, :descendant_or_self,
103                :namespace, :preceding, :preceding_sibling, :self, :parent
104            string << "/" unless string.size == 0
105            string << op.to_s.tr("_", "-")
106            string << "::"
107          when :any
108            string << "*"
109          when :qname
110            prefix = path.shift
111            name = path.shift
112            string << prefix+":" if prefix.size > 0
113            string << name
114          when :predicate
115            string << '['
116            string << predicate_to_string( path.shift ) { |x| expand(x) }
117            string << ']'
118          when :document
119            document = true
120          else
121            string << "/" unless string.size == 0
122            string << "UNKNOWN("
123            string << op.inspect
124            string << ")"
125          end
126        end
127        string = "/"+string if document
128        return string
129      end
130
131      def predicate_to_string( path, &block )
132        string = ""
133        case path[0]
134        when :and, :or, :mult, :plus, :minus, :neq, :eq, :lt, :gt, :lteq, :gteq, :div, :mod, :union
135          op = path.shift
136          case op
137          when :eq
138            op = "="
139          when :lt
140            op = "<"
141          when :gt
142            op = ">"
143          when :lteq
144            op = "<="
145          when :gteq
146            op = ">="
147          when :neq
148            op = "!="
149          when :union
150            op = "|"
151          end
152          left = predicate_to_string( path.shift, &block )
153          right = predicate_to_string( path.shift, &block )
154          string << " "
155          string << left
156          string << " "
157          string << op.to_s
158          string << " "
159          string << right
160          string << " "
161        when :function
162          path.shift
163          name = path.shift
164          string << name
165          string << "( "
166          string << predicate_to_string( path.shift, &block )
167          string << " )"
168        when :literal
169          path.shift
170          string << " "
171          string << path.shift.inspect
172          string << " "
173        else
174          string << " "
175          string << yield( path )
176          string << " "
177        end
178        return string.squeeze(" ")
179      end
180
181      private
182      #LocationPath
183      #  | RelativeLocationPath
184      #  | '/' RelativeLocationPath?
185      #  | '//' RelativeLocationPath
186      def LocationPath path, parsed
187        #puts "LocationPath '#{path}'"
188        path = path.strip
189        if path[0] == ?/
190          parsed << :document
191          if path[1] == ?/
192            parsed << :descendant_or_self
193            parsed << :node
194            path = path[2..-1]
195          else
196            path = path[1..-1]
197          end
198        end
199        #puts parsed.inspect
200        return RelativeLocationPath( path, parsed ) if path.size > 0
201      end
202
203      #RelativeLocationPath
204      #  |                                                    Step
205      #    | (AXIS_NAME '::' | '@' | '')                     AxisSpecifier
206      #      NodeTest
207      #        Predicate
208      #    | '.' | '..'                                      AbbreviatedStep
209      #  |  RelativeLocationPath '/' Step
210      #  | RelativeLocationPath '//' Step
211      AXIS = /^(ancestor|ancestor-or-self|attribute|child|descendant|descendant-or-self|following|following-sibling|namespace|parent|preceding|preceding-sibling|self)::/
212      def RelativeLocationPath path, parsed
213        #puts "RelativeLocationPath #{path}"
214        while path.size > 0
215          # (axis or @ or <child::>) nodetest predicate  >
216          # OR                                          >  / Step
217          # (. or ..)                                    >
218          if path[0] == ?.
219            if path[1] == ?.
220              parsed << :parent
221              parsed << :node
222              path = path[2..-1]
223            else
224              parsed << :self
225              parsed << :node
226              path = path[1..-1]
227            end
228          else
229            if path[0] == ?@
230              #puts "ATTRIBUTE"
231              parsed << :attribute
232              path = path[1..-1]
233              # Goto Nodetest
234            elsif path =~ AXIS
235              parsed << $1.tr('-','_').intern
236              path = $'
237              # Goto Nodetest
238            else
239              parsed << :child
240            end
241
242            #puts "NODETESTING '#{path}'"
243            n = []
244            path = NodeTest( path, n)
245            #puts "NODETEST RETURNED '#{path}'"
246
247            if path[0] == ?[
248              path = Predicate( path, n )
249            end
250
251            parsed.concat(n)
252          end
253
254          if path.size > 0
255            if path[0] == ?/
256              if path[1] == ?/
257                parsed << :descendant_or_self
258                parsed << :node
259                path = path[2..-1]
260              else
261                path = path[1..-1]
262              end
263            else
264              return path
265            end
266          end
267        end
268        return path
269      end
270
271      # Returns a 1-1 map of the nodeset
272      # The contents of the resulting array are either:
273      #   true/false, if a positive match
274      #   String, if a name match
275      #NodeTest
276      #  | ('*' | NCNAME ':' '*' | QNAME)                NameTest
277      #  | NODE_TYPE '(' ')'                              NodeType
278      #  | PI '(' LITERAL ')'                            PI
279      #    | '[' expr ']'                                Predicate
280      NCNAMETEST= /^(#{NCNAME_STR}):\*/u
281      QNAME     = Namespace::NAMESPLIT
282      NODE_TYPE  = /^(comment|text|node)\(\s*\)/m
283      PI        = /^processing-instruction\(/
284      def NodeTest path, parsed
285        #puts "NodeTest with #{path}"
286        case path
287        when /^\*/
288          path = $'
289          parsed << :any
290        when NODE_TYPE
291          type = $1
292          path = $'
293          parsed << type.tr('-', '_').intern
294        when PI
295          path = $'
296          literal = nil
297          if path !~ /^\s*\)/
298            path =~ LITERAL
299            literal = $1
300            path = $'
301            raise ParseException.new("Missing ')' after processing instruction") if path[0] != ?)
302            path = path[1..-1]
303          end
304          parsed << :processing_instruction
305          parsed << (literal || '')
306        when NCNAMETEST
307          #puts "NCNAMETEST"
308          prefix = $1
309          path = $'
310          parsed << :namespace
311          parsed << prefix
312        when QNAME
313          #puts "QNAME"
314          prefix = $1
315          name = $2
316          path = $'
317          prefix = "" unless prefix
318          parsed << :qname
319          parsed << prefix
320          parsed << name
321        end
322        return path
323      end
324
325      # Filters the supplied nodeset on the predicate(s)
326      def Predicate path, parsed
327        #puts "PREDICATE with #{path}"
328        return nil unless path[0] == ?[
329        predicates = []
330        while path[0] == ?[
331          path, expr = get_group(path)
332          predicates << expr[1..-2] if expr
333        end
334        #puts "PREDICATES = #{predicates.inspect}"
335        predicates.each{ |pred|
336          #puts "ORING #{pred}"
337          preds = []
338          parsed << :predicate
339          parsed << preds
340          OrExpr(pred, preds)
341        }
342        #puts "PREDICATES = #{predicates.inspect}"
343        path
344      end
345
346      # The following return arrays of true/false, a 1-1 mapping of the
347      # supplied nodeset, except for axe(), which returns a filtered
348      # nodeset
349
350      #| OrExpr S 'or' S AndExpr
351      #| AndExpr
352      def OrExpr path, parsed
353        #puts "OR >>> #{path}"
354        n = []
355        rest = AndExpr( path, n )
356        #puts "OR <<< #{rest}"
357        if rest != path
358          while rest =~ /^\s*( or )/
359            n = [ :or, n, [] ]
360            rest = AndExpr( $', n[-1] )
361          end
362        end
363        if parsed.size == 0 and n.size != 0
364          parsed.replace(n)
365        elsif n.size > 0
366          parsed << n
367        end
368        rest
369      end
370
371      #| AndExpr S 'and' S EqualityExpr
372      #| EqualityExpr
373      def AndExpr path, parsed
374        #puts "AND >>> #{path}"
375        n = []
376        rest = EqualityExpr( path, n )
377        #puts "AND <<< #{rest}"
378        if rest != path
379          while rest =~ /^\s*( and )/
380            n = [ :and, n, [] ]
381            #puts "AND >>> #{rest}"
382            rest = EqualityExpr( $', n[-1] )
383            #puts "AND <<< #{rest}"
384          end
385        end
386        if parsed.size == 0 and n.size != 0
387          parsed.replace(n)
388        elsif n.size > 0
389          parsed << n
390        end
391        rest
392      end
393
394      #| EqualityExpr ('=' | '!=')  RelationalExpr
395      #| RelationalExpr
396      def EqualityExpr path, parsed
397        #puts "EQUALITY >>> #{path}"
398        n = []
399        rest = RelationalExpr( path, n )
400        #puts "EQUALITY <<< #{rest}"
401        if rest != path
402          while rest =~ /^\s*(!?=)\s*/
403            if $1[0] == ?!
404              n = [ :neq, n, [] ]
405            else
406              n = [ :eq, n, [] ]
407            end
408            rest = RelationalExpr( $', n[-1] )
409          end
410        end
411        if parsed.size == 0 and n.size != 0
412          parsed.replace(n)
413        elsif n.size > 0
414          parsed << n
415        end
416        rest
417      end
418
419      #| RelationalExpr ('<' | '>' | '<=' | '>=') AdditiveExpr
420      #| AdditiveExpr
421      def RelationalExpr path, parsed
422        #puts "RELATION >>> #{path}"
423        n = []
424        rest = AdditiveExpr( path, n )
425        #puts "RELATION <<< #{rest}"
426        if rest != path
427          while rest =~ /^\s*([<>]=?)\s*/
428            if $1[0] == ?<
429              sym = "lt"
430            else
431              sym = "gt"
432            end
433            sym << "eq" if $1[-1] == ?=
434            n = [ sym.intern, n, [] ]
435            rest = AdditiveExpr( $', n[-1] )
436          end
437        end
438        if parsed.size == 0 and n.size != 0
439          parsed.replace(n)
440        elsif n.size > 0
441          parsed << n
442        end
443        rest
444      end
445
446      #| AdditiveExpr ('+' | S '-') MultiplicativeExpr
447      #| MultiplicativeExpr
448      def AdditiveExpr path, parsed
449        #puts "ADDITIVE >>> #{path}"
450        n = []
451        rest = MultiplicativeExpr( path, n )
452        #puts "ADDITIVE <<< #{rest}"
453        if rest != path
454          while rest =~ /^\s*(\+| -)\s*/
455            if $1[0] == ?+
456              n = [ :plus, n, [] ]
457            else
458              n = [ :minus, n, [] ]
459            end
460            rest = MultiplicativeExpr( $', n[-1] )
461          end
462        end
463        if parsed.size == 0 and n.size != 0
464          parsed.replace(n)
465        elsif n.size > 0
466          parsed << n
467        end
468        rest
469      end
470
471      #| MultiplicativeExpr ('*' | S ('div' | 'mod') S) UnaryExpr
472      #| UnaryExpr
473      def MultiplicativeExpr path, parsed
474        #puts "MULT >>> #{path}"
475        n = []
476        rest = UnaryExpr( path, n )
477        #puts "MULT <<< #{rest}"
478        if rest != path
479          while rest =~ /^\s*(\*| div | mod )\s*/
480            if $1[0] == ?*
481              n = [ :mult, n, [] ]
482            elsif $1.include?( "div" )
483              n = [ :div, n, [] ]
484            else
485              n = [ :mod, n, [] ]
486            end
487            rest = UnaryExpr( $', n[-1] )
488          end
489        end
490        if parsed.size == 0 and n.size != 0
491          parsed.replace(n)
492        elsif n.size > 0
493          parsed << n
494        end
495        rest
496      end
497
498      #| '-' UnaryExpr
499      #| UnionExpr
500      def UnaryExpr path, parsed
501        path =~ /^(\-*)/
502        path = $'
503        if $1 and (($1.size % 2) != 0)
504          mult = -1
505        else
506          mult = 1
507        end
508        parsed << :neg if mult < 0
509
510        #puts "UNARY >>> #{path}"
511        n = []
512        path = UnionExpr( path, n )
513        #puts "UNARY <<< #{path}"
514        parsed.concat( n )
515        path
516      end
517
518      #| UnionExpr '|' PathExpr
519      #| PathExpr
520      def UnionExpr path, parsed
521        #puts "UNION >>> #{path}"
522        n = []
523        rest = PathExpr( path, n )
524        #puts "UNION <<< #{rest}"
525        if rest != path
526          while rest =~ /^\s*(\|)\s*/
527            n = [ :union, n, [] ]
528            rest = PathExpr( $', n[-1] )
529          end
530        end
531        if parsed.size == 0 and n.size != 0
532          parsed.replace( n )
533        elsif n.size > 0
534          parsed << n
535        end
536        rest
537      end
538
539      #| LocationPath
540      #| FilterExpr ('/' | '//') RelativeLocationPath
541      def PathExpr path, parsed
542        path =~ /^\s*/
543        path = $'
544        #puts "PATH >>> #{path}"
545        n = []
546        rest = FilterExpr( path, n )
547        #puts "PATH <<< '#{rest}'"
548        if rest != path
549          if rest and rest[0] == ?/
550            return RelativeLocationPath(rest, n)
551          end
552        end
553        #puts "BEFORE WITH '#{rest}'"
554        rest = LocationPath(rest, n) if rest =~ /\A[\/\.\@\[\w*]/
555        parsed.concat(n)
556        return rest
557      end
558
559      #| FilterExpr Predicate
560      #| PrimaryExpr
561      def FilterExpr path, parsed
562        #puts "FILTER >>> #{path}"
563        n = []
564        path = PrimaryExpr( path, n )
565        #puts "FILTER <<< #{path}"
566        path = Predicate(path, n) if path and path[0] == ?[
567        #puts "FILTER <<< #{path}"
568        parsed.concat(n)
569        path
570      end
571
572      #| VARIABLE_REFERENCE
573      #| '(' expr ')'
574      #| LITERAL
575      #| NUMBER
576      #| FunctionCall
577      VARIABLE_REFERENCE  = /^\$(#{NAME_STR})/u
578      NUMBER              = /^(\d*\.?\d+)/
579      NT        = /^comment|text|processing-instruction|node$/
580      def PrimaryExpr path, parsed
581        case path
582        when VARIABLE_REFERENCE
583          varname = $1
584          path = $'
585          parsed << :variable
586          parsed << varname
587          #arry << @variables[ varname ]
588        when /^(\w[-\w]*)(?:\()/
589          #puts "PrimaryExpr :: Function >>> #$1 -- '#$''"
590          fname = $1
591          tmp = $'
592          #puts "#{fname} =~ #{NT.inspect}"
593          return path if fname =~ NT
594          path = tmp
595          parsed << :function
596          parsed << fname
597          path = FunctionCall(path, parsed)
598        when NUMBER
599          #puts "LITERAL or NUMBER: #$1"
600          varname = $1.nil? ? $2 : $1
601          path = $'
602          parsed << :literal
603          parsed << (varname.include?('.') ? varname.to_f : varname.to_i)
604        when LITERAL
605          #puts "LITERAL or NUMBER: #$1"
606          varname = $1.nil? ? $2 : $1
607          path = $'
608          parsed << :literal
609          parsed << varname
610        when /^\(/                                               #/
611          path, contents = get_group(path)
612          contents = contents[1..-2]
613          n = []
614          OrExpr( contents, n )
615          parsed.concat(n)
616        end
617        path
618      end
619
620      #| FUNCTION_NAME '(' ( expr ( ',' expr )* )? ')'
621      def FunctionCall rest, parsed
622        path, arguments = parse_args(rest)
623        argset = []
624        for argument in arguments
625          args = []
626          OrExpr( argument, args )
627          argset << args
628        end
629        parsed << argset
630        path
631      end
632
633      # get_group( '[foo]bar' ) -> ['bar', '[foo]']
634      def get_group string
635        ind = 0
636        depth = 0
637        st = string[0,1]
638        en = (st == "(" ? ")" : "]")
639        begin
640          case string[ind,1]
641          when st
642            depth += 1
643          when en
644            depth -= 1
645          end
646          ind += 1
647        end while depth > 0 and ind < string.length
648        return nil unless depth==0
649        [string[ind..-1], string[0..ind-1]]
650      end
651
652      def parse_args( string )
653        arguments = []
654        ind = 0
655        inquot = false
656        inapos = false
657        depth = 1
658        begin
659          case string[ind]
660          when ?"
661            inquot = !inquot unless inapos
662          when ?'
663            inapos = !inapos unless inquot
664          else
665            unless inquot or inapos
666              case string[ind]
667              when ?(
668                depth += 1
669                if depth == 1
670                  string = string[1..-1]
671                  ind -= 1
672                end
673              when ?)
674                depth -= 1
675                if depth == 0
676                  s = string[0,ind].strip
677                  arguments << s unless s == ""
678                  string = string[ind+1..-1]
679                end
680              when ?,
681                if depth == 1
682                  s = string[0,ind].strip
683                  arguments << s unless s == ""
684                  string = string[ind+1..-1]
685                  ind = -1
686                end
687              end
688            end
689          end
690          ind += 1
691        end while depth > 0 and ind < string.length
692        return nil unless depth==0
693        [string,arguments]
694      end
695    end
696  end
697end
698