1# Copyright (C) 2001, 2002, 2003 by Michael Neumann (mneumann@ntecs.de)
2#
3# $Id: parser.rb 37688 2012-11-16 16:55:29Z marcandre $
4#
5
6
7require "date"
8require "xmlrpc/base64"
9require "xmlrpc/datetime"
10
11
12module NQXML
13  class Node
14
15    def removeChild(node)
16      @children.delete(node)
17    end
18    def childNodes
19      @children
20    end
21    def hasChildNodes
22      not @children.empty?
23    end
24    def [] (index)
25      @children[index]
26    end
27
28    def nodeType
29      if @entity.instance_of? NQXML::Text then :TEXT
30      elsif @entity.instance_of? NQXML::Comment then :COMMENT
31      #elsif @entity.instance_of? NQXML::Element then :ELEMENT
32      elsif @entity.instance_of? NQXML::Tag then :ELEMENT
33      else :ELSE
34      end
35    end
36
37    def nodeValue
38      #TODO: error when wrong Entity-type
39      @entity.text
40    end
41    def nodeName
42      #TODO: error when wrong Entity-type
43      @entity.name
44    end
45  end # class Node
46end # module NQXML
47
48module XMLRPC # :nodoc:
49
50  # Raised when the remote procedure returns a fault-structure, which has two
51  # accessor-methods +faultCode+ an Integer, and +faultString+ a String.
52  class FaultException < StandardError
53    attr_reader :faultCode, :faultString
54
55    # Creates a new XMLRPC::FaultException instance.
56    #
57    # +faultString+ is passed to StandardError as the +msg+ of the Exception.
58    def initialize(faultCode, faultString)
59      @faultCode   = faultCode
60      @faultString = faultString
61      super(@faultString)
62    end
63
64    # The +faultCode+ and +faultString+ of the exception in a Hash.
65    def to_h
66      {"faultCode" => @faultCode, "faultString" => @faultString}
67    end
68  end
69
70  # Helper class used to convert types.
71  module Convert
72
73    # Converts a String to an Integer
74    #
75    # See also String.to_i
76    def self.int(str)
77      str.to_i
78    end
79
80    # Converts a String to +true+ or +false+
81    #
82    # Raises an exception if +str+ is not +0+ or +1+
83    def self.boolean(str)
84      case str
85      when "0" then false
86      when "1" then true
87      else
88        raise "RPC-value of type boolean is wrong"
89      end
90    end
91
92    # Converts a String to a Float
93    #
94    # See also String.to_f
95    def self.double(str)
96      str.to_f
97    end
98
99    # Converts a the given +str+ to a +dateTime.iso8601+ formatted date.
100    #
101    # Raises an exception if the String isn't in +dateTime.iso8601+ format.
102    #
103    # See also, XMLRPC::DateTime
104    def self.dateTime(str)
105      case str
106      when /^(-?\d\d\d\d)-?(\d\d)-?(\d\d)T(\d\d):(\d\d):(\d\d)(?:Z|([+-])(\d\d):?(\d\d))?$/
107        a = [$1, $2, $3, $4, $5, $6].collect{|i| i.to_i}
108        if $7
109          ofs = $8.to_i*3600 + $9.to_i*60
110          ofs = -ofs if $7=='+'
111          utc = Time.utc(*a) + ofs
112          a = [ utc.year, utc.month, utc.day, utc.hour, utc.min, utc.sec ]
113        end
114        XMLRPC::DateTime.new(*a)
115      when /^(-?\d\d)-?(\d\d)-?(\d\d)T(\d\d):(\d\d):(\d\d)(Z|([+-]\d\d):(\d\d))?$/
116        a = [$1, $2, $3, $4, $5, $6].collect{|i| i.to_i}
117        if a[0] < 70
118          a[0] += 2000
119        else
120          a[0] += 1900
121        end
122        if $7
123          ofs = $8.to_i*3600 + $9.to_i*60
124          ofs = -ofs if $7=='+'
125          utc = Time.utc(*a) + ofs
126          a = [ utc.year, utc.month, utc.day, utc.hour, utc.min, utc.sec ]
127        end
128        XMLRPC::DateTime.new(*a)
129      else
130        raise "wrong dateTime.iso8601 format " + str
131      end
132    end
133
134    # Decodes the given +str+ using XMLRPC::Base64.decode
135    def self.base64(str)
136      XMLRPC::Base64.decode(str)
137    end
138
139    # Converts the given +hash+ to a marshalled object.
140    #
141    # Returns the given +hash+ if an exception occurs.
142    def self.struct(hash)
143      # convert to marshalled object
144      klass = hash["___class___"]
145      if klass.nil? or Config::ENABLE_MARSHALLING == false
146        hash
147      else
148        begin
149          mod = Module
150          klass.split("::").each {|const| mod = mod.const_get(const.strip)}
151
152          obj = mod.allocate
153
154          hash.delete "___class___"
155          hash.each {|key, value|
156            obj.instance_variable_set("@#{ key }", value) if key =~ /^([a-zA-Z_]\w*)$/
157          }
158          obj
159        rescue
160          hash
161        end
162      end
163    end
164
165    # Converts the given +hash+ to an XMLRPC::FaultException object by passing
166    # the +faultCode+ and +faultString+ attributes of the Hash to
167    # XMLRPC::FaultException.new
168    #
169    # Raises an Exception if the given +hash+ doesn't meet the requirements.
170    # Those requirements being:
171    # * 2 keys
172    # * <code>'faultCode'</code> key is an Integer
173    # * <code>'faultString'</code> key is a String
174    def self.fault(hash)
175      if hash.kind_of? Hash and hash.size == 2 and
176        hash.has_key? "faultCode" and hash.has_key? "faultString" and
177        hash["faultCode"].kind_of? Integer and hash["faultString"].kind_of? String
178
179        XMLRPC::FaultException.new(hash["faultCode"], hash["faultString"])
180      else
181        raise "wrong fault-structure: #{hash.inspect}"
182      end
183    end
184
185  end # module Convert
186
187  # Parser for XML-RPC call and response
188  module XMLParser
189
190    class AbstractTreeParser
191
192      def parseMethodResponse(str)
193        methodResponse_document(createCleanedTree(str))
194      end
195
196      def parseMethodCall(str)
197        methodCall_document(createCleanedTree(str))
198      end
199
200      private
201
202      # Removes all whitespaces but in the tags i4, i8, int, boolean....
203      # and all comments
204      def removeWhitespacesAndComments(node)
205        remove = []
206        childs = node.childNodes.to_a
207        childs.each do |nd|
208          case _nodeType(nd)
209          when :TEXT
210            # TODO: add nil?
211            unless %w(i4 i8 int boolean string double dateTime.iso8601 base64).include? node.nodeName
212
213               if node.nodeName == "value"
214                 if not node.childNodes.to_a.detect {|n| _nodeType(n) == :ELEMENT}.nil?
215                   remove << nd if nd.nodeValue.strip == ""
216                 end
217               else
218                 remove << nd if nd.nodeValue.strip == ""
219               end
220            end
221          when :COMMENT
222            remove << nd
223          else
224            removeWhitespacesAndComments(nd)
225          end
226        end
227
228        remove.each { |i| node.removeChild(i) }
229      end
230
231
232      def nodeMustBe(node, name)
233        cmp = case name
234        when Array
235          name.include?(node.nodeName)
236        when String
237          name == node.nodeName
238        else
239          raise "error"
240        end
241
242        if not cmp then
243          raise "wrong xml-rpc (name)"
244        end
245
246        node
247      end
248
249      # Returns, when successfully the only child-node
250      def hasOnlyOneChild(node, name=nil)
251        if node.childNodes.to_a.size != 1
252          raise "wrong xml-rpc (size)"
253        end
254        if name != nil then
255          nodeMustBe(node.firstChild, name)
256        end
257      end
258
259
260      def assert(b)
261        if not b then
262          raise "assert-fail"
263        end
264      end
265
266      # The node `node` has empty string or string
267      def text_zero_one(node)
268        nodes = node.childNodes.to_a.size
269
270        if nodes == 1
271          text(node.firstChild)
272        elsif nodes == 0
273          ""
274        else
275          raise "wrong xml-rpc (size)"
276        end
277      end
278
279
280      def integer(node)
281        #TODO: check string for float because to_i returnsa
282        #      0 when wrong string
283         nodeMustBe(node, %w(i4 i8 int))
284        hasOnlyOneChild(node)
285
286        Convert.int(text(node.firstChild))
287      end
288
289      def boolean(node)
290        nodeMustBe(node, "boolean")
291        hasOnlyOneChild(node)
292
293        Convert.boolean(text(node.firstChild))
294      end
295
296      def v_nil(node)
297        nodeMustBe(node, "nil")
298        assert( node.childNodes.to_a.size == 0 )
299        nil
300      end
301
302      def string(node)
303        nodeMustBe(node, "string")
304        text_zero_one(node)
305      end
306
307      def double(node)
308        #TODO: check string for float because to_f returnsa
309        #      0.0 when wrong string
310        nodeMustBe(node, "double")
311        hasOnlyOneChild(node)
312
313        Convert.double(text(node.firstChild))
314      end
315
316      def dateTime(node)
317        nodeMustBe(node, "dateTime.iso8601")
318        hasOnlyOneChild(node)
319
320        Convert.dateTime( text(node.firstChild) )
321      end
322
323      def base64(node)
324        nodeMustBe(node, "base64")
325        #hasOnlyOneChild(node)
326
327        Convert.base64(text_zero_one(node))
328      end
329
330      def member(node)
331        nodeMustBe(node, "member")
332        assert( node.childNodes.to_a.size == 2 )
333
334        [ name(node[0]), value(node[1]) ]
335      end
336
337      def name(node)
338        nodeMustBe(node, "name")
339        #hasOnlyOneChild(node)
340        text_zero_one(node)
341      end
342
343      def array(node)
344        nodeMustBe(node, "array")
345        hasOnlyOneChild(node, "data")
346        data(node.firstChild)
347      end
348
349      def data(node)
350        nodeMustBe(node, "data")
351
352        node.childNodes.to_a.collect do |val|
353          value(val)
354        end
355      end
356
357      def param(node)
358        nodeMustBe(node, "param")
359        hasOnlyOneChild(node, "value")
360        value(node.firstChild)
361      end
362
363      def methodResponse(node)
364        nodeMustBe(node, "methodResponse")
365        hasOnlyOneChild(node, %w(params fault))
366        child = node.firstChild
367
368        case child.nodeName
369        when "params"
370          [ true, params(child,false) ]
371        when "fault"
372          [ false, fault(child) ]
373        else
374          raise "unexpected error"
375        end
376
377      end
378
379      def methodName(node)
380        nodeMustBe(node, "methodName")
381        hasOnlyOneChild(node)
382        text(node.firstChild)
383      end
384
385      def params(node, call=true)
386        nodeMustBe(node, "params")
387
388        if call
389          node.childNodes.to_a.collect do |n|
390            param(n)
391          end
392        else # response (only one param)
393          hasOnlyOneChild(node)
394          param(node.firstChild)
395        end
396      end
397
398      def fault(node)
399        nodeMustBe(node, "fault")
400        hasOnlyOneChild(node, "value")
401        f = value(node.firstChild)
402        Convert.fault(f)
403      end
404
405
406
407      # _nodeType is defined in the subclass
408      def text(node)
409        assert( _nodeType(node) == :TEXT )
410        assert( node.hasChildNodes == false )
411        assert( node.nodeValue != nil )
412
413        node.nodeValue.to_s
414      end
415
416      def struct(node)
417        nodeMustBe(node, "struct")
418
419        hash = {}
420        node.childNodes.to_a.each do |me|
421          n, v = member(me)
422          hash[n] = v
423        end
424
425        Convert.struct(hash)
426      end
427
428
429      def value(node)
430        nodeMustBe(node, "value")
431        nodes = node.childNodes.to_a.size
432        if nodes == 0
433          return ""
434        elsif nodes > 1
435          raise "wrong xml-rpc (size)"
436        end
437
438        child = node.firstChild
439
440        case _nodeType(child)
441        when :TEXT
442          text_zero_one(node)
443        when :ELEMENT
444          case child.nodeName
445          when "i4", "i8", "int"  then integer(child)
446          when "boolean"          then boolean(child)
447          when "string"           then string(child)
448          when "double"           then double(child)
449          when "dateTime.iso8601" then dateTime(child)
450          when "base64"           then base64(child)
451          when "struct"           then struct(child)
452          when "array"            then array(child)
453          when "nil"
454            if Config::ENABLE_NIL_PARSER
455              v_nil(child)
456            else
457              raise "wrong/unknown XML-RPC type 'nil'"
458            end
459          else
460            raise "wrong/unknown XML-RPC type"
461          end
462        else
463          raise "wrong type of node"
464        end
465
466      end
467
468      def methodCall(node)
469        nodeMustBe(node, "methodCall")
470        assert( (1..2).include?( node.childNodes.to_a.size ) )
471        name = methodName(node[0])
472
473        if node.childNodes.to_a.size == 2 then
474          pa = params(node[1])
475        else # no parameters given
476          pa = []
477        end
478        [name, pa]
479      end
480
481    end # module TreeParserMixin
482
483    class AbstractStreamParser
484      def parseMethodResponse(str)
485        parser = @parser_class.new
486        parser.parse(str)
487        raise "No valid method response!" if parser.method_name != nil
488        if parser.fault != nil
489          # is a fault structure
490          [false, parser.fault]
491        else
492          # is a normal return value
493          raise "Missing return value!" if parser.params.size == 0
494          raise "Too many return values. Only one allowed!" if parser.params.size > 1
495          [true, parser.params[0]]
496        end
497      end
498
499      def parseMethodCall(str)
500        parser = @parser_class.new
501        parser.parse(str)
502        raise "No valid method call - missing method name!" if parser.method_name.nil?
503        [parser.method_name, parser.params]
504      end
505    end
506
507    module StreamParserMixin
508      attr_reader :params
509      attr_reader :method_name
510      attr_reader :fault
511
512      def initialize(*a)
513        super(*a)
514        @params = []
515        @values = []
516        @val_stack = []
517
518        @names = []
519        @name = []
520
521        @structs = []
522        @struct = {}
523
524        @method_name = nil
525        @fault = nil
526
527        @data = nil
528      end
529
530      def startElement(name, attrs=[])
531        @data = nil
532        case name
533        when "value"
534          @value = nil
535        when "nil"
536          raise "wrong/unknown XML-RPC type 'nil'" unless Config::ENABLE_NIL_PARSER
537          @value = :nil
538        when "array"
539          @val_stack << @values
540          @values = []
541        when "struct"
542          @names << @name
543          @name = []
544
545          @structs << @struct
546          @struct = {}
547        end
548      end
549
550      def endElement(name)
551        @data ||= ""
552        case name
553        when "string"
554          @value = @data
555        when "i4", "i8", "int"
556          @value = Convert.int(@data)
557        when "boolean"
558          @value = Convert.boolean(@data)
559        when "double"
560          @value = Convert.double(@data)
561        when "dateTime.iso8601"
562          @value = Convert.dateTime(@data)
563        when "base64"
564          @value = Convert.base64(@data)
565        when "value"
566          @value = @data if @value.nil?
567          @values << (@value == :nil ? nil : @value)
568        when "array"
569          @value = @values
570          @values = @val_stack.pop
571        when "struct"
572          @value = Convert.struct(@struct)
573
574          @name = @names.pop
575          @struct = @structs.pop
576        when "name"
577          @name[0] = @data
578        when "member"
579          @struct[@name[0]] = @values.pop
580
581        when "param"
582          @params << @values[0]
583          @values = []
584
585        when "fault"
586          @fault = Convert.fault(@values[0])
587
588        when "methodName"
589          @method_name = @data
590        end
591
592        @data = nil
593      end
594
595      def character(data)
596        if @data
597          @data << data
598        else
599          @data = data
600        end
601      end
602
603    end # module StreamParserMixin
604
605    class XMLStreamParser < AbstractStreamParser
606      def initialize
607        require "xmlparser"
608        @parser_class = Class.new(::XMLParser) {
609          include StreamParserMixin
610        }
611      end
612    end # class XMLStreamParser
613
614    class NQXMLStreamParser < AbstractStreamParser
615      def initialize
616        require "nqxml/streamingparser"
617        @parser_class = XMLRPCParser
618      end
619
620      class XMLRPCParser
621        include StreamParserMixin
622
623        def parse(str)
624          parser = NQXML::StreamingParser.new(str)
625          parser.each do |ele|
626            case ele
627            when NQXML::Text
628              @data = ele.text
629              #character(ele.text)
630            when NQXML::Tag
631              if ele.isTagEnd
632                endElement(ele.name)
633              else
634                startElement(ele.name, ele.attrs)
635              end
636            end
637          end # do
638        end # method parse
639      end # class XMLRPCParser
640
641    end # class NQXMLStreamParser
642
643    class XMLTreeParser < AbstractTreeParser
644
645      def initialize
646        require "xmltreebuilder"
647
648        # The new XMLParser library (0.6.2+) uses a slightly different DOM implementation.
649        # The following code removes the differences between both versions.
650        if defined? XML::DOM::Builder
651          return if defined? XML::DOM::Node::DOCUMENT # code below has been already executed
652          klass = XML::DOM::Node
653          klass.const_set(:DOCUMENT, klass::DOCUMENT_NODE)
654          klass.const_set(:TEXT, klass::TEXT_NODE)
655          klass.const_set(:COMMENT, klass::COMMENT_NODE)
656          klass.const_set(:ELEMENT, klass::ELEMENT_NODE)
657        end
658      end
659
660      private
661
662      def _nodeType(node)
663        tp = node.nodeType
664        if tp == XML::SimpleTree::Node::TEXT then :TEXT
665        elsif tp == XML::SimpleTree::Node::COMMENT then :COMMENT
666        elsif tp == XML::SimpleTree::Node::ELEMENT then :ELEMENT
667        else :ELSE
668        end
669      end
670
671
672      def methodResponse_document(node)
673        assert( node.nodeType == XML::SimpleTree::Node::DOCUMENT )
674        hasOnlyOneChild(node, "methodResponse")
675
676        methodResponse(node.firstChild)
677      end
678
679      def methodCall_document(node)
680        assert( node.nodeType == XML::SimpleTree::Node::DOCUMENT )
681        hasOnlyOneChild(node, "methodCall")
682
683        methodCall(node.firstChild)
684      end
685
686      def createCleanedTree(str)
687        doc = XML::SimpleTreeBuilder.new.parse(str)
688        doc.documentElement.normalize
689        removeWhitespacesAndComments(doc)
690        doc
691      end
692
693    end # class XMLParser
694
695    class NQXMLTreeParser < AbstractTreeParser
696
697      def initialize
698        require "nqxml/treeparser"
699      end
700
701      private
702
703      def _nodeType(node)
704        node.nodeType
705      end
706
707      def methodResponse_document(node)
708        methodResponse(node)
709      end
710
711      def methodCall_document(node)
712        methodCall(node)
713      end
714
715      def createCleanedTree(str)
716        doc = ::NQXML::TreeParser.new(str).document.rootNode
717        removeWhitespacesAndComments(doc)
718        doc
719      end
720
721    end # class NQXMLTreeParser
722
723    class REXMLStreamParser < AbstractStreamParser
724      def initialize
725        require "rexml/document"
726        @parser_class = StreamListener
727      end
728
729      class StreamListener
730        include StreamParserMixin
731
732        alias :tag_start :startElement
733        alias :tag_end :endElement
734        alias :text :character
735        alias :cdata :character
736
737        def method_missing(*a)
738          # ignore
739        end
740
741        def parse(str)
742          REXML::Document.parse_stream(str, self)
743        end
744      end
745
746    end
747
748    class XMLScanStreamParser < AbstractStreamParser
749      def initialize
750        require "xmlscan/parser"
751        @parser_class = XMLScanParser
752      end
753
754      class XMLScanParser
755        include StreamParserMixin
756
757        Entities = {
758          "lt"   => "<",
759          "gt"   => ">",
760          "amp"  => "&",
761          "quot" => '"',
762          "apos" => "'"
763        }
764
765        def parse(str)
766          parser  = XMLScan::XMLParser.new(self)
767          parser.parse(str)
768        end
769
770        alias :on_stag :startElement
771        alias :on_etag :endElement
772
773        def on_stag_end(name); end
774
775        def on_stag_end_empty(name)
776          startElement(name)
777          endElement(name)
778        end
779
780        def on_chardata(str)
781          character(str)
782        end
783
784        def on_cdata(str)
785          character(str)
786        end
787
788        def on_entityref(ent)
789          str = Entities[ent]
790          if str
791            character(str)
792          else
793            raise "unknown entity"
794          end
795        end
796
797        def on_charref(code)
798          character(code.chr)
799        end
800
801        def on_charref_hex(code)
802          character(code.chr)
803        end
804
805        def method_missing(*a)
806        end
807
808        # TODO: call/implement?
809        # valid_name?
810        # valid_chardata?
811        # valid_char?
812        # parse_error
813
814      end
815    end
816
817    XMLParser   = XMLTreeParser
818    NQXMLParser = NQXMLTreeParser
819
820    Classes = [XMLStreamParser, XMLTreeParser,
821               NQXMLStreamParser, NQXMLTreeParser,
822               REXMLStreamParser, XMLScanStreamParser]
823
824    # yields an instance of each installed parser
825    def self.each_installed_parser
826      XMLRPC::XMLParser::Classes.each do |klass|
827        begin
828          yield klass.new
829        rescue LoadError
830        end
831      end
832    end
833
834  end # module XMLParser
835
836
837end # module XMLRPC
838
839