1require 'forwardable'
2
3require 'rexml/parseexception'
4require 'rexml/parsers/baseparser'
5require 'rexml/xmltokens'
6
7module REXML
8  module Parsers
9    # = Using the Pull Parser
10    # <em>This API is experimental, and subject to change.</em>
11    #  parser = PullParser.new( "<a>text<b att='val'/>txet</a>" )
12    #  while parser.has_next?
13    #    res = parser.next
14    #    puts res[1]['att'] if res.start_tag? and res[0] == 'b'
15    #  end
16    # See the PullEvent class for information on the content of the results.
17    # The data is identical to the arguments passed for the various events to
18    # the StreamListener API.
19    #
20    # Notice that:
21    #  parser = PullParser.new( "<a>BAD DOCUMENT" )
22    #  while parser.has_next?
23    #    res = parser.next
24    #    raise res[1] if res.error?
25    #  end
26    #
27    # Nat Price gave me some good ideas for the API.
28    class PullParser
29      include XMLTokens
30      extend Forwardable
31
32      def_delegators( :@parser, :has_next? )
33      def_delegators( :@parser, :entity )
34      def_delegators( :@parser, :empty? )
35      def_delegators( :@parser, :source )
36
37      def initialize stream
38        @entities = {}
39        @listeners = nil
40        @parser = BaseParser.new( stream )
41        @my_stack = []
42      end
43
44      def add_listener( listener )
45        @listeners = [] unless @listeners
46        @listeners << listener
47      end
48
49      def each
50        while has_next?
51          yield self.pull
52        end
53      end
54
55      def peek depth=0
56        if @my_stack.length <= depth
57          (depth - @my_stack.length + 1).times {
58            e = PullEvent.new(@parser.pull)
59            @my_stack.push(e)
60          }
61        end
62        @my_stack[depth]
63      end
64
65      def pull
66        return @my_stack.shift if @my_stack.length > 0
67
68        event = @parser.pull
69        case event[0]
70        when :entitydecl
71          @entities[ event[1] ] =
72            event[2] unless event[2] =~ /PUBLIC|SYSTEM/
73        when :text
74          unnormalized = @parser.unnormalize( event[1], @entities )
75          event << unnormalized
76        end
77        PullEvent.new( event )
78      end
79
80      def unshift token
81        @my_stack.unshift token
82      end
83    end
84
85    # A parsing event.  The contents of the event are accessed as an +Array?,
86    # and the type is given either by the ...? methods, or by accessing the
87    # +type+ accessor.  The contents of this object vary from event to event,
88    # but are identical to the arguments passed to +StreamListener+s for each
89    # event.
90    class PullEvent
91      # The type of this event.  Will be one of :tag_start, :tag_end, :text,
92      # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl,
93      # :notationdecl, :entity, :cdata, :xmldecl, or :error.
94      def initialize(arg)
95        @contents = arg
96      end
97
98      def []( start, endd=nil)
99        if start.kind_of? Range
100          @contents.slice( start.begin+1 .. start.end )
101        elsif start.kind_of? Numeric
102          if endd.nil?
103            @contents.slice( start+1 )
104          else
105            @contents.slice( start+1, endd )
106          end
107        else
108          raise "Illegal argument #{start.inspect} (#{start.class})"
109        end
110      end
111
112      def event_type
113        @contents[0]
114      end
115
116      # Content: [ String tag_name, Hash attributes ]
117      def start_element?
118        @contents[0] == :start_element
119      end
120
121      # Content: [ String tag_name ]
122      def end_element?
123        @contents[0] == :end_element
124      end
125
126      # Content: [ String raw_text, String unnormalized_text ]
127      def text?
128        @contents[0] == :text
129      end
130
131      # Content: [ String text ]
132      def instruction?
133        @contents[0] == :processing_instruction
134      end
135
136      # Content: [ String text ]
137      def comment?
138        @contents[0] == :comment
139      end
140
141      # Content: [ String name, String pub_sys, String long_name, String uri ]
142      def doctype?
143        @contents[0] == :start_doctype
144      end
145
146      # Content: [ String text ]
147      def attlistdecl?
148        @contents[0] == :attlistdecl
149      end
150
151      # Content: [ String text ]
152      def elementdecl?
153        @contents[0] == :elementdecl
154      end
155
156      # Due to the wonders of DTDs, an entity declaration can be just about
157      # anything.  There's no way to normalize it; you'll have to interpret the
158      # content yourself.  However, the following is true:
159      #
160      # * If the entity declaration is an internal entity:
161      #   [ String name, String value ]
162      # Content: [ String text ]
163      def entitydecl?
164        @contents[0] == :entitydecl
165      end
166
167      # Content: [ String text ]
168      def notationdecl?
169        @contents[0] == :notationdecl
170      end
171
172      # Content: [ String text ]
173      def entity?
174        @contents[0] == :entity
175      end
176
177      # Content: [ String text ]
178      def cdata?
179        @contents[0] == :cdata
180      end
181
182      # Content: [ String version, String encoding, String standalone ]
183      def xmldecl?
184        @contents[0] == :xmldecl
185      end
186
187      def error?
188        @contents[0] == :error
189      end
190
191      def inspect
192        @contents[0].to_s + ": " + @contents[1..-1].inspect
193      end
194    end
195  end
196end
197