1require 'rexml/parsers/baseparser'
2require 'rexml/parseexception'
3require 'rexml/namespace'
4require 'rexml/text'
5
6module REXML
7  module Parsers
8    # SAX2Parser
9    class SAX2Parser
10      def initialize source
11        @parser = BaseParser.new(source)
12        @listeners = []
13        @procs = []
14        @namespace_stack = []
15        @has_listeners = false
16        @tag_stack = []
17        @entities = {}
18      end
19
20      def source
21        @parser.source
22      end
23
24      def add_listener( listener )
25        @parser.add_listener( listener )
26      end
27
28      # Listen arguments:
29      #
30      # Symbol, Array, Block
31      #         Listen to Symbol events on Array elements
32      # Symbol, Block
33      #   Listen to Symbol events
34      # Array, Listener
35      #         Listen to all events on Array elements
36      # Array, Block
37      #         Listen to :start_element events on Array elements
38      # Listener
39      #         Listen to All events
40      #
41      # Symbol can be one of: :start_element, :end_element,
42      # :start_prefix_mapping, :end_prefix_mapping, :characters,
43      # :processing_instruction, :doctype, :attlistdecl, :elementdecl,
44      # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment
45      #
46      # There is an additional symbol that can be listened for: :progress.
47      # This will be called for every event generated, passing in the current
48      # stream position.
49      #
50      # Array contains regular expressions or strings which will be matched
51      # against fully qualified element names.
52      #
53      # Listener must implement the methods in SAX2Listener
54      #
55      # Block will be passed the same arguments as a SAX2Listener method would
56      # be, where the method name is the same as the matched Symbol.
57      # See the SAX2Listener for more information.
58      def listen( *args, &blok )
59        if args[0].kind_of? Symbol
60          if args.size == 2
61            args[1].each { |match| @procs << [args[0], match, blok] }
62          else
63            add( [args[0], nil, blok] )
64          end
65        elsif args[0].kind_of? Array
66          if args.size == 2
67            args[0].each { |match| add( [nil, match, args[1]] ) }
68          else
69            args[0].each { |match| add( [ :start_element, match, blok ] ) }
70          end
71        else
72          add([nil, nil, args[0]])
73        end
74      end
75
76      def deafen( listener=nil, &blok )
77        if listener
78          @listeners.delete_if {|item| item[-1] == listener }
79          @has_listeners = false if @listeners.size == 0
80        else
81          @procs.delete_if {|item| item[-1] == blok }
82        end
83      end
84
85      def parse
86        @procs.each { |sym,match,block| block.call if sym == :start_document }
87        @listeners.each { |sym,match,block|
88          block.start_document if sym == :start_document or sym.nil?
89        }
90        context = []
91        while true
92          event = @parser.pull
93          case event[0]
94          when :end_document
95            handle( :end_document )
96            break
97          when :start_doctype
98            handle( :doctype, *event[1..-1])
99          when :end_doctype
100            context = context[1]
101          when :start_element
102            @tag_stack.push(event[1])
103            # find the observers for namespaces
104            procs = get_procs( :start_prefix_mapping, event[1] )
105            listeners = get_listeners( :start_prefix_mapping, event[1] )
106            if procs or listeners
107              # break out the namespace declarations
108              # The attributes live in event[2]
109              event[2].each {|n, v| event[2][n] = @parser.normalize(v)}
110              nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ }
111              nsdecl.collect! { |n, value| [ n[6..-1], value ] }
112              @namespace_stack.push({})
113              nsdecl.each do |n,v|
114                @namespace_stack[-1][n] = v
115                # notify observers of namespaces
116                procs.each { |ob| ob.call( n, v ) } if procs
117                listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners
118              end
119            end
120            event[1] =~ Namespace::NAMESPLIT
121            prefix = $1
122            local = $2
123            uri = get_namespace(prefix)
124            # find the observers for start_element
125            procs = get_procs( :start_element, event[1] )
126            listeners = get_listeners( :start_element, event[1] )
127            # notify observers
128            procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs
129            listeners.each { |ob|
130              ob.start_element( uri, local, event[1], event[2] )
131            } if listeners
132          when :end_element
133            @tag_stack.pop
134            event[1] =~ Namespace::NAMESPLIT
135            prefix = $1
136            local = $2
137            uri = get_namespace(prefix)
138            # find the observers for start_element
139            procs = get_procs( :end_element, event[1] )
140            listeners = get_listeners( :end_element, event[1] )
141            # notify observers
142            procs.each { |ob| ob.call( uri, local, event[1] ) } if procs
143            listeners.each { |ob|
144              ob.end_element( uri, local, event[1] )
145            } if listeners
146
147            namespace_mapping = @namespace_stack.pop
148            # find the observers for namespaces
149            procs = get_procs( :end_prefix_mapping, event[1] )
150            listeners = get_listeners( :end_prefix_mapping, event[1] )
151            if procs or listeners
152              namespace_mapping.each do |ns_prefix, ns_uri|
153                # notify observers of namespaces
154                procs.each { |ob| ob.call( ns_prefix ) } if procs
155                listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners
156              end
157            end
158          when :text
159            #normalized = @parser.normalize( event[1] )
160            #handle( :characters, normalized )
161            copy = event[1].clone
162
163            esub = proc { |match|
164              if @entities.has_key?($1)
165                @entities[$1].gsub(Text::REFERENCE, &esub)
166              else
167                match
168              end
169            }
170
171            copy.gsub!( Text::REFERENCE, &esub )
172            copy.gsub!( Text::NUMERICENTITY ) {|m|
173              m=$1
174              m = "0#{m}" if m[0] == ?x
175              [Integer(m)].pack('U*')
176            }
177            handle( :characters, copy )
178          when :entitydecl
179            @entities[ event[1] ] = event[2] if event.size == 3
180            handle( *event )
181          when :processing_instruction, :comment, :attlistdecl,
182            :elementdecl, :cdata, :notationdecl, :xmldecl
183            handle( *event )
184          end
185          handle( :progress, @parser.position )
186        end
187      end
188
189      private
190      def handle( symbol, *arguments )
191        tag = @tag_stack[-1]
192        procs = get_procs( symbol, tag )
193        listeners = get_listeners( symbol, tag )
194        # notify observers
195        procs.each { |ob| ob.call( *arguments ) } if procs
196        listeners.each { |l|
197          l.send( symbol.to_s, *arguments )
198        } if listeners
199      end
200
201      # The following methods are duplicates, but it is faster than using
202      # a helper
203      def get_procs( symbol, name )
204        return nil if @procs.size == 0
205        @procs.find_all do |sym, match, block|
206          #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s
207          (
208            (sym.nil? or symbol == sym) and
209            ((name.nil? and match.nil?) or match.nil? or (
210              (name == match) or
211              (match.kind_of? Regexp and name =~ match)
212              )
213            )
214          )
215        end.collect{|x| x[-1]}
216      end
217      def get_listeners( symbol, name )
218        return nil if @listeners.size == 0
219        @listeners.find_all do |sym, match, block|
220          (
221            (sym.nil? or symbol == sym) and
222            ((name.nil? and match.nil?) or match.nil? or (
223              (name == match) or
224              (match.kind_of? Regexp and name =~ match)
225              )
226            )
227          )
228        end.collect{|x| x[-1]}
229      end
230
231      def add( pair )
232        if pair[-1].respond_to? :call
233          @procs << pair unless @procs.include? pair
234        else
235          @listeners << pair unless @listeners.include? pair
236          @has_listeners = true
237        end
238      end
239
240      def get_namespace( prefix )
241        uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) ||
242          (@namespace_stack.find { |ns| not ns[nil].nil? })
243        uris[-1][prefix] unless uris.nil? or 0 == uris.size
244      end
245    end
246  end
247end
248