1require 'rexml/parsers/baseparser' 2require 'rexml/parseexception' 3require 'rexml/namespace' 4require 'rexml/text' 5 6module REXML 7 module Parsers 8 # SAX2Parser 9 class SAX2Parser 10 def initialize source 11 @parser = BaseParser.new(source) 12 @listeners = [] 13 @procs = [] 14 @namespace_stack = [] 15 @has_listeners = false 16 @tag_stack = [] 17 @entities = {} 18 end 19 20 def source 21 @parser.source 22 end 23 24 def add_listener( listener ) 25 @parser.add_listener( listener ) 26 end 27 28 # Listen arguments: 29 # 30 # Symbol, Array, Block 31 # Listen to Symbol events on Array elements 32 # Symbol, Block 33 # Listen to Symbol events 34 # Array, Listener 35 # Listen to all events on Array elements 36 # Array, Block 37 # Listen to :start_element events on Array elements 38 # Listener 39 # Listen to All events 40 # 41 # Symbol can be one of: :start_element, :end_element, 42 # :start_prefix_mapping, :end_prefix_mapping, :characters, 43 # :processing_instruction, :doctype, :attlistdecl, :elementdecl, 44 # :entitydecl, :notationdecl, :cdata, :xmldecl, :comment 45 # 46 # There is an additional symbol that can be listened for: :progress. 47 # This will be called for every event generated, passing in the current 48 # stream position. 49 # 50 # Array contains regular expressions or strings which will be matched 51 # against fully qualified element names. 52 # 53 # Listener must implement the methods in SAX2Listener 54 # 55 # Block will be passed the same arguments as a SAX2Listener method would 56 # be, where the method name is the same as the matched Symbol. 57 # See the SAX2Listener for more information. 58 def listen( *args, &blok ) 59 if args[0].kind_of? Symbol 60 if args.size == 2 61 args[1].each { |match| @procs << [args[0], match, blok] } 62 else 63 add( [args[0], nil, blok] ) 64 end 65 elsif args[0].kind_of? Array 66 if args.size == 2 67 args[0].each { |match| add( [nil, match, args[1]] ) } 68 else 69 args[0].each { |match| add( [ :start_element, match, blok ] ) } 70 end 71 else 72 add([nil, nil, args[0]]) 73 end 74 end 75 76 def deafen( listener=nil, &blok ) 77 if listener 78 @listeners.delete_if {|item| item[-1] == listener } 79 @has_listeners = false if @listeners.size == 0 80 else 81 @procs.delete_if {|item| item[-1] == blok } 82 end 83 end 84 85 def parse 86 @procs.each { |sym,match,block| block.call if sym == :start_document } 87 @listeners.each { |sym,match,block| 88 block.start_document if sym == :start_document or sym.nil? 89 } 90 context = [] 91 while true 92 event = @parser.pull 93 case event[0] 94 when :end_document 95 handle( :end_document ) 96 break 97 when :start_doctype 98 handle( :doctype, *event[1..-1]) 99 when :end_doctype 100 context = context[1] 101 when :start_element 102 @tag_stack.push(event[1]) 103 # find the observers for namespaces 104 procs = get_procs( :start_prefix_mapping, event[1] ) 105 listeners = get_listeners( :start_prefix_mapping, event[1] ) 106 if procs or listeners 107 # break out the namespace declarations 108 # The attributes live in event[2] 109 event[2].each {|n, v| event[2][n] = @parser.normalize(v)} 110 nsdecl = event[2].find_all { |n, value| n =~ /^xmlns(:|$)/ } 111 nsdecl.collect! { |n, value| [ n[6..-1], value ] } 112 @namespace_stack.push({}) 113 nsdecl.each do |n,v| 114 @namespace_stack[-1][n] = v 115 # notify observers of namespaces 116 procs.each { |ob| ob.call( n, v ) } if procs 117 listeners.each { |ob| ob.start_prefix_mapping(n, v) } if listeners 118 end 119 end 120 event[1] =~ Namespace::NAMESPLIT 121 prefix = $1 122 local = $2 123 uri = get_namespace(prefix) 124 # find the observers for start_element 125 procs = get_procs( :start_element, event[1] ) 126 listeners = get_listeners( :start_element, event[1] ) 127 # notify observers 128 procs.each { |ob| ob.call( uri, local, event[1], event[2] ) } if procs 129 listeners.each { |ob| 130 ob.start_element( uri, local, event[1], event[2] ) 131 } if listeners 132 when :end_element 133 @tag_stack.pop 134 event[1] =~ Namespace::NAMESPLIT 135 prefix = $1 136 local = $2 137 uri = get_namespace(prefix) 138 # find the observers for start_element 139 procs = get_procs( :end_element, event[1] ) 140 listeners = get_listeners( :end_element, event[1] ) 141 # notify observers 142 procs.each { |ob| ob.call( uri, local, event[1] ) } if procs 143 listeners.each { |ob| 144 ob.end_element( uri, local, event[1] ) 145 } if listeners 146 147 namespace_mapping = @namespace_stack.pop 148 # find the observers for namespaces 149 procs = get_procs( :end_prefix_mapping, event[1] ) 150 listeners = get_listeners( :end_prefix_mapping, event[1] ) 151 if procs or listeners 152 namespace_mapping.each do |ns_prefix, ns_uri| 153 # notify observers of namespaces 154 procs.each { |ob| ob.call( ns_prefix ) } if procs 155 listeners.each { |ob| ob.end_prefix_mapping(ns_prefix) } if listeners 156 end 157 end 158 when :text 159 #normalized = @parser.normalize( event[1] ) 160 #handle( :characters, normalized ) 161 copy = event[1].clone 162 163 esub = proc { |match| 164 if @entities.has_key?($1) 165 @entities[$1].gsub(Text::REFERENCE, &esub) 166 else 167 match 168 end 169 } 170 171 copy.gsub!( Text::REFERENCE, &esub ) 172 copy.gsub!( Text::NUMERICENTITY ) {|m| 173 m=$1 174 m = "0#{m}" if m[0] == ?x 175 [Integer(m)].pack('U*') 176 } 177 handle( :characters, copy ) 178 when :entitydecl 179 @entities[ event[1] ] = event[2] if event.size == 3 180 handle( *event ) 181 when :processing_instruction, :comment, :attlistdecl, 182 :elementdecl, :cdata, :notationdecl, :xmldecl 183 handle( *event ) 184 end 185 handle( :progress, @parser.position ) 186 end 187 end 188 189 private 190 def handle( symbol, *arguments ) 191 tag = @tag_stack[-1] 192 procs = get_procs( symbol, tag ) 193 listeners = get_listeners( symbol, tag ) 194 # notify observers 195 procs.each { |ob| ob.call( *arguments ) } if procs 196 listeners.each { |l| 197 l.send( symbol.to_s, *arguments ) 198 } if listeners 199 end 200 201 # The following methods are duplicates, but it is faster than using 202 # a helper 203 def get_procs( symbol, name ) 204 return nil if @procs.size == 0 205 @procs.find_all do |sym, match, block| 206 #puts sym.inspect+"=="+symbol.inspect+ "\t"+match.inspect+"=="+name.inspect+ "\t"+( (sym.nil? or symbol == sym) and ((name.nil? and match.nil?) or match.nil? or ( (name == match) or (match.kind_of? Regexp and name =~ match)))).to_s 207 ( 208 (sym.nil? or symbol == sym) and 209 ((name.nil? and match.nil?) or match.nil? or ( 210 (name == match) or 211 (match.kind_of? Regexp and name =~ match) 212 ) 213 ) 214 ) 215 end.collect{|x| x[-1]} 216 end 217 def get_listeners( symbol, name ) 218 return nil if @listeners.size == 0 219 @listeners.find_all do |sym, match, block| 220 ( 221 (sym.nil? or symbol == sym) and 222 ((name.nil? and match.nil?) or match.nil? or ( 223 (name == match) or 224 (match.kind_of? Regexp and name =~ match) 225 ) 226 ) 227 ) 228 end.collect{|x| x[-1]} 229 end 230 231 def add( pair ) 232 if pair[-1].respond_to? :call 233 @procs << pair unless @procs.include? pair 234 else 235 @listeners << pair unless @listeners.include? pair 236 @has_listeners = true 237 end 238 end 239 240 def get_namespace( prefix ) 241 uris = (@namespace_stack.find_all { |ns| not ns[prefix].nil? }) || 242 (@namespace_stack.find { |ns| not ns[nil].nil? }) 243 uris[-1][prefix] unless uris.nil? or 0 == uris.size 244 end 245 end 246 end 247end 248