1require 'forwardable' 2 3require 'rexml/parseexception' 4require 'rexml/parsers/baseparser' 5require 'rexml/xmltokens' 6 7module REXML 8 module Parsers 9 # = Using the Pull Parser 10 # <em>This API is experimental, and subject to change.</em> 11 # parser = PullParser.new( "<a>text<b att='val'/>txet</a>" ) 12 # while parser.has_next? 13 # res = parser.next 14 # puts res[1]['att'] if res.start_tag? and res[0] == 'b' 15 # end 16 # See the PullEvent class for information on the content of the results. 17 # The data is identical to the arguments passed for the various events to 18 # the StreamListener API. 19 # 20 # Notice that: 21 # parser = PullParser.new( "<a>BAD DOCUMENT" ) 22 # while parser.has_next? 23 # res = parser.next 24 # raise res[1] if res.error? 25 # end 26 # 27 # Nat Price gave me some good ideas for the API. 28 class PullParser 29 include XMLTokens 30 extend Forwardable 31 32 def_delegators( :@parser, :has_next? ) 33 def_delegators( :@parser, :entity ) 34 def_delegators( :@parser, :empty? ) 35 def_delegators( :@parser, :source ) 36 37 def initialize stream 38 @entities = {} 39 @listeners = nil 40 @parser = BaseParser.new( stream ) 41 @my_stack = [] 42 end 43 44 def add_listener( listener ) 45 @listeners = [] unless @listeners 46 @listeners << listener 47 end 48 49 def each 50 while has_next? 51 yield self.pull 52 end 53 end 54 55 def peek depth=0 56 if @my_stack.length <= depth 57 (depth - @my_stack.length + 1).times { 58 e = PullEvent.new(@parser.pull) 59 @my_stack.push(e) 60 } 61 end 62 @my_stack[depth] 63 end 64 65 def pull 66 return @my_stack.shift if @my_stack.length > 0 67 68 event = @parser.pull 69 case event[0] 70 when :entitydecl 71 @entities[ event[1] ] = 72 event[2] unless event[2] =~ /PUBLIC|SYSTEM/ 73 when :text 74 unnormalized = @parser.unnormalize( event[1], @entities ) 75 event << unnormalized 76 end 77 PullEvent.new( event ) 78 end 79 80 def unshift token 81 @my_stack.unshift token 82 end 83 end 84 85 # A parsing event. The contents of the event are accessed as an +Array?, 86 # and the type is given either by the ...? methods, or by accessing the 87 # +type+ accessor. The contents of this object vary from event to event, 88 # but are identical to the arguments passed to +StreamListener+s for each 89 # event. 90 class PullEvent 91 # The type of this event. Will be one of :tag_start, :tag_end, :text, 92 # :processing_instruction, :comment, :doctype, :attlistdecl, :entitydecl, 93 # :notationdecl, :entity, :cdata, :xmldecl, or :error. 94 def initialize(arg) 95 @contents = arg 96 end 97 98 def []( start, endd=nil) 99 if start.kind_of? Range 100 @contents.slice( start.begin+1 .. start.end ) 101 elsif start.kind_of? Numeric 102 if endd.nil? 103 @contents.slice( start+1 ) 104 else 105 @contents.slice( start+1, endd ) 106 end 107 else 108 raise "Illegal argument #{start.inspect} (#{start.class})" 109 end 110 end 111 112 def event_type 113 @contents[0] 114 end 115 116 # Content: [ String tag_name, Hash attributes ] 117 def start_element? 118 @contents[0] == :start_element 119 end 120 121 # Content: [ String tag_name ] 122 def end_element? 123 @contents[0] == :end_element 124 end 125 126 # Content: [ String raw_text, String unnormalized_text ] 127 def text? 128 @contents[0] == :text 129 end 130 131 # Content: [ String text ] 132 def instruction? 133 @contents[0] == :processing_instruction 134 end 135 136 # Content: [ String text ] 137 def comment? 138 @contents[0] == :comment 139 end 140 141 # Content: [ String name, String pub_sys, String long_name, String uri ] 142 def doctype? 143 @contents[0] == :start_doctype 144 end 145 146 # Content: [ String text ] 147 def attlistdecl? 148 @contents[0] == :attlistdecl 149 end 150 151 # Content: [ String text ] 152 def elementdecl? 153 @contents[0] == :elementdecl 154 end 155 156 # Due to the wonders of DTDs, an entity declaration can be just about 157 # anything. There's no way to normalize it; you'll have to interpret the 158 # content yourself. However, the following is true: 159 # 160 # * If the entity declaration is an internal entity: 161 # [ String name, String value ] 162 # Content: [ String text ] 163 def entitydecl? 164 @contents[0] == :entitydecl 165 end 166 167 # Content: [ String text ] 168 def notationdecl? 169 @contents[0] == :notationdecl 170 end 171 172 # Content: [ String text ] 173 def entity? 174 @contents[0] == :entity 175 end 176 177 # Content: [ String text ] 178 def cdata? 179 @contents[0] == :cdata 180 end 181 182 # Content: [ String version, String encoding, String standalone ] 183 def xmldecl? 184 @contents[0] == :xmldecl 185 end 186 187 def error? 188 @contents[0] == :error 189 end 190 191 def inspect 192 @contents[0].to_s + ": " + @contents[1..-1].inspect 193 end 194 end 195 end 196end 197