1# $Id: Reader.pm,v 1.1.1.1 2004/05/20 17:59:56 jpetri Exp $ 2 3package XML::SAX::PurePerl::Reader; 4 5use strict; 6use XML::SAX::PurePerl::Reader::URI; 7use XML::SAX::PurePerl::Productions qw( $SingleChar $Letter $NameChar ); 8use Exporter (); 9 10use vars qw(@ISA @EXPORT_OK); 11@ISA = qw(Exporter); 12@EXPORT_OK = qw( 13 EOF 14 BUFFER 15 INTERNAL_BUFFER 16 LINE 17 COLUMN 18 CURRENT 19 ENCODING 20); 21 22use constant EOF => 0; 23use constant BUFFER => 1; 24use constant INTERNAL_BUFFER => 2; 25use constant LINE => 3; 26use constant COLUMN => 4; 27use constant MATCHED => 5; 28use constant CURRENT => 6; 29use constant CONSUMED => 7; 30use constant ENCODING => 8; 31use constant SYSTEM_ID => 9; 32use constant PUBLIC_ID => 10; 33 34require XML::SAX::PurePerl::Reader::Stream; 35require XML::SAX::PurePerl::Reader::String; 36 37if ($] >= 5.007002) { 38 require XML::SAX::PurePerl::Reader::UnicodeExt; 39} 40else { 41 require XML::SAX::PurePerl::Reader::NoUnicodeExt; 42} 43 44sub new { 45 my $class = shift; 46 my $thing = shift; 47 48 # try to figure if this $thing is a handle of some sort 49 if (ref($thing) && UNIVERSAL::isa($thing, 'IO::Handle')) { 50 return XML::SAX::PurePerl::Reader::Stream->new($thing)->init; 51 } 52 my $ioref; 53 if (tied($thing)) { 54 my $class = ref($thing); 55 no strict 'refs'; 56 $ioref = $thing if defined &{"${class}::TIEHANDLE"}; 57 } 58 else { 59 eval { 60 $ioref = *{$thing}{IO}; 61 }; 62 undef $@; 63 } 64 if ($ioref) { 65 return XML::SAX::PurePerl::Reader::Stream->new($thing)->init; 66 } 67 68 if ($thing =~ /</) { 69 # assume it's a string 70 return XML::SAX::PurePerl::Reader::String->new($thing)->init; 71 } 72 73 # assume it is a uri 74 return XML::SAX::PurePerl::Reader::URI->new($thing)->init; 75} 76 77sub init { 78 my $self = shift; 79 $self->[LINE] = 1; 80 $self->[COLUMN] = 1; 81 $self->nextchar; 82 return $self; 83} 84 85sub match { 86 my $self = shift; 87 if ($self->match_nocheck(@_)) { 88 if ($self->[MATCHED] =~ $SingleChar) { 89 return 1; 90 } 91 throw XML::SAX::Exception::Parse ( 92 Message => "Not a valid XML character: '&#x". 93 sprintf("%X", ord($self->[MATCHED])). 94 ";'" 95 ); 96 } 97 return 0; 98} 99 100sub match_char { 101 my $self = shift; 102 103 if (defined($self->[CURRENT]) && $self->[CURRENT] eq $_[0]) { 104 $self->[MATCHED] = $_[0]; 105 $self->nextchar; 106 return 1; 107 } 108 $self->[MATCHED] = ''; 109 return 0; 110} 111 112sub match_re { 113 my $self = shift; 114 115 if ($self->[CURRENT] =~ $_[0]) { 116 $self->[MATCHED] = $self->[CURRENT]; 117 $self->nextchar; 118 return 1; 119 } 120 $self->[MATCHED] = ''; 121 return 0; 122} 123 124sub match_not { 125 my $self = shift; 126 127 my $current = $self->[CURRENT]; 128 return 0 unless defined $current; 129 130 for my $m (@_) { 131 if ($current eq $m) { 132 $self->[MATCHED] = ''; 133 return 0; 134 } 135 } 136 $self->[MATCHED] = $current; 137 $self->nextchar; 138 return 1; 139} 140 141my %hist; 142END { 143 foreach my $k (sort { $hist{$a} <=> $hist{$b} } keys %hist ) { 144 my $x = $k; 145 $k =~ s/^(.{80})(.{3}).*/$1\.\.\./s; 146 # warn("$k called $hist{$x} times\n"); 147 } 148} 149 150sub match_nonext { 151 my $self = shift; 152 153 my $current = $self->[CURRENT]; 154 return 0 unless defined $current; 155 156 foreach my $m (@_) { 157 # $hist{$m}++; 158 if (my $ref = ref($m)) { 159 if ($ref eq 'Regexp' && $current =~ $m) { 160 $self->[MATCHED] = $current; 161 return 1; 162 } 163 } 164 elsif ($current eq $m) { 165 $self->[MATCHED] = $current; 166 return 1; 167 } 168 } 169 $self->[MATCHED] = ''; 170 return 0; 171} 172 173sub match_nocheck { 174 my $self = shift; 175 176 if ($self->match_nonext(@_)) { 177 $self->nextchar; 178 179 return 1; 180 } 181 return 0; 182} 183 184sub matched { 185 my $self = shift; 186 return $self->[MATCHED]; 187} 188 189my $unpack_type = ($] >= 5.007002) ? 'U*' : 'C*'; 190 191sub match_string { 192 my $self = shift; 193 my ($str) = @_; 194 my $matched = ''; 195# for my $char (map { chr } unpack($unpack_type, $str)) { 196 for my $char (split //, $str) { 197 if ($self->match_char($char)) { 198 $matched .= $self->[MATCHED]; 199 } 200 else { 201 $self->buffer($matched); 202 return 0; 203 } 204 } 205 return 1; 206} 207 208# avoids split 209sub match_sequence { 210 my $self = shift; 211 my $matched = ''; 212 for my $char (@_) { 213 if ($self->match_char($char)) { 214 $matched .= $self->[MATCHED]; 215 } 216 else { 217 $self->buffer($matched); 218 return 0; 219 } 220 } 221 return 1; 222} 223 224sub consume_name { 225 my $self = shift; 226 227 my $current = $self->[CURRENT]; 228 return unless defined $current; # perhaps die here instead? 229 230 my $name; 231 if ($current eq '_') { 232 $name = '_'; 233 } 234 elsif ($current eq ':') { 235 $name = ':'; 236 } 237 else { 238 $self->consume($Letter) || 239 throw XML::SAX::Exception::Parse ( 240 Message => "Name contains invalid start character: '&#x". 241 sprintf("%X", ord($self->[CURRENT])). 242 ";'", reader => $self ); 243 $name = $self->[CONSUMED]; 244 } 245 246 $self->consume($NameChar); 247 $name .= $self->[CONSUMED]; 248 return $name; 249} 250 251sub consume { 252 my $self = shift; 253 254 my $consumed = ''; 255 256 while(!$self->eof && $self->match_re(@_)) { 257 $consumed .= $self->[MATCHED]; 258 } 259 return length($self->[CONSUMED] = $consumed); 260} 261 262 263 264sub consume_not { 265 my $self = shift; 266 267 my $consumed = ''; 268 269 while(!$self->[EOF] && $self->match_not(@_)) { 270 $consumed .= $self->[MATCHED]; 271 } 272 return length($self->[CONSUMED] = $consumed); 273} 274 275sub consumed { 276 my $self = shift; 277 return $self->[CONSUMED]; 278} 279 280sub current { 281 my $self = shift; 282 return $self->[CURRENT]; 283} 284 285sub buffer { 286 my $self = shift; 287 # warn("buffering: '$_[0]' + '$self->[CURRENT]' + '$self->[BUFFER]'\n"); 288 local $^W; 289 my $current = $self->[CURRENT]; 290 if ($] >= 5.006 && $] < 5.007) { 291 $current = pack("C0A*", $current); 292 } 293 $self->[BUFFER] = $_[0] . $current . $self->[BUFFER]; 294 $self->[COLUMN] -= length($_[0]); 295 $self->nextchar; 296} 297 298sub public_id { 299 my ($self, $value) = @_; 300 if (defined $value) { 301 return $self->[PUBLIC_ID] = $value; 302 } 303 return $self->[PUBLIC_ID]; 304} 305 306sub system_id { 307 my ($self, $value) = @_; 308 if (defined $value) { 309 return $self->[SYSTEM_ID] = $value; 310 } 311 return $self->[SYSTEM_ID]; 312} 313 314sub line { 315 shift->[LINE]; 316} 317 318sub column { 319 shift->[COLUMN]; 320} 321 322sub get_encoding { 323 my $self = shift; 324 return $self->[ENCODING]; 325} 326 327sub eof { 328 return shift->[EOF]; 329} 330 3311; 332 333__END__ 334 335=head1 NAME 336 337XML::Parser::PurePerl::Reader - Abstract Reader factory class 338 339=cut 340