1# $Id: Reader.pm,v 1.1.1.1 2004/05/20 17:59:56 jpetri Exp $
2
3package XML::SAX::PurePerl::Reader;
4
5use strict;
6use XML::SAX::PurePerl::Reader::URI;
7use XML::SAX::PurePerl::Productions qw( $SingleChar $Letter $NameChar );
8use Exporter ();
9
10use vars qw(@ISA @EXPORT_OK);
11@ISA = qw(Exporter);
12@EXPORT_OK = qw(
13    EOF
14    BUFFER
15    INTERNAL_BUFFER
16    LINE
17    COLUMN
18    CURRENT
19    ENCODING
20);
21
22use constant EOF => 0;
23use constant BUFFER => 1;
24use constant INTERNAL_BUFFER => 2;
25use constant LINE => 3;
26use constant COLUMN => 4;
27use constant MATCHED => 5;
28use constant CURRENT => 6;
29use constant CONSUMED => 7;
30use constant ENCODING => 8;
31use constant SYSTEM_ID => 9;
32use constant PUBLIC_ID => 10;
33
34require XML::SAX::PurePerl::Reader::Stream;
35require XML::SAX::PurePerl::Reader::String;
36
37if ($] >= 5.007002) {
38    require XML::SAX::PurePerl::Reader::UnicodeExt;
39}
40else {
41    require XML::SAX::PurePerl::Reader::NoUnicodeExt;
42}
43
44sub new {
45    my $class = shift;
46    my $thing = shift;
47
48    # try to figure if this $thing is a handle of some sort
49    if (ref($thing) && UNIVERSAL::isa($thing, 'IO::Handle')) {
50        return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
51    }
52    my $ioref;
53    if (tied($thing)) {
54        my $class = ref($thing);
55        no strict 'refs';
56        $ioref = $thing if defined &{"${class}::TIEHANDLE"};
57    }
58    else {
59        eval {
60            $ioref = *{$thing}{IO};
61        };
62        undef $@;
63    }
64    if ($ioref) {
65        return XML::SAX::PurePerl::Reader::Stream->new($thing)->init;
66    }
67
68    if ($thing =~ /</) {
69        # assume it's a string
70        return XML::SAX::PurePerl::Reader::String->new($thing)->init;
71    }
72
73    # assume it is a uri
74    return XML::SAX::PurePerl::Reader::URI->new($thing)->init;
75}
76
77sub init {
78    my $self = shift;
79    $self->[LINE] = 1;
80    $self->[COLUMN] = 1;
81    $self->nextchar;
82    return $self;
83}
84
85sub match {
86    my $self = shift;
87    if ($self->match_nocheck(@_)) {
88        if ($self->[MATCHED] =~ $SingleChar) {
89            return 1;
90        }
91        throw XML::SAX::Exception::Parse (
92            Message => "Not a valid XML character: '&#x".
93                        sprintf("%X", ord($self->[MATCHED])).
94                        ";'"
95        );
96    }
97    return 0;
98}
99
100sub match_char {
101    my $self = shift;
102
103    if (defined($self->[CURRENT]) && $self->[CURRENT] eq $_[0]) {
104        $self->[MATCHED] = $_[0];
105        $self->nextchar;
106        return 1;
107    }
108    $self->[MATCHED] = '';
109    return 0;
110}
111
112sub match_re {
113    my $self = shift;
114
115    if ($self->[CURRENT] =~ $_[0]) {
116        $self->[MATCHED] = $self->[CURRENT];
117        $self->nextchar;
118        return 1;
119    }
120    $self->[MATCHED] = '';
121    return 0;
122}
123
124sub match_not {
125    my $self = shift;
126
127    my $current = $self->[CURRENT];
128    return 0 unless defined $current;
129
130    for my $m (@_) {
131        if ($current eq $m) {
132            $self->[MATCHED] = '';
133            return 0;
134        }
135    }
136    $self->[MATCHED] = $current;
137    $self->nextchar;
138    return 1;
139}
140
141my %hist;
142END {
143    foreach my $k (sort { $hist{$a} <=> $hist{$b} } keys %hist ) {
144        my $x = $k;
145        $k =~ s/^(.{80})(.{3}).*/$1\.\.\./s;
146        # warn("$k called $hist{$x} times\n");
147    }
148}
149
150sub match_nonext {
151    my $self = shift;
152
153    my $current = $self->[CURRENT];
154    return 0 unless defined $current;
155
156    foreach my $m (@_) {
157        # $hist{$m}++;
158        if (my $ref = ref($m)) {
159            if ($ref eq 'Regexp' && $current =~ $m) {
160                $self->[MATCHED] = $current;
161                return 1;
162            }
163        }
164        elsif ($current eq $m) {
165            $self->[MATCHED] = $current;
166            return 1;
167        }
168    }
169    $self->[MATCHED] = '';
170    return 0;
171}
172
173sub match_nocheck {
174    my $self = shift;
175
176    if ($self->match_nonext(@_)) {
177        $self->nextchar;
178
179        return 1;
180    }
181    return 0;
182}
183
184sub matched {
185    my $self = shift;
186    return $self->[MATCHED];
187}
188
189my $unpack_type = ($] >= 5.007002) ? 'U*' : 'C*';
190
191sub match_string {
192    my $self = shift;
193    my ($str) = @_;
194    my $matched = '';
195#    for my $char (map { chr } unpack($unpack_type, $str)) {
196    for my $char (split //, $str) {
197        if ($self->match_char($char)) {
198            $matched .= $self->[MATCHED];
199        }
200        else {
201            $self->buffer($matched);
202            return 0;
203        }
204    }
205    return 1;
206}
207
208# avoids split
209sub match_sequence {
210    my $self = shift;
211    my $matched = '';
212    for my $char (@_) {
213        if ($self->match_char($char)) {
214            $matched .= $self->[MATCHED];
215        }
216        else {
217            $self->buffer($matched);
218            return 0;
219        }
220    }
221    return 1;
222}
223
224sub consume_name {
225    my $self = shift;
226
227    my $current = $self->[CURRENT];
228    return unless defined $current; # perhaps die here instead?
229
230    my $name;
231    if ($current eq '_') {
232        $name = '_';
233    }
234    elsif ($current eq ':') {
235        $name = ':';
236    }
237    else {
238        $self->consume($Letter) ||
239                throw XML::SAX::Exception::Parse (
240                    Message => "Name contains invalid start character: '&#x".
241                                sprintf("%X", ord($self->[CURRENT])).
242                                ";'", reader => $self );
243        $name = $self->[CONSUMED];
244    }
245
246    $self->consume($NameChar);
247    $name .= $self->[CONSUMED];
248    return $name;
249}
250
251sub consume {
252    my $self = shift;
253
254    my $consumed = '';
255
256    while(!$self->eof && $self->match_re(@_)) {
257        $consumed .= $self->[MATCHED];
258    }
259    return length($self->[CONSUMED] = $consumed);
260}
261
262
263
264sub consume_not {
265    my $self = shift;
266
267    my $consumed = '';
268
269    while(!$self->[EOF] && $self->match_not(@_)) {
270        $consumed .= $self->[MATCHED];
271    }
272    return length($self->[CONSUMED] = $consumed);
273}
274
275sub consumed {
276    my $self = shift;
277    return $self->[CONSUMED];
278}
279
280sub current {
281    my $self = shift;
282    return $self->[CURRENT];
283}
284
285sub buffer {
286    my $self = shift;
287    # warn("buffering: '$_[0]' + '$self->[CURRENT]' + '$self->[BUFFER]'\n");
288    local $^W;
289    my $current = $self->[CURRENT];
290    if ($] >= 5.006 && $] < 5.007) {
291        $current = pack("C0A*", $current);
292    }
293    $self->[BUFFER] = $_[0] . $current . $self->[BUFFER];
294    $self->[COLUMN] -= length($_[0]);
295    $self->nextchar;
296}
297
298sub public_id {
299    my ($self, $value) = @_;
300    if (defined $value) {
301        return $self->[PUBLIC_ID] = $value;
302    }
303    return $self->[PUBLIC_ID];
304}
305
306sub system_id {
307    my ($self, $value) = @_;
308    if (defined $value) {
309        return $self->[SYSTEM_ID] = $value;
310    }
311    return $self->[SYSTEM_ID];
312}
313
314sub line {
315    shift->[LINE];
316}
317
318sub column {
319    shift->[COLUMN];
320}
321
322sub get_encoding {
323    my $self = shift;
324    return $self->[ENCODING];
325}
326
327sub eof {
328    return shift->[EOF];
329}
330
3311;
332
333__END__
334
335=head1 NAME
336
337XML::Parser::PurePerl::Reader - Abstract Reader factory class
338
339=cut
340