• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /macosx-10.10/CPANInternal-159.1/DateTime-Format-Builder-0.80/lib/DateTime/Format/Builder/
1package DateTime::Format::Builder::Parser;
2use strict;
3use vars qw( $VERSION );
4use Carp qw( croak );
5use Params::Validate qw(
6    validate SCALAR CODEREF UNDEF ARRAYREF
7);
8use Scalar::Util qw( weaken );
9
10=head1 NAME
11
12DateTime::Format::Builder::Parser - Parser creation
13
14=head1 SYNOPSIS
15
16    my $class = 'DateTime::Format::Builder::Parser';
17    my $parser = $class->create_single_parser( %specs );
18
19=head1 DESCRIPTION
20
21This is a utility class for L<DateTime::Format::Builder> that
22handles creation of parsers. It is to here that C<Builder> delegates
23most of its responsibilities.
24
25=cut
26
27$VERSION = '0.77';
28
29=head1 CONSTRUCTORS
30
31=cut
32
33sub on_fail
34{
35    my ($self, $input, $parent) = @_;
36    my $maker = $self->maker;
37    if ( $maker and $maker->can( 'on_fail' ) ) {
38        $maker->on_fail( $input );
39    } else {
40        croak __PACKAGE__.": Invalid date format: $input";
41    }
42}
43
44sub no_parser
45{
46    croak "No parser set for this parser object.";
47}
48
49sub new
50{
51    my $class = shift;
52    $class = ref($class)||$class;
53    my $i = 0;
54    my $self = bless {
55        on_fail => \&on_fail,
56        parser => \&no_parser,
57    }, $class;
58
59    return $self;
60}
61
62sub maker { $_[0]->{maker} }
63
64sub set_maker
65{
66    my $self  = shift;
67    my $maker = shift;
68
69    $self->{maker} = $maker;
70    weaken $self->{maker}
71        if ref $self->{maker};
72
73    return $self;
74}
75
76sub fail
77{
78    my ($self, $parent, $input) = @_;
79    $self->{on_fail}->( $self, $input, $parent );
80}
81
82sub parse
83{
84    my ( $self, $parent, $input, @args ) = @_;
85    my $r = $self->{parser}->( $parent, $input, @args );
86    $self->fail( $parent, $input ) unless defined $r;
87    $r;
88}
89
90sub set_parser
91{
92    my ($self, $parser) = @_;
93    $self->{parser} = $parser;
94    $self;
95}
96
97sub set_fail
98{
99    my ($self, $fail) = @_;
100    $self->{on_fail} = $fail;
101    $self;
102}
103
104=head1 METHODS
105
106There are two sorts of methods in this class. Those used by
107parser implementations and those used by C<Builder>. It is
108generally unlikely the user will want to use any of them.
109
110They are presented, grouped according to use.
111
112=head2 Parameter Handling (implementations)
113
114These methods allow implementations to have validation of
115their arguments in a standard manner and due to C<Parser>'s
116impelementation, these methods also allow C<Parser> to
117determine which implementation to use.
118
119=cut
120
121my @callbacks = qw( on_match on_fail postprocess preprocess );
122
123{
124
125=head3 Common parameters
126
127These parameters appear for all parser implementations.
128These are primarily documented in
129L<the main docs|DateTime::Format::Builder/"SINGLE SPECIFICATIONS">.
130
131=over 4
132
133=item *
134
135B<on_match>
136
137=item *
138
139B<on_fail>
140
141=item *
142
143B<postprocess>
144
145=item *
146
147B<preprocess>
148
149=item *
150
151B<label>
152
153=item *
154
155B<length> may be a number or an arrayref of numbers
156indicating the length of the input. This lets us optimise in
157the case of static length input. If supplying an arrayref of
158numbers, please keep the number of numbers to a minimum.
159
160=back
161
162=cut
163
164    my %params = (
165	common => {
166	    length	=> {
167		type      => SCALAR|ARRAYREF,
168		optional  => 1,
169		callbacks => {
170		    'is an int' => sub { ref $_[0] ? 1 : $_[0] !~ /\D/ },
171		    'not empty' => sub { ref $_[0] ? @{$_[0]} >= 1 : 1 },
172		}
173	    },
174
175	    # Stuff used by callbacks
176	    label	=> { type => SCALAR,	optional => 1 },
177	    ( map { $_ => { type => CODEREF|ARRAYREF, optional => 1 } } @callbacks ),
178	},
179    );
180
181=head3 params
182
183    my $params = $self->params();
184    validate( @_, $params );
185
186Returns declared parameters and C<common> parameters in a hashref
187suitable for handing to L<Params::Validate>'s C<validate> function.
188
189=cut
190
191    sub params
192    {
193	my $self = shift;
194	my $caller = ref $self || $self;
195	return { map { %$_ } @params{ $caller, 'common' } }
196    }
197
198=head3 params_all
199
200    my $all_params = $self->params_all();
201
202Returns a hash of all the valid options. Not recommended
203for general use.
204
205=cut
206
207    my $all_params;
208    sub params_all
209    {
210	return $all_params if defined $all_params;
211	my %all_params = map { %$_ } values %params;
212	$_->{optional} = 1 for values %all_params;
213	$all_params = \%all_params;
214    }
215
216=head3 valid_params
217
218    __PACKAGE__->valid_params( %params );
219
220Arguments are as per L<Params::Validate>'s C<validate> function.
221This method is used to declare what your valid arguments are in
222a parser specification.
223
224=cut
225
226    my %inverse;
227    sub valid_params
228    {
229	my $self = shift;
230	my $from = (caller)[0];
231	my %args = @_;
232	$params{ $from } = \%args;
233	for (keys %args)
234	{
235	    # %inverse contains keys matching all the
236	    # possible params; values are the class if and
237	    # only if that class is the only one that uses
238	    # the given param.
239	    $inverse{$_} = exists $inverse{$_} ? undef : $from;
240	}
241	undef $all_params;
242	1;
243    }
244
245=head3 whose_params
246
247    my $class = whose_params( $key );
248
249Internal function which merely returns to which class a
250parameter is unique. If not unique, returns C<undef>.
251
252=cut
253
254    sub whose_params
255    {
256	my $param = shift;
257	return $inverse{$param};
258    }
259}
260
261=head2 Organising and Creating Parsers
262
263=head3 create_single_parser
264
265This takes a single specification and returns a coderef that
266is a parser that suits that specification. This is the end
267of the line for all the parser creation methods. It
268delegates no further.
269
270If a coderef is specified, then that coderef is immediately
271returned (it is assumed to be appropriate).
272
273The single specification (if not a coderef) can be either a
274hashref or a hash. The keys and values must be as per the
275L<specification|/"SINGLE SPECIFICATIONS">.
276
277It is here that any arrays of callbacks are unified. It is
278also here that any parser implementations are used. With
279the spec that's given, the keys are looked at and whichever
280module is the first to have a unique key in the spec is the
281one to whom the spec is given.
282
283B<Note>: please declare a C<valid_params> argument with an
284uppercase letter. For example, if you're writing
285C<DateTime::Format::Builder::Parser::Fnord>, declare a
286parameter called C<Fnord>. Similarly, C<DTFBP::Strptime>
287should have C<Strptime> and C<DTFBP::Regex> should have
288C<Regex>. These latter two don't for backwards compatibility
289reasons.
290
291The returned parser will return either a C<DateTime> object
292or C<undef>.
293
294=cut
295
296sub create_single_object
297{
298    my ( $self ) = shift;
299    my $obj = $self->new;
300    my $parser = $self->create_single_parser( @_ );
301
302    $obj->set_parser( $parser );
303}
304
305sub create_single_parser
306{
307    my $class = shift;
308    return $_[0] if ref $_[0] eq 'CODE'; # already code
309    @_ = %{ $_[0] } if ref $_[0] eq 'HASH'; # turn hashref into hash
310    # ordinary boring sort
311    my %args = validate( @_, params_all() );
312
313    # Determine variables for ease of reference.
314    for (@callbacks)
315    {
316	$args{$_} = $class->merge_callbacks( $args{$_} ) if $args{$_};
317    }
318
319    # Determine parser class
320    my $from;
321    for ( keys %args )
322    {
323	$from = whose_params( $_ );
324	next if (not defined $from) or ($from eq 'common');
325	last;
326    }
327    croak "Could not identify a parsing module to use." unless $from;
328
329    # Find and call parser creation method
330    my $method = $from->can( "create_parser" )
331	or croak "Can't create a $_ parser (no appropriate create_parser method)";
332    my @args = %args;
333    %args = validate( @args, $from->params() );
334    $from->$method( %args );
335}
336
337=head3 merge_callbacks
338
339Produce either undef or a single coderef from either undef,
340an empty array, a single coderef or an array of coderefs
341
342=cut
343
344sub merge_callbacks
345{
346    my $self = shift;
347
348    return unless @_; # No arguments
349    return unless $_[0]; # Irrelevant argument
350    my @callbacks = @_;
351    if (@_ == 1)
352    {
353	return $_[0] if ref $_[0] eq 'CODE';
354	@callbacks = @{ $_[0] } if ref $_[0] eq 'ARRAY';
355    }
356    return unless @callbacks;
357
358    for (@callbacks)
359    {
360	croak "All callbacks must be coderefs!" unless ref $_ eq 'CODE';
361    }
362
363    return sub {
364	my $rv;
365	my %args = @_;
366	for my $cb (@callbacks)
367	{
368	    $rv = $cb->( %args );
369	    return $rv unless $rv;
370	    # Ugh. Symbiotic. All but postprocessor return the date.
371	    $args{input} = $rv unless $args{parsed};
372	}
373	$rv;
374    };
375}
376
377=head2 create_multiple_parsers
378
379Given the options block (as made from C<create_parser()>)
380and a list of single parser specifications, this returns a
381coderef that returns either the resultant C<DateTime> object
382or C<undef>.
383
384It first sorts the specifications using C<sort_parsers()>
385and then creates the function based on what that returned.
386
387=cut
388
389sub create_multiple_parsers
390{
391    my $class = shift;
392    my ($options, @specs) = @_;
393
394    my $obj = $class->new;
395
396    # Organise the specs, and transform them into parsers.
397    my ($lengths, $others) = $class->sort_parsers( $options, \@specs );
398
399    # Merge callbacks if any.
400    for ( 'preprocess' ) {
401	$options->{$_} = $class->merge_callbacks(
402	    $options->{$_}
403	) if $options->{$_};
404    }
405    # Custom fail method?
406    $obj->set_fail( $options->{on_fail} ) if exists $options->{on_fail};
407    # Who's our maker?
408    $obj->set_maker( $options->{maker} ) if exists $options->{maker};
409
410    # We don't want to save the whole options hash as a closure, since
411    # that can cause a circular reference when $options->{maker} is
412    # set.
413    my $preprocess = $options->{preprocess};
414
415    # These are the innards of a multi-parser.
416    my $parser = sub {
417	my ($self, $date, @args) = @_;
418	return unless defined $date;
419
420	# Parameters common to the callbacks. Pre-prepared.
421	my %param = (
422	    self => $self,
423	    ( @args ? (args => \@args) : () ),
424	);
425
426	my %p;
427	# Preprocess and potentially fill %p
428	if ($preprocess)
429	{
430	    $date = $preprocess->(
431		input => $date, parsed => \%p, %param
432	    );
433	}
434
435	# Find length parser
436	if (%$lengths)
437	{
438	    my $length = length $date;
439	    my $parser = $lengths->{$length};
440	    if ($parser)
441	    {
442		# Found one, call it with _copy_ of %p
443		my $dt = $parser->( $self, $date, { %p }, @args );
444		return $dt if defined $dt;
445	    }
446	}
447	# Or calls all others, with _copy_ of %p
448	for my $parser (@$others)
449	{
450	    my $dt = $parser->( $self, $date, { %p }, @args );
451	    return $dt if defined $dt;
452	}
453	# Failed, return undef.
454	return;
455    };
456    $obj->set_parser( $parser );
457}
458
459=head2 sort_parsers
460
461This takes the list of specifications and sorts them while
462turning the specifications into parsers. It returns two
463values: the first is a hashref containing all the length
464based parsers. The second is an array containing all the
465other parsers.
466
467If any of the specs are not code or hash references, then it
468will call C<croak()>.
469
470Code references are put directly into the 'other' array. Any
471hash references without I<length> keys are run through
472C<create_single_parser()> and the resultant parser is placed
473in the 'other' array.
474
475Hash references B<with> I<length> keys are run through
476C<create_single_parser()>, but the resultant parser is used
477as the value in the length hashref with the length being the
478key. If two or more parsers have the same I<length>
479specified then an error is thrown.
480
481=cut
482
483sub sort_parsers
484{
485    my $class = shift;
486    my ($options, $specs) = @_;
487    my (%lengths, @others);
488
489    for my $spec (@$specs)
490    {
491	# Put coderefs straight into the 'other' heap.
492	if (ref $spec eq 'CODE')
493	{
494	    push @others, $spec;
495	}
496	# Specifications...
497	elsif (ref $spec eq 'HASH')
498	{
499	    if (exists $spec->{length})
500	    {
501		my $code = $class->create_single_parser( %$spec );
502		my @lengths = ref $spec->{length}
503		    ? @{ $spec->{length} }
504		    : ( $spec->{length} );
505		for my $length ( @lengths )
506		{
507		    push @{ $lengths{$length} }, $code;
508		}
509	    }
510	    else
511	    {
512		push @others, $class->create_single_parser( %$spec );
513	    }
514	}
515	# Something else
516	else
517	{
518	    croak "Invalid specification in list.";
519	}
520    }
521
522    while (my ($length, $parsers) = each %lengths)
523    {
524	$lengths{$length} = $class->chain_parsers( $parsers );
525    }
526
527    return ( \%lengths, \@others );
528}
529
530sub chain_parsers
531{
532    my ($self, $parsers) = @_;
533    return $parsers->[0] if @$parsers == 1;
534    return sub {
535	my $self = shift;
536	for my $parser (@$parsers)
537	{
538	    my $rv = $self->$parser( @_ );
539	    return $rv if defined $rv;
540	}
541	return undef;
542    };
543}
544
545=head2 create_parser
546
547C<create_class()> is mostly a wrapper around
548C<create_parser()> that does loops and stuff and calls
549C<create_parser()> to create the actual parsers.
550
551C<create_parser()> takes the parser specifications (be they
552single specifications or multiple specifications) and
553returns an anonymous coderef that is suitable for use as a
554method. The coderef will call C<croak()> in the event of
555being unable to parse the single string it expects as input.
556
557The simplest input is that of a single specification,
558presented just as a plain hash, not a hashref. This is
559passed directly to C<create_single_parser()> with the return
560value from that being wrapped in a function that lets it
561C<croak()> on failure, with that wrapper being returned.
562
563If the first argument to C<create_parser()> is an arrayref,
564then that is taken to be an options block (as per the
565multiple parser specification documented earlier).
566
567Any further arguments should be either hashrefs or coderefs.
568If the first argument after the optional arrayref is not a
569hashref or coderef then that argument and all remaining
570arguments are passed off to C<create_single_parser()>
571directly. If the first argument is a hashref or coderef,
572then it and the remaining arguments are passed to
573C<create_multiple_parsers()>.
574
575The resultant coderef from calling either of the creation
576methods is then wrapped in a function that calls C<croak()>
577in event of failure or the C<DateTime> object in event of
578success.
579
580=cut
581
582sub create_parser
583{
584    my $class = shift;
585    if (not ref $_[0])
586    {
587	# Simple case of single specification as a hash
588	return $class->create_single_object( @_ )
589    }
590
591    # Let's see if we were given an options block
592    my %options;
593    while ( ref $_[0] eq 'ARRAY' )
594    {
595	my $options = shift;
596	%options = ( %options, @$options );
597    }
598
599    # Now, can we create a multi-parser out of the remaining arguments?
600    if (ref $_[0] eq 'HASH' or ref $_[0] eq 'CODE')
601    {
602	return $class->create_multiple_parsers( \%options, @_ );
603    }
604    else
605    {
606	# If it wasn't a HASH or CODE, then it was (ideally)
607        # a list of pairs describing a single specification.
608        return $class->create_multiple_parsers( \%options, { @_ } );
609    }
610}
611
612=head1 FINDING IMPLEMENTATIONS
613
614C<Parser> automatically loads any parser classes in C<@INC>.
615
616To be loaded automatically, you must be a
617C<DateTime::Format::Builder::Parser::XXX> module.
618
619To be invisible, and not loaded, start your class with a lower class
620letter. These are ignored.
621
622=cut
623
624# Find all our workers
625{
626    use Class::Factory::Util;
627
628    foreach my $worker ( __PACKAGE__->subclasses )
629    {
630        eval "use DateTime::Format::Builder::Parser::$worker;";
631        die $@ if $@;
632    }
633}
634
6351;
636
637__END__
638
639=head1 WRITING A PARSER IMPLEMENTATION
640
641=head2 Naming your parser
642
643Create a module and name it in the form
644C<DateTime::Format::Builder::Parser::XXX>
645where I<XXX> is whatever you like,
646so long as it doesn't start with a
647lower case letter.
648
649Alternatively, call it something completely different
650if you don't mind the users explicitly loading your module.
651
652I'd recommend keeping within the C<DateTime::Format::Builder>
653namespace though --- at the time of writing I've not given
654thought to what non-auto loaded ones should be called. Any
655ideas, please email me.
656
657=head2 Declaring specification arguments
658
659Call C<<DateTime::Format::Builder::Parser->valid_params()>> with
660C<Params::Validate> style arguments. For example:
661
662   DateTime::Format::Builder::Parser->valid_params(
663       params => { type => ARRAYREF },
664       Regex  => { type => SCALARREF, callbacks => {
665          'is a regex' => sub { ref(shift) eq 'Regexp' }
666       }}
667   );
668
669Start one of the key names with a capital letter. Ideally that key
670should match the I<XXX> from earlier. This will be used to help
671identify which module a parser specification should be given to.
672
673The key names I<on_match>, I<on_fail>, I<postprocess>, I<preprocess>,
674I<label> and I<length> are predefined. You are recommended to make use
675of them. You may ignore I<length> as C<sort_parsers> takes care of that.
676
677=head2 Define create_parser
678
679A class method of the name C<create_parser> that does the following:
680
681Its arguments are as for a normal method (i.e. class as first argument).
682The other arguments are the result from a call to C<Params::Validate>
683according to your specification (the C<valid_params> earlier), i.e. a
684hash of argument name and value.
685
686The return value should be a coderef that takes a date string as its
687first argument and returns either a C<DateTime> object or C<undef>.
688
689=head2 Callbacks
690
691It is preferred that you support some callbacks to your parsers.
692In particular, C<preprocess>, C<on_match>, C<on_fail> and
693C<postprocess>. See the L<main Builder|DateTime::Format::Builder>
694docs for the appropriate placing of calls to the callbacks.
695
696=head1 SUPPORT
697
698Support for this module is provided via the datetime@perl.org email
699list. See http://lists.perl.org/ for more details.
700
701Alternatively, log them via the CPAN RT system via the web or email:
702
703    http://perl.dellah.org/rt/dtbuilder
704    bug-datetime-format-builder@rt.cpan.org
705
706This makes it much easier for me to track things and thus means
707your problem is less likely to be neglected.
708
709=head1 THANKS
710
711See L<DateTime::Format::Builder>.
712
713=head1 LICENCE AND COPYRIGHT
714
715Copyright E<copy> Iain Truskett, 2003. All rights reserved.
716
717This library is free software; you can redistribute it and/or modify
718it under the same terms as Perl itself, either Perl version 5.000 or,
719at your option, any later version of Perl 5 you may have available.
720
721The full text of the licences can be found in the F<Artistic> and
722F<COPYING> files included with this module, or in L<perlartistic> and
723L<perlgpl> as supplied with Perl 5.8.1 and later.
724
725=head1 AUTHOR
726
727Iain Truskett <spoon@cpan.org>
728
729=head1 SEE ALSO
730
731C<datetime@perl.org> mailing list.
732
733http://datetime.perl.org/
734
735L<perl>, L<DateTime>, L<DateTime::Format::Builder>.
736
737L<Params::Validate>.
738
739L<DateTime::Format::Builder::Parser::generic>,
740L<DateTime::Format::Builder::Parser::Dispatch>,
741L<DateTime::Format::Builder::Parser::Quick>,
742L<DateTime::Format::Builder::Parser::Regex>,
743L<DateTime::Format::Builder::Parser::Strptime>.
744
745=cut
746
747