1package DateTime::Format::Builder::Parser; 2use strict; 3use vars qw( $VERSION ); 4use Carp qw( croak ); 5use Params::Validate qw( 6 validate SCALAR CODEREF UNDEF ARRAYREF 7); 8use Scalar::Util qw( weaken ); 9 10=head1 NAME 11 12DateTime::Format::Builder::Parser - Parser creation 13 14=head1 SYNOPSIS 15 16 my $class = 'DateTime::Format::Builder::Parser'; 17 my $parser = $class->create_single_parser( %specs ); 18 19=head1 DESCRIPTION 20 21This is a utility class for L<DateTime::Format::Builder> that 22handles creation of parsers. It is to here that C<Builder> delegates 23most of its responsibilities. 24 25=cut 26 27$VERSION = '0.77'; 28 29=head1 CONSTRUCTORS 30 31=cut 32 33sub on_fail 34{ 35 my ($self, $input, $parent) = @_; 36 my $maker = $self->maker; 37 if ( $maker and $maker->can( 'on_fail' ) ) { 38 $maker->on_fail( $input ); 39 } else { 40 croak __PACKAGE__.": Invalid date format: $input"; 41 } 42} 43 44sub no_parser 45{ 46 croak "No parser set for this parser object."; 47} 48 49sub new 50{ 51 my $class = shift; 52 $class = ref($class)||$class; 53 my $i = 0; 54 my $self = bless { 55 on_fail => \&on_fail, 56 parser => \&no_parser, 57 }, $class; 58 59 return $self; 60} 61 62sub maker { $_[0]->{maker} } 63 64sub set_maker 65{ 66 my $self = shift; 67 my $maker = shift; 68 69 $self->{maker} = $maker; 70 weaken $self->{maker} 71 if ref $self->{maker}; 72 73 return $self; 74} 75 76sub fail 77{ 78 my ($self, $parent, $input) = @_; 79 $self->{on_fail}->( $self, $input, $parent ); 80} 81 82sub parse 83{ 84 my ( $self, $parent, $input, @args ) = @_; 85 my $r = $self->{parser}->( $parent, $input, @args ); 86 $self->fail( $parent, $input ) unless defined $r; 87 $r; 88} 89 90sub set_parser 91{ 92 my ($self, $parser) = @_; 93 $self->{parser} = $parser; 94 $self; 95} 96 97sub set_fail 98{ 99 my ($self, $fail) = @_; 100 $self->{on_fail} = $fail; 101 $self; 102} 103 104=head1 METHODS 105 106There are two sorts of methods in this class. Those used by 107parser implementations and those used by C<Builder>. It is 108generally unlikely the user will want to use any of them. 109 110They are presented, grouped according to use. 111 112=head2 Parameter Handling (implementations) 113 114These methods allow implementations to have validation of 115their arguments in a standard manner and due to C<Parser>'s 116impelementation, these methods also allow C<Parser> to 117determine which implementation to use. 118 119=cut 120 121my @callbacks = qw( on_match on_fail postprocess preprocess ); 122 123{ 124 125=head3 Common parameters 126 127These parameters appear for all parser implementations. 128These are primarily documented in 129L<the main docs|DateTime::Format::Builder/"SINGLE SPECIFICATIONS">. 130 131=over 4 132 133=item * 134 135B<on_match> 136 137=item * 138 139B<on_fail> 140 141=item * 142 143B<postprocess> 144 145=item * 146 147B<preprocess> 148 149=item * 150 151B<label> 152 153=item * 154 155B<length> may be a number or an arrayref of numbers 156indicating the length of the input. This lets us optimise in 157the case of static length input. If supplying an arrayref of 158numbers, please keep the number of numbers to a minimum. 159 160=back 161 162=cut 163 164 my %params = ( 165 common => { 166 length => { 167 type => SCALAR|ARRAYREF, 168 optional => 1, 169 callbacks => { 170 'is an int' => sub { ref $_[0] ? 1 : $_[0] !~ /\D/ }, 171 'not empty' => sub { ref $_[0] ? @{$_[0]} >= 1 : 1 }, 172 } 173 }, 174 175 # Stuff used by callbacks 176 label => { type => SCALAR, optional => 1 }, 177 ( map { $_ => { type => CODEREF|ARRAYREF, optional => 1 } } @callbacks ), 178 }, 179 ); 180 181=head3 params 182 183 my $params = $self->params(); 184 validate( @_, $params ); 185 186Returns declared parameters and C<common> parameters in a hashref 187suitable for handing to L<Params::Validate>'s C<validate> function. 188 189=cut 190 191 sub params 192 { 193 my $self = shift; 194 my $caller = ref $self || $self; 195 return { map { %$_ } @params{ $caller, 'common' } } 196 } 197 198=head3 params_all 199 200 my $all_params = $self->params_all(); 201 202Returns a hash of all the valid options. Not recommended 203for general use. 204 205=cut 206 207 my $all_params; 208 sub params_all 209 { 210 return $all_params if defined $all_params; 211 my %all_params = map { %$_ } values %params; 212 $_->{optional} = 1 for values %all_params; 213 $all_params = \%all_params; 214 } 215 216=head3 valid_params 217 218 __PACKAGE__->valid_params( %params ); 219 220Arguments are as per L<Params::Validate>'s C<validate> function. 221This method is used to declare what your valid arguments are in 222a parser specification. 223 224=cut 225 226 my %inverse; 227 sub valid_params 228 { 229 my $self = shift; 230 my $from = (caller)[0]; 231 my %args = @_; 232 $params{ $from } = \%args; 233 for (keys %args) 234 { 235 # %inverse contains keys matching all the 236 # possible params; values are the class if and 237 # only if that class is the only one that uses 238 # the given param. 239 $inverse{$_} = exists $inverse{$_} ? undef : $from; 240 } 241 undef $all_params; 242 1; 243 } 244 245=head3 whose_params 246 247 my $class = whose_params( $key ); 248 249Internal function which merely returns to which class a 250parameter is unique. If not unique, returns C<undef>. 251 252=cut 253 254 sub whose_params 255 { 256 my $param = shift; 257 return $inverse{$param}; 258 } 259} 260 261=head2 Organising and Creating Parsers 262 263=head3 create_single_parser 264 265This takes a single specification and returns a coderef that 266is a parser that suits that specification. This is the end 267of the line for all the parser creation methods. It 268delegates no further. 269 270If a coderef is specified, then that coderef is immediately 271returned (it is assumed to be appropriate). 272 273The single specification (if not a coderef) can be either a 274hashref or a hash. The keys and values must be as per the 275L<specification|/"SINGLE SPECIFICATIONS">. 276 277It is here that any arrays of callbacks are unified. It is 278also here that any parser implementations are used. With 279the spec that's given, the keys are looked at and whichever 280module is the first to have a unique key in the spec is the 281one to whom the spec is given. 282 283B<Note>: please declare a C<valid_params> argument with an 284uppercase letter. For example, if you're writing 285C<DateTime::Format::Builder::Parser::Fnord>, declare a 286parameter called C<Fnord>. Similarly, C<DTFBP::Strptime> 287should have C<Strptime> and C<DTFBP::Regex> should have 288C<Regex>. These latter two don't for backwards compatibility 289reasons. 290 291The returned parser will return either a C<DateTime> object 292or C<undef>. 293 294=cut 295 296sub create_single_object 297{ 298 my ( $self ) = shift; 299 my $obj = $self->new; 300 my $parser = $self->create_single_parser( @_ ); 301 302 $obj->set_parser( $parser ); 303} 304 305sub create_single_parser 306{ 307 my $class = shift; 308 return $_[0] if ref $_[0] eq 'CODE'; # already code 309 @_ = %{ $_[0] } if ref $_[0] eq 'HASH'; # turn hashref into hash 310 # ordinary boring sort 311 my %args = validate( @_, params_all() ); 312 313 # Determine variables for ease of reference. 314 for (@callbacks) 315 { 316 $args{$_} = $class->merge_callbacks( $args{$_} ) if $args{$_}; 317 } 318 319 # Determine parser class 320 my $from; 321 for ( keys %args ) 322 { 323 $from = whose_params( $_ ); 324 next if (not defined $from) or ($from eq 'common'); 325 last; 326 } 327 croak "Could not identify a parsing module to use." unless $from; 328 329 # Find and call parser creation method 330 my $method = $from->can( "create_parser" ) 331 or croak "Can't create a $_ parser (no appropriate create_parser method)"; 332 my @args = %args; 333 %args = validate( @args, $from->params() ); 334 $from->$method( %args ); 335} 336 337=head3 merge_callbacks 338 339Produce either undef or a single coderef from either undef, 340an empty array, a single coderef or an array of coderefs 341 342=cut 343 344sub merge_callbacks 345{ 346 my $self = shift; 347 348 return unless @_; # No arguments 349 return unless $_[0]; # Irrelevant argument 350 my @callbacks = @_; 351 if (@_ == 1) 352 { 353 return $_[0] if ref $_[0] eq 'CODE'; 354 @callbacks = @{ $_[0] } if ref $_[0] eq 'ARRAY'; 355 } 356 return unless @callbacks; 357 358 for (@callbacks) 359 { 360 croak "All callbacks must be coderefs!" unless ref $_ eq 'CODE'; 361 } 362 363 return sub { 364 my $rv; 365 my %args = @_; 366 for my $cb (@callbacks) 367 { 368 $rv = $cb->( %args ); 369 return $rv unless $rv; 370 # Ugh. Symbiotic. All but postprocessor return the date. 371 $args{input} = $rv unless $args{parsed}; 372 } 373 $rv; 374 }; 375} 376 377=head2 create_multiple_parsers 378 379Given the options block (as made from C<create_parser()>) 380and a list of single parser specifications, this returns a 381coderef that returns either the resultant C<DateTime> object 382or C<undef>. 383 384It first sorts the specifications using C<sort_parsers()> 385and then creates the function based on what that returned. 386 387=cut 388 389sub create_multiple_parsers 390{ 391 my $class = shift; 392 my ($options, @specs) = @_; 393 394 my $obj = $class->new; 395 396 # Organise the specs, and transform them into parsers. 397 my ($lengths, $others) = $class->sort_parsers( $options, \@specs ); 398 399 # Merge callbacks if any. 400 for ( 'preprocess' ) { 401 $options->{$_} = $class->merge_callbacks( 402 $options->{$_} 403 ) if $options->{$_}; 404 } 405 # Custom fail method? 406 $obj->set_fail( $options->{on_fail} ) if exists $options->{on_fail}; 407 # Who's our maker? 408 $obj->set_maker( $options->{maker} ) if exists $options->{maker}; 409 410 # We don't want to save the whole options hash as a closure, since 411 # that can cause a circular reference when $options->{maker} is 412 # set. 413 my $preprocess = $options->{preprocess}; 414 415 # These are the innards of a multi-parser. 416 my $parser = sub { 417 my ($self, $date, @args) = @_; 418 return unless defined $date; 419 420 # Parameters common to the callbacks. Pre-prepared. 421 my %param = ( 422 self => $self, 423 ( @args ? (args => \@args) : () ), 424 ); 425 426 my %p; 427 # Preprocess and potentially fill %p 428 if ($preprocess) 429 { 430 $date = $preprocess->( 431 input => $date, parsed => \%p, %param 432 ); 433 } 434 435 # Find length parser 436 if (%$lengths) 437 { 438 my $length = length $date; 439 my $parser = $lengths->{$length}; 440 if ($parser) 441 { 442 # Found one, call it with _copy_ of %p 443 my $dt = $parser->( $self, $date, { %p }, @args ); 444 return $dt if defined $dt; 445 } 446 } 447 # Or calls all others, with _copy_ of %p 448 for my $parser (@$others) 449 { 450 my $dt = $parser->( $self, $date, { %p }, @args ); 451 return $dt if defined $dt; 452 } 453 # Failed, return undef. 454 return; 455 }; 456 $obj->set_parser( $parser ); 457} 458 459=head2 sort_parsers 460 461This takes the list of specifications and sorts them while 462turning the specifications into parsers. It returns two 463values: the first is a hashref containing all the length 464based parsers. The second is an array containing all the 465other parsers. 466 467If any of the specs are not code or hash references, then it 468will call C<croak()>. 469 470Code references are put directly into the 'other' array. Any 471hash references without I<length> keys are run through 472C<create_single_parser()> and the resultant parser is placed 473in the 'other' array. 474 475Hash references B<with> I<length> keys are run through 476C<create_single_parser()>, but the resultant parser is used 477as the value in the length hashref with the length being the 478key. If two or more parsers have the same I<length> 479specified then an error is thrown. 480 481=cut 482 483sub sort_parsers 484{ 485 my $class = shift; 486 my ($options, $specs) = @_; 487 my (%lengths, @others); 488 489 for my $spec (@$specs) 490 { 491 # Put coderefs straight into the 'other' heap. 492 if (ref $spec eq 'CODE') 493 { 494 push @others, $spec; 495 } 496 # Specifications... 497 elsif (ref $spec eq 'HASH') 498 { 499 if (exists $spec->{length}) 500 { 501 my $code = $class->create_single_parser( %$spec ); 502 my @lengths = ref $spec->{length} 503 ? @{ $spec->{length} } 504 : ( $spec->{length} ); 505 for my $length ( @lengths ) 506 { 507 push @{ $lengths{$length} }, $code; 508 } 509 } 510 else 511 { 512 push @others, $class->create_single_parser( %$spec ); 513 } 514 } 515 # Something else 516 else 517 { 518 croak "Invalid specification in list."; 519 } 520 } 521 522 while (my ($length, $parsers) = each %lengths) 523 { 524 $lengths{$length} = $class->chain_parsers( $parsers ); 525 } 526 527 return ( \%lengths, \@others ); 528} 529 530sub chain_parsers 531{ 532 my ($self, $parsers) = @_; 533 return $parsers->[0] if @$parsers == 1; 534 return sub { 535 my $self = shift; 536 for my $parser (@$parsers) 537 { 538 my $rv = $self->$parser( @_ ); 539 return $rv if defined $rv; 540 } 541 return undef; 542 }; 543} 544 545=head2 create_parser 546 547C<create_class()> is mostly a wrapper around 548C<create_parser()> that does loops and stuff and calls 549C<create_parser()> to create the actual parsers. 550 551C<create_parser()> takes the parser specifications (be they 552single specifications or multiple specifications) and 553returns an anonymous coderef that is suitable for use as a 554method. The coderef will call C<croak()> in the event of 555being unable to parse the single string it expects as input. 556 557The simplest input is that of a single specification, 558presented just as a plain hash, not a hashref. This is 559passed directly to C<create_single_parser()> with the return 560value from that being wrapped in a function that lets it 561C<croak()> on failure, with that wrapper being returned. 562 563If the first argument to C<create_parser()> is an arrayref, 564then that is taken to be an options block (as per the 565multiple parser specification documented earlier). 566 567Any further arguments should be either hashrefs or coderefs. 568If the first argument after the optional arrayref is not a 569hashref or coderef then that argument and all remaining 570arguments are passed off to C<create_single_parser()> 571directly. If the first argument is a hashref or coderef, 572then it and the remaining arguments are passed to 573C<create_multiple_parsers()>. 574 575The resultant coderef from calling either of the creation 576methods is then wrapped in a function that calls C<croak()> 577in event of failure or the C<DateTime> object in event of 578success. 579 580=cut 581 582sub create_parser 583{ 584 my $class = shift; 585 if (not ref $_[0]) 586 { 587 # Simple case of single specification as a hash 588 return $class->create_single_object( @_ ) 589 } 590 591 # Let's see if we were given an options block 592 my %options; 593 while ( ref $_[0] eq 'ARRAY' ) 594 { 595 my $options = shift; 596 %options = ( %options, @$options ); 597 } 598 599 # Now, can we create a multi-parser out of the remaining arguments? 600 if (ref $_[0] eq 'HASH' or ref $_[0] eq 'CODE') 601 { 602 return $class->create_multiple_parsers( \%options, @_ ); 603 } 604 else 605 { 606 # If it wasn't a HASH or CODE, then it was (ideally) 607 # a list of pairs describing a single specification. 608 return $class->create_multiple_parsers( \%options, { @_ } ); 609 } 610} 611 612=head1 FINDING IMPLEMENTATIONS 613 614C<Parser> automatically loads any parser classes in C<@INC>. 615 616To be loaded automatically, you must be a 617C<DateTime::Format::Builder::Parser::XXX> module. 618 619To be invisible, and not loaded, start your class with a lower class 620letter. These are ignored. 621 622=cut 623 624# Find all our workers 625{ 626 use Class::Factory::Util; 627 628 foreach my $worker ( __PACKAGE__->subclasses ) 629 { 630 eval "use DateTime::Format::Builder::Parser::$worker;"; 631 die $@ if $@; 632 } 633} 634 6351; 636 637__END__ 638 639=head1 WRITING A PARSER IMPLEMENTATION 640 641=head2 Naming your parser 642 643Create a module and name it in the form 644C<DateTime::Format::Builder::Parser::XXX> 645where I<XXX> is whatever you like, 646so long as it doesn't start with a 647lower case letter. 648 649Alternatively, call it something completely different 650if you don't mind the users explicitly loading your module. 651 652I'd recommend keeping within the C<DateTime::Format::Builder> 653namespace though --- at the time of writing I've not given 654thought to what non-auto loaded ones should be called. Any 655ideas, please email me. 656 657=head2 Declaring specification arguments 658 659Call C<<DateTime::Format::Builder::Parser->valid_params()>> with 660C<Params::Validate> style arguments. For example: 661 662 DateTime::Format::Builder::Parser->valid_params( 663 params => { type => ARRAYREF }, 664 Regex => { type => SCALARREF, callbacks => { 665 'is a regex' => sub { ref(shift) eq 'Regexp' } 666 }} 667 ); 668 669Start one of the key names with a capital letter. Ideally that key 670should match the I<XXX> from earlier. This will be used to help 671identify which module a parser specification should be given to. 672 673The key names I<on_match>, I<on_fail>, I<postprocess>, I<preprocess>, 674I<label> and I<length> are predefined. You are recommended to make use 675of them. You may ignore I<length> as C<sort_parsers> takes care of that. 676 677=head2 Define create_parser 678 679A class method of the name C<create_parser> that does the following: 680 681Its arguments are as for a normal method (i.e. class as first argument). 682The other arguments are the result from a call to C<Params::Validate> 683according to your specification (the C<valid_params> earlier), i.e. a 684hash of argument name and value. 685 686The return value should be a coderef that takes a date string as its 687first argument and returns either a C<DateTime> object or C<undef>. 688 689=head2 Callbacks 690 691It is preferred that you support some callbacks to your parsers. 692In particular, C<preprocess>, C<on_match>, C<on_fail> and 693C<postprocess>. See the L<main Builder|DateTime::Format::Builder> 694docs for the appropriate placing of calls to the callbacks. 695 696=head1 SUPPORT 697 698Support for this module is provided via the datetime@perl.org email 699list. See http://lists.perl.org/ for more details. 700 701Alternatively, log them via the CPAN RT system via the web or email: 702 703 http://perl.dellah.org/rt/dtbuilder 704 bug-datetime-format-builder@rt.cpan.org 705 706This makes it much easier for me to track things and thus means 707your problem is less likely to be neglected. 708 709=head1 THANKS 710 711See L<DateTime::Format::Builder>. 712 713=head1 LICENCE AND COPYRIGHT 714 715Copyright E<copy> Iain Truskett, 2003. All rights reserved. 716 717This library is free software; you can redistribute it and/or modify 718it under the same terms as Perl itself, either Perl version 5.000 or, 719at your option, any later version of Perl 5 you may have available. 720 721The full text of the licences can be found in the F<Artistic> and 722F<COPYING> files included with this module, or in L<perlartistic> and 723L<perlgpl> as supplied with Perl 5.8.1 and later. 724 725=head1 AUTHOR 726 727Iain Truskett <spoon@cpan.org> 728 729=head1 SEE ALSO 730 731C<datetime@perl.org> mailing list. 732 733http://datetime.perl.org/ 734 735L<perl>, L<DateTime>, L<DateTime::Format::Builder>. 736 737L<Params::Validate>. 738 739L<DateTime::Format::Builder::Parser::generic>, 740L<DateTime::Format::Builder::Parser::Dispatch>, 741L<DateTime::Format::Builder::Parser::Quick>, 742L<DateTime::Format::Builder::Parser::Regex>, 743L<DateTime::Format::Builder::Parser::Strptime>. 744 745=cut 746 747