1package DateTime::Format::Builder::Parser; 2{ 3 $DateTime::Format::Builder::Parser::VERSION = '0.81'; 4} 5use strict; 6use warnings; 7use Carp qw( croak ); 8use Params::Validate qw( 9 validate SCALAR CODEREF UNDEF ARRAYREF 10); 11use Scalar::Util qw( weaken ); 12 13 14 15 16sub on_fail { 17 my ( $self, $input, $parent ) = @_; 18 my $maker = $self->maker; 19 if ( $maker and $maker->can('on_fail') ) { 20 $maker->on_fail($input); 21 } 22 else { 23 croak __PACKAGE__ . ": Invalid date format: $input"; 24 } 25} 26 27sub no_parser { 28 croak "No parser set for this parser object."; 29} 30 31sub new { 32 my $class = shift; 33 $class = ref($class) || $class; 34 my $i = 0; 35 my $self = bless { 36 on_fail => \&on_fail, 37 parser => \&no_parser, 38 }, $class; 39 40 return $self; 41} 42 43sub maker { $_[0]->{maker} } 44 45sub set_maker { 46 my $self = shift; 47 my $maker = shift; 48 49 $self->{maker} = $maker; 50 weaken $self->{maker} 51 if ref $self->{maker}; 52 53 return $self; 54} 55 56sub fail { 57 my ( $self, $parent, $input ) = @_; 58 $self->{on_fail}->( $self, $input, $parent ); 59} 60 61sub parse { 62 my ( $self, $parent, $input, @args ) = @_; 63 my $r = $self->{parser}->( $parent, $input, @args ); 64 $self->fail( $parent, $input ) unless defined $r; 65 $r; 66} 67 68sub set_parser { 69 my ( $self, $parser ) = @_; 70 $self->{parser} = $parser; 71 $self; 72} 73 74sub set_fail { 75 my ( $self, $fail ) = @_; 76 $self->{on_fail} = $fail; 77 $self; 78} 79 80 81my @callbacks = qw( on_match on_fail postprocess preprocess ); 82 83{ 84 85 86 my %params = ( 87 common => { 88 length => { 89 type => SCALAR | ARRAYREF, 90 optional => 1, 91 callbacks => { 92 'is an int' => sub { ref $_[0] ? 1 : $_[0] !~ /\D/ }, 93 'not empty' => sub { ref $_[0] ? @{ $_[0] } >= 1 : 1 }, 94 } 95 }, 96 97 # Stuff used by callbacks 98 label => { type => SCALAR, optional => 1 }, 99 ( 100 map { $_ => { type => CODEREF | ARRAYREF, optional => 1 } } 101 @callbacks 102 ), 103 }, 104 ); 105 106 107 sub params { 108 my $self = shift; 109 my $caller = ref $self || $self; 110 return { map { %$_ } @params{ $caller, 'common' } }; 111 } 112 113 114 my $all_params; 115 116 sub params_all { 117 return $all_params if defined $all_params; 118 my %all_params = map { %$_ } values %params; 119 $_->{optional} = 1 for values %all_params; 120 $all_params = \%all_params; 121 } 122 123 124 my %inverse; 125 126 sub valid_params { 127 my $self = shift; 128 my $from = (caller)[0]; 129 my %args = @_; 130 $params{$from} = \%args; 131 for ( keys %args ) { 132 133 # %inverse contains keys matching all the 134 # possible params; values are the class if and 135 # only if that class is the only one that uses 136 # the given param. 137 $inverse{$_} = exists $inverse{$_} ? undef : $from; 138 } 139 undef $all_params; 140 1; 141 } 142 143 144 sub whose_params { 145 my $param = shift; 146 return $inverse{$param}; 147 } 148} 149 150 151sub create_single_object { 152 my ($self) = shift; 153 my $obj = $self->new; 154 my $parser = $self->create_single_parser(@_); 155 156 $obj->set_parser($parser); 157} 158 159sub create_single_parser { 160 my $class = shift; 161 return $_[0] if ref $_[0] eq 'CODE'; # already code 162 @_ = %{ $_[0] } if ref $_[0] eq 'HASH'; # turn hashref into hash 163 # ordinary boring sort 164 my %args = validate( @_, params_all() ); 165 166 # Determine variables for ease of reference. 167 for (@callbacks) { 168 $args{$_} = $class->merge_callbacks( $args{$_} ) if $args{$_}; 169 } 170 171 # Determine parser class 172 my $from; 173 for ( keys %args ) { 174 $from = whose_params($_); 175 next if ( not defined $from ) or ( $from eq 'common' ); 176 last; 177 } 178 croak "Could not identify a parsing module to use." unless $from; 179 180 # Find and call parser creation method 181 my $method = $from->can("create_parser") 182 or croak 183 "Can't create a $_ parser (no appropriate create_parser method)"; 184 my @args = %args; 185 %args = validate( @args, $from->params() ); 186 $from->$method(%args); 187} 188 189 190sub merge_callbacks { 191 my $self = shift; 192 193 return unless @_; # No arguments 194 return unless $_[0]; # Irrelevant argument 195 my @callbacks = @_; 196 if ( @_ == 1 ) { 197 return $_[0] if ref $_[0] eq 'CODE'; 198 @callbacks = @{ $_[0] } if ref $_[0] eq 'ARRAY'; 199 } 200 return unless @callbacks; 201 202 for (@callbacks) { 203 croak "All callbacks must be coderefs!" unless ref $_ eq 'CODE'; 204 } 205 206 return sub { 207 my $rv; 208 my %args = @_; 209 for my $cb (@callbacks) { 210 $rv = $cb->(%args); 211 return $rv unless $rv; 212 213 # Ugh. Symbiotic. All but postprocessor return the date. 214 $args{input} = $rv unless $args{parsed}; 215 } 216 $rv; 217 }; 218} 219 220 221sub create_multiple_parsers { 222 my $class = shift; 223 my ( $options, @specs ) = @_; 224 225 my $obj = $class->new; 226 227 # Organise the specs, and transform them into parsers. 228 my ( $lengths, $others ) = $class->sort_parsers( $options, \@specs ); 229 230 # Merge callbacks if any. 231 for ('preprocess') { 232 $options->{$_} = $class->merge_callbacks( $options->{$_} ) 233 if $options->{$_}; 234 } 235 236 # Custom fail method? 237 $obj->set_fail( $options->{on_fail} ) if exists $options->{on_fail}; 238 239 # Who's our maker? 240 $obj->set_maker( $options->{maker} ) if exists $options->{maker}; 241 242 # We don't want to save the whole options hash as a closure, since 243 # that can cause a circular reference when $options->{maker} is 244 # set. 245 my $preprocess = $options->{preprocess}; 246 247 # These are the innards of a multi-parser. 248 my $parser = sub { 249 my ( $self, $date, @args ) = @_; 250 return unless defined $date; 251 252 # Parameters common to the callbacks. Pre-prepared. 253 my %param = ( 254 self => $self, 255 ( @args ? ( args => \@args ) : () ), 256 ); 257 258 my %p; 259 260 # Preprocess and potentially fill %p 261 if ($preprocess) { 262 $date = $preprocess->( input => $date, parsed => \%p, %param ); 263 } 264 265 # Find length parser 266 if (%$lengths) { 267 my $length = length $date; 268 my $parser = $lengths->{$length}; 269 if ($parser) { 270 271 # Found one, call it with _copy_ of %p 272 my $dt = $parser->( $self, $date, {%p}, @args ); 273 return $dt if defined $dt; 274 } 275 } 276 277 # Or calls all others, with _copy_ of %p 278 for my $parser (@$others) { 279 my $dt = $parser->( $self, $date, {%p}, @args ); 280 return $dt if defined $dt; 281 } 282 283 # Failed, return undef. 284 return; 285 }; 286 $obj->set_parser($parser); 287} 288 289 290sub sort_parsers { 291 my $class = shift; 292 my ( $options, $specs ) = @_; 293 my ( %lengths, @others ); 294 295 for my $spec (@$specs) { 296 297 # Put coderefs straight into the 'other' heap. 298 if ( ref $spec eq 'CODE' ) { 299 push @others, $spec; 300 } 301 302 # Specifications... 303 elsif ( ref $spec eq 'HASH' ) { 304 if ( exists $spec->{length} ) { 305 my $code = $class->create_single_parser(%$spec); 306 my @lengths 307 = ref $spec->{length} 308 ? @{ $spec->{length} } 309 : ( $spec->{length} ); 310 for my $length (@lengths) { 311 push @{ $lengths{$length} }, $code; 312 } 313 } 314 else { 315 push @others, $class->create_single_parser(%$spec); 316 } 317 } 318 319 # Something else 320 else { 321 croak "Invalid specification in list."; 322 } 323 } 324 325 while ( my ( $length, $parsers ) = each %lengths ) { 326 $lengths{$length} = $class->chain_parsers($parsers); 327 } 328 329 return ( \%lengths, \@others ); 330} 331 332sub chain_parsers { 333 my ( $self, $parsers ) = @_; 334 return $parsers->[0] if @$parsers == 1; 335 return sub { 336 my $self = shift; 337 for my $parser (@$parsers) { 338 my $rv = $self->$parser(@_); 339 return $rv if defined $rv; 340 } 341 return undef; 342 }; 343} 344 345 346sub create_parser { 347 my $class = shift; 348 if ( not ref $_[0] ) { 349 350 # Simple case of single specification as a hash 351 return $class->create_single_object(@_); 352 } 353 354 # Let's see if we were given an options block 355 my %options; 356 while ( ref $_[0] eq 'ARRAY' ) { 357 my $options = shift; 358 %options = ( %options, @$options ); 359 } 360 361 # Now, can we create a multi-parser out of the remaining arguments? 362 if ( ref $_[0] eq 'HASH' or ref $_[0] eq 'CODE' ) { 363 return $class->create_multiple_parsers( \%options, @_ ); 364 } 365 else { 366 # If it wasn't a HASH or CODE, then it was (ideally) 367 # a list of pairs describing a single specification. 368 return $class->create_multiple_parsers( \%options, {@_} ); 369 } 370} 371 372 373# Find all our workers 374{ 375 use Class::Factory::Util 1.6; 376 377 foreach my $worker ( __PACKAGE__->subclasses ) { 378 eval "use DateTime::Format::Builder::Parser::$worker;"; 379 die $@ if $@; 380 } 381} 382 3831; 384 385# ABSTRACT: Parser creation 386 387__END__ 388 389=pod 390 391=head1 NAME 392 393DateTime::Format::Builder::Parser - Parser creation 394 395=head1 VERSION 396 397version 0.81 398 399=head1 SYNOPSIS 400 401 my $class = 'DateTime::Format::Builder::Parser'; 402 my $parser = $class->create_single_parser( %specs ); 403 404=head1 DESCRIPTION 405 406This is a utility class for L<DateTime::Format::Builder> that 407handles creation of parsers. It is to here that C<Builder> delegates 408most of its responsibilities. 409 410=head1 CONSTRUCTORS 411 412=head1 METHODS 413 414There are two sorts of methods in this class. Those used by 415parser implementations and those used by C<Builder>. It is 416generally unlikely the user will want to use any of them. 417 418They are presented, grouped according to use. 419 420=head2 Parameter Handling (implementations) 421 422These methods allow implementations to have validation of 423their arguments in a standard manner and due to C<Parser>'s 424impelementation, these methods also allow C<Parser> to 425determine which implementation to use. 426 427=head3 Common parameters 428 429These parameters appear for all parser implementations. 430These are primarily documented in 431L<DateTime::Format::Builder>. 432 433=over 4 434 435=item * 436 437B<on_match> 438 439=item * 440 441B<on_fail> 442 443=item * 444 445B<postprocess> 446 447=item * 448 449B<preprocess> 450 451=item * 452 453B<label> 454 455=item * 456 457B<length> may be a number or an arrayref of numbers 458indicating the length of the input. This lets us optimise in 459the case of static length input. If supplying an arrayref of 460numbers, please keep the number of numbers to a minimum. 461 462=back 463 464=head3 params 465 466 my $params = $self->params(); 467 validate( @_, $params ); 468 469Returns declared parameters and C<common> parameters in a hashref 470suitable for handing to L<Params::Validate>'s C<validate> function. 471 472=head3 params_all 473 474 my $all_params = $self->params_all(); 475 476Returns a hash of all the valid options. Not recommended 477for general use. 478 479=head3 valid_params 480 481 __PACKAGE__->valid_params( %params ); 482 483Arguments are as per L<Params::Validate>'s C<validate> function. 484This method is used to declare what your valid arguments are in 485a parser specification. 486 487=head3 whose_params 488 489 my $class = whose_params( $key ); 490 491Internal function which merely returns to which class a 492parameter is unique. If not unique, returns C<undef>. 493 494=head2 Organising and Creating Parsers 495 496=head3 create_single_parser 497 498This takes a single specification and returns a coderef that 499is a parser that suits that specification. This is the end 500of the line for all the parser creation methods. It 501delegates no further. 502 503If a coderef is specified, then that coderef is immediately 504returned (it is assumed to be appropriate). 505 506The single specification (if not a coderef) can be either a 507hashref or a hash. The keys and values must be as per the 508specification. 509 510It is here that any arrays of callbacks are unified. It is 511also here that any parser implementations are used. With 512the spec that's given, the keys are looked at and whichever 513module is the first to have a unique key in the spec is the 514one to whom the spec is given. 515 516B<Note>: please declare a C<valid_params> argument with an 517uppercase letter. For example, if you're writing 518C<DateTime::Format::Builder::Parser::Fnord>, declare a 519parameter called C<Fnord>. Similarly, C<DTFBP::Strptime> 520should have C<Strptime> and C<DTFBP::Regex> should have 521C<Regex>. These latter two don't for backwards compatibility 522reasons. 523 524The returned parser will return either a C<DateTime> object 525or C<undef>. 526 527=head3 merge_callbacks 528 529Produce either undef or a single coderef from either undef, 530an empty array, a single coderef or an array of coderefs 531 532=head2 create_multiple_parsers 533 534Given the options block (as made from C<create_parser()>) 535and a list of single parser specifications, this returns a 536coderef that returns either the resultant C<DateTime> object 537or C<undef>. 538 539It first sorts the specifications using C<sort_parsers()> 540and then creates the function based on what that returned. 541 542=head2 sort_parsers 543 544This takes the list of specifications and sorts them while 545turning the specifications into parsers. It returns two 546values: the first is a hashref containing all the length 547based parsers. The second is an array containing all the 548other parsers. 549 550If any of the specs are not code or hash references, then it 551will call C<croak()>. 552 553Code references are put directly into the 'other' array. Any 554hash references without I<length> keys are run through 555C<create_single_parser()> and the resultant parser is placed 556in the 'other' array. 557 558Hash references B<with> I<length> keys are run through 559C<create_single_parser()>, but the resultant parser is used 560as the value in the length hashref with the length being the 561key. If two or more parsers have the same I<length> 562specified then an error is thrown. 563 564=head2 create_parser 565 566C<create_class()> is mostly a wrapper around 567C<create_parser()> that does loops and stuff and calls 568C<create_parser()> to create the actual parsers. 569 570C<create_parser()> takes the parser specifications (be they 571single specifications or multiple specifications) and 572returns an anonymous coderef that is suitable for use as a 573method. The coderef will call C<croak()> in the event of 574being unable to parse the single string it expects as input. 575 576The simplest input is that of a single specification, 577presented just as a plain hash, not a hashref. This is 578passed directly to C<create_single_parser()> with the return 579value from that being wrapped in a function that lets it 580C<croak()> on failure, with that wrapper being returned. 581 582If the first argument to C<create_parser()> is an arrayref, 583then that is taken to be an options block (as per the 584multiple parser specification documented earlier). 585 586Any further arguments should be either hashrefs or coderefs. 587If the first argument after the optional arrayref is not a 588hashref or coderef then that argument and all remaining 589arguments are passed off to C<create_single_parser()> 590directly. If the first argument is a hashref or coderef, 591then it and the remaining arguments are passed to 592C<create_multiple_parsers()>. 593 594The resultant coderef from calling either of the creation 595methods is then wrapped in a function that calls C<croak()> 596in event of failure or the C<DateTime> object in event of 597success. 598 599=head1 FINDING IMPLEMENTATIONS 600 601C<Parser> automatically loads any parser classes in C<@INC>. 602 603To be loaded automatically, you must be a 604C<DateTime::Format::Builder::Parser::XXX> module. 605 606To be invisible, and not loaded, start your class with a lower class 607letter. These are ignored. 608 609=head1 WRITING A PARSER IMPLEMENTATION 610 611=head2 Naming your parser 612 613Create a module and name it in the form 614C<DateTime::Format::Builder::Parser::XXX> 615where I<XXX> is whatever you like, 616so long as it doesn't start with a 617lower case letter. 618 619Alternatively, call it something completely different 620if you don't mind the users explicitly loading your module. 621 622I'd recommend keeping within the C<DateTime::Format::Builder> 623namespace though --- at the time of writing I've not given 624thought to what non-auto loaded ones should be called. Any 625ideas, please email me. 626 627=head2 Declaring specification arguments 628 629Call C<<DateTime::Format::Builder::Parser->valid_params()>> with 630C<Params::Validate> style arguments. For example: 631 632 DateTime::Format::Builder::Parser->valid_params( 633 params => { type => ARRAYREF }, 634 Regex => { type => SCALARREF, callbacks => { 635 'is a regex' => sub { ref(shift) eq 'Regexp' } 636 }} 637 ); 638 639Start one of the key names with a capital letter. Ideally that key 640should match the I<XXX> from earlier. This will be used to help 641identify which module a parser specification should be given to. 642 643The key names I<on_match>, I<on_fail>, I<postprocess>, I<preprocess>, 644I<label> and I<length> are predefined. You are recommended to make use 645of them. You may ignore I<length> as C<sort_parsers> takes care of that. 646 647=head2 Define create_parser 648 649A class method of the name C<create_parser> that does the following: 650 651Its arguments are as for a normal method (i.e. class as first argument). 652The other arguments are the result from a call to C<Params::Validate> 653according to your specification (the C<valid_params> earlier), i.e. a 654hash of argument name and value. 655 656The return value should be a coderef that takes a date string as its 657first argument and returns either a C<DateTime> object or C<undef>. 658 659=head2 Callbacks 660 661It is preferred that you support some callbacks to your parsers. 662In particular, C<preprocess>, C<on_match>, C<on_fail> and 663C<postprocess>. See the L<main Builder|DateTime::Format::Builder> 664docs for the appropriate placing of calls to the callbacks. 665 666=head1 SUPPORT 667 668See L<DateTime::Format::Builder> for details. 669 670=head1 SEE ALSO 671 672C<datetime@perl.org> mailing list. 673 674http://datetime.perl.org/ 675 676L<perl>, L<DateTime>, L<DateTime::Format::Builder>. 677 678L<Params::Validate>. 679 680L<DateTime::Format::Builder::Parser::generic>, 681L<DateTime::Format::Builder::Parser::Dispatch>, 682L<DateTime::Format::Builder::Parser::Quick>, 683L<DateTime::Format::Builder::Parser::Regex>, 684L<DateTime::Format::Builder::Parser::Strptime>. 685 686=head1 AUTHORS 687 688=over 4 689 690=item * 691 692Dave Rolsky <autarch@urth.org> 693 694=item * 695 696Iain Truskett 697 698=back 699 700=head1 COPYRIGHT AND LICENSE 701 702This software is Copyright (c) 2013 by Dave Rolsky. 703 704This is free software, licensed under: 705 706 The Artistic License 2.0 (GPL Compatible) 707 708=cut 709