1#!/usr/bin/perl -w 2# 3# 4# Regenerate (overwriting only if changed): 5# 6# pod/perldebguts.pod 7# regnodes.h 8# 9# from information stored in 10# 11# regcomp.sym 12# op_reg_common.h 13# regexp.h 14# 15# pod/perldebguts.pod is not completely regenerated. Only the table of 16# regexp nodes is replaced; other parts remain unchanged. 17# 18# Accepts the standard regen_lib -q and -v args. 19# 20# This script is normally invoked from regen.pl. 21# 22# F<regcomp.sym> defines the opcodes and states used in the regex 23# engine, it also includes documentation on the opcodes. This script 24# parses those definitions out and turns them into typedefs, defines, 25# and data structures, and maybe even code which the regex engine can 26# use to operate. 27# 28# F<regexp.h> and op_reg_common.h contain defines C<RXf_xxx> and 29# C<PREGf_xxx> that are used in flags in our code. These defines are 30# parsed out and data structures are created to allow the debug mode of 31# the regex engine to show things such as which flags were set during 32# compilation. In some cases we transform the C code in the header files 33# into perl code which we execute to C<eval()> the contents. For instance 34# in a situation like this: 35# 36# #define RXf_X 0x1 /* the X mode */ 37# #define RXf_Y 0x2 /* the Y mode */ 38# #define RXf_Z (X|Y) /* the Z mode */ 39# 40# this script might end up eval()ing something like C<0x1> and then 41# C<0x2> and then C<(0x1|0x2)> the results of which it then might use in 42# constructing a data structure, or pod in perldebguts, or a comment in 43# C<regnodes.h>. It also would separate out the "X", "Y", and "Z" and 44# use them, and would also use the data in the line comment if present. 45# 46# If you compile a regex under perl -Mre=Debug,ALL you can see much 47# of the content that this file generates and parses out of its input 48# files. 49 50BEGIN { 51 # Get function prototypes 52 require './regen/regen_lib.pl'; 53 require './regen/HeaderParser.pm'; 54} 55 56use strict; 57 58# NOTE I don't think anyone actually knows what all of these properties mean, 59# and I suspect some of them are outright unused. This is a first attempt to 60# clean up the generation so maybe one day we can move to something more self 61# documenting. (One might argue that an array of hashes of properties would 62# be easier to use.) 63# 64# Why we use the term regnode and nodes, and not say, opcodes, I am not sure. 65 66# General thoughts: 67# 1. We use a single continuum to represent both opcodes and states, 68# and in regexec.c we switch on the combined set. 69# 2. Opcodes have more information associated to them, states are simpler, 70# basically just an identifier/number that can be used to switch within 71# the state machine. 72# 3. Some opcode are order dependent. 73# 4. Output files often use "tricks" to reduce diff effects. Some of what 74# we do below is more clumsy looking than it could be because of this. 75 76# Op/state properties: 77# 78# Property In Descr 79# ---------------------------------------------------------------------------- 80# name Both Name of op/state 81# id Both integer value for this opcode/state 82# optype Both Either 'op' or 'state' 83# line_num Both line_num number of the input file for this item. 84# type Op Type of node (aka regnode_kind) 85# code Op Meta about the node, used to detect variable length nodes 86# suffix Op which regnode struct this uses, so if this is '1', it 87# uses 'struct regnode_1' 88# flags Op S for simple; V for varies 89# longj Op Boolean as to if this node is a longjump 90# comment Both Comment about node, if any. Placed in perlredebguts 91# as its description 92# pod_comment Both Special comments for pod output (preceding lines in def) 93# Such lines begin with '#*' 94 95# Global State 96my @all; # all opcodes/state 97my %all; # hash of all opcode/state names 98 99my @ops; # array of just opcodes 100my @states; # array of just states 101 102my $longest_name_length= 0; # track lengths of names for nicer reports 103my (%type_alias); # map the type (??) 104 105# register a newly constructed node into our state tables. 106# ensures that we have no name collisions (on name anyway), 107# and issues the "id" for the node. 108sub register_node { 109 my ($node)= @_; 110 111 if ( $all{ $node->{name} } ) { 112 die "Duplicate item '$node->{name}' in regcomp.sym line $node->{line_num} " 113 . "previously defined on line $all{ $node->{name} }{line_num}\n"; 114 } elsif (!$node->{optype}) { 115 die "must have an optype in node ", Dumper($node); 116 } elsif ($node->{optype} eq "op") { 117 push @ops, $node; 118 } elsif ($node->{optype} eq "state") { 119 push @states, $node; 120 } else { 121 die "Uknown optype '$node->{optype}' in ", Dumper($node); 122 } 123 $node->{id}= 0 + @all; 124 push @all, $node; 125 $all{ $node->{name} }= $node; 126 127 if ($node->{longj} && $node->{longj} != 1) { 128 die "longj field must be in [01] if present in ", Dumper($node); 129 } 130 131} 132 133# Parse and add an opcode definition to the global state. 134# What an opcode definition looks like is given in regcomp.sym. 135# 136# Not every opcode definition has all of the components. We should maybe make 137# this nicer/easier to read in the future. Also note that the above is tab 138# sensitive. 139 140# Special comments for an entry precede it, and begin with '#*' and are placed 141# in the generated pod file just before the entry. 142 143sub parse_opcode_def { 144 my ( $text, $line_num, $pod_comment )= @_; 145 my $node= { 146 line_num => $line_num, 147 pod_comment => $pod_comment, 148 optype => "op", 149 }; 150 151 # first split the line into three, the initial NAME, a middle part 152 # that we call "desc" which contains various (not well documented) things, 153 # and a comment section. 154 @{$node}{qw(name desc comment)}= /^(\S+)\s+([^\t]+?)\s*;\s*(.*)/ 155 or die "Failed to match $_"; 156 157 # the content of the "desc" field from the first step is extracted here: 158 @{$node}{qw(type code suffix flags longj)}= split /[,\s]\s*/, $node->{desc}; 159 160 defined $node->{$_} or $node->{$_} = "" 161 for qw(type code suffix flags longj); 162 163 register_node($node); # has to be before the type_alias code below 164 165 if ( !$all{ $node->{type} } and !$type_alias{ $node->{type} } ) { 166 167 #warn "Regop type '$node->{type}' from regcomp.sym line $line_num" 168 # ." is not an existing regop, and will be aliased to $node->{name}\n" 169 # if -t STDERR; 170 $type_alias{ $node->{type} }= $node->{name}; 171 } 172 173 $longest_name_length= length $node->{name} 174 if length $node->{name} > $longest_name_length; 175} 176 177# parse out a state definition and add the resulting data 178# into the global state. may create multiple new states from 179# a single definition (this is part of the point). 180# Format for states: 181# REGOP \t typelist [ \t typelist] 182# typelist= namelist 183# = namelist:FAIL 184# = name:count 185# Eg: 186# WHILEM A_pre,A_min,A_max,B_min,B_max:FAIL 187# BRANCH next:FAIL 188# CURLYM A,B:FAIL 189# 190# The CURLYM definition would create the states: 191# CURLYM_A, CURLYM_A_fail, CURLYM_B, CURLYM_B_fail 192sub parse_state_def { 193 my ( $text, $line_num, $pod_comment )= @_; 194 my ( $type, @lists )= split /\s+/, $text; 195 die "No list? $type" if !@lists; 196 foreach my $list (@lists) { 197 my ( $names, $special )= split /:/, $list, 2; 198 $special ||= ""; 199 foreach my $name ( split /,/, $names ) { 200 my $real= 201 $name eq 'resume' 202 ? "resume_$type" 203 : "${type}_$name"; 204 my @suffix; 205 if ( !$special ) { 206 @suffix= (""); 207 } 208 elsif ( $special =~ /\d/ ) { 209 @suffix= ( 1 .. $special ); 210 } 211 elsif ( $special eq 'FAIL' ) { 212 @suffix= ( "", "_fail" ); 213 } 214 else { 215 die "unknown :type ':$special'"; 216 } 217 foreach my $suffix (@suffix) { 218 my $node= { 219 name => "$real$suffix", 220 optype => "state", 221 type => $type || "", 222 comment => "state for $type", 223 line_num => $line_num, 224 }; 225 register_node($node); 226 } 227 } 228 } 229} 230 231sub process_flags { 232 my ( $flag, $varname, $comment )= @_; 233 $comment= '' unless defined $comment; 234 235 my @selected; 236 my $bitmap= ''; 237 for my $node (@ops) { 238 my $set= $node->{flags} && $node->{flags} eq $flag ? 1 : 0; 239 240 # Whilst I could do this with vec, I'd prefer to do longhand the arithmetic 241 # ops in the C code. 242 my $current= do { 243 no warnings; 244 ord substr $bitmap, ( $node->{id} >> 3 ); 245 }; 246 substr( $bitmap, ( $node->{id} >> 3 ), 1 )= 247 chr( $current | ( $set << ( $node->{id} & 7 ) ) ); 248 249 push @selected, $node->{name} if $set; 250 } 251 my $out_string= join ', ', @selected, 0; 252 $out_string =~ s/(.{1,70},) /$1\n /g; 253 254 my $out_mask= join ', ', map { sprintf "0x%02X", ord $_ } split '', $bitmap; 255 256 return $comment . <<"EOP"; 257#define REGNODE_\U$varname\E(node) (PL_${varname}_bitmask[(node) >> 3] & (1 << ((node) & 7))) 258 259#ifndef DOINIT 260EXTCONST U8 PL_${varname}\[] __attribute__deprecated__; 261#else 262EXTCONST U8 PL_${varname}\[] __attribute__deprecated__ = { 263 $out_string 264}; 265#endif /* DOINIT */ 266 267#ifndef DOINIT 268EXTCONST U8 PL_${varname}_bitmask[]; 269#else 270EXTCONST U8 PL_${varname}_bitmask[] = { 271 $out_mask 272}; 273#endif /* DOINIT */ 274EOP 275} 276 277sub print_process_EXACTish { 278 my ($out)= @_; 279 280 # Creates some bitmaps for EXACTish nodes. 281 282 my @folded; 283 my @req8; 284 285 my $base; 286 for my $node (@ops) { 287 next unless $node->{type} eq 'EXACT'; 288 my $name = $node->{name}; 289 $base = $node->{id} if $name eq 'EXACT'; 290 291 my $index = $node->{id} - $base; 292 293 # This depends entirely on naming conventions in regcomp.sym 294 $folded[$index] = $name =~ /^EXACTF/ || 0; 295 $req8[$index] = $name =~ /8/ || 0; 296 } 297 298 die "Can't cope with > 32 EXACTish nodes" if @folded > 32; 299 300 my $exactf = sprintf "%X", oct("0b" . join "", reverse @folded); 301 my $req8 = sprintf "%X", oct("0b" . join "", reverse @req8); 302 print $out <<EOP, 303 304/* Is 'op', known to be of type EXACT, folding? */ 305#define isEXACTFish(op) (__ASSERT_(REGNODE_TYPE(op) == EXACT) (PL_EXACTFish_bitmask & (1U << (op - EXACT)))) 306 307/* Do only UTF-8 target strings match 'op', known to be of type EXACT? */ 308#define isEXACT_REQ8(op) (__ASSERT_(REGNODE_TYPE(op) == EXACT) (PL_EXACT_REQ8_bitmask & (1U << (op - EXACT)))) 309 310#ifndef DOINIT 311EXTCONST U32 PL_EXACTFish_bitmask; 312EXTCONST U32 PL_EXACT_REQ8_bitmask; 313#else 314EXTCONST U32 PL_EXACTFish_bitmask = 0x$exactf; 315EXTCONST U32 PL_EXACT_REQ8_bitmask = 0x$req8; 316#endif /* DOINIT */ 317EOP 318} 319 320sub read_definition { 321 my ( $file )= @_; 322 my ( $seen_sep, $pod_comment )= ""; 323 open my $in_fh, "<", $file 324 or die "Failed to open '$file' for reading: $!"; 325 while (<$in_fh>) { 326 327 # Special pod comments 328 if (/^#\* ?/) { $pod_comment .= "# $'"; } 329 330 # Truly blank lines possibly surrounding pod comments 331 elsif (/^\s*$/) { $pod_comment .= "\n" } 332 333 next if /\A\s*#/ || /\A\s*\z/; 334 335 s/\s*\z//; 336 if (/^-+\s*$/) { 337 $seen_sep= 1; 338 next; 339 } 340 341 if ($seen_sep) { 342 parse_state_def( $_, $., $pod_comment ); 343 } 344 else { 345 parse_opcode_def( $_, $., $pod_comment ); 346 } 347 $pod_comment= ""; 348 } 349 close $in_fh; 350 die "Too many regexp/state opcodes! Maximum is 256, but there are ", 0 + @all, 351 " in file!" 352 if @all > 256; 353} 354 355# use fixed width to keep the diffs between regcomp.pl recompiles 356# as small as possible. 357my ( $base_name_width, $rwidth, $twidth )= ( 22, 12, 9 ); 358 359sub print_state_defs { 360 my ($out)= @_; 361 printf $out <<EOP, 362/* Regops and State definitions */ 363 364#define %*s\t%d 365#define %*s\t%d 366 367EOP 368 -$base_name_width, 369 REGNODE_MAX => $#ops, 370 -$base_name_width, REGMATCH_STATE_MAX => $#all; 371 372 my %rev_type_alias= reverse %type_alias; 373 my $base_format = "#define %*s\t%d\t/* %#04x %s */\n"; 374 my @withs; 375 my $in_states = 0; 376 377 my $max_name_width = 0; 378 for my $ref (\@ops, \@states) { 379 for my $node ($ref->@*) { 380 my $len = length $node->{name}; 381 $max_name_width = $len if $max_name_width < $len; 382 } 383 } 384 385 die "Do a white-space only commit to increase \$base_name_width to" 386 . " $max_name_width; then re-run" if $base_name_width < $max_name_width; 387 388 print $out <<EOT; 389/* -- For regexec.c to switch on target being utf8 (t8) or not (tb, b='byte'); */ 390#define with_t_UTF8ness(op, t_utf8) (((op) << 1) + (cBOOL(t_utf8))) 391/* -- same, but also with pattern (p8, pb) -- */ 392#define with_tp_UTF8ness(op, t_utf8, p_utf8) \\ 393\t\t(((op) << 2) + (cBOOL(t_utf8) << 1) + cBOOL(p_utf8)) 394 395/* The #defines below give both the basic regnode and the expanded version for 396 switching on utf8ness */ 397EOT 398 399 for my $node (@ops) { 400 print_state_def_line($out, $node->{name}, $node->{id}, $node->{comment}); 401 if ( defined( my $alias= $rev_type_alias{ $node->{name} } ) ) { 402 print_state_def_line($out, $alias, $node->{id}, $node->{comment}); 403 } 404 } 405 406 print $out "\t/* ------------ States ------------- */\n"; 407 for my $node (@states) { 408 print_state_def_line($out, $node->{name}, $node->{id}, $node->{comment}); 409 } 410} 411 412sub print_state_def_line 413{ 414 my ($fh, $name, $id, $comment) = @_; 415 416 # The sub-names are like '_tb' or '_tb_p8' = max 6 chars wide 417 my $name_col_width = $base_name_width + 6; 418 my $base_id_width = 3; # Max is '255' or 3 cols 419 my $mid_id_width = 3; # Max is '511' or 3 cols 420 my $full_id_width = 3; # Max is '1023' but not close to using the 4th 421 422 my $line = "#define " . $name; 423 $line .= " " x ($name_col_width - length($name)); 424 425 $line .= sprintf "%*s", $base_id_width, $id; 426 $line .= " " x $mid_id_width; 427 $line .= " " x ($full_id_width + 2); 428 429 $line .= "/* "; 430 my $hanging = length $line; # Indent any subsequent line to this pos 431 $line .= sprintf "0x%02x", $id; 432 433 my $columns = 78; 434 435 # From the documentation: 'In fact, every resulting line will have length 436 # of no more than "$columns - 1"' 437 $line = wrap($columns + 1, "", " " x $hanging, "$line $comment"); 438 chomp $line; # wrap always adds a trailing \n 439 $line =~ s/ \s+ $ //x; # trim, just in case. 440 441 # The comment may have wrapped. Find the final \n and measure the length 442 # to the end. If it is short enough, just append the ' */' to the line. 443 # If it is too close to the end of the space available, add an extra line 444 # that consists solely of blanks and the ' */' 445 my $len = length($line); my $rindex = rindex($line, "\n"); 446 if (length($line) - rindex($line, "\n") - 1 <= $columns - 3) { 447 $line .= " */\n"; 448 } 449 else { 450 $line .= "\n" . " " x ($hanging - 3) . "*/\n"; 451 } 452 453 print $fh $line; 454 455 # And add the 2 subsidiary #defines used when switching on 456 # with_t_UTF8nes() 457 my $with_id_t = $id * 2; 458 for my $with (qw(tb t8)) { 459 my $with_name = "${name}_$with"; 460 print $fh "#define ", $with_name; 461 print $fh " " x ($name_col_width - length($with_name) + $base_id_width); 462 printf $fh "%*s", $mid_id_width, $with_id_t; 463 print $fh " " x $full_id_width; 464 printf $fh " /*"; 465 print $fh " " x (4 + 2); # 4 is width of 0xHH that the base entry uses 466 printf $fh "0x%03x */\n", $with_id_t; 467 468 $with_id_t++; 469 } 470 471 # Finally add the 4 subsidiary #defines used when switching on 472 # with_tp_UTF8nes() 473 my $with_id_tp = $id * 4; 474 for my $with (qw(tb_pb tb_p8 t8_pb t8_p8)) { 475 my $with_name = "${name}_$with"; 476 print $fh "#define ", $with_name; 477 print $fh " " x ($name_col_width - length($with_name) + $base_id_width + $mid_id_width); 478 printf $fh "%*s", $full_id_width, $with_id_tp; 479 printf $fh " /*"; 480 print $fh " " x (4 + 2); # 4 is width of 0xHH that the base entry uses 481 printf $fh "0x%03x */\n", $with_id_tp; 482 483 $with_id_tp++; 484 } 485 486 print $fh "\n"; # Blank line separates groups for clarity 487} 488 489sub print_typedefs { 490 my ($out)= @_; 491 print $out <<EOP; 492 493/* typedefs for regex nodes - one typedef per node type */ 494 495EOP 496 my $len= 0; 497 foreach my $node (@ops) { 498 if ($node->{suffix} and $len < length($node->{suffix})) { 499 $len= length $node->{suffix}; 500 } 501 } 502 $len += length "struct regnode_"; 503 $len = (int($len/5)+2)*5; 504 my $prefix= "tregnode"; 505 506 foreach my $node (sort { $a->{name} cmp $b->{name} } @ops) { 507 my $struct_name= "struct regnode"; 508 if (my $suffix= $node->{suffix}) { 509 $struct_name .= "_$suffix"; 510 } 511 $node->{typedef}= $prefix . "_" . $node->{name}; 512 printf $out "typedef %*s %s;\n", -$len, $struct_name, $node->{typedef}; 513 } 514 print $out <<EOP; 515 516/* end typedefs */ 517 518EOP 519 520} 521 522 523 524 525sub print_regnode_info { 526 my ($out)= @_; 527 print $out <<EOP; 528 529/* PL_regnode_info[] - Opcode/state names in string form, for debugging */ 530 531#ifndef DOINIT 532EXTCONST struct regnode_meta PL_regnode_info[]; 533#else 534EXTCONST struct regnode_meta PL_regnode_info[] = { 535EOP 536 my @fields= qw(type arg_len arg_len_varies off_by_arg); 537 foreach my $node_idx (0..$#all) { 538 my $node= $all[$node_idx]; 539 { 540 my $size= 0; 541 $size= "EXTRA_SIZE($node->{typedef})" if $node->{suffix}; 542 $node->{arg_len}= $size; 543 544 } 545 { 546 my $varies= 0; 547 $varies= 1 if $node->{code} and $node->{code}=~"str"; 548 $node->{arg_len_varies}= $varies; 549 } 550 $node->{off_by_arg}= $node->{longj} || 0; 551 print $out " {\n"; 552 print $out " /* #$node_idx $node->{optype} $node->{name} */\n"; 553 foreach my $f_idx (0..$#fields) { 554 my $field= $fields[$f_idx]; 555 printf $out " .%s = %s", $field, $node->{$field} // 0; 556 printf $out $f_idx == $#fields ? "\n" : ",\n"; 557 } 558 print $out " }"; 559 print $out $node_idx==$#all ? "\n" : ",\n"; 560 } 561 562 print $out <<EOP; 563}; 564#endif /* DOINIT */ 565 566EOP 567} 568 569 570sub print_regnode_name { 571 my ($out)= @_; 572 print $out <<EOP; 573 574/* PL_regnode_name[] - Opcode/state names in string form, for debugging */ 575 576#ifndef DOINIT 577EXTCONST char * PL_regnode_name[]; 578#else 579EXTCONST char * const PL_regnode_name[] = { 580EOP 581 582 my $ofs= 0; 583 my $sym= ""; 584 foreach my $node (@all) { 585 printf $out "\t%*s\t/* $sym%#04x */\n", 586 -3 - $base_name_width, qq("$node->{name}",), $node->{id} - $ofs; 587 if ( $node->{id} == $#ops and @ops != @all ) { 588 print $out "\t/* ------------ States ------------- */\n"; 589 $ofs= $#ops; 590 $sym= 'REGNODE_MAX +'; 591 } 592 } 593 594 print $out <<EOP; 595}; 596#endif /* DOINIT */ 597 598EOP 599} 600 601sub print_reg_extflags_name { 602 my ($out)= @_; 603 print $out <<EOP; 604/* PL_reg_extflags_name[] - Opcode/state names in string form, for debugging */ 605 606#ifndef DOINIT 607EXTCONST char * PL_reg_extflags_name[]; 608#else 609EXTCONST char * const PL_reg_extflags_name[] = { 610EOP 611 612 my %rxfv; 613 my %definitions; # Remember what the symbol definitions are 614 my $val= 0; 615 my %reverse; 616 my $REG_EXTFLAGS_NAME_SIZE= 0; 617 my $hp= HeaderParser->new(); 618 foreach my $file ( "op_reg_common.h", "regexp.h" ) { 619 $hp->read_file($file); 620 foreach my $line_info (@{$hp->lines}) { 621 next unless $line_info->{type} eq "content" 622 and $line_info->{sub_type} eq "#define"; 623 my $line= $line_info->{line}; 624 $line=~s/\s*\\\n\s*/ /g; 625 626 # optional leading '_'. Return symbol in $1, and strip it from 627 # comment of line. Currently doesn't handle comments running onto 628 # next line 629 if ($line=~s/^ \# \s* define \s+ ( _? RXf_ \w+ ) \s+ //xi) { 630 chomp($line); 631 my $define= $1; 632 my $orig= $_; 633 $line=~s{ /\* .*? \*/ }{ }x; # Replace comments by a blank 634 635 # Replace any prior defined symbols by their values 636 foreach my $key ( keys %definitions ) { 637 $line=~s/\b$key\b/$definitions{$key}/g; 638 } 639 640 # Remove the U suffix from unsigned int literals 641 $line=~s/\b([0-9]+)U\b/$1/g; 642 643 my $newval= eval $line; # Get numeric definition 644 645 $definitions{$define}= $newval; 646 647 next unless $line =~ /<</; # Bit defines use left shift 648 if ( $val & $newval ) { 649 my @names= ( $define, $reverse{$newval} ); 650 s/PMf_// for @names; 651 if ( $names[0] ne $names[1] ) { 652 die sprintf 653 "ERROR: both $define and $reverse{$newval} use 0x%08X (%s:%s)", 654 $newval, $orig, $line; 655 } 656 next; 657 } 658 $val |= $newval; 659 $rxfv{$define}= $newval; 660 $reverse{$newval}= $define; 661 } 662 } 663 } 664 my %vrxf= reverse %rxfv; 665 printf $out "\t/* Bits in extflags defined: %s */\n", unpack 'B*', pack 'N', 666 $val; 667 my %multibits; 668 for ( 0 .. 31 ) { 669 my $power_of_2= 2**$_; 670 my $n= $vrxf{$power_of_2}; 671 my $extra= ""; 672 if ( !$n ) { 673 674 # Here, there was no name that matched exactly the bit. It could be 675 # either that it is unused, or the name matches multiple bits. 676 if ( !( $val & $power_of_2 ) ) { 677 $n= "UNUSED_BIT_$_"; 678 } 679 else { 680 681 # Here, must be because it matches multiple bits. Look through 682 # all possibilities until find one that matches this one. Use 683 # that name, and all the bits it matches 684 foreach my $name ( keys %rxfv ) { 685 if ( $rxfv{$name} & $power_of_2 ) { 686 $n= $name . ( $multibits{$name}++ ); 687 $extra= sprintf qq{ : "%s" - 0x%08x}, $name, 688 $rxfv{$name} 689 if $power_of_2 != $rxfv{$name}; 690 last; 691 } 692 } 693 } 694 } 695 s/\bRXf_(PMf_)?// for $n, $extra; 696 printf $out qq(\t%-20s/* 0x%08x%s */\n), qq("$n",), $power_of_2, $extra; 697 $REG_EXTFLAGS_NAME_SIZE++; 698 } 699 700 print $out <<EOP; 701}; 702#endif /* DOINIT */ 703 704#ifdef DEBUGGING 705# define REG_EXTFLAGS_NAME_SIZE $REG_EXTFLAGS_NAME_SIZE 706#endif 707EOP 708 709} 710 711sub print_reg_intflags_name { 712 my ($out)= @_; 713 print $out <<EOP; 714 715/* PL_reg_intflags_name[] - Opcode/state names in string form, for debugging */ 716 717#ifndef DOINIT 718EXTCONST char * PL_reg_intflags_name[]; 719#else 720EXTCONST char * const PL_reg_intflags_name[] = { 721EOP 722 723 my %rxfv; 724 my %definitions; # Remember what the symbol definitions are 725 my $val= 0; 726 my %reverse; 727 my $REG_INTFLAGS_NAME_SIZE= 0; 728 my $hp= HeaderParser->new(); 729 my $last_val = 0; 730 foreach my $file ("regcomp.h") { 731 $hp->read_file($file); 732 my @bit_tuples; 733 foreach my $line_info (@{$hp->lines}) { 734 next unless $line_info->{type} eq "content" 735 and $line_info->{sub_type} eq "#define"; 736 my $line= $line_info->{line}; 737 $line=~s/\s*\\\n\s*/ /g; 738 739 # optional leading '_'. Return symbol in $1, and strip it from 740 # comment of line 741 if ( 742 $line =~ m/^ \# \s* define \s+ ( PREGf_ ( \w+ ) ) \s+ 0x([0-9a-f]+)(?:\s*\/\*(.*)\*\/)?/xi 743 ){ 744 chomp $line; 745 my $define= $1; 746 my $abbr= $2; 747 my $hex= $3; 748 my $comment= $4; 749 my $val= hex($hex); 750 my $bin= sprintf "%b", $val; 751 if ($bin=~/1.*?1/) { die "Not expecting multiple bits in PREGf" } 752 my $bit= length($bin) - 1 ; 753 $comment= $comment ? " - $comment" : ""; 754 if ($bit_tuples[$bit]) { 755 die "Duplicate PREGf bit '$bit': $define $val ($hex)"; 756 } 757 $bit_tuples[$bit]= [ $bit, $val, $abbr, $define, $comment ]; 758 } 759 } 760 foreach my $i (0..$#bit_tuples) { 761 my $bit_tuple= $bit_tuples[$i]; 762 if (!$bit_tuple) { 763 $bit_tuple= [ $i, 1<<$i, "", "", "*UNUSED*" ]; 764 } 765 my ($bit, $val, $abbr, $define, $comment)= @$bit_tuple; 766 printf $out qq(\t%-30s/* (1<<%2d) - 0x%08x - %s%s */\n), 767 qq("$abbr",), $bit, $val, $define, $comment; 768 } 769 $REG_INTFLAGS_NAME_SIZE=0+@bit_tuples; 770 } 771 772 print $out <<EOP; 773}; 774#endif /* DOINIT */ 775 776EOP 777 print $out <<EOQ; 778#ifdef DEBUGGING 779# define REG_INTFLAGS_NAME_SIZE $REG_INTFLAGS_NAME_SIZE 780#endif 781 782EOQ 783} 784 785sub print_process_flags { 786 my ($out)= @_; 787 788 print $out process_flags( 'V', 'varies', <<'EOC'); 789/* The following have no fixed length. U8 so we can do strchr() on it. */ 790EOC 791 792 print $out process_flags( 'S', 'simple', <<'EOC'); 793 794/* The following always have a length of 1. U8 we can do strchr() on it. */ 795/* (Note that length 1 means "one character" under UTF8, not "one octet".) */ 796EOC 797 798} 799 800sub do_perldebguts { 801 my $guts= open_new( 'pod/perldebguts.pod', '>' ); 802 803 my $node; 804 my $code; 805 my $name_fmt= '<' x ( $longest_name_length - 1 ); 806 my $descr_fmt= '<' x ( 58 - $longest_name_length ); 807 eval <<EOD or die $@; 808format GuTS = 809 ^*~~ 810 \$node->{pod_comment} 811 ^$name_fmt ^<<<<<<<<< ^$descr_fmt~~ 812 \$node->{name}, \$code, defined \$node->{comment} ? \$node->{comment} : '' 813. 8141; 815EOD 816 817 my $old_fh= select($guts); 818 $~= "GuTS"; 819 820 open my $oldguts, '<', 'pod/perldebguts.pod' 821 or die "$0 cannot open pod/perldebguts.pod for reading: $!"; 822 while (<$oldguts>) { 823 print; 824 last if /=for regcomp.pl begin/; 825 } 826 827 print <<'END_OF_DESCR'; 828 829 # TYPE arg-description [regnode-struct-suffix] [longjump-len] DESCRIPTION 830END_OF_DESCR 831 for my $n (@ops) { 832 $node= $n; 833 $code= "$node->{code} " . ( $node->{suffix} || "" ); 834 $code .= " $node->{longj}" if $node->{longj}; 835 if ( $node->{pod_comment} ||= "" ) { 836 837 # Trim multiple blanks 838 $node->{pod_comment} =~ s/^\n\n+/\n/; 839 $node->{pod_comment} =~ s/\n\n+$/\n\n/; 840 } 841 write; 842 } 843 print "\n"; 844 845 while (<$oldguts>) { 846 last if /=for regcomp.pl end/; 847 } 848 do { print } while <$oldguts>; #win32 can't unlink an open FH 849 close $oldguts or die "Error closing pod/perldebguts.pod: $!"; 850 select $old_fh; 851 close_and_rename($guts); 852} 853 854my $confine_to_core = 'defined(PERL_CORE) || defined(PERL_EXT_RE_BUILD)'; 855read_definition("regcomp.sym"); 856if ($ENV{DUMP}) { 857 require Data::Dumper; 858 print Data::Dumper::Dumper(\@all); 859 exit(1); 860} 861my $out= open_new( 'regnodes.h', '>', 862 { 863 by => 'regen/regcomp.pl', 864 from => [ 'regcomp.sym', 'op_reg_common.h', 'regexp.h' ], 865 }, 866); 867print $out "#if $confine_to_core\n\n"; 868print_typedefs($out); 869print_state_defs($out); 870 871print_regnode_name($out); 872print_regnode_info($out); 873 874 875print_reg_extflags_name($out); 876print_reg_intflags_name($out); 877print_process_flags($out); 878print_process_EXACTish($out); 879print $out "\n#endif /* $confine_to_core */\n"; 880read_only_bottom_close_and_rename($out); 881 882do_perldebguts(); 883