1#! /bin/sh 2# -*- tcl -*- \ 3exec tclsh "$0" ${1+"$@"} 4 5# @@ Meta Begin 6# Application page 1.0 7# Meta platform tcl 8# Meta summary Tool for general text transformation 9# Meta description While the name is an allusion to parser 10# Meta description generation, the modular plugin-based 11# Meta description nature of this application allows for 12# Meta description any type of text transformation which 13# Meta description can be put into a plugin. Still, the 14# Meta description plugins coming with Tcllib all deal 15# Meta description with parser generation. 16# Meta category Processing text files 17# Meta subject {parser generation} {text transformation} 18# Meta require page::pluginmgr 19# Meta require logger 20# Meta require struct::matrix 21# Meta author Andreas Kupries 22# Meta license BSD 23# @@ Meta End 24 25package provide page 1.0 26 27lappend auto_path [file join [file dirname [file dirname [file normalize [info script]]]] modules] 28 29#lappend auto_path [file join [file dirname [info script]] .. modules] 30#source [file join [file dirname [info script]] .. modules struct tree.tcl] 31 32# /= 33# $Id: page,v 1.2 2007/03/28 17:49:44 andreas_kupries Exp $ 34# \= 35# 36# PAGE - PArser GEnerator | GTT - General Text Transformation 37# ==== = ================ + === = =========================== 38# 39# Use cases 40# --------- 41# 42# (1) Read a grammar specification and write out code implementing a 43# parser for that grammar. 44# 45# (2) As (1), and additionally allow the user to select between a 46# number of different backends for writing the results. 47# Different forms for the same parser, pretty printing the 48# grammar, different parser types (LL vs LR vs ...). Etc. 49# 50# (3) As (1) and/or (2), and additionally allow the user to select 51# the frontend, i.e. the part reading the grammar. This allows 52# the use of different input grammars for the specification of 53# grammars, i.e. PEG, Yacc, Tyacc, Coco, etc. 54# 55# Note: For grammars it may be possible to write a unifying 56# frontend whose reader grammar is able to recognize many 57# different grammar formats without requiring the user to 58# specify which format the supplied input is in. 59# 60# (4) As (1) and/or (2), and/or (3), and additionally allow the user 61# to select the transformations to execute on the data provided 62# by the frontend before it is given to the backend. At this 63# point the parser generator has transformed into a general tool 64# for the reading, transformation, and writing of any type of 65# structured information. 66# 67# Note: For the use cases from (1) to (3) the representations returned 68# by the frontend, and taken by the backend have to be fully 69# specified to ensure that all the parts are working together. 70# For the use case (4) it becomes the responsibility of the user 71# of the tool to specify frontend, backed, and transformations 72# which work properly together. 73 74# Command syntax 75# -------------- 76# 77# Ad 1) page ?-rd peg|hb|ser? ?-gen tpcp|hb|ser|tree|peg|me|null? ?-min no|reach|use|all? [input|"-" [output|"-"]] 78# 79# The tool reads the grammar from the specified inputfile, 80# transforms it as needed and then writes the resulting parser 81# to the outputfile. Usage of "-" for the input signals that the 82# grammar should be read from stdin. Analoguously usage of "-" 83# for the output signals that the results should be written to 84# stdout. 85# 86# Unspecified parts of the command line default to "-". 87# 88# Ad 2) Not specified yet. 89# Ad 3) S.a. 90# Ad 4) S.a. 91 92# ### ### ### ######### ######### ######### 93## Requisites 94 95package require page::pluginmgr ; # Management of the PAGE plugins. 96package require logger ; # Logging subsystem for debugging. 97package require struct::matrix ; # Matrices. For statistics report 98 99# ### ### ### ######### ######### ######### 100## Internal data and status 101 102namespace eval ::page { 103 # Path to where the output goes to. The name of a file, or "-" for 104 # stdout. 105 106 variable output "" 107 108 # Path to where the input comes from. The name of a file, or "-" 109 # for stdin. 110 111 variable input "" 112 113 # Boolean flag. Input processing is timed. 114 115 variable timed 0 116 117 # Boolean flag. Input processing has progressbar. 118 119 variable progress 0 120 121 # Reader plugin and options. 122 123 variable rd {} 124 125 # List of transforms and their options. 126 127 variable tr {} 128 129 # Writer plugin an options. 130 131 variable wr {} 132 133 # ### ### ### ######### ######### ######### 134 135 # Statistics. 136 # The number of characters read from the input. 137 138 variable nread 0 139 140 # Progress 141 # Counter for when to print progress notification. 142 143 variable ncount 0 144 variable ndelta 100 145 146 # Collected statistical output. A matrix object, for proper 147 # columnar formatting when generating the report. And the last 148 # non-empty string in the first column, to prevent repetition. 149 150 variable statistics {} 151 variable slast {} 152 153 # ### ### ### ######### ######### ######### 154} 155 156# ### ### ### ######### ######### ######### 157## External data and status 158 159# This tool does not use external files to save and load status 160# information. It has no history. If history is required, or data 161# beyond the regular input see use cases (2-4). These may allow the 162# specification of options specific to the selected frontend, backend, 163# and transformations. 164 165# ### ### ### ######### ######### ######### 166## Option processing. 167## Validate command line. 168## Full command line syntax. 169## 170# page [input|"-" [output|"-"]] 171## 172 173proc ::page::ProcessCmdline {} { 174 global argv 175 176 variable output 177 variable input 178 179 set logging 0 180 set n [ProcessArguments] 181 182 # No options at all => Default -c peg. 183 184 if {!$n} { 185 set argv [linsert $argv 0 -c peg] 186 ProcessArguments 187 } 188 189 # Additional validation, and extraction of the non-option 190 # arguments. 191 192 if {[llength $argv] > 2} Usage 193 194 set input [lindex $argv 0] 195 set output [lindex $argv 1] 196 197 # Final validation across the whole configuration. 198 199 if {$input eq ""} { 200 set input - 201 } elseif {$input ne "-"} { 202 CheckInputFile $input {Input file} 203 } 204 205 if {$output eq ""} { 206 set output - 207 } elseif {$output ne "-"} { 208 CheckTheOutput 209 } 210 211 CheckReader 212 CheckWriter 213 CheckTransforms 214 215 if {$logging} { 216 pluginmgr::log [::logger::init page] 217 } else { 218 pluginmgr::log {} 219 } 220 return 221} 222 223proc ::page::ProcessArguments {} { 224 global argv 225 upvar 1 logging logging 226 227 variable rd {} 228 variable tr {} 229 variable wr {} 230 variable timed 0 231 variable progress 0 232 233 # Process the options, perform basic validation. 234 235 set type {} 236 set name {} 237 set options {} 238 set mode {} 239 set nextmode {} 240 241 set noptions 0 242 243 while {[llength $argv]} { 244 #puts ([join $argv ") ("]) 245 246 set opt [lindex $argv 0] 247 if {![string match "-*" $opt]} { 248 # End of options reached. 249 break 250 } 251 incr noptions 252 Shift 253 switch -exact -- $opt { 254 --help - -h - -? {Usage} 255 --version - -V {Version} 256 257 -v - --verbose - --log {set logging 1} 258 -q - --quiet - --nolog {set logging 0} 259 260 -P {set progress 1} 261 -T {set timed 1} 262 263 -D { 264 # Activate logging in the safe base for better debugging. 265 ::safe::setLogCmd {puts stderr} 266 } 267 268 -r - -rd - --reader { 269 Complete 270 set type rd 271 set name [Shift] 272 set options {} 273 } 274 -w - -wr - --writer { 275 Complete 276 set type wr 277 set name [Shift] 278 set options {} 279 } 280 -t - -tr - --transform { 281 Complete 282 set type tr 283 set name [Shift] 284 if {$mode eq ""} {set mode tail} 285 set options {} 286 } 287 -c - --config { 288 set configfile [Shift] 289 if {($configfile eq "") || [catch { 290 set newargv [pluginmgr::configuration \ 291 $configfile] 292 } msg]} { 293 set msg [string map { 294 {Unable to locate} 295 {Unable to locate configuration}} $msg] 296 297 ArgError "Bad argument \"$configfile\".\n\t$msg" 298 } 299 300 if {[llength $newargv]} { 301 if {![llength $argv]} { 302 set argv $newargv 303 } else { 304 # linsert argv 0 {expanded}newargv 305 # -------------- 306 # linsert options 0 (linsert argv 0) 307 308 set argv [eval [linsert $newargv 0 linsert $argv 0]] 309 #set argv [linsert $argv 0 {expand}$options] 310 } 311 } 312 } 313 -p - --prepend {set nextmode head} 314 -a - --append {set nextmode tail} 315 316 --reset {Complete ; set tr {}} 317 318 default { 319 # All unknown options go into the 320 # configuration of the last plugin 321 # defined (-r, -w, -t) 322 lappend options $opt [Shift] 323 } 324 } 325 } 326 327 Complete 328 return $noptions 329} 330 331proc ::page::Shift {} { 332 upvar 1 argv argv 333 if {![llength $argv]} {return {}} 334 set first [lindex $argv 0] 335 set argv [lrange $argv 1 end] 336 return $first 337} 338 339proc ::page::Complete {} { 340 upvar 1 type type name name options options mode mode \ 341 nextmode nextmode rd rd wr wr tr tr 342 343 #puts "$type $name ($options) \[$mode/$nextmode\]" 344 345 set currentmode $mode 346 if {$nextmode ne $mode} { 347 set mode $nextmode 348 } 349 350 if {$type eq ""} return 351 352 switch -exact -- $type { 353 rd {set rd [list $name $options]} 354 wr {set wr [list $name $options]} 355 tr { 356 if {$currentmode eq "tail"} { 357 lappend tr [list $name $options] 358 } else { 359 set tr [linsert $tr 0 [list $name $options]] 360 } 361 } 362 } 363 return 364} 365 366# ### ### ### ######### ######### ######### 367## Option processing. 368## Helpers: Generation of error messages. 369## I. General usage/help message. 370## II. Specific messages. 371# 372# Both write their messages to stderr and then 373# exit the application with status 1. 374## 375 376proc ::page::Usage {} { 377 global argv0 378 puts stderr "Expected $argv0 ?options? ?inputpath|- ?outputpath|-??" 379 380 puts stderr " --help, -h, -? This help" 381 puts stderr " --version, -V, Version information" 382 puts stderr " -v, --verbose, --log Activate logging in all loaded plugins" 383 puts stderr " -q, --quiet, --nolog Disable logging in all loaded plugins" 384 puts stderr " -P Activate progress feedback" 385 puts stderr " -T Activate collection of timings" 386 puts stderr " -r reader Specify input plugin" 387 puts stderr " -rd, --reader See above" 388 puts stderr " -w writer Specify output plugin" 389 puts stderr " -wr, --writer See above" 390 puts stderr " -t transform Specify processing plugin" 391 puts stderr " -tr, --transform See above" 392 puts stderr " -p, --prepend Place processing at front" 393 puts stderr " -a, --append Place processing at end" 394 puts stderr " --reset Clear list of transforms" 395 puts stderr " -c file Read configuration file" 396 puts stderr " --configuration See above." 397 puts stderr " " 398 399 # --log, --nolog, -v, --verbose, -q, --quiet 400 401 exit 1 402} 403 404proc ::page::Version {} { 405 puts stderr {$Id: page,v 1.2 2007/03/28 17:49:44 andreas_kupries Exp $} 406 exit 1 407} 408 409proc ::page::ArgError {text} { 410 global argv0 411 puts stderr "$argv0: $text" 412 exit 1 413} 414 415proc in {list item} { 416 expr {([lsearch -exact $list $item] >= 0)} 417} 418 419# ### ### ### ######### ######### ######### 420## Check existence and permissions of an input/output file 421 422proc ::page::CheckReader {} { 423 variable rd 424 425 if {![llength $rd]} { 426 ArgError "Input processing module is missing" 427 } 428 429 foreach {name options} $rd break 430 431 if {[catch { 432 set po [pluginmgr::reader $name] 433 } msg]} { 434 set msg [string map { 435 {Unable to locate} 436 {Unable to locate reader}} $msg] 437 438 ArgError "Bad argument \"$name\".\n\t$msg" 439 } 440 441 set opt {} 442 foreach {k v} $options { 443 if {![in $po $k]} { 444 ArgError "Input plugin $name: Bad option $k" 445 } 446 lappend opt $k $v 447 } 448 449 pluginmgr::rconfigure $opt 450 return 451} 452 453proc ::page::CheckWriter {} { 454 variable wr 455 456 if {![llength $wr]} { 457 ArgError "Output module is missing" 458 } 459 460 foreach {name options} $wr break 461 462 if {[catch { 463 set po [pluginmgr::writer $name] 464 } msg]} { 465 set msg [string map { 466 {Unable to locate} 467 {Unable to locate writer}} $msg] 468 469 ArgError "Bad argument \"$name\".\n\t$msg" 470 } 471 472 set opt {} 473 foreach {k v} $options { 474 if {![in $po $k]} { 475 ArgError "Output plugin $name: Bad option $k" 476 } 477 lappend opt $k $v 478 } 479 480 pluginmgr::wconfigure $opt 481 return 482} 483 484proc ::page::CheckTransforms {} { 485 variable tr 486 487 set idlist {} 488 foreach t $tr { 489 foreach {name options} $t break 490 491 if {[catch { 492 foreach {id po} \ 493 [pluginmgr::transform $name] \ 494 break 495 } msg]} { 496 set msg [string map { 497 {Unable to locate} 498 {Unable to locate transformation}} $msg] 499 500 ArgError "Bad argument \"$name\".\n\t$msg" 501 } 502 503 set opt {} 504 foreach {k v} $options { 505 if {![in $po $k]} { 506 ArgError "Processing plugin $name: Bad option $k" 507 } 508 lappend opt $k $v 509 } 510 511 pluginmgr::tconfigure $id $opt 512 lappend idlist $id 513 } 514 515 set tr $idlist 516 return 517} 518 519proc ::page::CheckInputFile {f label} { 520 if {![file exists $f]} { 521 ArgError "Unable to find $label \"$f\"" 522 } elseif {![file isfile $f]} { 523 ArgError "$label \"$f\" is not a file" 524 } elseif {![file readable $f]} { 525 ArgError "$label \"$f\" not readable (permission denied)" 526 } 527 return 528} 529 530proc ::page::CheckTheOutput {} { 531 variable output 532 533 set base [file dirname $output] 534 if {$base eq ""} {set base [pwd]} 535 536 if {![file exists $output]} { 537 if {![file exists $base]} { 538 ArgError "Output base path \"$base\" not found" 539 } 540 if {![file writable $base]} { 541 ArgError "Output base path \"$base\" not writable (permission denied)" 542 } 543 } elseif {![file writable $output]} { 544 ArgError "Output path \"$output\" not writable (permission denied)" 545 } elseif {![file isfile $output]} { 546 ArgError "Output path \"$output\" is not a file" 547 } 548 549 return 550} 551 552# ### ### ### ######### ######### ######### 553## Commands implementing the main functionality. 554 555proc ::page::Read {} { 556 variable input 557 variable progress 558 variable timed 559 variable nread 560 561 set label \[[pluginmgr::rlabel]\] 562 set msg "" 563 append msg $label " " 564 565 if {$input eq "-"} { 566 append msg {Reading grammar from stdin} 567 set chan stdin 568 } else { 569 append msg {Reading grammar from file "} $input {"} 570 set chan [open $input r] 571 } 572 573 pluginmgr::report info $msg 574 575 if {!$timed && !$progress} { 576 # Regular run 577 set data [pluginmgr::read \ 578 [list read $chan] [list eof $chan]] 579 580 } elseif {$timed && $progress} { 581 # Timed, with feedback 582 if {[pluginmgr::rtimeable]} { 583 pluginmgr::rtime 584 set data [pluginmgr::read \ 585 [list ::page::ReadPT $chan] [list eof $chan] \ 586 ::page::ReadComplete] 587 set usec [pluginmgr::rgettime] 588 } else { 589 set usec [lindex [time { 590 set data [pluginmgr::read \ 591 [list ::page::ReadPT $chan] [list eof $chan] \ 592 ::page::ReadComplete] 593 }] 0] ; # {} 594 } 595 } elseif {$timed} { 596 # Timed only 597 if {[pluginmgr::rtimeable]} { 598 pluginmgr::rtime 599 set data [pluginmgr::read \ 600 [list ::page::ReadT $chan] [list eof $chan]] 601 set usec [pluginmgr::rgettime] 602 } else { 603 set usec [lindex [time { 604 set data [pluginmgr::read \ 605 [list ::page::ReadT $chan] [list eof $chan]] 606 }] 0] ; # {} 607 } 608 } else { 609 # Feedback only ... 610 set data [pluginmgr::read \ 611 [list ::page::ReadPT $chan] [list eof $chan] \ 612 ::page::ReadComplete] 613 } 614 615 if {$input ne "-"} { 616 close $chan 617 } 618 619 if {$timed} { 620 Statistics $label "Characters:" $nread 621 Statistics $label "Seconds:" [expr {double($usec)/1000000}] 622 Statistics $label "Char/Seconds:" [expr {1000000*double($nread)/$usec}] 623 Statistics $label "Microseconds:" $usec 624 Statistics $label "Microsec/Char:" [expr {$usec/double($nread)}] 625 } elseif {$progress} { 626 pluginmgr::report info " Read $nread [expr {$nread == 1 ? "character" : "characters"}]" 627 } 628 return $data 629} 630 631proc ::page::Transform {data} { 632 variable timed 633 variable tr 634 635 if {$data eq ""} {return $data} 636 637 if 0 { 638 pluginmgr::report info ---------------------------- 639 foreach tid $tr { 640 set label "\[[pluginmgr::tlabel $tid]\]" 641 pluginmgr::report info $label 642 } 643 pluginmgr::report info ---------------------------- 644 } 645 646 #puts /($data)/ 647 648 foreach tid $tr { 649 set label "\[[pluginmgr::tlabel $tid]\]" 650 651 pluginmgr::report info $label 652 653 if {!$timed} { 654 set data [pluginmgr::transform_do $tid $data] 655 } else { 656 if {[pluginmgr::ttimeable $tid]} { 657 pluginmgr::ttime $tid 658 set data [pluginmgr::transform_do $tid $data] 659 set usec [pluginmgr::tgettime $tid] 660 } else { 661 set usec [lindex [time { 662 set data [pluginmgr::transform_do $tid $data] 663 }] 0]; #{} 664 } 665 Statistics $label Seconds: [expr {double($usec)/1000000}] 666 } 667 } 668 return $data 669} 670 671proc ::page::Write {data} { 672 variable timed 673 variable output 674 675 if {$data eq ""} {return $data} 676 677 set label \[[pluginmgr::wlabel]\] 678 set msg "" 679 append msg $label " " 680 681 if {$output eq "-"} { 682 append msg {Writing to stdout} 683 set chan stdout 684 } else { 685 append msg {Writing to file "} $output {"} 686 set chan [open $output w] 687 } 688 689 pluginmgr::report info $msg 690 691 if {!$timed} { 692 pluginmgr::write $chan $data 693 } else { 694 if {[pluginmgr::wtimeable]} { 695 pluginmgr::wtime 696 pluginmgr::write $chan $data 697 set usec [pluginmgr::wgettime] 698 } else { 699 set usec [lindex [time { 700 pluginmgr::write $chan $data 701 }] 0]; #{} 702 } 703 Statistics $label Seconds: [expr {double($usec)/1000000}] 704 } 705 706 if {$output ne "-"} { 707 close $chan 708 } 709 return 710} 711 712proc ::page::StatisticsBegin {} { 713 variable timed 714 variable statistics 715 if {!$timed} return 716 717 set statistics [struct::matrix ::page::STAT] 718 719 Statistics _Statistics_________ 720 return 721} 722 723proc ::page::Statistics {module args} { 724 variable statistics 725 variable slast 726 727 set n [expr {1+[llength $args]}] 728 729 if {[$statistics columns] < $n} { 730 $statistics add columns [expr { 731 $n - [$statistics columns] 732 }] ; # {} 733 } 734 735 if {$module eq $slast} { 736 set prefix "" 737 } else { 738 set prefix $module 739 set slast $module 740 } 741 742 $statistics add row [linsert $args 0 $prefix] 743 return 744} 745 746proc ::page::StatisticsComplete {} { 747 variable timed 748 variable statistics 749 if {!$timed} return 750 751 pluginmgr::report info "" 752 foreach line [split [$statistics \ 753 format 2string] \n] { 754 pluginmgr::report info $line 755 } 756 return 757} 758 759# ### ### ### ######### ######### ######### 760## Helper commands. 761 762proc ::page::ReadPT {chan {n {}}} { 763 variable nread 764 variable ncount 765 variable ndelta 766 767 if {$n eq ""} { 768 set data [read $chan] 769 } else { 770 set data [read $chan $n] 771 } 772 773 set n [string length $data] 774 incr nread $n 775 776 while {$ncount < $nread} { 777 puts -nonewline stderr . 778 flush stderr 779 incr ncount $ndelta 780 } 781 782 return $data 783} 784 785proc ::page::ReadComplete {} { 786 puts stderr "" 787 flush stderr 788 return 789} 790 791proc ::page::ReadT {chan {n {}}} { 792 variable nread 793 794 if {$n eq ""} { 795 set data [read $chan] 796 } else { 797 set data [read $chan $n] 798 } 799 800 set n [string length $data] 801 incr nread $n 802 803 return $data 804} 805 806# ### ### ### ######### ######### ######### 807## Invoking the functionality. 808 809if {[catch { 810 ::page::ProcessCmdline 811 ::page::StatisticsBegin 812 ::page::Write [::page::Transform [::page::Read]] 813 ::page::StatisticsComplete 814} msg]} { 815 puts $::errorInfo 816 #::page::ArgError $msg 817} 818 819# ### ### ### ######### ######### ######### 820exit 821