1#! /bin/sh
2# -*- tcl -*- \
3exec tclsh "$0" ${1+"$@"}
4
5# @@ Meta Begin
6# Application page 1.0
7# Meta platform     tcl
8# Meta summary      Tool for general text transformation
9# Meta description  While the name is an allusion to parser
10# Meta description  generation, the modular plugin-based
11# Meta description  nature of this application allows for
12# Meta description  any type of text transformation which
13# Meta description  can be put into a plugin. Still, the
14# Meta description  plugins coming with Tcllib all deal
15# Meta description  with parser generation.
16# Meta category     Processing text files
17# Meta subject      {parser generation} {text transformation}
18# Meta require      page::pluginmgr
19# Meta require      logger
20# Meta require      struct::matrix
21# Meta author       Andreas Kupries
22# Meta license      BSD
23# @@ Meta End
24
25package provide page 1.0
26
27lappend auto_path [file join [file dirname [file dirname [file normalize [info script]]]] modules]
28
29#lappend auto_path [file join [file dirname [info script]] .. modules]
30#source [file join [file dirname [info script]] .. modules struct tree.tcl]
31
32# /=
33#  $Id: page,v 1.2 2007/03/28 17:49:44 andreas_kupries Exp $
34# \=
35#
36# PAGE - PArser GEnerator | GTT - General Text Transformation
37# ==== = ================ + === = ===========================
38#
39# Use cases
40# ---------
41#
42# (1)	Read a grammar specification and write out code implementing a
43#	parser for that grammar.
44#
45# (2)	As (1), and additionally allow the user to select between a
46#	number of different backends for writing the results.
47#	Different forms for the same parser, pretty printing the
48#	grammar, different parser types (LL vs LR vs ...). Etc.
49#
50# (3)	As (1) and/or (2), and additionally allow the user to select
51#	the frontend, i.e. the part reading the grammar. This allows
52#	the use of different input grammars for the specification of
53#	grammars, i.e. PEG, Yacc, Tyacc, Coco, etc.
54#
55#	Note: For grammars it may be possible to write a unifying
56#	frontend whose reader grammar is able to recognize many
57#	different grammar formats without requiring the user to
58#	specify which format the supplied input is in.
59#
60# (4)	As (1) and/or (2), and/or (3), and additionally allow the user
61#	to select the transformations to execute on the data provided
62#	by the frontend before it is given to the backend. At this
63#	point the parser generator has transformed into a general tool
64#	for the reading, transformation, and writing of any type of
65#	structured information.
66#
67# Note:	For the use cases from (1) to (3) the representations returned
68#	by the frontend, and taken by the backend have to be fully
69#	specified to ensure that all the parts are working together.
70#	For the use case (4) it becomes the responsibility of the user
71#	of the tool to specify frontend, backed, and transformations
72#	which work properly together.
73
74# Command syntax
75# --------------
76# 
77# Ad 1)	page ?-rd peg|hb|ser? ?-gen tpcp|hb|ser|tree|peg|me|null? ?-min no|reach|use|all? [input|"-" [output|"-"]]
78#
79#	The tool reads the grammar from the specified inputfile,
80#	transforms it as needed and then writes the resulting parser
81#	to the outputfile. Usage of "-" for the input signals that the
82#	grammar should be read from stdin. Analoguously usage of "-"
83#	for the output signals that the results should be written to
84#	stdout.
85#
86#	Unspecified parts of the command line default to "-".
87#
88# Ad 2)	Not specified yet.
89# Ad 3) S.a.
90# Ad 4) S.a.
91
92# ### ### ### ######### ######### #########
93## Requisites
94
95package require page::pluginmgr ; # Management of the PAGE plugins.
96package require logger          ; # Logging subsystem for debugging.
97package require struct::matrix  ; # Matrices. For statistics report
98
99# ### ### ### ######### ######### #########
100## Internal data and status
101
102namespace eval ::page {
103    # Path to where the output goes to. The name of a file, or "-" for
104    # stdout.
105
106    variable  output ""
107
108    # Path to where the input comes from. The name of a file, or "-"
109    # for stdin.
110
111    variable  input  ""
112
113    # Boolean flag. Input processing is timed.
114
115    variable timed 0
116
117    # Boolean flag. Input processing has progressbar.
118
119    variable progress 0
120
121    # Reader plugin and options.
122
123    variable rd {}
124
125    # List of transforms and their options.
126
127    variable tr {}
128
129    # Writer plugin an options.
130
131    variable wr {}
132
133    # ### ### ### ######### ######### #########
134
135    # Statistics.
136    # The number of characters read from the input.
137
138    variable nread 0
139
140    # Progress
141    # Counter for when to print progress notification.
142
143    variable ncount 0
144    variable ndelta 100
145
146    # Collected statistical output. A matrix object, for proper
147    # columnar formatting when generating the report. And the last
148    # non-empty string in the first column, to prevent repetition.
149
150    variable statistics {}
151    variable slast      {}
152
153    # ### ### ### ######### ######### #########
154}
155
156# ### ### ### ######### ######### #########
157## External data and status
158
159# This tool does not use external files to save and load status
160# information. It has no history. If history is required, or data
161# beyond the regular input see use cases (2-4). These may allow the
162# specification of options specific to the selected frontend, backend,
163# and transformations.
164
165# ### ### ### ######### ######### #########
166## Option processing.
167## Validate command line.
168## Full command line syntax.
169##
170# page [input|"-" [output|"-"]]
171##
172
173proc ::page::ProcessCmdline {} {
174    global argv
175
176    variable output
177    variable input
178
179    set logging 0
180    set n [ProcessArguments]
181
182    # No options at all => Default -c peg.
183
184    if {!$n} {
185	set argv [linsert $argv 0 -c peg]
186	ProcessArguments
187    }
188
189    # Additional validation, and extraction of the non-option
190    # arguments.
191
192    if {[llength $argv] > 2} Usage
193
194    set input  [lindex $argv 0]
195    set output [lindex $argv 1]
196
197    # Final validation across the whole configuration.
198
199    if {$input eq ""} {
200	set input -
201    } elseif {$input ne "-"} {
202	CheckInputFile $input {Input file}
203    }
204
205    if {$output eq ""} {
206	set output -
207    } elseif {$output ne "-"} {
208	CheckTheOutput
209    }
210
211    CheckReader
212    CheckWriter
213    CheckTransforms
214
215    if {$logging} {   
216	pluginmgr::log [::logger::init page]
217    } else {
218	pluginmgr::log {}
219    }
220    return
221}
222
223proc ::page::ProcessArguments {} {
224    global argv
225    upvar 1 logging logging
226
227    variable rd       {}
228    variable tr       {}
229    variable wr       {}
230    variable timed    0
231    variable progress 0
232
233    # Process the options, perform basic validation.
234
235    set type     {}
236    set name     {}
237    set options  {}
238    set mode     {}
239    set nextmode {}
240
241    set noptions 0
242
243    while {[llength $argv]} {
244	#puts ([join $argv ") ("])
245
246	set opt [lindex $argv 0]
247	if {![string match "-*" $opt]} {
248	    # End of options reached.
249	    break
250	}
251	incr noptions
252	Shift
253	switch -exact -- $opt {
254	    --help - -h - -? {Usage}
255	    --version - -V   {Version}
256
257	    -v - --verbose - --log   {set logging 1}
258	    -q - --quiet   - --nolog {set logging 0}
259
260	    -P {set progress 1}
261	    -T {set timed    1}
262
263	    -D {
264		# Activate logging in the safe base for better debugging.
265		::safe::setLogCmd {puts stderr}
266	    }
267
268	    -r - -rd - --reader {
269		Complete
270		set type    rd
271		set name    [Shift]
272		set options {}
273	    }
274	    -w - -wr - --writer {
275		Complete
276		set type    wr
277		set name    [Shift]
278		set options {}
279	    }
280	    -t - -tr - --transform {
281 		Complete
282		set type    tr
283		set name    [Shift]
284		if {$mode eq ""} {set mode tail}
285		set options {}
286	    }
287	    -c - --config {
288		set configfile [Shift]
289		if {($configfile eq "") || [catch {
290		    set newargv [pluginmgr::configuration \
291			    $configfile]
292		} msg]} {
293		    set msg [string map {
294			{Unable to locate}
295			{Unable to locate configuration}} $msg]
296
297		    ArgError "Bad argument \"$configfile\".\n\t$msg"
298		}
299
300		if {[llength $newargv]} {
301		    if {![llength $argv]} {
302			set argv $newargv
303		    } else {
304			# linsert argv 0 {expanded}newargv
305			# --------------
306			#        linsert options 0 (linsert argv 0)
307
308			set argv [eval [linsert $newargv 0 linsert $argv 0]]
309			#set argv [linsert $argv 0 {expand}$options]
310		    }
311		}
312	    }
313	    -p - --prepend {set nextmode head}
314	    -a - --append  {set nextmode tail}
315
316	    --reset        {Complete ; set tr {}}
317
318	    default {
319		# All unknown options go into the
320		# configuration of the last plugin
321		# defined (-r, -w, -t)
322		lappend options $opt [Shift]
323	    }
324	}
325    }
326
327    Complete
328    return $noptions
329}
330
331proc ::page::Shift {} {
332    upvar 1 argv argv
333    if {![llength $argv]} {return {}}
334    set first [lindex $argv 0]
335    set argv [lrange $argv 1 end]
336    return $first
337}
338
339proc ::page::Complete {} {
340    upvar 1 type type name name options options mode mode \
341	    nextmode nextmode rd rd wr wr tr tr
342
343    #puts "$type $name ($options) \[$mode/$nextmode\]"
344
345    set currentmode $mode
346    if {$nextmode ne $mode} {
347	set mode $nextmode
348    }
349
350    if {$type eq ""} return
351
352    switch -exact -- $type {
353	rd {set rd [list $name $options]}
354	wr {set wr [list $name $options]}
355	tr {
356	    if {$currentmode eq "tail"} {
357		lappend tr [list $name $options]
358	    } else {
359		set tr [linsert $tr 0  [list $name $options]]
360	    }
361	}
362    }
363    return
364}
365
366# ### ### ### ######### ######### #########
367## Option processing.
368## Helpers: Generation of error messages.
369## I.  General usage/help message.
370## II. Specific messages.
371#
372# Both write their messages to stderr and then
373# exit the application with status 1.
374##
375
376proc ::page::Usage {} {
377    global argv0
378    puts stderr "Expected $argv0 ?options? ?inputpath|- ?outputpath|-??"
379
380    puts stderr "    --help, -h, -?        This help"
381    puts stderr "    --version, -V,        Version information"
382    puts stderr "    -v, --verbose, --log  Activate logging in all loaded plugins"
383    puts stderr "    -q, --quiet, --nolog  Disable logging in all loaded plugins"
384    puts stderr "    -P                    Activate progress feedback"
385    puts stderr "    -T                    Activate collection of timings"
386    puts stderr "    -r reader             Specify input plugin"
387    puts stderr "    -rd, --reader         See above"
388    puts stderr "    -w writer             Specify output plugin"
389    puts stderr "    -wr, --writer         See above"
390    puts stderr "    -t transform          Specify processing plugin"
391    puts stderr "    -tr, --transform      See above"
392    puts stderr "    -p, --prepend         Place processing at front"
393    puts stderr "    -a, --append          Place processing at end"
394    puts stderr "    --reset               Clear list of transforms"
395    puts stderr "    -c file               Read configuration file"
396    puts stderr "    --configuration       See above."
397    puts stderr "    "
398
399    # --log, --nolog, -v, --verbose, -q, --quiet
400
401    exit 1
402}
403
404proc ::page::Version {} {
405    puts stderr {$Id: page,v 1.2 2007/03/28 17:49:44 andreas_kupries Exp $}
406    exit 1
407}
408
409proc ::page::ArgError {text} {
410    global argv0
411    puts stderr "$argv0: $text"
412    exit 1
413}
414
415proc in {list item} {
416    expr {([lsearch -exact $list $item] >= 0)}
417}
418
419# ### ### ### ######### ######### #########
420## Check existence and permissions of an input/output file
421
422proc ::page::CheckReader {} {
423    variable rd
424
425    if {![llength $rd]} {
426	ArgError "Input processing module is missing"
427    }
428
429    foreach {name options} $rd break
430
431    if {[catch {
432	set po [pluginmgr::reader $name]
433    } msg]} {
434	set msg [string map {
435	    {Unable to locate}
436	    {Unable to locate reader}} $msg]
437
438	ArgError "Bad argument \"$name\".\n\t$msg"
439    }
440
441    set opt {}
442    foreach {k v} $options {
443	if {![in $po $k]} {
444	    ArgError "Input plugin $name: Bad option $k"
445	}
446	lappend opt $k $v
447    }
448
449    pluginmgr::rconfigure $opt
450    return
451}
452
453proc ::page::CheckWriter {} {
454    variable wr
455
456    if {![llength $wr]} {
457	ArgError "Output module is missing"
458    }
459
460    foreach {name options} $wr break
461
462    if {[catch {
463	set po [pluginmgr::writer $name]
464    } msg]} {
465	set msg [string map {
466	    {Unable to locate}
467	    {Unable to locate writer}} $msg]
468
469	ArgError "Bad argument \"$name\".\n\t$msg"
470    }
471
472    set opt {}
473    foreach {k v} $options {
474	if {![in $po $k]} {
475	    ArgError "Output plugin $name: Bad option $k"
476	}
477	lappend opt $k $v
478    }
479
480    pluginmgr::wconfigure $opt
481    return
482}
483
484proc ::page::CheckTransforms {} {
485    variable tr
486
487    set idlist {}
488    foreach t $tr {
489	foreach {name options} $t break
490
491	if {[catch {
492	    foreach {id po} \
493		    [pluginmgr::transform $name] \
494		    break
495	} msg]} {
496	    set msg [string map {
497		{Unable to locate}
498		{Unable to locate transformation}} $msg]
499
500	    ArgError "Bad argument \"$name\".\n\t$msg"
501	}
502
503	set opt {}
504	foreach {k v} $options {
505	    if {![in $po $k]} {
506		ArgError "Processing plugin $name: Bad option $k"
507	    }
508	    lappend opt $k $v
509	}
510
511	pluginmgr::tconfigure $id $opt
512	lappend idlist $id
513    }
514
515    set tr $idlist
516    return
517}
518
519proc ::page::CheckInputFile {f label} {
520    if {![file exists $f]} {
521	ArgError "Unable to find $label \"$f\""
522    } elseif {![file isfile $f]} {
523	ArgError "$label \"$f\" is not a file"
524    } elseif {![file readable $f]} {
525	ArgError "$label \"$f\" not readable (permission denied)"
526    }
527    return
528}
529
530proc ::page::CheckTheOutput {} {
531    variable output
532
533    set base [file dirname $output]
534    if {$base eq ""} {set base [pwd]}
535
536    if {![file exists $output]} {
537	if {![file exists $base]} {
538	    ArgError "Output base path \"$base\" not found"
539	}
540	if {![file writable $base]} {
541	    ArgError "Output base path \"$base\" not writable (permission denied)"
542	}
543    } elseif {![file writable $output]} {
544	ArgError "Output path \"$output\" not writable (permission denied)"
545    } elseif {![file isfile $output]} {
546	ArgError "Output path \"$output\" is not a file"
547    }
548
549    return
550}
551
552# ### ### ### ######### ######### #########
553## Commands implementing the main functionality.
554
555proc ::page::Read {} {
556    variable input
557    variable progress
558    variable timed
559    variable nread
560
561    set    label \[[pluginmgr::rlabel]\]
562    set    msg ""
563    append msg $label  " "
564
565    if {$input eq "-"} {
566	append msg {Reading grammar from stdin}
567	set chan stdin
568    } else {
569	append msg {Reading grammar from file "} $input {"}
570	set chan [open $input r]
571    }
572
573    pluginmgr::report info $msg
574
575    if {!$timed && !$progress} {
576	# Regular run
577	set data [pluginmgr::read \
578		[list read $chan] [list eof $chan]]
579
580    } elseif {$timed && $progress} {
581	# Timed, with feedback
582	if {[pluginmgr::rtimeable]} {
583	    pluginmgr::rtime
584	    set data [pluginmgr::read \
585		    [list ::page::ReadPT $chan] [list eof $chan] \
586		    ::page::ReadComplete]
587	    set usec [pluginmgr::rgettime]
588	} else {
589	    set usec [lindex [time {
590		set data [pluginmgr::read \
591			[list ::page::ReadPT $chan] [list eof $chan] \
592			::page::ReadComplete]
593	    }] 0] ; # {}
594	}
595    } elseif {$timed} {
596	# Timed only
597	if {[pluginmgr::rtimeable]} {
598	    pluginmgr::rtime
599	    set data [pluginmgr::read \
600		    [list ::page::ReadT $chan] [list eof $chan]]
601	    set usec [pluginmgr::rgettime]
602	} else {
603	    set usec [lindex [time {
604		set data [pluginmgr::read \
605			[list ::page::ReadT $chan] [list eof $chan]]
606	    }] 0] ; # {}
607	}
608    } else {
609	# Feedback only ...
610	set data [pluginmgr::read \
611		[list ::page::ReadPT $chan] [list eof $chan] \
612		::page::ReadComplete]
613    }
614
615    if {$input ne "-"} {
616	close $chan
617    }
618
619    if {$timed} {
620	Statistics $label "Characters:"    $nread
621	Statistics $label "Seconds:"       [expr {double($usec)/1000000}]
622	Statistics $label "Char/Seconds:"  [expr {1000000*double($nread)/$usec}]
623	Statistics $label "Microseconds:"  $usec
624	Statistics $label "Microsec/Char:" [expr {$usec/double($nread)}]
625    } elseif {$progress} {
626	pluginmgr::report info "  Read $nread [expr {$nread == 1 ? "character" : "characters"}]"
627    }
628    return $data
629}
630
631proc ::page::Transform {data} {
632    variable timed
633    variable tr
634
635    if {$data eq ""} {return $data}
636
637    if 0 {
638	pluginmgr::report info ----------------------------
639	foreach tid $tr {
640	    set label "\[[pluginmgr::tlabel $tid]\]"
641	    pluginmgr::report info $label
642	}
643	pluginmgr::report info ----------------------------
644    }
645
646    #puts /($data)/
647
648    foreach tid $tr {
649	set label "\[[pluginmgr::tlabel $tid]\]"
650
651	pluginmgr::report info $label
652
653	if {!$timed} {
654	    set data [pluginmgr::transform_do $tid $data]
655	} else {
656	    if {[pluginmgr::ttimeable $tid]} {
657		pluginmgr::ttime $tid
658		set data [pluginmgr::transform_do $tid $data]
659		set usec [pluginmgr::tgettime $tid]
660	    } else {
661		set usec [lindex [time {
662		    set data [pluginmgr::transform_do $tid $data]
663		}] 0]; #{}
664	    }
665	    Statistics $label Seconds: [expr {double($usec)/1000000}]
666	}
667    }
668    return $data
669}
670
671proc ::page::Write {data} {
672    variable timed
673    variable output
674
675    if {$data eq ""} {return $data}
676
677    set    label \[[pluginmgr::wlabel]\]
678    set    msg   ""
679    append msg   $label " "
680
681    if {$output eq "-"} {
682	append msg {Writing to stdout}
683	set chan stdout
684    } else {
685	append msg {Writing to file "} $output {"}
686	set chan [open $output w]
687    }
688
689    pluginmgr::report info $msg
690
691    if {!$timed} {
692	pluginmgr::write $chan $data
693    } else {
694	if {[pluginmgr::wtimeable]} {
695	    pluginmgr::wtime
696	    pluginmgr::write $chan $data
697	    set usec [pluginmgr::wgettime]
698	} else {
699	    set usec [lindex [time {
700		pluginmgr::write $chan $data
701	    }] 0]; #{}
702	}
703	Statistics $label Seconds: [expr {double($usec)/1000000}]
704    }
705
706    if {$output ne "-"} {
707	close $chan
708    }
709    return
710}
711
712proc ::page::StatisticsBegin {} {
713    variable timed
714    variable statistics
715    if {!$timed} return
716
717    set statistics [struct::matrix ::page::STAT]
718
719    Statistics _Statistics_________
720    return
721}
722
723proc ::page::Statistics {module args} {
724    variable statistics
725    variable slast
726
727    set n [expr {1+[llength $args]}]
728
729    if {[$statistics columns] < $n} {
730	$statistics add columns [expr {
731	    $n - [$statistics columns]
732	}] ; # {}
733    }
734
735    if {$module eq $slast} {
736	set prefix ""
737    } else {
738	set prefix $module
739	set slast  $module
740    }
741
742    $statistics add row [linsert $args 0 $prefix]
743    return
744}
745
746proc ::page::StatisticsComplete {} {
747    variable timed
748    variable statistics
749    if {!$timed} return
750
751    pluginmgr::report info ""
752    foreach line [split [$statistics \
753	    format 2string] \n] {
754	pluginmgr::report info $line
755    }
756    return
757}
758
759# ### ### ### ######### ######### #########
760## Helper commands.
761
762proc ::page::ReadPT {chan {n {}}} {
763    variable nread
764    variable ncount
765    variable ndelta
766
767    if {$n eq ""} {
768	set data [read $chan]
769    } else {
770	set data [read $chan $n]
771    }
772
773    set  n [string length $data]
774    incr nread $n
775
776    while {$ncount < $nread} {
777	puts -nonewline stderr .
778	flush stderr
779	incr ncount $ndelta
780    }
781
782    return $data
783}
784
785proc ::page::ReadComplete {} {
786    puts  stderr ""
787    flush stderr
788    return
789}
790
791proc ::page::ReadT {chan {n {}}} {
792    variable nread
793
794    if {$n eq ""} {
795	set data [read $chan]
796    } else {
797	set data [read $chan $n]
798    }
799
800    set  n [string length $data]
801    incr nread $n
802
803    return $data
804}
805
806# ### ### ### ######### ######### #########
807## Invoking the functionality.
808
809if {[catch {
810    ::page::ProcessCmdline
811    ::page::StatisticsBegin
812    ::page::Write [::page::Transform [::page::Read]]
813    ::page::StatisticsComplete
814} msg]} {
815    puts $::errorInfo
816    #::page::ArgError $msg
817}
818
819# ### ### ### ######### ######### #########
820exit
821