1# -*- tcl -*-
2#
3# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
4# Parser Generator / Backend - PEG as ... PEG
5
6# ### ### ### ######### ######### #########
7## Dumping the input grammar. But not as Tcl or other code. In PEG
8## format again, pretty printing.
9
10# ### ### ### ######### ######### #########
11## Requisites
12
13package require textutil
14
15namespace eval ::page::gen::peg::canon {}
16
17# ### ### ### ######### ######### #########
18## API
19
20proc ::page::gen::peg::canon {t chan} {
21
22    # Generate data for inherited attributes
23    # used during synthesis.
24    canon::Setup $t
25
26    # Synthesize all text fragments we need.
27    canon::Synth $t
28
29    # And write the grammar text.
30    puts $chan [$t get root TEXT]
31    return
32}
33
34# ### ### ### ######### ######### #########
35## Internal. Helpers
36
37proc ::page::gen::peg::canon::Setup {t} {
38    # Phase 1: Top-down, inherited attributes:
39    #
40    # - Max length of nonterminal symbols defined by the grammar.
41    #
42    # - Indentation put on all rules to get enough space for
43    #   definition attributes.
44
45    set       max   -1
46    array set modes {}
47
48    foreach {sym def} [$t get root definitions] {
49	set l [string length $sym]
50	if {$l > $max} {set max $l}
51
52	set mode [string index [$t get $def mode] 0]
53	set modes($mode) .
54    }
55    set modeset [join [lsort [array names modes]] ""]
56    set mlen    [AttrFieldLength $modeset]
57    set heading [expr {$max + $mlen + 4}]
58    # The constant 4 is for ' <- ', see
59    # SynthNode/Nonterminal
60
61    # Save the computed information for access by the definitions and
62    # other operators.
63
64    $t set root SYM_FIELDLEN $max
65    $t set root ATT_FIELDLEN $mlen
66    $t set root ATT_BASE     $modeset
67    $t set root HEADLEN      $heading
68    return
69}
70
71proc ::page::gen::peg::canon::Synth {t} {
72    # Phase 2: Bottom-up, synthesized attributes
73    #
74    # - Text block per node, length and height.
75
76    $t walk root -order post -type dfs n {
77	SynthNode $t $n
78    }
79    return
80}
81
82proc ::page::gen::peg::canon::SynthNode {t n} {
83    if {$n eq "root"} {
84	set code Root
85    } elseif {[$t keyexists $n symbol]} {
86	set code Nonterminal
87    } elseif {[$t keyexists $n op]} {
88	set code [$t get $n op]
89    } else {
90	return -code error "PANIC. Bad node $n, cannot classify"
91    }
92
93    #puts stderr "SynthNode/$code $t $n"
94
95    SynthNode/$code $t $n
96
97    #SHOW [$t get $n TEXT] 1 0
98    #catch {puts stderr "\t.[$t get $n W]x[$t get $n H]"}
99    return
100}
101
102proc ::page::gen::peg::canon::SynthNode/Root {t n} {
103    # Root is the grammar itself.
104
105    # Get the data we need from our children, which are start
106    # expression and nonterminal definitions.
107
108    set gname  [$t get root name]
109    set gstart [$t get root start]
110    if {$gstart ne ""} {
111	set stext  [$t get $gstart TEXT]
112    } else {
113	puts stderr "No start expression."
114	set stext ""
115    }
116    set rules  {}
117    foreach {sym def} [$t get root definitions] {
118	lappend rules [list $sym [$t get $def TEXT]]
119    }
120
121    # Combine them into a text for the whole grammar.
122
123    set intro  "PEG $gname \("
124    set ispace [::textutil::blank [string length $intro]]
125
126    set    out ""
127    append out "# -*- text -*-" \n
128    append out "## Parsing Expression Grammar '$gname'." \n
129    append out "## Layouted by the PG backend 'PEGwriter'." \n
130    append out \n
131    append out $intro[::textutil::indent $stext $ispace 1]\)
132    append out \n
133    append out \n
134
135    foreach e [lsort -dict -index 0 $rules] {
136	foreach {sym text} $e break
137	append out $text \n
138	append out \n
139    }
140
141    append out "END\;" \n
142
143    $t set root TEXT $out
144    return
145}
146
147proc ::page::gen::peg::canon::SynthNode/Nonterminal {t n} {
148    # This is the root of a definition. We now
149    # have to combine the text block for the
150    # expression with nonterminal and attribute
151    # data.
152
153    variable ms
154
155    set abase [$t get root ATT_BASE]
156    set sfl   [$t get root SYM_FIELDLEN]
157    set mode  [$t get $n mode]
158    set sym   [$t get $n symbol]
159    set etext [$t get [lindex [$t children $n] 0] TEXT]
160
161    set    out ""
162    append out $ms($abase,$mode)
163    append out $sym
164    append out [::textutil::blank [expr {$sfl - [string length $sym]}]]
165    append out " <- "
166
167    set ispace [::textutil::blank [string length $out]]
168
169    append out [::textutil::indent $etext $ispace 1]
170    append out " ;"
171
172    $t set $n TEXT $out
173    return
174}
175
176proc ::page::gen::peg::canon::SynthNode/t {t n} {
177    # Terminal node. Primitive layout.
178    # Put the char into single or double quotes.
179
180    set ch [$t get $n char]
181    if {$ch eq "'"} {set q "\""} else {set q '}
182
183    set text $q$ch$q
184
185    SetBlock $t $n $text
186    return
187}
188
189proc ::page::gen::peg::canon::SynthNode/n {t n} {
190    # Nonterminal node. Primitive layout. Text is the name of smybol
191    # itself.
192
193    SetBlock $t $n [$t get $n sym]
194    return
195}
196
197proc ::page::gen::peg::canon::SynthNode/.. {t n} {
198    # Range is [x-y]
199    set b [$t get $n begin]
200    set e [$t get $n end]
201    SetBlock $t $n "\[${b}-${e}\]"
202    return
203}
204
205proc ::page::gen::peg::canon::SynthNode/alnum   {t n} {SetBlock $t $n <alnum>}
206proc ::page::gen::peg::canon::SynthNode/alpha   {t n} {SetBlock $t $n <alpha>}
207proc ::page::gen::peg::canon::SynthNode/dot     {t n} {SetBlock $t $n .}
208proc ::page::gen::peg::canon::SynthNode/epsilon {t n} {SetBlock $t $n ""}
209
210proc ::page::gen::peg::canon::SynthNode/? {t n} {SynthSuffix $t $n ?}
211proc ::page::gen::peg::canon::SynthNode/* {t n} {SynthSuffix $t $n *}
212proc ::page::gen::peg::canon::SynthNode/+ {t n} {SynthSuffix $t $n +}
213
214proc ::page::gen::peg::canon::SynthNode/! {t n} {SynthPrefix $t $n !}
215proc ::page::gen::peg::canon::SynthNode/& {t n} {SynthPrefix $t $n &}
216
217proc ::page::gen::peg::canon::SynthSuffix {t n op} {
218
219    set sub   [lindex [$t children $n] 0]
220    set sop   [$t get $sub op]
221    set etext [$t get $sub TEXT]
222
223    WrapParens $op $sop etext
224    SetBlock $t $n $etext$op
225    return
226}
227
228proc ::page::gen::peg::canon::SynthPrefix {t n op} {
229
230    set sub   [lindex [$t children $n] 0]
231    set sop   [$t get $sub op]
232    set etext [$t get $sub TEXT]
233
234    WrapParens $op $sop etext
235    SetBlock $t $n $op$etext
236    return
237}
238
239proc ::page::gen::peg::canon::SynthNode/x {t n} {
240    variable llen
241
242    # Space given to us for an expression.
243    set lend [expr {$llen - [$t get root HEADLEN]}]
244
245    set clist [$t children $n]
246    if {[llength $clist] == 1} {
247	# Implicit cutting out of chains.
248
249	CopyBlock $t $n [lindex $clist 0]
250
251	#puts stderr <<implicit>>
252	return
253    }
254
255    set out ""
256
257    # We are not tracking the total width of the block, but only the
258    # width of the current line, as that is where we may have to
259    # wrap. The height however is the total height.
260
261    #puts stderr <<$clist>>
262    #puts stderr \t___________________________________
263
264    set w 0
265    set h 0
266    foreach c $clist {
267	set sop   [$t get $c op]
268	set sub   [$t get $c TEXT]
269	set sw    [$t get $c W]
270	set slw   [$t get $c Wlast]
271	set sh    [$t get $c H]
272
273	#puts stderr \t<$sop/$sw/$slw/$sh>___________________________________
274	#SHOW $sub $slw $sh
275
276	if {[Paren x $sop]} {
277	    set sub "([::textutil::indent $sub " " 1])"
278	    incr slw 2
279	    incr sw  2
280
281	    #puts stderr /paren/
282	    #SHOW $sub $slw $sh
283	}
284
285	# Empty buffer ... Put element, and extend dimensions
286
287	#puts stderr \t.=============================
288	#SHOW $out $w $h
289
290	if {$w == 0} {
291	    #puts stderr /init
292	    append out $sub
293	    set w $slw
294	    set h $sh
295	} elseif {($w + $sw + 1) > $lend} {
296	    #puts stderr /wrap/[expr {($w + $sw + 1)}]/$lend
297	    # To large, wrap into next line.
298	    append out \n $sub
299	    incr h $sh
300	    set  w $slw
301	} else {
302	    # We have still space to put the block in. Either by
303	    # simply appending, or by indenting a multiline block
304	    # properly so that its parts stay aligned with each other.
305	    if {$sh == 1} {
306		#puts stderr /add/line
307		append out " " $sub
308		incr w ; incr w $slw
309	    } else {
310		append out " "  ; incr w
311		#puts stderr /add/block/$w
312		append out [::textutil::indent $sub [::textutil::blank $w] 1]
313		incr w $slw
314		incr h $sh ; incr h -1
315	    }
316	}
317
318	#puts stderr \t.~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
319	#SHOW $out $w $h
320    }
321
322    SetBlock $t $n $out
323    return
324}
325
326proc ::page::gen::peg::canon::SynthNode// {t n} {
327    # We take all branches and put them together, nicely aligned under
328    # each other.
329
330    set clist [$t children $n]
331    if {[llength $clist] == 1} {
332	# Implicit cutting out of chains.
333
334	CopyBlock $t $n [lindex $clist 0]
335	return
336    }
337
338    set out ""
339    foreach c $clist {
340	set sop   [$t get $c op]
341	set sub   [$t get $c TEXT]
342	WrapParens / $sop sub
343	append out "/ [::textutil::indent $sub "  " 1]" \n
344    }
345
346    SetBlock $t $n " [string range $out 1 end]"
347    return
348}
349
350proc ::page::gen::peg::canon::WrapParens {op sop tvar} {
351    if {[Paren $op $sop]} {
352	upvar 1 $tvar text
353	set text "([::textutil::indent $text " " 1])"
354    }
355}
356
357proc ::page::gen::peg::canon::Paren {op sop} {
358    # sop is nested under op.
359    # Parens are required if sop has a lower priority than op.
360
361    return [expr {[Priority $sop] < [Priority $op]}]
362}
363
364proc ::page::gen::peg::canon::Priority {op} {
365    switch -exact -- $op {
366	t        -
367	n        -
368	..       -
369	alnum    -
370	alpha    -
371	dot      -
372	epsilon  {return 4}
373	? -
374	* -
375	+        {return 3}
376	! -
377	&        {return 2}
378	x        {return 1}
379	/        {return 0}
380    }
381    return -code error "Internal error, bad operator \"$op\""
382}
383
384proc ::page::gen::peg::canon::CopyBlock {t n src} {
385    $t set $n TEXT  [$t get $src TEXT]
386    $t set $n W     [$t get $src W]
387    $t set $n Wlast [$t get $src Wlast]
388    $t set $n H     [$t get $src H]
389    return
390}
391
392proc ::page::gen::peg::canon::SetBlock {t n text} {
393    set text   [string trimright $text]
394    set lines  [split $text \n]
395    set height [llength $lines]
396
397    if {$height > 1} {
398	set max -1
399	set ntext {}
400
401	foreach line $lines {
402	    set line [string trimright $line]
403	    set l [string length $line]
404	    if {$l > $max} {set max $l}
405	    lappend ntext $line
406	    set wlast $l
407	}
408	set text  [join $ntext \n]
409	set width $max
410    } else {
411	set width [string length $text]
412	set wlast $width
413    }
414
415    $t set $n TEXT $text
416    $t set $n W     $width
417    $t set $n Wlast $wlast
418    $t set $n H     $height
419    return
420}
421
422proc ::page::gen::peg::canon::AttrFieldLength {modeset} {
423    variable ms
424    return  $ms($modeset,*)
425}
426
427if {0} {
428    proc ::page::gen::peg::canon::SHOW {text w h} {
429	set wl $w ; incr wl -1
430	puts stderr "\t/$h"
431	puts stderr "[textutil::indent $text \t|]"
432	puts stderr "\t\\[string repeat "-" $wl]^ ($w)"
433	return
434    }
435}
436
437# ### ### ### ######### ######### #########
438## Internal. Strings.
439
440namespace eval ::page::gen::peg::canon {
441    variable llen 80
442    variable ms ; array set ms {
443	dlmv,discard {void:  }
444	dlmv,leaf    {leaf:  }
445	dlmv,match   {match: }
446	dlmv,value   {       }
447	dlmv,*       7
448
449	dlm,discard  {void:  }		dlv,discard  {void: }
450	dlm,leaf     {leaf:  }		dlv,leaf     {leaf: }
451	dlm,match    {match: }		dlv,value    {      }
452	dlm,*        7			dlv,*        6
453
454	dmv,discard  {void:  }		lmv,leaf     {leaf:  }
455	dmv,match    {match: }		lmv,match    {match: }
456	dmv,value    {       }		lmv,value    {       }
457	dmv,*        7			lmv,*        7
458
459	dl,discard   {void: }		dm,discard   {void:  }
460	dl,leaf      {leaf: }		dm,match     {match: }
461	dl,*         6			dm,*         7
462
463	lm,leaf      {leaf:  }		dv,discard   {void: }
464	lm,match     {match: }		dv,value     {      }
465	lm,*         7			dv,*         6
466
467	lv,leaf      {leaf: }		mv,match     {match: }
468	lv,value     {      }		mv,value     {       }
469	lv,*         6			mv,*         7
470
471	d,discard    {void: }		d,*       6
472	l,leaf       {leaf: }		l,*       6
473	m,match      {match: }		m,*       7
474	v,value      {}			v,*       0
475    }
476}
477
478# ### ### ### ######### ######### #########
479## Ready
480
481package provide page::gen::peg::canon 0.1
482