1# peg_to_peg.tcl --
2#
3#	Conversion from PEG to PEG (Human readable text).
4#
5# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net>
6#
7# See the file "license.terms" for information on usage and redistribution
8# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
9#
10# RCS: @(#) $Id: pt_peg_to_peg.tcl,v 1.1 2010/03/26 05:07:24 andreas_kupries Exp $
11
12# This package takes the canonical serialization of a parsing
13# expression grammar and produces text in PEG format, a form of text
14# which specifies a PEG in a human readable, yet formal manner,
15# similar too, but not identical to EBNF.
16
17# ### ### ### ######### ######### #########
18## Requisites
19
20package  require Tcl 8.5
21package  require pt::peg  ; # Verification that the input
22				       # is proper.
23package  require pt::pe              ; # Walking an expression.
24package  require pt::pe::op          ; # Flatten & fuse.
25package  require text::write         ; # Text generation support
26package  require textutil::adjust
27package  require struct::list
28
29# ### ### ### ######### ######### #########
30##
31
32namespace eval ::pt::peg::to::peg {
33    namespace export \
34	reset configure convert
35
36    namespace ensemble create
37}
38
39# ### ### ### ######### ######### #########
40## API.
41
42proc ::pt::peg::to::peg::reset {} {
43    variable template @code@
44    variable name     a_pe_grammar
45    variable file     unknown
46    variable user     unknown
47    variable fused    1
48    return
49}
50
51proc ::pt::peg::to::peg::configure {args} {
52    variable template
53    variable name
54    variable file
55    variable user
56    variable fused
57
58    if {[llength $args] == 0} {
59	return [list \
60		    -file     $file \
61		    -fused    $fused \
62		    -name     $name \
63		    -template $template \
64		    -user     $user]
65    } elseif {[llength $args] == 1} {
66	lassign $args option
67	set variable [string range $option 1 end]
68	if {[info exists $variable]} {
69	    return [set $variable]
70	} else {
71	    return -code error "Expected one of -file, -fused, -name, -template, or -user, got \"$option\""
72	}
73    } elseif {[llength $args] % 2 == 0} {
74	foreach {option value} $args {
75	    set variable [string range $option 1 end]
76	    if {![info exists $variable]} {
77		return -code error "Expected one of -file, -fused, -name, -template, or -user, got \"$option\""
78	    }
79	}
80	foreach {option value} $args {
81	    set variable [string range $option 1 end]
82	    switch -exact -- $variable {
83		template {
84		    if {$value eq {}} {
85			return -code error "Expected template, got the empty string"
86		    }
87		}
88		fused {
89		    if {![::string is boolean -strict $value]} {
90			return -code error "Expected boolean, got \"$value\""
91		    }
92		}
93		name -
94		file -
95		user { }
96	    }
97	    set $variable $value
98	}
99    } else {
100	return -code error {wrong#args, expected option value ...}
101    }
102}
103
104proc ::pt::peg::to::peg::convert {serial} {
105    variable template
106    variable name
107    variable file
108    variable user
109
110    ::pt::peg verify-as-canonical $serial
111
112    # Unpack the serialization, known as canonical
113    array set peg $serial
114    array set peg $peg(pt::grammar::peg)
115    unset     peg(pt::grammar::peg)
116
117    # Determine the field sizes for nonterminal symbol names and
118    # semantic modes.
119
120    set smax [text::write maxlen [dict keys $peg(rules)]]
121    set mmax [ModeSize                      $peg(rules)]
122
123    # Assemble the output, various pieces
124    text::write reset
125    Header $peg(start)
126    Rules  $peg(rules) $mmax $smax
127    Trailer
128
129    # At last retrieve the fully assembled result and integrate with
130    # the chosen template.
131    return [string map \
132		[list \
133		     @user@   $user \
134		     @format@ PEG   \
135		     @file@   $file \
136		     @name@   $name \
137		     @code@   [text::write get]] $template]
138
139    # ### ### ### ######### ######### #########
140}
141
142# ### ### ### ######### ######### #########
143## Internals
144
145proc ::pt::peg::to::peg::Header {startexpression} {
146    variable name
147
148    text::write field  PEG
149    text::write field  $name
150    text::write field  ([Expression $startexpression])
151    text::write /line
152    return
153}
154
155proc ::pt::peg::to::peg::Rules {rules mmax smax} {
156    if {[llength $rules]} { text::write /line }
157
158    foreach {symbol def} $rules {
159	lassign $def _ is _ mode
160	set mode  [expr {($mode eq "value")
161			 ? ""
162			 : "${mode}:"}]
163
164	text::write fieldl $mmax $mode
165	text::write fieldl $smax $symbol
166	text::write field        "<-"
167	text::write field        [Expression $is]
168	text::write field        ";"
169	text::write /line
170    }
171
172    if {[llength $rules]} { text::write /line }
173    return
174}
175
176proc ::pt::peg::to::peg::Trailer {} {
177    text::write field  {END;}
178    text::write /line
179    return
180}
181
182# ### ### ### ######### ######### #########
183
184proc ::pt::peg::to::peg::Expression {pe} {
185    variable fused
186
187    if {$fused} {
188	# First flatten for a maximum amount of adjacent terminals and
189	# ranges, then fuse these into strings and classes, then
190	# flatten again, eliminating all sequences and choices fully
191	# subsumed by the new elements.
192
193	set pe [pt::pe::op flatten \
194		    [pt::pe::op fusechars \
195			 [pt::pe::op flatten \
196			      $pe]]]
197    }
198
199    return [lindex [pt::pe bottomup \
200			[namespace current]::Convert \
201			$pe] 0]
202}
203
204proc ::pt::peg::to::peg::Convert {pe operator arguments} {
205    # For the inner nodes the each of arguments are a pair of
206    # generated text, and the sub-expression it came from, in this
207    # order.
208
209    switch -exact -- $operator {
210	alpha - alnum - ascii - digit - graph - lower - print -
211	punct - space - upper - wordchar - xdigit - ddigit {
212	    # Special forms ...
213	    return [list <$operator> $pe]
214	}
215	dot {
216	    # Special form ...
217	    return [list "." $pe]
218	}
219	epsilon {
220	    # Special form ...
221	    return [list "" $pe]
222	}
223	t {
224	    # Character ...
225	    lassign $arguments char
226	    return [list "'[Char ${char}]'" $pe]
227	}
228	.. {
229	    # Range of characters ... Show as character class.
230	    # Note: Canonical input means that an expression like
231	    # {.. X X} cannot occur, and can be ignored.
232
233	    lassign $arguments chstart chend
234	    return [list "\[[Char ${chstart}]-[Char $chend]\]" $pe]
235	}
236	n {
237	    # Nonterminal symbol
238	    lassign $arguments symbol
239	    return [list $symbol $pe]
240	}
241	? - * - + {
242	    # Suffix operators (Option, Kleene Closure, Positive KC) ...
243	    lassign $arguments child
244	    lassign $child text def
245	    lassign $def coperator
246	    return [list [MayParens $operator $coperator $text]$operator $pe]
247	}
248	& -
249	! {
250	    # Prefix operators (And/Not Lookahead) ...
251	    lassign $arguments child
252	    lassign $child text def
253	    lassign $def coperator
254	    return [list $operator[MayParens $operator $coperator $text] $pe]
255	}
256	x {
257	    # Sequences ...
258	    # TODO :: merge adjacent chars into strings ...  also, cut
259	    # x out if only one child
260
261	    set t {}
262	    set x {}
263	    foreach a $arguments {
264		lassign $a text def
265		lassign $def coperator
266		lappend t [MayParens $operator $coperator $text]
267		lappend x $def
268	    }
269	    return [list [join $t { }] [list x {*}$x]]
270	}
271	/ {
272	    # Choices ...
273	    # TODO :: merge adjacent chars and ranges into classes ...
274	    # also, cut / out if only one child
275
276	    set t {}
277	    set x {}
278	    foreach a $arguments {
279		lassign $a text def
280		lassign $def coperator
281		lappend t [MayParens $operator $coperator $text]
282		lappend x $def
283	    }
284	    return [list [join $t { / }] [list / {*}$x]]
285	}
286	str {
287	    return [list \
288			'[join [struct::list map $arguments \
289				    [namespace current]::Char] {}]' \
290			$pe]
291	}
292	cl {
293	    return [list \
294			\[[join [struct::list map $arguments \
295				     [namespace current]::Range] {}]\] \
296			$pe]
297	}
298    }
299}
300
301proc ::pt::peg::to::peg::Range {range} {
302    # See also pt::peg::to::tclparam
303
304    # Use string ops here to distinguish terminals and ranges. The
305    # input can be a single char, not a list, and further the char may
306    # not be a proper list. Example: double-apostroph.
307    if {[string length $range] > 1} {
308	lassign $range s e
309	return [Char $s]-[Char $e]
310    } else {
311	return [Char $range]
312    }
313}
314
315proc ::pt::peg::to::peg::Char {ch} {
316    # Encode a character, handle special cases.  We cannot use package
317    # char, as that is geared towards character encoding for Tcl code.
318
319    switch -exact -- $ch {
320	"\n" { return "\\n"  }
321	"\r" { return "\\r"  }
322	"\t" { return "\\t"  }
323	"\\" { return "\\\\" }
324	"\"" { return "\\\"" }
325	"'"  { return "\\'"  }
326	"\]" { return "\\\]" }
327	"\[" { return "\\\[" }
328    }
329
330    scan $ch %c chcode
331
332    # Control characters: Octal
333    if {[::string is control -strict $ch]} {
334	return \\[format %o $chcode]
335    }
336
337    # Beyond 7-bit ASCII: Unicode
338
339    if {$chcode > 127} {
340	return \\u[format %04x $chcode]
341    }
342
343    # Regular character: Is its own representation.
344
345    return $ch
346
347}
348
349proc ::pt::peg::to::peg::MayParens {op cop text} {
350    if {![NeedParens $op $cop]} { return $text }
351    return "([::textutil::adjust::indent $text " " 1])"
352}
353
354proc ::pt::peg::to::peg::NeedParens {op cop} {
355    variable priority
356    # c(hild)op is nested under op.
357    # Parens are required if cop has a lower priority than op.
358
359    return [expr {$priority($cop) < $priority($op)}]
360}
361
362# ### ### ### ######### ######### #########
363
364proc ::pt::peg::to::peg::ModeSize {rules} {
365    set modes {}
366    foreach {symbol def} $rules {
367	lassign $def _ is _ mode
368	if {$mode eq "value"} continue ; # These are not shown in the
369					 # text representation, as
370					 # they are the implicit
371					 # default for it.
372	lappend modes ${mode}:
373    }
374    return [text::write maxlen [lsort -uniq $modes]]
375}
376
377# ### ### ### ######### ######### #########
378## Configuration
379
380namespace eval ::pt::peg::to::peg {
381
382    variable template @code@       ; # A string. Specifies how to
383				     # embed the generated code into a
384				     # larger frame- work (the
385				     # template).
386    variable name     a_pe_grammar ; # String. Name of the grammar.
387    variable file     unknown      ; # String. Name of the file or
388				     # other entity the grammar came
389				     # from.
390    variable user     unknown      ; # String. Name of the user on
391				     # which behalf the conversion has
392				     # been invoked.
393    variable fused    1            ; # Boolean flag. If true character
394				     # sequences and choices are fused
395				     # into strings and classes.
396
397    variable  priority
398    array set priority {
399	/ 0  t       4  ascii 4  upper    4
400	x 1  n       4  digit 4  wordchar 4
401	& 2  ..      4  graph 4  xdigit   4
402	! 2  dot     4  lower 4  ddigit   4
403	+ 3  epsilon 4  print 4  str      4
404	* 3  alnum   4  punct 4  cl       4
405	? 3  alpha   4  space 4
406    }
407}
408
409# ### ### ### ######### ######### #########
410## Ready
411
412package provide pt::peg::to::peg 1
413return
414