1# pt_peg_from_peg.tcl --
2#
3#	Conversion from PEG (Human readable text) to PEG.
4#
5# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net>
6#
7# See the file "license.terms" for information on usage and redistribution
8# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
9#
10# RCS: @(#) $Id: pt_peg_from_peg.tcl,v 1.1 2010/03/26 05:07:24 andreas_kupries Exp $
11
12# This package takes text for a human-readable PEG and produces the
13# canonical serialization of a parsing expression grammar.
14
15# TODO :: APIs for reading from arbitrary channel.
16
17# ### ### ### ######### ######### #########
18## Requisites
19
20package require Tcl 8.5
21package require pt::peg  ; # Verification that the input is proper.
22#package require pt::peg::interp
23#package require pt::peg::container::peg
24package require pt::parse::peg
25package require pt::ast
26package require pt::pe
27package require pt::pe::op
28
29# ### ### ### ######### ######### #########
30##
31
32namespace eval ::pt::peg::from::peg {
33    namespace export   convert convert-file
34    namespace ensemble create
35}
36
37# ### ### ### ######### ######### #########
38## API.
39
40proc ::pt::peg::from::peg::convert {text} {
41    # Initialize data for the pseudo-channel
42    variable input $text
43    variable loc   0
44    variable max   [expr { [string length $text] - 1 }]
45
46    return [Convert]
47}
48
49proc ::pt::peg::from::peg::convert-file {path} {
50    # Initialize data for the pseudo-channel
51    variable input [fileutil::cat $path]
52    variable loc   0
53    variable max   [expr { [string length $input] - 1 }]
54
55    return [Convert]
56}
57
58# ### ### ### ######### ######### #########
59
60proc ::pt::peg::from::peg::Convert {} {
61    # Create the runtime ...
62    set c [chan create read pt::peg::from::peg::CHAN] ; # pseudo-channel for input
63
64    #set g [pt::peg::container::peg %AUTO]             ; # load peg grammar
65    #set i [pt::peg::interp         %AUTO% $g]         ; # grammar interpreter / parser
66    #$g destroy
67    set i [pt::parse::peg]
68
69    # Parse input.
70    set fail [catch {
71	set ast [$i parse $c]
72    } msg]
73    if {$fail} {
74	set ei $::errorInfo
75	set ec $::errorCode
76    }
77
78    $i destroy
79    close $c
80
81    if {$fail} {
82	variable input {}
83	return -code error -errorinfo $ei -errorcode $ec $msg
84    }
85
86    # Now convert the AST to the grammar serial.
87    set serial [pt::ast bottomup \
88		    pt::peg::from::peg::GEN \
89		    $ast]
90
91    variable input {}
92    return $serial
93
94    # ### ### ### ######### ######### #########
95}
96
97# ### ### ### ######### ######### #########
98## Internals - Pseudo channel to couple the in-memory text with the
99## RDE.
100
101namespace eval ::pt::peg::from::peg::CHAN {
102    namespace export   initialize finalize read watch
103    namespace ensemble create
104}
105
106proc pt::peg::from::peg::CHAN::initialize {c mode} {
107    return {initialize finalize watch read}
108}
109
110proc pt::peg::from::peg::CHAN::finalize {c}        {}
111proc pt::peg::from::peg::CHAN::watch    {c events} {}
112
113proc pt::peg::from::peg::CHAN::read {c n} {
114    # Note: Should have binary string of the input, to properly handle
115    # encodings ...
116    variable ::pt::peg::from::peg::input
117    variable ::pt::peg::from::peg::loc
118    variable ::pt::peg::from::peg::max
119
120    if {$loc >= $max} { return {} }
121
122    set end [expr {$loc + $n - 1}]
123    set res [string range $input $loc $end]
124
125    incr loc $n
126
127    return $res
128}
129
130# ### ### ### ######### ######### #########
131## Internals - Bottom up walk converting AST to PEG serialization.
132## Pseudo-ensemble
133
134namespace eval ::pt::peg::from::peg::GEN {}
135
136proc pt::peg::from::peg::GEN {ast} {
137    # The reason for not being an ensemble, an additional param
138    # (8.6+ can code that as ensemble).
139    return [namespace eval GEN $ast]
140}
141
142proc pt::peg::from::peg::GEN::ALNUM {s e} {
143    return [pt::pe alnum]
144}
145
146proc pt::peg::from::peg::GEN::ALPHA {s e} {
147    return [pt::pe alpha]
148}
149
150proc pt::peg::from::peg::GEN::AND {s e} {
151    return [pt::pe ahead [pt::pe dot]] ; # -> Prefix
152}
153
154proc pt::peg::from::peg::GEN::ASCII {s e} {
155    return [pt::pe ascii]
156}
157
158proc pt::peg::from::peg::GEN::Attribute {s e args} {
159    return [lindex $args 0] ; # -> Definition
160}
161
162proc pt::peg::from::peg::GEN::Char {s e args} {
163    return [lindex $args 0]
164}
165
166proc pt::peg::from::peg::GEN::CharOctalFull {s e} {
167    variable ::pt::peg::from::peg::input
168    return [pt::pe terminal [char unquote [string range $input $s $e]]]
169}
170
171proc pt::peg::from::peg::GEN::CharOctalPart {s e} {
172    variable ::pt::peg::from::peg::input
173    return [pt::pe terminal [char unquote [string range $input $s $e]]]
174}
175
176proc pt::peg::from::peg::GEN::CharSpecial {s e} {
177    variable ::pt::peg::from::peg::input
178    return [pt::pe terminal [char unquote [string range $input $s $e]]]
179}
180
181proc pt::peg::from::peg::GEN::CharUnescaped {s e} {
182    variable ::pt::peg::from::peg::input
183    return [pt::pe terminal [string range $input $s $e]]
184}
185
186proc pt::peg::from::peg::GEN::CharUnicode {s e} {
187    variable ::pt::peg::from::peg::input
188    return [pt::pe terminal [char unquote [string range $input $s $e]]]
189}
190
191proc pt::peg::from::peg::GEN::Class {s e args} {
192    if {[llength $args] == 1} { ; # integrated pe::op flatten
193	return [lindex $args 0]
194    } else {
195	return [pt::pe choice {*}$args] ; # <- Chars and Ranges
196    }
197}
198
199proc pt::peg::from::peg::GEN::CONTROL {s e} {
200    return [pt::pe ddigit]
201}
202
203proc pt::peg::from::peg::GEN::DDIGIT {s e} {
204    return [pt::pe ddigit]
205}
206
207proc pt::peg::from::peg::GEN::Definition {s e args} {
208    # args = list/2 (symbol pe)      | <-           Ident(ifier) Expression
209    # args = list/3 (mode symbol pe) | <- Attribute Ident(ifier) Expression
210    if {[llength $args] == 3} {
211	lassign $args mode sym pe
212    } else {
213	lassign $args sym pe
214	set mode value
215    }
216    # sym = list/2 ('n' name)
217    return [list [lindex $sym 1] $mode [pt::pe::op flatten $pe]]
218}
219
220proc pt::peg::from::peg::GEN::DIGIT {s e} {
221    return [pt::pe digit]
222}
223
224proc pt::peg::from::peg::GEN::DOT {s e} {
225    return [pt::pe dot]
226}
227
228proc pt::peg::from::peg::GEN::Expression {s e args} {
229    if {[llength $args] == 1} { ; # integrated pe::op flatten
230	return [lindex $args 0]
231    } else {
232	return [pt::pe choice {*}$args] ; # <- Primary
233    }
234}
235
236proc pt::peg::from::peg::GEN::Grammar {s e args} {
237    # args = list (start, list/3(symbol, mode, rule)...) <- Header Definition*
238    array set symbols {}
239    set rules {}
240    foreach def [lsort -index 0 -dict [lassign $args startexpr]] {
241	lassign $def sym mode rhs
242	if {[info exists symbol($sym)]} {
243	    return -code error "Double declaration of symolb '$sym'"
244	}
245	set symbols($sym) .
246	lappend rules $sym [list is $rhs mode $mode]
247    }
248    # Full grammar
249    return [list pt::grammar::peg [list rules $rules start $startexpr]]
250}
251
252proc pt::peg::from::peg::GEN::GRAPH {s e} {
253    return [pt::pe graph]
254}
255
256proc pt::peg::from::peg::GEN::Header {s e args} {
257    # args = list/2 (list/2 ('n', name), pe) <- Ident(ifier) StartExpr
258    return [lindex $args 1] ; # StartExpr passes through
259}
260
261proc pt::peg::from::peg::GEN::Ident {s e} {
262    variable ::pt::peg::from::peg::input
263    return [pt::pe nonterminal [string range $input $s $e]]
264}
265
266proc pt::peg::from::peg::GEN::Identifier {s e args} {
267    return [lindex $args 0] ; # <- Ident, passes through
268}
269
270proc pt::peg::from::peg::GEN::LEAF {s e} {
271    return leaf
272}
273
274proc pt::peg::from::peg::GEN::LOWER {s e} {
275    return [pt::pe lower]
276}
277
278proc pt::peg::from::peg::GEN::Literal {s e args} {
279    if {[llength $args] == 1} { ; # integrated pe::op flatten
280	return [lindex $args 0]
281    } else {
282	return [pt::pe sequence {*}$args] ; # Series of chars -> Primary
283    }
284}
285
286proc pt::peg::from::peg::GEN::NOT {s e} {
287    return [pt::pe notahead [pt::pe dot]] ; # -> Prefix (dot is placeholder)
288}
289
290proc pt::peg::from::peg::GEN::PLUS {s e} {
291    return [pt::pe repeat1 [pt::pe dot]] ; # -> Suffix (dot is placeholder)
292}
293
294proc pt::peg::from::peg::GEN::Primary {s e args} {
295    return [lindex $args 0] ; # -> Expression, pass through
296}
297
298proc pt::peg::from::peg::GEN::Prefix {s e args} {
299    # args = list/1 (pe)            | <- AND/NOT, Expression
300    # args = list/2 (pe/prefix, pe) | <- Expression
301    if {[llength $args] == 2} {
302	# Prefix operator present ... Replace its child (dot,
303	# placeholder) with our second, the actual expression.
304	return [lreplace [lindex $args 0] 1 1 [lindex $args 1]]
305    } else {
306	# Pass the sub-expression
307	return [lindex $args 0]
308    }
309}
310
311proc pt::peg::from::peg::GEN::PRINTABLE {s e} {
312    return [pt::pe printable]
313}
314
315proc pt::peg::from::peg::GEN::PUNCT {s e} {
316    return [pt::pe punct]
317}
318
319proc pt::peg::from::peg::GEN::QUESTION {s e} {
320    return [pt::pe optional [pt::pe dot]] ; # -> Suffix (dot is placeholder)
321}
322
323proc pt::peg::from::peg::GEN::Range {s e args} {
324    # args = list/1 (pe/t)       | <- Char (pass through)
325    # args = list/2 (pe/t, pe/t) | <- Char, Char
326    if {[llength $args] == 2} {
327	# Convert two terminals to range
328	return [pt::pe range [lindex $args 0 1] [lindex $args 1 1]]
329    } else {
330	# Pass the char ...
331	return [lindex $args 0]
332    }
333}
334
335proc pt::peg::from::peg::GEN::Sequence {s e args} {
336    if {[llength $args] == 1} { ; # integrated pe::op flatten
337	return [lindex $args 0]
338    } else {
339	return [pt::pe sequence {*}$args] ; # <- Prefix+
340    }
341}
342
343proc pt::peg::from::peg::GEN::SPACE {s e} {
344    return [pt::pe space]
345}
346
347proc pt::peg::from::peg::GEN::STAR {s e} {
348    return [pt::pe repeat0 [pt::pe dot]] ; # -> Suffix (dot is placeholder)
349}
350
351proc pt::peg::from::peg::GEN::StartExpr {s e args} {
352    # args = list/1 (pe) | <- Expression, -> Header
353    return [pt::pe::op flatten [lindex $args 0]]
354}
355proc pt::peg::from::peg::GEN::Suffix {s e args} {
356    # args = list/1 (pe)            | <- Expression
357    # args = list/2 (pe, pe/suffix) | <- Expression */+/?
358    if {[llength $args] == 2} {
359	# Suffix operator present ... Replace its child (dot,
360	# placeholder) with our first, the actual expression.
361	return [lreplace [lindex $args 1] 1 1 [lindex $args 0]]
362    } else {
363	# Pass the sub-expression
364	return [lindex $args 0]
365    }
366}
367
368proc pt::peg::from::peg::GEN::UPPER {s e} {
369    return [pt::pe upper]
370}
371
372proc pt::peg::from::peg::GEN::VOID {s e} {
373    return void
374}
375
376proc pt::peg::from::peg::GEN::WORDCHAR {s e} {
377    return [pt::pe wordchar]
378}
379
380proc pt::peg::from::peg::GEN::XDIGIT {s e} {
381    return [pt::pe xdigit]
382}
383
384# ### ### ### ######### ######### #########
385## Ready
386
387package provide pt::peg::from::peg 1
388return
389