1# -*- tcl -*-
2# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net>
3
4# Verification of serialized parsing expressions, conversion
5# between such and other data structures, and their construction.
6
7# # ## ### ##### ######## ############# #####################
8## Requirements
9
10package require Tcl 8.5              ; # Required runtime.
11package require char                 ; # Character quoting utilities.
12
13# # ## ### ##### ######## ############# #####################
14##
15
16namespace eval ::pt::pe {
17    namespace export \
18	verify verify-as-canonical canonicalize \
19	bottomup topdown print equal \
20	\
21	epsilon dot alnum alpha ascii digit graph lower printable \
22	punct space upper wordchar xdigit ddigit \
23	nonterminal optional repeat0 repeat1 ahead notahead \
24	choice sequence \
25	terminal range
26
27    namespace ensemble create
28}
29
30# # ## ### ##### ######## #############
31## Public API
32
33# Check that the proposed serialization of a keyword index is
34# indeed such.
35
36proc ::pt::pe::verify {serial {canonvar {}}} {
37    variable ourprefix
38    variable ourempty
39    #puts "V <$serial> /[llength [info level 0]] / [info level 0]"
40
41    if {[llength $serial] == 0} {
42	return -code error $ourprefix$ourempty
43    }
44
45    if {$canonvar ne {}} {
46	upvar 1 $canonvar iscanonical
47	set iscanonical [string equal $serial [list {*}$serial]]
48    }
49
50    topdown [list [namespace current]::Verify] $serial
51    return
52}
53
54proc ::pt::pe::verify-as-canonical {serial} {
55    verify $serial iscanonical
56    if {!$iscanonical} {
57	variable ourprefix
58	variable ourimpure
59	return -code error $ourprefix$ourimpure
60    }
61    return
62}
63
64proc ::pt::pe::Verify {pe op arguments} {
65    variable ourprefix
66    variable ourbadop
67    variable ourarity
68    variable ourwrongargs
69    variable ourempty
70
71    #puts "VE <$pe /$op /$arguments>"
72    if {[llength $pe] == 0} {
73	return -code error $ourprefix$ourempty
74    }
75
76    if {![info exists ourarity($op)]} {
77	return -code error $ourprefix[format $ourbadop $op]
78    }
79
80    lassign $ourarity($op) min max
81
82    set n [llength $arguments]
83    if {($n < $min) || (($max >= 0) && ($n > $max))} {
84	return -code error $ourprefix[format $ourwrongargs $op]
85    }
86
87    upvar 1 iscanonical iscanonical
88    if {
89	[info exists iscanonical] &&
90	(($pe ne [list {*}$pe]) ||
91	 ($op eq "..") && ([lindex $arguments 0] eq [lindex $arguments 1]))
92    } {
93	# Reject coding with superfluous whitespace, and the use of
94	# {.. x x} as coding for {t x} as non-canonical.
95
96	set iscanonical 0
97    }
98    return
99}
100
101# # ## ### ##### ######## #############
102
103proc ::pt::pe::canonicalize {serial} {
104    verify $serial iscanonical
105    if {$iscanonical} { return $serial }
106    return [bottomup [list [namespace current]::Canonicalize] $serial]
107}
108
109proc ::pt::pe::Canonicalize {pe op arguments} {
110    # The input is mostly already pulled apart into its elements. Now
111    # we construct a pure list out of them, and if necessary, convert
112    # a {.. x x} expression into the canonical {t x} representation.
113
114    if {($op eq ".." ) &&
115	([lindex $arguments 0] eq [lindex $arguments 1])} {
116	return [list t [lindex $arguments 0]]
117    }
118    return [list $op {*}$arguments]
119}
120
121# # ## ### ##### ######## #############
122
123# Converts a parsing expression serialization into a human readable
124# string for test results. It assumes that the serialization is at
125# least structurally sound.
126
127proc ::pt::pe::print {serial} {
128    return [join [bottomup [list [namespace current]::Print] $serial] \n]
129}
130
131proc ::pt::pe::Print {pe op arguments} {
132    switch -exact -- $op {
133	epsilon - alpha - alnum - ascii - digit - graph - lower - print - \
134	    punct - space - upper - wordchar - xdigit - ddigit - dot {
135		return [list <$op>]
136	    }
137	str { return [list "\"[join [char quote comment {*}$arguments] {}]\""] }
138	cl  { return [list "\[[join [char quote comment {*}$arguments] {}]\]"] }
139	n   { return [list "([lindex $arguments 0])"] }
140	t   { return [list "'[char quote comment [lindex $arguments 0]]'"] }
141	..  {
142	    lassign $arguments ca ce
143	    return [list "range ([char quote comment $ca] .. [char quote comment $ce])"]
144	}
145    }
146    # The arguments are already processed for printing
147
148    set out {}
149    lappend out $op
150    foreach a $arguments {
151	foreach line $a {
152	    lappend out "    $line"
153	}
154    }
155    return $out
156}
157
158# # ## ### ##### ######## #############
159
160proc ::pt::pe::equal {seriala serialb} {
161    return [string equal \
162		[canonicalize $seriala] \
163		[canonicalize $serialb]]
164}
165
166# # ## ### ##### ######## #############
167
168proc ::pt::pe::bottomup {cmdprefix pe} {
169    Bottomup 2 $cmdprefix $pe
170}
171
172proc ::pt::pe::Bottomup {level cmdprefix pe} {
173    set op [lindex $pe 0]
174    set ar [lrange $pe 1 end]
175
176    switch -exact -- $op {
177	& - ! - * - + - ? - x - / {
178	    set clevel $level
179	    incr clevel
180	    set nar {}
181	    foreach a $ar {
182		lappend nar [Bottomup $clevel $cmdprefix $a]
183	    }
184	    set ar $nar
185	    set pe [list $op {*}$nar]
186	}
187	default {}
188    }
189
190    return [uplevel $level [list {*}$cmdprefix $pe $op $ar]]
191}
192
193proc ::pt::pe::topdown {cmdprefix pe} {
194    Topdown 2 $cmdprefix $pe
195    return
196}
197
198proc ::pt::pe::Topdown {level cmdprefix pe} {
199    set op [lindex $pe 0]
200    set ar [lrange $pe 1 end]
201
202    uplevel $level [list {*}$cmdprefix $pe $op $ar]
203
204    switch -exact -- $op {
205	& - ! - * - + - ? - x - / {
206	    incr level
207	    foreach a $ar {
208		Topdown $level $cmdprefix $a
209	    }
210	}
211	default {}
212    }
213    return
214}
215
216# # ## ### ##### ######## #############
217
218proc ::pt::pe::epsilon   {} { return epsilon  }
219proc ::pt::pe::dot       {} { return dot      }
220proc ::pt::pe::alnum     {} { return alnum    }
221proc ::pt::pe::alpha     {} { return alpha    }
222proc ::pt::pe::ascii     {} { return ascii    }
223proc ::pt::pe::digit     {} { return digit    }
224proc ::pt::pe::graph     {} { return graph    }
225proc ::pt::pe::lower     {} { return lower    }
226proc ::pt::pe::printable {} { return print    }
227proc ::pt::pe::punct     {} { return punct    }
228proc ::pt::pe::space     {} { return space    }
229proc ::pt::pe::upper     {} { return upper    }
230proc ::pt::pe::wordchar  {} { return wordchar }
231proc ::pt::pe::xdigit    {} { return xdigit   }
232proc ::pt::pe::ddigit    {} { return ddigit   }
233
234proc ::pt::pe::nonterminal {nt} { list n $nt }
235proc ::pt::pe::optional    {pe} { list ? $pe }
236proc ::pt::pe::repeat0     {pe} { list * $pe }
237proc ::pt::pe::repeat1     {pe} { list + $pe }
238proc ::pt::pe::ahead       {pe} { list & $pe }
239proc ::pt::pe::notahead    {pe} { list ! $pe }
240
241proc ::pt::pe::choice   {pe args} { linsert $args 0 / $pe }
242proc ::pt::pe::sequence {pe args} { linsert $args 0 x $pe }
243
244proc ::pt::pe::terminal {t}     { list t $t }
245proc ::pt::pe::range    {ta tb} {
246    if {$ta eq $tb} {
247	list t $ta
248    } else {
249	list .. $ta $tb
250    }
251}
252
253namespace eval ::pt::pe {
254    # # ## ### ##### ######## #############
255    ## Strings for error messages.
256
257    variable ourprefix    "error in serialization:"
258    variable ourempty     " got empty string"
259    variable ourwrongargs " wrong#args for \"%s\""
260    variable ourbadop     " invalid operator \"%s\""
261    variable ourimpure    " has irrelevant whitespace or (.. X X)"
262
263    # # ## ### ##### ######## #############
264    ## operator arities
265
266    variable  ourarity
267    array set ourarity {
268	epsilon  {0 0}
269	alpha    {0 0}
270	alnum    {0 0}
271	ascii    {0 0}
272	digit    {0 0}
273	graph    {0 0}
274	lower    {0 0}
275	print    {0 0}
276	punct    {0 0}
277	space    {0 0}
278	upper    {0 0}
279	wordchar {0 0}
280	xdigit   {0 0}
281	ddigit   {0 0}
282	dot      {0 0}
283	..       {2 2}
284	n        {1 1}
285	t        {1 1}
286	&        {1 1}
287	!        {1 1}
288	*        {1 1}
289	+        {1 1}
290	?        {1 1}
291	x        {1 -1}
292	/        {1 -1}
293    }
294
295    ##
296    # # ## ### ##### ######## #############
297}
298
299# # ## ### ##### ######## ############# #####################
300## Ready
301
302package provide pt::pe 1
303return
304