1# -*- tcl -*-
2# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net>
3
4# Verification of serialized PEGs, and conversion between
5# serializations and other data structures.
6
7# # ## ### ##### ######## ############# #####################
8## Requirements
9
10package require Tcl 8.5                 ; # Required runtime.
11package require pt::pe
12
13# # ## ### ##### ######## ############# #####################
14##
15
16namespace eval ::pt::peg {
17    namespace export \
18	verify verify-as-canonical canonicalize print merge equal
19    namespace ensemble create
20}
21
22# # ## ### ##### ######## #############
23## Public API
24
25# Check that the proposed serialization of a keyword index is
26# indeed such.
27
28proc ::pt::peg::verify {serial {canonvar {}}} {
29    variable ourprefix
30    variable ourshort
31    variable ourtag
32    variable ourcbadlen
33    variable ourmiss
34    variable ourbadpe
35    variable ourcode
36
37    # Basic syntax: Length and outer type code
38    if {[llength $serial] != 2} {
39	return -code error $ourprefix$ourshort
40    }
41
42    lassign $serial tag contents
43
44    if {$tag ne $ourcode} {
45	return -code error $ourprefix[format $ourtag $tag]
46    }
47
48    # contents = dict (rules, start -> ...)
49
50    if {[llength $contents] != 4} {
51	return -code error $ourprefix$ourcbadlen
52    }
53
54    # Unpack the contents, then check that all necessary keys are
55    # present. Together with the length check we can then also be
56    # sure that no other key is present either.
57    array set peg $contents
58    foreach k {rules start} {
59	if {[info exists peg($k)]} continue
60	return -code error $ourprefix[format $ourmiss $k]
61    }
62
63    if {[catch {
64	pt::pe verify $peg(start) canon
65    } msg]} {
66	return -code error \
67	    [string map \
68		 [list \
69		      {error in serialization:} \
70		      $ourprefix[format $ourbadpe start]] \
71		 $msg]
72    }
73
74    if {$canonvar eq {}} {
75	VerifyRules $peg(rules)
76    } else {
77	upvar 1 $canonvar iscanonical
78	set iscanonical $canon
79
80	VerifyRules $peg(rules) iscanonical
81
82	# Quick exit if the inner structure was already
83	# non-canonical.
84	if {!$iscanonical} return
85
86	# Now various checks if the keys and identifiers are
87	# properly sorted to make this a canonical serialization.
88
89	lassign $contents a _ b _
90	if {[list $a $b] ne {rules start}} {
91	    set iscanonical 0
92	}
93
94	if {$serial ne [list {*}$serial]} {
95	    set iscanonical 0
96	}
97
98	if {$contents ne [list {*}$contents]} {
99	    set iscanonical 0
100	}
101    }
102
103    # Everything checked out.
104    return
105}
106
107proc ::pt::peg::verify-as-canonical {serial} {
108    verify $serial iscanonical
109    if {!$iscanonical} {
110	variable ourprefix
111	variable ourdupsort
112	return -code error $ourprefix$ourdupsort
113    }
114    return
115}
116
117proc ::pt::peg::canonicalize {serial} {
118    variable ourcode
119
120    verify $serial iscanonical
121    if {$iscanonical} { return $serial }
122
123    # Unpack the serialization.
124    array set peg $serial
125    array set peg $peg($ourcode)
126    unset     peg($ourcode)
127
128    # Construct result, inside out
129    set rules {}
130    array set r $peg(rules)
131    foreach symbol [lsort -dict [array names r]] {
132	array set sd $r($symbol)
133	lappend rules \
134	    $symbol [list \
135			 is   [pt::pe \
136				   canonicalize $sd(is)] \
137			 mode $sd(mode)]
138	unset sd
139    }
140
141    set serial [list $ourcode \
142		    [list \
143			 rules  $rules \
144			 start  [pt::pe \
145				     canonicalize $peg(start)]]]
146    return $serial
147}
148
149# Converts a PEG serialization into a human readable string for
150# test results. It assumes that the serialization is at least
151# structurally sound.
152
153proc ::pt::peg::print {serial} {
154    variable ourcode
155
156    # Unpack the serialization.
157    array set peg $serial
158    array set peg $peg($ourcode)
159    unset     peg($ourcode)
160    # Print
161    set lines {}
162    lappend lines $ourcode
163    lappend lines "    start := [join [split [pt::pe print $peg(start)] \n] "\n             "]"
164    lappend lines {    rules}
165    foreach {symbol value} $peg(rules) {
166	array set sd $value
167	# keys :: is, mode
168	lappend lines "        $symbol :: <$sd(mode)> :="
169	lappend lines "            [join [split [pt::pe print $sd(is)] \n] "\n            "]"
170	unset sd
171    }
172    return [join $lines \n]
173}
174
175# # ## ### ##### ######## #############
176
177proc ::pt::peg::merge {seriala serialb} {
178    variable ourcode
179
180    verify $seriala
181    verify $serialb
182
183    array set pega $seriala
184    array set pega $pega($ourcode)
185    unset     pega($ourcode)
186
187    array set pegb $serialb
188    array set pegb $pegb($ourcode)
189    unset     pegb($ourcode)
190
191    array set ra $pega(rules)
192    array set rb $pegb(rules)
193
194    foreach symbol [array names rb] {
195	if {![info exists ra($symbol)]} {
196	    # No conflict possible, copy over
197	    set ra($symbol) $rb($symbol)
198	} else {
199	    # unpack definitions, check for conflicts
200	    array set sda $ra($symbol)
201	    array set sdb $rb($symbol)
202
203	    if {$sda(mode) ne $sdb(mode)} {
204		return -code "Merge error for nonterminal \"$symbol\", semantic mode mismatch"
205	    }
206
207	    # Merge parsing expressions, if not identical ...
208	    if {![pt::pe equal \
209		      $sda(is) \
210		      $sdb(is)]} {
211		set sda(is) [pt::pe choice \
212				 $sda(is) \
213				 $sdb(is)]
214		set ra($symbol) [array get sda]
215	    }
216
217	    unset sda
218	    unset sdb
219	}
220    }
221
222    # Construct result, inside out
223
224    set rules {}
225    foreach symbol [lsort -dict [array names ra]] {
226	array set sd $ra($symbol)
227	lappend rules \
228	    $symbol [list \
229			 is   $sd(is) \
230			 mode $sd(mode)]
231	unset sd
232    }
233
234    if {![pt::pe equal \
235	      $pega(start) \
236	      $pegb(start)]} {
237	set start [pt::pe choice \
238		       $pega(start) \
239		       $pegb(start)]
240    } else {
241	set start $pega(start)
242    }
243
244    set serial [list $ourcode \
245		    [list \
246			 rules  $rules \
247			 start  $start]]
248    return $serial
249
250}
251
252# # ## ### ##### ######## #############
253
254proc ::pt::peg::equal {seriala serialb} {
255    # syntactical (intensional) grammar equality.
256    string equal \
257	[canonicalize $seriala] \
258	[canonicalize $serialb]
259}
260
261# # ## ### ##### ######## #############
262
263
264proc ::pt::peg::VerifyRules {rules {canonvar {}}} {
265    variable ourprefix
266    variable ourrbadlen
267    variable oursdup
268    variable oursempty
269    variable oursbadlen
270    variable oursmiss
271    variable ourbadpe
272    variable ourbadmode
273    variable ourmode
274
275    if {$canonvar ne {}} {
276	upvar 1 $canonvar iscanonical
277    }
278
279    if {[llength $rules] % 2 == 1} {
280	return -code error $ourprefix$ourrbadlen
281    }
282
283    if {$rules ne [list {*}$rules]} {
284	set iscanonical 0
285    }
286
287    array set r $rules
288
289    if {([array size r]*2) < [llength $rules]} {
290	return -code error $ourprefix$oursdup
291    }
292
293    foreach symbol [array names r] {
294	if {$symbol eq {}} {
295	    return -code error $ourprefix$oursempty
296	}
297
298	set def $r($symbol)
299
300	if {[llength $def] != 4} {
301	    return -code error $ourprefix[format $oursbadlen $symbol]
302	}
303
304	if {$def ne [list {*}$def]} {
305	    set iscanonical 0
306	}
307
308	array set sd $def
309	foreach k {is mode} {
310	    if {[info exists sd($k)]} continue
311	    return -code error $ourprefix[format $oursmiss $symbol $k]
312	}
313
314	if {[catch {
315	    pt::pe verify $sd(is) canon
316	} msg]} {
317	    return -code error \
318		[string map \
319		     [list \
320			  {error in serialization:} \
321			  $ourprefix[format $ourbadpe ($symbol)]] \
322		     $msg]
323	}
324
325	if {![info exists ourmode($sd(mode))]} {
326	    return -code error $ourprefix[format $ourbadmode $symbol $sd(mode)]
327	}
328
329	# Now various checks if the keys and identifiers are
330	# properly sorted to make this a canonical serialization.
331
332	if {!$canon} {
333	    set iscanonical 0
334	    continue
335	}
336
337	lassign $def a _ b _
338	if {[list $a $b] ne {is mode}} {
339	    set iscanonical 0
340	}
341    }
342    return
343}
344
345namespace eval ::pt::peg {
346    # # ## ### ##### ######## #############
347
348    variable ourcode      pt::grammar::peg
349    variable ourprefix    {error in serialization:}
350    #                                                                                  # Test cases (grammar-peg-structure-)
351    variable ourshort     { dictionary too short, expected exactly one key}      ; #
352    variable ourtag       { bad type tag "%s"}                                   ; #
353    variable ourcbadlen   { dictionary of bad length, expected exactly two keys} ; #
354    variable ourmiss      { missing expected key "%s"}                           ; #
355    variable oursmiss     { symbol "%s", missing expected key "%s"}                           ; #
356    variable ourbadpe     { bad %s parsing expression:}                      ; #
357    variable ourbadmode   { symbol "%s", bad nonterminal mode "%s"}                           ; #
358    variable ourrbadlen   { rule dictionary of bad length, not a dictionary}     ; #
359    variable oursempty    { expected symbol name, got empty string}
360    variable oursbadlen   { symbol dictionary for "%s" of bad length, expected exactly two keys} ; #
361    variable oursdup      { duplicate nonterminal keywords}                                  ; #
362    # Message for non-canonical serialization when expecting canonical form
363    variable ourdupsort   { duplicate and/or unsorted keywords and/or irrelevant whitespace}                ; #
364
365    variable  ourmode
366    array set ourmode {
367	value .
368	leaf  .
369	void  .
370    }
371
372    ##
373    # # ## ### ##### ######## #############
374}
375
376# # ## ### ##### ######## ############# #####################
377## Ready
378
379package provide pt::peg 1
380return
381