1# -*- tcl -*-
2#
3# Copyright (c) 2005 by Andreas Kupries <andreas_kupries@users.sourceforge.net>
4# Parser Generator / Transformation - Normalize PEG AST for later.
5
6# This package assumes to be used from within a PAGE plugin. It uses
7# the API commands listed below. These are identical across the major
8# types of PAGE plugins, allowing this package to be used in reader,
9# transform, and writer plugins. It cannot be used in a configuration
10# plugin, and this makes no sense either.
11#
12# To ensure that our assumption is ok we require the relevant pseudo
13# package setup by the PAGE plugin management code.
14#
15# -----------------+--
16# page_info        | Reporting to the user.
17# page_warning     |
18# page_error       |
19# -----------------+--
20# page_log_error   | Reporting of internals.
21# page_log_warning |
22# page_log_info    |
23# -----------------+--
24
25# ### ### ### ######### ######### #########
26## Requisites
27
28# @mdgen NODEP: page::plugin
29
30package require page::plugin ; # S.a. pseudo-package.
31package require treeql
32package require page::util::quote
33package require page::util::peg
34
35namespace eval ::page::util::norm::lemon {
36    # Get the peg char de/encoder commands.
37    # (unquote, quote'tcl)
38
39    namespace import ::page::util::quote::*
40    namespace import ::page::util::peg::*
41}
42
43# ### ### ### ######### ######### #########
44## API
45
46proc ::page::util::norm::lemon {t} {
47    set q [treeql q -tree $t]
48
49    page_info {[Lemon Normalization]}
50
51    # Retrieve grammar name out of one directive.
52    # Or from LHS of first rule.
53
54    page_log_info ..Startsymbol
55
56    set start {}
57
58    $q query tree \
59	    withatt type nonterminal \
60	    withatt detail StartSymbol \
61	    descendants \
62	    withatt type terminal \
63	    over n {
64
65	lemon::TokReduce $t $n detail
66	set start [$t get $n detail]
67
68	page_info "  StartSymbol: $start"
69    }
70
71    $q query tree \
72	    withatt type   nonterminal \
73	    withatt detail Name \
74	    descendants \
75	    withatt type terminal \
76	    over n {
77
78	lemon::TokReduce $t $n detail
79	set name [$t get $n detail]
80
81	page_info "  Name:        $name"
82
83	$t set root name $name
84    }
85
86    page_log_info ..Drop        ; lemon::Drop        $q $t
87    page_log_info ..Terminals   ; lemon::Terminals   $q $t
88    page_log_info ..Definitions ; lemon::Definitions $q $t
89    page_log_info ..Rules       ; lemon::Rules       $q $t start
90    page_log_info ..Epsilon     ; lemon::ElimEpsilon $q $t
91    page_log_info ..Autoclass   ; lemon::AutoClassId $q $t
92    page_log_info ..Chains
93
94    # Find and cut operator chains, very restricted. Cut only chains
95    # of x- and /-operators. The other operators have only one child
96    # by definition and are thus not chains.
97
98    #set q [treeql q -tree $t]
99    # q query tree over n
100    foreach n [$t children -all root] {
101	if {[$t keyexists $n symbol]}        continue
102	if {[llength [$t children $n]] != 1} continue
103
104	set op [$t get $n op]
105	if {($op ne "/") && ($op ne "x")} continue
106	$t cut $n
107    }
108
109    page_log_info ..Flatten
110
111    lemon::flatten $q $t
112
113    # Analysis: Left recursion, and where.
114    # Manual: Definitions for terminals.
115    #         Definitions for space, comments.
116    #         Integration of this into the grammar.
117
118    # Sentinel for PE algorithms.
119    $t set root symbol <StartExpression>
120
121    if {$start eq ""} {
122	page_error "  Startsymbol missing"
123    } else {
124	set s [$t insert root end]
125	$t set $s op  n
126	$t set $s sym $start
127	$t set root start $s
128
129	array set def [$t get root definitions]
130
131	if {![info exists def($start)]} {
132	    page_error "  Startsymbol is undefined"
133	    $t set $s def ""
134	} else {
135	    $t set $s def $def($start)
136	}
137	unset def
138    }
139
140    $q destroy
141
142    page_log_info Ok
143    return
144}
145
146# ### ### ### ######### ######### #########
147## Documentation
148#
149## See doc_normalize.txt for the specification of the publicly visible
150## attributes.
151##
152## Internal attributes
153## - DATA - Transient storage for terminal data.
154
155# ### ### ### ######### ######### #########
156## Internal. Helpers
157
158proc ::page::util::norm::lemon::Drop {q t} {
159    # Simple normalization.
160    # All lemon specific data is dropped completely.
161
162    foreach drop {
163	Directive Codeblock Label Precedence
164    } {
165	$q query tree withatt type nonterminal \
166	    withatt detail $drop over n {
167		$t delete $n
168	    }
169    }
170
171    # Some nodes can be dropped, but not their children.
172
173    $q query tree withatt type nonterminal \
174	withatt detail Statement over n {
175	    $t cut $n
176	}
177
178    # Cut the ALL and LemonGrammar nodes, direct access, no search
179    # needed.
180
181    $t cut [lindex [$t children root] 0]
182    $t cut [lindex [$t children root] 0]
183
184    return
185}
186
187proc ::page::util::norm::lemon::Terminals {q t} {
188    # The data for all terminals is stored in their grandparental
189    # nodes. We get rid of both terminals and their parents.
190
191    $q query tree withatt type terminal over n {
192	set p  [$t parent $n]
193	set gp [$t parent $p]
194
195	CopyLocation $t $n $gp
196	AttrCopy     $t $n detail $gp DATA
197	TokReduce    $t           $gp DATA
198	$t delete $p
199    }
200
201    # We can now drop the type attribute, as all the remaining nodes
202    # (which have it) will contain the value 'nonterminal'.
203
204    $q query tree hasatt type over n {
205	$t unset $n type
206    }
207    return
208}
209
210proc ::page::util::norm::lemon::Definitions {q t} {
211    # Convert 'Definition' into the sequences they are.
212    # Sequences of length one will be flattened later.
213    # Empty sequences (Length zero) are epsilon.
214    # Epsilon will be later converted to ? of the
215    # whole choice they are part of.
216
217    $q query tree withatt detail Definition over n {
218	$t unset $n detail
219
220	if {[$t children $n] < 1} {
221	    $t set $n op epsilon
222	} else {
223	    $t set $n op x
224	}
225    }
226    return
227}
228
229proc ::page::util::norm::lemon::Rules {q t sv} {
230    upvar $sv start
231    # We move nonterminal hint information from nodes into attributes,
232    # and delete the now irrelevant nodes.
233
234    # Like with the global metadata we move definition specific
235    # information out of nodes into attributes, get rid of the
236    # superfluous nodes, and tag the definition roots with marker
237    # attributes.
238
239    array set defs {}
240    $q query tree withatt detail Rule over n {
241	set first [Child $t $n 0]
242
243	set sym   [$t get $first DATA]
244	$t set $n symbol $sym
245	$t set $n label  $sym
246	$t set $n users  {}
247	$t set $n mode value
248
249	if {$start eq ""} {
250	    page_info "  StartSymbol: $sym"
251	    set start $sym
252	}
253
254	# We get the left extend of the definition from the terminal
255	# for the symbol it defines.
256
257	MergeLocations $t $first [Rightmost $t $n] $n
258	$t unset $n detail
259
260	lappend defs($sym) $n
261	$t cut $first
262    }
263
264    set d {}
265    foreach sym [array names defs] {
266	set nodes $defs($sym)
267	if {[llength $nodes] == 1} {
268	    lappend d $sym [lindex $nodes 0]
269	} else {
270	    # Merge multi-node definition together, under a choice.
271
272	    set r [$t insert root end]
273	    set c [$t insert $r end]
274
275	    $t set $r symbol $sym
276	    $t set $r label  $sym
277	    $t set $r users  {}
278	    $t set $r mode value
279	    $t set $c op     /
280
281	    foreach n $nodes {
282		set seq [lindex [$t children $n] 0]
283		$t move $c end $seq
284		$t delete $n
285	    }
286
287	    lappend d $sym $r
288	}
289    }
290
291    # We remember a mapping from nonterminal names to their defining
292    # nodes in the root as well, for quick reference later, when we
293    # build nonterminal usage references
294
295    $t set root definitions $d
296    return
297}
298
299proc ::page::util::norm::lemon::Rightmost {t n} {
300    # Determine the rightmost leaf under the specified node.
301
302    if {[$t isleaf $n]} {return $n}
303    return [Rightmost $t [lindex [$t children $n] end]]
304}
305
306proc ::page::util::norm::lemon::ElimEpsilon {q t} {
307    # We convert choices with an epsilon in them into
308    # optional choices without an epsilon branch.
309
310    $q query tree withatt op epsilon over n {
311	set choice [$t parent $n]
312
313	# Move branches into the epsilon, which becomes the new
314	# choice. And the choice becomes an option.
315	foreach c [$t children $choice] {
316	    if {$c eq $n} continue
317	    $t move $n end $c
318	}
319	$t set $n      op /
320	$t set $choice op ?
321    }
322    return
323}
324
325proc ::page::util::norm::lemon::AutoClassId {q t} {
326
327    array set defs [$t get root definitions]
328    array set use {}
329
330    $q query tree \
331	    withatt op x \
332	    children \
333	    hasatt DATA \
334	    over n {
335	# All identifiers are nonterminals, and for the
336	# undefined ones we create rules which define
337	# them as terminal sequences.
338
339	set sym  [$t get $n DATA]
340	$t unset $n DATA
341
342	$t set $n op  n
343	$t set $n sym $sym
344
345	if {![info exists defs($sym)]} {
346	    set defs($sym) [NewTerminal $t $sym]
347	}
348	$t set $n def $defs($sym)
349
350	lappend use($sym) $n
351	$t unset $n detail
352    }
353
354    $t set root definitions [array get defs]
355
356    foreach sym [array names use] {
357	$t set $defs($sym) users $use($sym)
358    }
359
360    $t set root undefined {}
361    return
362}
363
364proc ::page::util::norm::lemon::NewTerminal {t sym} {
365    page_log_info "  Terminal: $sym"
366
367    set     r [$t insert root end]
368    $t set $r symbol $sym
369    $t set $r label  $sym
370    $t set $r users  {}
371    $t set $r mode   leaf
372
373    set     s [$t insert $r end]
374    $t set $s op x
375
376    foreach ch [split $sym {}] {
377	set c [$t insert $s end]
378	$t set $c op   t
379	$t set $c char $ch
380    }
381    return $r
382}
383
384# ### ### ### ######### ######### #########
385## Internal. Low-level helpers.
386
387proc ::page::util::norm::lemon::CopyLocation {t src dst} {
388    $t set $dst range    [$t get $src range]
389    $t set $dst range_lc [$t get $src range_lc]
390    return
391}
392
393proc ::page::util::norm::lemon::MergeLocations {t srca srcb dst} {
394    set ar   [$t get $srca range]
395    set arlc [$t get $srca range_lc]
396
397    set br   [$t get $srcb range]
398    set brlc [$t get $srcb range_lc]
399
400    $t set $dst range    [list [lindex $ar   0] [lindex $br   1]]
401    $t set $dst range_lc [list [lindex $arlc 0] [lindex $brlc 1]]
402    return
403}
404
405proc ::page::util::norm::lemon::AttrCopy {t src asrc dst adst} {
406    $t set $dst $adst [$t get $src $asrc]
407    return
408}
409
410proc ::page::util::norm::lemon::Child {t n index} {
411    return [lindex [$t children $n] $index]
412}
413
414proc ::page::util::norm::lemon::TokReduce {t src attr} {
415    set tokens [$t get $src $attr]
416    set ch     {}
417    foreach tok $tokens {
418	lappend ch [lindex $tok 0]
419    }
420    $t set $src $attr [join $ch {}]
421    return
422}
423
424# ### ### ### ######### ######### #########
425## Ready
426
427package provide page::util::norm::lemon 0.1
428