1# -*- tcl -*-
2# Copyright (c) 2009 Andreas Kupries <andreas_kupries@sourceforge.net>
3
4# Utility commands operating on parsing expressions.
5
6# # ## ### ##### ######## ############# #####################
7## Requirements
8
9package require Tcl 8.5        ; # Required runtime.
10package require pt::pe         ; # PE basics
11package require struct::set    ; # Set operations (symbol sets)
12
13# # ## ### ##### ######## ############# #####################
14##
15
16namespace eval ::pt::pe::op {
17    namespace export \
18	drop rename called flatten fusechars
19
20    namespace ensemble create
21}
22
23# # ## ### ##### ######## #############
24## Public API
25
26proc ::pt::pe::op::rename {nt ntnew serial} {
27    if {$nt eq $ntnew} {
28	return $serial
29    }
30    return [pt::pe bottomup \
31		[list [namespace current]::Rename $nt $ntnew] \
32		$serial]
33}
34
35proc ::pt::pe::op::drop {dropset serial} {
36   set res [pt::pe bottomup \
37		[list [namespace current]::Drop $dropset] \
38		$serial]
39   if {$res eq "@@"} { set res [pt::pe epsilon] }
40   return $res
41}
42
43proc ::pt::pe::op::called {serial} {
44    return [pt::pe bottomup \
45		[list [namespace current]::Called] \
46		$serial]
47}
48
49proc ::pt::pe::op::flatten {serial} {
50    return [pt::pe bottomup \
51		[list [namespace current]::Flatten] \
52		$serial]
53}
54
55proc ::pt::pe::op::fusechars {serial} {
56    return [pt::pe bottomup \
57		[list [namespace current]::FuseChars] \
58		$serial]
59}
60
61# # ## ### ##### ######## #############
62## Internals
63
64proc ::pt::pe::op::Drop {dropset pe op arguments} {
65    if {$op eq "n"} {
66	lassign $arguments symbol
67	if {[struct::set contains $dropset $symbol]} {
68	    return @@
69	} else {
70	    return $pe
71	}
72    }
73
74    switch -exact -- $op {
75	/ - x - * - + - ? - & - ! {
76	    set newarg {}
77	    foreach a $arguments {
78		if {$a eq "@@"} continue
79		lappend newarg $a
80	    }
81
82	    if {![llength $newarg]} {
83		# Nothing remained, drop the whole expression
84		return [pt::pe epsilon]
85	    } elseif {[llength $newarg] < [llength $argument]} {
86		# Some removed, construct a new expression
87		set pe [list $op {*}$newarg]
88	    } ; # None removed, no change.
89	}
90    }
91
92    return $pe
93}
94
95proc ::pt::pe::op::Rename {nt ntnew pe op arguments} {
96    #puts R($op)/$arguments/
97    if {($op eq "n") && ([lindex $arguments 0] eq $nt)} {
98	return [pt::pe nonterminal $ntnew]
99    } else {
100	return $pe
101    }
102}
103
104proc ::pt::pe::op::Called {pe op arguments} {
105    # arguments = list(set-of-symbols) for operators, and n.
106    #             ignored for terminal expressions.
107    # result    = set-of-symbols
108
109    #puts -nonewline C|$op|$arguments|=
110    switch -exact -- $op {
111	n - & - ! - * - + - ? {
112	    #puts |[lindex $arguments 0]|
113	    return [lindex $arguments 0]
114	}
115	x - / {
116	    #puts |[struct::set union {*}$arguments]|
117	    return [struct::set union {*}$arguments]
118	}
119    }
120    #puts ||
121    return {}
122}
123
124proc ::pt::pe::op::Flatten {pe op arguments} {
125    switch -exact -- $op {
126	x - / {
127	    if {[llength $arguments] == 1} {
128		# Cut single-child x/ out of the tree
129		return [lindex $arguments 0]
130	    } else {
131		set res {}
132		foreach c $arguments {
133		    if {[lindex $c 0] eq $op} {
134			# Cut x in x (/ in /) operator out of the
135			# tree.
136			lappend res {*}[lrange $c 1 end]
137		    } else {
138			# Leave anything else unchanged.
139			lappend res $c
140		    }
141		}
142		return [list $op {*}$res]
143	    }
144	}
145	default {
146	    # Leave anything not x/ unchanged
147	    return $pe
148	}
149    }
150}
151
152proc ::pt::pe::op::FuseChars {pe op arguments} {
153    switch -exact -- $op {
154	x {
155	    set changed 0  ; # boolean flag showing if fuse ops were done.
156	    set buf     {} ; # accumulator of chars in a string.
157	    set res     {} ; # accumulator of new children for operator.
158
159	    foreach c $arguments {
160		CollectTerminal $c
161		FuseTerminal
162		lappend res $c
163	    }
164
165	    # Capture a run of characters at the end of the sequence.
166	    FuseTerminal
167
168	    if {$changed} {
169		return [list x {*}$res]
170	    } else {
171		return $pe
172	    }
173	}
174	/ {
175	    set changed 0  ; # boolean flag showing if fuse ops were done.
176	    set buf     {} ; # accumulator of chars and ranges in a class.
177	    set res     {} ; # accumulator of new children for operator.
178
179	    foreach c $arguments {
180		CollectClass $c
181		FuseClass
182		lappend res $c
183	    }
184
185	    # Capture a run of characters and ranges at the end of the
186	    # sequence.
187	    FuseClass
188
189	    if {$changed} {
190		return [list / {*}$res]
191	    } else {
192		return $pe
193	    }
194	}
195	default {
196	    # Leave anything not x/ unchanged
197	    return $pe
198	}
199    }
200}
201
202# # ## ### ##### ######## #############
203## Fuser Support
204
205proc ::pt::pe::op::CollectTerminal {c} {
206    if {[lindex $c 0] ne "t"} return
207
208    # A terminal. Just extend the accumulator. The main processing
209    # happens after each run of t-operators, see FuseTerminal.
210
211    upvar 1 buf buf
212    lappend buf [lindex $c 1]
213    return -code continue
214}
215
216proc ::pt::pe::op::FuseTerminal {} {
217    upvar 1 changed changed res res buf buf
218
219    # Nothing has accumulated, nothing to fuse.
220    if {$buf eq {}} return
221
222    # The current non-t operator is after one or more t-operators. We
223    # have to flush its accumulated data to keep the expression
224    # correct.
225
226    if {[llength $buf] > 1} {
227	# We are behind an actual series of t-operators, i.e. a
228	# string. We flush it and signal the change to the processing
229	# after the loop,
230
231	lappend res [list str {*}$buf]
232	set changed 1
233    } else {
234	# We are behind a single t-operator. We keep it as is, there
235	# is no actual need to make it a string.
236
237	lappend res [pt::pe terminal [lindex $buf 0]]
238    }
239
240    # Reset the accumulator for the next series.
241    set buf {}
242    return
243}
244
245# # ## ### ##### ######## #############
246
247proc ::pt::pe::op::CollectClass {c} {
248    if {[lindex $c 0] ni {t ..}} return
249
250    # A terminal or range. Just extend the accumulator. The main processing
251    # happens after each run of t-operators, see FuseTerminal.
252
253    upvar 1 buf buf
254    set new [lrange $c 1 end]
255    if {([llength $new] == 1) || ([lindex $new 0] eq [lindex $new 1])} {
256	set new [lindex $new 0]
257    }
258    lappend buf $new
259    return -code continue
260}
261
262proc ::pt::pe::op::FuseClass {} {
263    upvar 1 changed changed res res buf buf
264
265    # Nothing has accumulated, nothing to fuse.
266    if {$buf eq {}} return
267
268    # The current non-t operator is after one or more
269    # t/..-operators. We have to flush the accumulated data to keep
270    # the expression correct.
271
272    if {[llength $buf] > 1} {
273	# We are behind an actual series of t/..-operators, i.e. a
274	# class. We flush it, signal the change to the processing
275	# after the loop, and reset the accumulator for the next
276	# series.
277
278	# TODO :: Sort class elements, aggregate adjacents into larger
279	#         ranges if possible and worthwhile (>= 3), look for
280	#         overlapping ranges and merge.
281
282	lappend res [list cl {*}$buf]
283	set changed 1
284    } else {
285	# We are behind a single t- or ..-operator. A terminal can be
286	# kept as is, but a range has to be encapsulated into a class,
287	# except of the range is something like a-a, then this is just
288	# a different coding of a single character ...
289
290	set args [lindex $buf 0]
291	if {[llength $args] == 1} {
292	    lappend res [pt::pe terminal [lindex $args 0]]
293	} else {
294	    lassign $args a b
295	    set changed 1
296	    if {$a ne $b} {
297		lappend res [list cl {*}$buf]
298	    } else {
299		lappend res [pt::pe terminal $a]
300	    }
301	}
302    }
303
304    # Reset the accumulator for the next series.
305    set buf {}
306    return
307}
308
309# # ## ### ##### ######## #############
310## State / Configuration :: n/a
311
312namespace eval ::pt::pe::op {}
313
314# # ## ### ##### ######## ############# #####################
315## Ready
316
317package provide pt::pe::op 1
318return
319