1# xmlswitch.tcl --
2#
3#	This file implements a control structure for Tcl.
4#	'xmlswitch' iterates over an XML document.  Features in
5#	the document may be specified using XPath location paths,
6#	and these will trigger Tcl scripts when matched.
7#
8# Copyright (c) 2000-2003 Zveno Pty Ltd
9# http://www.zveno.com/
10#
11# Zveno makes this software available free of charge for any purpose.
12# Copies may be made of this software but all of this notice must be included
13# on any copy.
14#
15# The software was developed for research purposes only and Zveno does not
16# warrant that it is error free or fit for any purpose.  Zveno disclaims any
17# liability for all claims, expenses, losses, damages and costs any user may
18# incur as a result of using, copying or modifying this software.
19#
20# $Id: xmlswitch.tcl,v 1.4 2003/03/09 11:12:49 balls Exp $
21
22package provide xmlswitch 1.0
23
24# We need the xml, dom and xpath packages
25
26package require xml 2.6
27package require dom 2.6
28package require xpath 1.0
29
30namespace eval xmlswitch {
31    namespace export xmlswitch xmlswitchcont xmlswitchend
32    namespace export domswitch
33    namespace export free rootnode
34
35    variable counter 0
36
37    variable typemap
38    array set typemap {
39	text textNode
40	comment comment
41	processing-instruction processingInstruction
42    }
43}
44
45# xmlswitch::xmlswitch --
46#
47#	Parse XML data, matching for XPath locations along the way
48#	and (possibly) triggering callbacks.
49#
50#	A DOM tree is built as a side-effect (necessary for resolving
51#	XPath location paths).
52#
53# Arguments:
54#	xml	XML document
55#	args	configuration options,
56#		plus a single path/script expression, or multiple expressions
57#
58# Results:
59#	Tcl callbacks may be invoked.
60#	If -async option is true returns a token for this "process".
61
62proc xmlswitch::xmlswitch {xml args} {
63    variable counter
64
65    #puts stderr [list xmlswitch::xmlswitch $xml $args]
66
67    set stateVarName [namespace current]::State[incr counter]
68    upvar #0 $stateVarName state
69    set state(stateVarName) $stateVarName
70    set state(-async) 0
71
72    set state(pathArray) ${stateVarName}Paths
73    upvar #0 $state(pathArray) paths
74    array set paths {}
75
76    set cleanup {
77	unset state
78	unset paths
79    }
80
81    # Find configuration options and remove
82    set numOpts 0
83    foreach {opt value} $args {
84	switch -glob -- $opt {
85	    -* {
86		set state($opt) $value
87		incr numOpts 2
88	    }
89	    default {
90		set args [lrange $args $numOpts end]
91		break
92	    }
93	}
94    }
95
96    switch -- [llength $args] {
97	0 {
98	    # Nothing to do
99	    eval $cleanup
100	    return $stateVarName
101	}
102	1 {
103	    foreach {path script} [lindex $args 0] {
104		set paths([xpath::split $path]) $script
105	    }
106	}
107	default {
108	    if {[llength $args] % 2} {
109		eval $cleanup
110		return -code error "no script matching location path \"[lindex $args end]\""
111	    }
112	    foreach {path script} $args {
113		set paths([xpath::split $path]) $script
114	    }
115	}
116    }
117
118    set root [set state(root) [dom::DOMImplementation create]]
119    set state(current) $root
120
121    # Parse the document
122    # We're going to do this incrementally, so the caller can
123    # break at any time
124    set state(parser) [eval xml::parser [array get state -parser]]
125    #append cleanup "\n $parser destroy\n"
126    $state(parser) configure \
127	    -elementstartcommand [namespace code [list ParseElementStart $stateVarName]]	\
128	    -elementendcommand [namespace code [list ParseElementEnd $stateVarName]]		\
129	    -characterdatacommand [namespace code [list ParseCharacterData $stateVarName]]	\
130	    -final false
131
132#	    -processinginstructioncommand [namespace code [list ParsePI $stateVarName]]		\
133#	    -commentcommand [namespace code [list ParseComment]]
134
135    if {[catch {$state(parser) parse $xml} err]} {
136	eval $cleanup
137	return -code error $err
138    }
139
140    if {$state(-async)} {
141	return $stateVarName
142    } else {
143	eval $cleanup
144	return {}
145    }
146}
147
148# xmlswitch::xmlswitchcont --
149#
150#	Provide more XML data to parse
151#
152# Arguments:
153#	token	state variable name
154#	xml	XML data
155#
156# Results:
157#	More parsing
158
159proc xmlswitch::xmlswitchcont {token xml} {
160    upvar #0 $token state
161
162    $state(parser) parse $xml
163
164    return {}
165}
166
167# xmlswitch::xmlswitchend --
168#
169#	Signal that no further data is available
170#
171# Arguments:
172#	token	state array
173#
174# Results:
175#	Parser configuration changed
176
177proc xmlswitch::xmlswitchend token {
178    upvar #0 $token state
179
180    $state(parser) configure -final true
181
182    return {}
183}
184
185# xmlswitch::rootnode --
186#
187#	Get the root node
188#
189# Arguments:
190#	token	state array
191#
192# Results:
193#	Returns root node token
194
195proc xmlswitch::rootnode token {
196    upvar #0 $token state
197
198    return $state(root)
199}
200
201# xmlswitch::free --
202#
203#	Free resources EXCEPT the DOM tree.
204#	"-all" causes DOM tree to be destroyed too.
205#
206# Arguments:
207#	token	state array
208#	args	options
209#
210# Results:
211#	Resources freed.
212
213proc xmlswitch::free {token args} {
214    upvar #0 $token state
215
216    if {[lsearch $args "-all"] >= 0} {
217	dom::DOMImplementation destroy $state(root)
218    }
219
220    catch {unset $state(pathArray)}
221    catch {unset state}
222
223    catch {$state(parser) free}
224
225    return {}
226}
227
228# xmlswitch::ParseElementStart --
229#
230#	Handle element start tag
231#
232# Arguments:
233#	token	state array
234#	name	element type
235#	attrList attribute list
236#	args	options
237# Results:
238#	All XPath location paths are checked for a match,
239#	and script evaluated for matching XPath.
240#	DOM tree node added.
241
242proc xmlswitch::ParseElementStart:dbgdisabled {token name attrList args} {
243    if {[catch {eval ParseElementStart:dbg [list $token $name $attrList] $args} msg]} {
244	puts stderr [list ParseElementStart failed with msg $msg]
245	puts stderr $::errorInfo
246	return -code error $msg
247    } else {
248	puts stderr [list ParseElementStart returned OK]
249    }
250    return $msg
251}
252proc xmlswitch::ParseElementStart {token name attrList args} {
253
254    upvar #0 $token state
255    array set opts $args
256
257    #puts stderr [list xmlswitch::ParseElementStart $token $name $attrList $args]
258
259    lappend state(current) \
260	    [dom::document createElement [lindex $state(current) end] $name]
261    foreach {name value} $attrList {
262	dom::element setAttribute [lindex $state(current) end] $name $value
263    }
264
265    MatchTemplates $token [lindex $state(current) end]
266
267    return {}
268}
269
270# xmlswitch::ParseElementEnd --
271#
272#	Handle element end tag
273#
274# Arguments:
275#	token	state array
276#	name	element type
277#	args	options
278# Results:
279#	State changed
280
281proc xmlswitch::ParseElementEnd {token name args} {
282    upvar #0 $token state
283
284    set state(current) [lreplace $state(current) end end]
285
286    return {}
287}
288
289# xmlswitch::ParseCharacterData --
290#
291#	Handle character data
292#
293# Arguments:
294#	token	state array
295#	data	pcdata
296#
297# Results:
298#	All XPath location paths are checked for a match,
299#	and script evaluated for matching XPath.
300#	DOM tree node added.
301
302proc xmlswitch::ParseCharacterData {token data} {
303    upvar #0 $token state
304
305    lappend state(current) \
306	    [dom::document createTextNode [lindex $state(current) end] $data]
307
308    MatchTemplates $token [lindex $state(current) end]
309
310    set state(current) [lreplace $state(current) end end]
311
312    return {}
313}
314
315# xmlswitch::domswitch --
316#
317#	Similar to xmlswitch above, but iterates over a pre-built
318#	DOM tree.
319#
320# Arguments:
321#	xml	XML document
322#	args	a single path/script expression, or multiple expressions
323#
324# Results:
325#	Tcl callbacks may be invoked.
326
327proc xmlswitch::domswitch {xml args} {
328}
329
330# xmlswitch::MatchTemplates --
331#
332#	Check all templates for one which matches
333#	the current node.
334#
335# Arguments:
336#	token	state array
337#	node	Current DOM node
338#
339# Results:
340#	If a template matches, its script is evaluated
341
342proc xmlswitch::MatchTemplates {token node} {
343    upvar #0 $token state
344    upvar #0 $state(pathArray) paths
345
346    #puts stderr [list xmlswitch::MatchTemplates $token $node (type: [dom::node cget $node -nodeType]) (name: [dom::node cget $node -nodeName])]
347
348    set matches {}
349
350    foreach {path script} [array get paths] {
351
352	#puts stderr [list checking path $path for a match]
353
354	set context $node
355
356	# Work backwards along the path, reversing each axis
357	set match 0
358	set i [llength $path]
359	#puts stderr [list $i steps to be tested]
360	while {[incr i -1] >= 0} {
361	    #puts stderr [list step $i [lindex $path $i]]
362	    switch -glob [llength [lindex $path $i]],$i {
363		0,0 {
364		    #puts stderr [list absolute path, end of steps - am I at the root?]
365		    if {![string length [dom::node parent $context]]} {
366			#puts stderr [list absolute path matched]
367			lappend matches [list $path $script]
368		    } else {
369			#puts stderr [list absolute path did not match]
370		    }
371		}
372		*,0 {
373		    #puts stderr [list last step, relative path]
374		    switch [lindex [lindex $path $i] 0] {
375			child {
376			    if {[NodeTest [lindex $path $i] $context] && \
377				    [CheckPredicates [lindex $path $i] $context]} {
378				#puts stderr [list relative path matched]
379				lappend matches [list $path $script]
380			    } else {
381				#puts stderr [list relative path did not match]
382			    }
383			}
384			default {
385			    return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported"
386			}
387		    }
388		}
389		default {
390		    #puts stderr [list continuing checking steps]
391		    switch [lindex [lindex $path $i] 0] {
392			child {
393			    if {[NodeTest [lindex $path $i] $context] && \
394				    [CheckPredicates [lindex $path $i] $context]} {
395				set context [dom::node parent $context]
396			    } else {
397				#puts stderr [list no match]
398			    }
399			}
400			default {
401			    return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported"
402			}
403		    }
404		}
405	    }
406	}
407    }
408
409    # TODO: If there are multiple matches then we must pick the
410    # most specific match
411
412    if {[llength $matches] > 1} {
413	# For the moment we'll just take the first match
414	set matches [list [lindex $matches 0]]
415    }
416
417    if {[llength $matches]} {
418	#puts stderr [list evaluating callback at level [info level]]
419	uplevel 3 [lindex [lindex $matches 0] 1]
420    }
421
422    return {}
423}
424
425# xmlswitch::NodeTest --
426#
427#	Check that the node passes the node (type) test
428#
429# Arguments:
430#	step	Location step
431#	node	DOM node
432#
433# Results:
434#	Boolean
435
436proc xmlswitch::NodeTest {step node} {
437
438    if {[llength [lindex $step 1]] > 1} {
439	switch -glob -- [lindex [lindex $step 1] 0],[dom::node cget $node -nodeType] {
440	    node,* -
441	    text,textNode -
442	    comment,comment -
443	    processing-instruction,processingInstruction {
444		return 1
445	    }
446	    default {
447		return 0
448	    }
449	}
450    } elseif {![string compare [lindex $step 1] "*"]} {
451	return 1
452    } elseif {![string compare [lindex $step 1] [dom::node cget $node -nodeName]]} {
453	return 1
454    } else {
455	return 0
456    }
457}
458
459# xmlswitch::CheckPredicates --
460#
461#	Check that the node passes the predicates
462#
463# Arguments:
464#	step	Location step
465#	node	DOM node
466#
467# Results:
468#	Boolean
469
470proc xmlswitch::CheckPredicates {step node} {
471    variable typemap
472
473    set predicates [lindex $step 2]
474    # Shortcut: no predicates means everything passes
475    if {![llength $predicates]} {
476	return 1
477    }
478
479    # Get the context node set
480    switch [lindex $step 0] {
481	child {
482	    set nodeset {}
483	    if {[llength [lindex $step 1]]} {
484		foreach {name typetest} [lindex $step 1] break
485		switch -- $name {
486		    node {
487			set nodeset [dom::node children [dom::node parent $node]]
488		    }
489		    text -
490		    comment -
491		    processing-instruction {
492			foreach child [dom::node children [dom::node parent $node]] {
493			    if {![string compare [dom::node cget $child -nodeType] $typemap($name)]} {
494				lappend nodeset $child
495			    }
496			}
497		    }
498		    default {
499			# Error
500		    }
501		}
502	    } else {
503		foreach child [dom::node children [dom::node parent $node]] {
504		    if {![string compare [lindex $step 1] [dom::node cget $child -nodeName]]} {
505			lappend nodeset $child
506		    }
507		}
508	    }
509	}
510	default {
511	    return -code error "axis \"[lindex $step 0]\" not supported"
512	}
513    }
514
515    foreach predicate $predicates {
516	# position() is the only supported predicate
517	if {[lsearch $nodeset $node] + 1 == $predicate} {
518	    # continue
519	} else {
520	    return 0
521	}
522    }
523
524    return 1
525}
526
527