1# xpath.tcl --
2#
3#	Provides an XPath parser for Tcl,
4#	plus various support procedures
5#
6# Copyright (c) 2000-2002 Zveno Pty Ltd
7#
8# $Id: xpath.tcl,v 1.7 2002/06/14 12:16:23 balls Exp $
9
10package provide xpath 1.0
11
12# We need the XML package for definition of Names
13package require xml
14
15namespace eval xpath {
16    namespace export split join createnode
17
18    variable axes {
19	ancestor
20	ancestor-or-self
21	attribute
22	child
23	descendant
24	descendant-or-self
25	following
26	following-sibling
27	namespace
28	parent
29	preceding
30	preceding-sibling
31	self
32    }
33
34    variable nodeTypes {
35	comment
36	text
37	processing-instruction
38	node
39    }
40
41    # NB. QName has parens for prefix
42
43    variable nodetestExpr ^(${::xml::QName})${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\5)?${::xml::allWsp}\\))?${::xml::allWsp}(.*)
44
45    variable nodetestExpr2 ((($::xml::QName)${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\7)?${::xml::allWsp}\\))?)|${::xml::allWsp}(\\*))${::xml::allWsp}(.*)
46}
47
48# xpath::split --
49#
50#	Parse an XPath location path
51#
52# Arguments:
53#	locpath	location path
54#
55# Results:
56#	A Tcl list representing the location path.
57#	The list has the form: {{axis node-test {predicate predicate ...}} ...}
58#	Where each list item is a location step.
59
60proc xpath::split locpath {
61    set leftover {}
62
63    set result [InnerSplit $locpath leftover]
64
65    if {[string length [string trim $leftover]]} {
66	return -code error "unexpected text \"$leftover\""
67    }
68
69    return $result
70}
71
72proc xpath::InnerSplit {locpath leftoverVar} {
73    upvar $leftoverVar leftover
74
75    variable axes
76    variable nodetestExpr
77    variable nodetestExpr2
78
79    # First determine whether we have an absolute location path
80    if {[regexp {^/(.*)} $locpath discard locpath]} {
81	set path {{}}
82    } else {
83	set path {}
84    }
85
86    while {[string length [string trimleft $locpath]]} {
87	if {[regexp {^\.\.(.*)} $locpath discard locpath]} {
88	    # .. abbreviation
89	    set axis parent
90	    set nodetest *
91	} elseif {[regexp {^/(.*)} $locpath discard locpath]} {
92	    # // abbreviation
93	    set axis descendant-or-self
94	    if {[regexp ^$nodetestExpr2 [string trimleft $locpath] discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} {
95		set nodetest [ResolveWildcard $nodetest $typetest $wildcard $literal]
96	    } else {
97		set leftover $locpath
98		return $path
99	    }
100	} elseif {[regexp ^\\.${::xml::allWsp}(.*) $locpath discard locpath]} {
101	    # . abbreviation
102	    set axis self
103	    set nodetest *
104	} elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}"(\[^"\])"(.*) $locpath discard attrName discard attrValue locpath]} {
105	    # @ abbreviation
106	    set axis attribute
107	    set nodetest $attrName
108	} elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}'(\[^'\])'(.*) $locpath discard attrName discard attrValue locpath]} {
109	    # @ abbreviation
110	    set axis attribute
111	    set nodetest $attrName
112	} elseif {[regexp ^@($::xml::QName)(.*) $locpath discard attrName discard2 locpath]} {
113	    # @ abbreviation
114	    set axis attribute
115	    set nodetest $attrName
116	} elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?\\*(.*) $locpath discard discard axis discard locpath]} {
117	    # wildcard specified
118	    set nodetest *
119	    if {![string length $axis]} {
120		set axis child
121	    }
122	} elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?$nodetestExpr2 $locpath discard discard axis discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} {
123	    # nodetest, with or without axis
124	    if {![string length $axis]} {
125		set axis child
126	    }
127	    set nodetest [ResolveWildcard $nodetest $typetest $wildcard $literal]
128	} else {
129	    set leftover $locpath
130	    return $path
131	}
132
133	# ParsePredicates
134	set predicates {}
135	set locpath [string trimleft $locpath]
136	while {[regexp {^\[(.*)} $locpath discard locpath]} {
137	    if {[regexp {^([0-9]+)(\].*)} [string trim $locpath] discard posn locpath]} {
138		set predicate [list = {function position {}} [list number $posn]]
139	    } else {
140		set leftover2 {}
141		set predicate [ParseExpr $locpath leftover2]
142		set locpath $leftover2
143		unset leftover2
144	    }
145
146	    if {[regexp {^\](.*)} [string trimleft $locpath] discard locpath]} {
147		lappend predicates $predicate
148	    } else {
149		return -code error "unexpected text in predicate \"$locpath\""
150	    }
151	}
152
153	set axis [string trim $axis]
154	set nodetest [string trim $nodetest]
155
156	# This step completed
157	if {[lsearch $axes $axis] < 0} {
158	    return -code error "invalid axis \"$axis\""
159	}
160	lappend path [list $axis $nodetest $predicates]
161
162	# Move to next step
163
164	if {[string length $locpath] && ![regexp ^/(.*) $locpath discard locpath]} {
165            set leftover $locpath
166	    return $path
167	}
168
169    }
170
171    return $path
172}
173
174# xpath::ParseExpr --
175#
176#	Parse one expression in a predicate
177#
178# Arguments:
179#	locpath	location path to parse
180#	leftoverVar	Name of variable in which to store remaining path
181#
182# Results:
183#	Returns parsed expression as a Tcl list
184
185proc xpath::ParseExpr {locpath leftoverVar} {
186    upvar $leftoverVar leftover
187    variable nodeTypes
188
189    set expr {}
190    set mode expr
191    set stack {}
192
193    while {[string index [string trimleft $locpath] 0] != "\]"} {
194	set locpath [string trimleft $locpath]
195	switch $mode {
196	    expr {
197		# We're looking for a term
198		if {[regexp ^-(.*) $locpath discard locpath]} {
199		    # UnaryExpr
200		    lappend stack "-"
201		} elseif {[regexp ^\\\$({$::xml::QName})(.*) $locpath discard varname discard locpath]} {
202		    # VariableReference
203		    lappend stack [list varRef $varname]
204		    set mode term
205		} elseif {[regexp {^\((.*)} $locpath discard locpath]} {
206		    # Start grouping
207		    set leftover2 {}
208		    lappend stack [list group [ParseExpr $locpath leftover2]]
209		    set locpath $leftover2
210		    unset leftover2
211
212		    if {[regexp {^\)(.*)} [string trimleft $locpath] discard locpath]} {
213			set mode term
214		    } else {
215			return -code error "unexpected text \"$locpath\", expected \")\""
216		    }
217
218		} elseif {[regexp {^"([^"]*)"(.*)} $locpath discard literal locpath]} {
219		    # Literal (" delimited)
220		    lappend stack [list literal $literal]
221		    set mode term
222		} elseif {[regexp {^'([^']*)'(.*)} $locpath discard literal locpath]} {
223		    # Literal (' delimited)
224		    lappend stack [list literal $literal]
225		    set mode term
226		} elseif {[regexp {^([0-9]+(\.[0-9]+)?)(.*)} $locpath discard number discard locpath]} {
227		    # Number
228		    lappend stack [list number $number]
229		    set mode term
230		} elseif {[regexp {^(\.[0-9]+)(.*)} $locpath discard number locpath]} {
231		    # Number
232		    lappend stack [list number $number]
233		    set mode term
234		} elseif {[regexp ^(${::xml::QName})\\(${::xml::allWsp}(.*) $locpath discard functionName discard locpath]} {
235		    # Function call start or abbreviated node-type test
236
237		    if {[lsearch $nodeTypes $functionName] >= 0} {
238			# Looking like a node-type test
239			if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} {
240			    lappend stack [list path [list child [list $functionName ()] {}]]
241			    set mode term
242			} else {
243			    return -code error "invalid node-type test \"$functionName\""
244			}
245		    } else {
246			if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} {
247			    set parameters {}
248			} else {
249			    set leftover2 {}
250			    set parameters [ParseExpr $locpath leftover2]
251			    set locpath $leftover2
252			    unset leftover2
253			    while {[regexp {^,(.*)} $locpath discard locpath]} {
254				set leftover2 {}
255				lappend parameters [ParseExpr $locpath leftover2]
256				set locpath $leftover2
257				unset leftover2
258			    }
259
260			    if {![regexp ^\\)${::xml::allWsp}(.*) [string trimleft $locpath] discard locpath]} {
261				return -code error "unexpected text \"locpath\" - expected \")\""
262			    }
263		        }
264
265			lappend stack [list function $functionName $parameters]
266			set mode term
267		    }
268
269		} else {
270		    # LocationPath
271		    set leftover2 {}
272		    lappend stack [list path [InnerSplit $locpath leftover2]]
273		    set locpath $leftover2
274		    unset leftover2
275		    set mode term
276		}
277	    }
278	    term {
279		# We're looking for an expression operator
280		if {[regexp ^-(.*) $locpath discard locpath]} {
281		    # UnaryExpr
282		    set stack [linsert $stack 0 expr "-"]
283		    set mode expr
284		} elseif {[regexp ^(and|or|\\=|!\\=|<|>|<\\=|>\\=|\\||\\+|\\-|\\*|div|mod)(.*) $locpath discard exprtype locpath]} {
285		    # AndExpr, OrExpr, EqualityExpr, RelationalExpr or UnionExpr
286		    set stack [linsert $stack 0 $exprtype]
287		    set mode expr
288		} else {
289		    return -code error "unexpected text \"$locpath\", expecting operator"
290		}
291	    }
292	    default {
293		# Should never be here!
294		return -code error "internal error"
295	    }
296	}
297    }
298
299    set leftover $locpath
300    return $stack
301}
302
303# xpath::ResolveWildcard --
304
305proc xpath::ResolveWildcard {nodetest typetest wildcard literal} {
306    variable nodeTypes
307
308    switch -glob -- [string length $nodetest],[string length $typetest],[string length $wildcard],[string length $literal] {
309	0,0,0,* {
310	    return -code error "bad location step (nothing parsed)"
311	}
312	0,0,* {
313	    # Name wildcard specified
314	    return *
315	}
316	*,0,0,* {
317	    # Element type test - nothing to do
318	    return $nodetest
319	}
320	*,0,*,* {
321	    # Internal error?
322	    return -code error "bad location step (found both nodetest and wildcard)"
323	}
324	*,*,0,0 {
325	    # Node type test
326	    if {[lsearch $nodeTypes $nodetest] < 0} {
327		return -code error "unknown node type \"$typetest\""
328	    }
329	    return [list $nodetest $typetest]
330	}
331	*,*,0,* {
332	    # Node type test
333	    if {[lsearch $nodeTypes $nodetest] < 0} {
334		return -code error "unknown node type \"$typetest\""
335	    }
336	    return [list $nodetest $literal]
337	}
338	default {
339	    # Internal error?
340	    return -code error "bad location step"
341	}
342    }
343}
344
345# xpath::join --
346#
347#	Reconstitute an XPath location path from a
348#	Tcl list representation.
349#
350# Arguments:
351#	spath	split path
352#
353# Results:
354#	Returns an Xpath location path
355
356proc xpath::join spath {
357    return -code error "not yet implemented"
358}
359
360