1# tclparser-8.0.tcl --
2#
3#	This file provides a Tcl implementation of a XML parser.
4#	This file supports Tcl 8.0.
5#
6#	See xml-8.[01].tcl for definitions of character sets and
7#	regular expressions.
8#
9# Copyright (c) 1998,1999 Zveno Pty Ltd
10# http://www.zveno.com/
11#
12# Zveno makes this software and all associated data and documentation
13# ('Software') available free of charge for any purpose.
14# Copies may be made of this Software but all of this notice must be included
15# on any copy.
16#
17# The Software was developed for research purposes and Zveno does not warrant
18# that it is error free or fit for any purpose.  Zveno disclaims any
19# liability for all claims, expenses, losses, damages and costs any user may
20# incur as a result of using, copying or modifying the Software.
21#
22# Copyright (c) 1997 Australian National University (ANU).
23#
24# ANU makes this software and all associated data and documentation
25# ('Software') available free of charge for any purpose. You may make copies
26# of the Software but you must include all of this notice on any copy.
27#
28# The Software was developed for research purposes and ANU does not warrant
29# that it is error free or fit for any purpose.  ANU disclaims any
30# liability for all claims, expenses, losses, damages and costs any user may
31# incur as a result of using, copying or modifying the Software.
32#
33# $Id: tclparser-8.0.tcl,v 1.7 2003/02/25 04:09:21 balls Exp $
34
35package require -exact Tcl 8.0
36
37package require xmldefs 1.10
38
39package require sgmlparser 1.0
40
41package provide xml::tclparser 2.6
42
43namespace eval xml {
44
45    # Procedures for parsing XML documents
46    namespace export parser
47    # Procedures for parsing XML DTDs
48    namespace export DTDparser
49
50    # Counter for creating unique parser objects
51    variable ParserCounter 0
52
53}
54
55# xml::parser --
56#
57#	Creates XML parser object.
58#
59# Arguments:
60#	args	Unique name for parser object
61#		plus option/value pairs
62#
63# Recognised Options:
64#	-final			Indicates end of document data
65#	-elementstartcommand	Called when an element starts
66#	-elementendcommand	Called when an element ends
67#	-characterdatacommand	Called when character data occurs
68#	-processinginstructioncommand	Called when a PI occurs
69#	-externalentityrefcommand	Called for an external entity reference
70#
71#	(Not compatible with expat)
72#	-xmldeclcommand		Called when the XML declaration occurs
73#	-doctypecommand		Called when the document type declaration occurs
74#
75#	-errorcommand		Script to evaluate for a fatal error
76#	-warningcommand		Script to evaluate for a reportable warning
77#	-statevariable		global state variable
78#	-reportempty		whether to provide empty element indication
79#
80# Results:
81#	The state variable is initialised.
82
83proc xml::parser {args} {
84    variable ParserCounter
85
86    if {[llength $args] > 0} {
87	set name [lindex $args 0]
88	set args [lreplace $args 0 0]
89    } else {
90	set name parser[incr ParserCounter]
91    }
92
93    if {[info command [namespace current]::$name] != {}} {
94	return -code error "unable to create parser object \"[namespace current]::$name\" command"
95    }
96
97    # Initialise state variable and object command
98    upvar \#0 [namespace current]::$name parser
99    set sgml_ns [namespace parent]::sgml
100    array set parser [list name $name			\
101	-final 1					\
102	-elementstartcommand ${sgml_ns}::noop		\
103	-elementendcommand ${sgml_ns}::noop		\
104	-characterdatacommand ${sgml_ns}::noop		\
105	-processinginstructioncommand ${sgml_ns}::noop	\
106	-externalentityrefcommand ${sgml_ns}::noop	\
107	-xmldeclcommand ${sgml_ns}::noop		\
108	-doctypecommand ${sgml_ns}::noop		\
109	-warningcommand ${sgml_ns}::noop		\
110	-statevariable [namespace current]::$name	\
111	-reportempty 0					\
112	internaldtd {}					\
113    ]
114
115    proc [namespace current]::$name {method args} \
116	"eval ParseCommand $name \$method \$args"
117
118    eval ParseCommand [list $name] configure $args
119
120    return [namespace current]::$name
121}
122
123# xml::ParseCommand --
124#
125#	Handles parse object command invocations
126#
127# Valid Methods:
128#	cget
129#	configure
130#	parse
131#	reset
132#
133# Arguments:
134#	parser	parser object
135#	method	minor command
136#	args	other arguments
137#
138# Results:
139#	Depends on method
140
141proc xml::ParseCommand {parser method args} {
142    upvar \#0 [namespace current]::$parser state
143
144    switch -- $method {
145	cget {
146	    return $state([lindex $args 0])
147	}
148	configure {
149	    foreach {opt value} $args {
150		set state($opt) $value
151	    }
152	}
153	parse {
154	    ParseCommand_parse $parser [lindex $args 0]
155	}
156	reset {
157	    if {[llength $args]} {
158		return -code error "too many arguments"
159	    }
160	    ParseCommand_reset $parser
161	}
162	default {
163	    return -code error "unknown method \"$method\""
164	}
165    }
166
167    return {}
168}
169
170# xml::ParseCommand_parse --
171#
172#	Parses document instance data
173#
174# Arguments:
175#	object	parser object
176#	xml	data
177#
178# Results:
179#	Callbacks are invoked, if any are defined
180
181proc xml::ParseCommand_parse {object xml} {
182    upvar \#0 [namespace current]::$object parser
183    variable Wsp
184    variable tokExpr
185    variable substExpr
186
187    set parent [namespace parent]
188    if {![string compare :: $parent]} {
189	set parent {}
190    }
191
192    set tokenised [lrange \
193	    [${parent}::sgml::tokenise $xml \
194	    $tokExpr \
195	    $substExpr \
196	    -internaldtdvariable [namespace current]::${object}(internaldtd)] \
197	4 end]
198
199    eval ${parent}::sgml::parseEvent \
200	[list $tokenised \
201	    -emptyelement [namespace code ParseEmpty] \
202	    -parseattributelistcommand [namespace code ParseAttrs]] \
203	[array get parser -*command] \
204	[array get parser -entityvariable] \
205	[array get parser -reportempty] \
206	[array get parser -final] \
207	-normalize 0 \
208	-internaldtd [list $parser(internaldtd)]
209
210    return {}
211}
212
213# xml::ParseEmpty --  Tcl 8.0 version
214#
215#       Used by parser to determine whether an element is empty.
216#       This should be dead easy in XML.  The only complication is
217#       that the RE above can't catch the trailing slash, so we have
218#       to dig it out of the tag name or attribute list.
219#
220#       Tcl 8.1 REs should fix this.
221#
222# Arguments:
223#       tag     element name
224#       attr    attribute list (raw)
225#       e       End tag delimiter.
226#
227# Results:
228#       "/" if the trailing slash is found.  Optionally, return a list
229#       containing new values for the tag name and/or attribute list.
230
231proc xml::ParseEmpty {tag attr e} {
232
233    if {[string match */ [string trimright $tag]] && \
234            ![string length $attr]} {
235        regsub {/$} $tag {} tag
236        return [list / $tag $attr]
237    } elseif {[string match */ [string trimright $attr]]} {
238        regsub {/$} [string trimright $attr] {} attr
239        return [list / $tag $attr]
240    } else {
241        return {}
242    }
243
244}
245
246# xml::ParseAttrs --
247#
248#	Parse element attributes.
249#
250# There are two forms for name-value pairs:
251#
252#	name="value"
253#	name='value'
254#
255# Watch out for the trailing slash on empty elements.
256#
257# Arguments:
258#	attrs	attribute string given in a tag
259#
260# Results:
261#	Returns a Tcl list representing the name-value pairs in the
262#	attribute string
263
264proc xml::ParseAttrs attrs {
265    variable Wsp
266    variable Name
267
268    # First check whether there's any work to do
269    if {![string compare {} [string trim $attrs]]} {
270	return {}
271    }
272
273    # Strip the trailing slash on empty elements
274    regsub [format {/[%s]*$} " \t\n\r"] $attrs {} atList
275
276    set mode name
277    set result {}
278    foreach component [split $atList =] {
279	switch $mode {
280	    name {
281		set component [string trim $component]
282		if {[regexp $Name $component]} {
283		    lappend result $component
284		} else {
285		    return -code error "invalid attribute name \"$component\""
286		}
287		set mode value:start
288	    }
289	    value:start {
290		set component [string trimleft $component]
291		set delimiter [string index $component 0]
292		set value {}
293		switch -- $delimiter {
294		    \" -
295		    ' {
296			if {[regexp [format {%s([^%s]*)%s(.*)} $delimiter $delimiter $delimiter] $component discard value remainder]} {
297			    lappend result $value
298			    set remainder [string trim $remainder]
299			    if {[string length $remainder]} {
300				if {[regexp $Name $remainder]} {
301				    lappend result $remainder
302				    set mode value:start
303				} else {
304				    return -code error "invalid attribute name \"$remainder\""
305				}
306			    } else {
307				set mode end
308			    }
309			} else {
310			    set value [string range $component 1 end]
311			    set mode value:continue
312			}
313		    }
314		    default {
315			return -code error "invalid value for attribute \"[lindex $result end]\""
316		    }
317		}
318	    }
319	    value:continue {
320		if {[regexp [format {([^%s]*)%s(.*)} $delimiter $delimiter] $component discard valuepart remainder]} {
321		    append value = $valuepart
322		    lappend result $value
323		    set remainder [string trim $remainder]
324		    if {[string length $remainder]} {
325			if {[regexp $Name $remainder]} {
326			    lappend result $remainder
327			    set mode value:start
328			} else {
329			    return -code error "invalid attribute name \"$remainder\""
330			}
331		    } else {
332			set mode end
333		    }
334		} else {
335		    append value = $component
336		}
337	    }
338	    end {
339		return -code error "unexpected data found after end of attribute list"
340	    }
341	}
342    }
343
344    switch $mode {
345	name -
346	end {
347	    # This is normal
348	}
349	default {
350	    return -code error "unexpected end of attribute list"
351	}
352    }
353
354    return $result
355}
356
357# xml::ParseCommand_reset --
358#
359#	Initialize parser data
360#
361# Arguments:
362#	object	parser object
363#
364# Results:
365#	Parser data structure initialised
366
367proc xml::ParseCommand_reset object {
368    upvar \#0 [namespace current]::$object parser
369
370    array set parser [list \
371	    -final 1		\
372	    internaldtd {}	\
373    ]
374}
375
376