1# tclparser-8.0.tcl -- 2# 3# This file provides a Tcl implementation of a XML parser. 4# This file supports Tcl 8.0. 5# 6# See xml-8.[01].tcl for definitions of character sets and 7# regular expressions. 8# 9# Copyright (c) 1998,1999 Zveno Pty Ltd 10# http://www.zveno.com/ 11# 12# Zveno makes this software and all associated data and documentation 13# ('Software') available free of charge for any purpose. 14# Copies may be made of this Software but all of this notice must be included 15# on any copy. 16# 17# The Software was developed for research purposes and Zveno does not warrant 18# that it is error free or fit for any purpose. Zveno disclaims any 19# liability for all claims, expenses, losses, damages and costs any user may 20# incur as a result of using, copying or modifying the Software. 21# 22# Copyright (c) 1997 Australian National University (ANU). 23# 24# ANU makes this software and all associated data and documentation 25# ('Software') available free of charge for any purpose. You may make copies 26# of the Software but you must include all of this notice on any copy. 27# 28# The Software was developed for research purposes and ANU does not warrant 29# that it is error free or fit for any purpose. ANU disclaims any 30# liability for all claims, expenses, losses, damages and costs any user may 31# incur as a result of using, copying or modifying the Software. 32# 33# $Id: tclparser-8.0.tcl,v 1.7 2003/02/25 04:09:21 balls Exp $ 34 35package require -exact Tcl 8.0 36 37package require xmldefs 1.10 38 39package require sgmlparser 1.0 40 41package provide xml::tclparser 2.6 42 43namespace eval xml { 44 45 # Procedures for parsing XML documents 46 namespace export parser 47 # Procedures for parsing XML DTDs 48 namespace export DTDparser 49 50 # Counter for creating unique parser objects 51 variable ParserCounter 0 52 53} 54 55# xml::parser -- 56# 57# Creates XML parser object. 58# 59# Arguments: 60# args Unique name for parser object 61# plus option/value pairs 62# 63# Recognised Options: 64# -final Indicates end of document data 65# -elementstartcommand Called when an element starts 66# -elementendcommand Called when an element ends 67# -characterdatacommand Called when character data occurs 68# -processinginstructioncommand Called when a PI occurs 69# -externalentityrefcommand Called for an external entity reference 70# 71# (Not compatible with expat) 72# -xmldeclcommand Called when the XML declaration occurs 73# -doctypecommand Called when the document type declaration occurs 74# 75# -errorcommand Script to evaluate for a fatal error 76# -warningcommand Script to evaluate for a reportable warning 77# -statevariable global state variable 78# -reportempty whether to provide empty element indication 79# 80# Results: 81# The state variable is initialised. 82 83proc xml::parser {args} { 84 variable ParserCounter 85 86 if {[llength $args] > 0} { 87 set name [lindex $args 0] 88 set args [lreplace $args 0 0] 89 } else { 90 set name parser[incr ParserCounter] 91 } 92 93 if {[info command [namespace current]::$name] != {}} { 94 return -code error "unable to create parser object \"[namespace current]::$name\" command" 95 } 96 97 # Initialise state variable and object command 98 upvar \#0 [namespace current]::$name parser 99 set sgml_ns [namespace parent]::sgml 100 array set parser [list name $name \ 101 -final 1 \ 102 -elementstartcommand ${sgml_ns}::noop \ 103 -elementendcommand ${sgml_ns}::noop \ 104 -characterdatacommand ${sgml_ns}::noop \ 105 -processinginstructioncommand ${sgml_ns}::noop \ 106 -externalentityrefcommand ${sgml_ns}::noop \ 107 -xmldeclcommand ${sgml_ns}::noop \ 108 -doctypecommand ${sgml_ns}::noop \ 109 -warningcommand ${sgml_ns}::noop \ 110 -statevariable [namespace current]::$name \ 111 -reportempty 0 \ 112 internaldtd {} \ 113 ] 114 115 proc [namespace current]::$name {method args} \ 116 "eval ParseCommand $name \$method \$args" 117 118 eval ParseCommand [list $name] configure $args 119 120 return [namespace current]::$name 121} 122 123# xml::ParseCommand -- 124# 125# Handles parse object command invocations 126# 127# Valid Methods: 128# cget 129# configure 130# parse 131# reset 132# 133# Arguments: 134# parser parser object 135# method minor command 136# args other arguments 137# 138# Results: 139# Depends on method 140 141proc xml::ParseCommand {parser method args} { 142 upvar \#0 [namespace current]::$parser state 143 144 switch -- $method { 145 cget { 146 return $state([lindex $args 0]) 147 } 148 configure { 149 foreach {opt value} $args { 150 set state($opt) $value 151 } 152 } 153 parse { 154 ParseCommand_parse $parser [lindex $args 0] 155 } 156 reset { 157 if {[llength $args]} { 158 return -code error "too many arguments" 159 } 160 ParseCommand_reset $parser 161 } 162 default { 163 return -code error "unknown method \"$method\"" 164 } 165 } 166 167 return {} 168} 169 170# xml::ParseCommand_parse -- 171# 172# Parses document instance data 173# 174# Arguments: 175# object parser object 176# xml data 177# 178# Results: 179# Callbacks are invoked, if any are defined 180 181proc xml::ParseCommand_parse {object xml} { 182 upvar \#0 [namespace current]::$object parser 183 variable Wsp 184 variable tokExpr 185 variable substExpr 186 187 set parent [namespace parent] 188 if {![string compare :: $parent]} { 189 set parent {} 190 } 191 192 set tokenised [lrange \ 193 [${parent}::sgml::tokenise $xml \ 194 $tokExpr \ 195 $substExpr \ 196 -internaldtdvariable [namespace current]::${object}(internaldtd)] \ 197 4 end] 198 199 eval ${parent}::sgml::parseEvent \ 200 [list $tokenised \ 201 -emptyelement [namespace code ParseEmpty] \ 202 -parseattributelistcommand [namespace code ParseAttrs]] \ 203 [array get parser -*command] \ 204 [array get parser -entityvariable] \ 205 [array get parser -reportempty] \ 206 [array get parser -final] \ 207 -normalize 0 \ 208 -internaldtd [list $parser(internaldtd)] 209 210 return {} 211} 212 213# xml::ParseEmpty -- Tcl 8.0 version 214# 215# Used by parser to determine whether an element is empty. 216# This should be dead easy in XML. The only complication is 217# that the RE above can't catch the trailing slash, so we have 218# to dig it out of the tag name or attribute list. 219# 220# Tcl 8.1 REs should fix this. 221# 222# Arguments: 223# tag element name 224# attr attribute list (raw) 225# e End tag delimiter. 226# 227# Results: 228# "/" if the trailing slash is found. Optionally, return a list 229# containing new values for the tag name and/or attribute list. 230 231proc xml::ParseEmpty {tag attr e} { 232 233 if {[string match */ [string trimright $tag]] && \ 234 ![string length $attr]} { 235 regsub {/$} $tag {} tag 236 return [list / $tag $attr] 237 } elseif {[string match */ [string trimright $attr]]} { 238 regsub {/$} [string trimright $attr] {} attr 239 return [list / $tag $attr] 240 } else { 241 return {} 242 } 243 244} 245 246# xml::ParseAttrs -- 247# 248# Parse element attributes. 249# 250# There are two forms for name-value pairs: 251# 252# name="value" 253# name='value' 254# 255# Watch out for the trailing slash on empty elements. 256# 257# Arguments: 258# attrs attribute string given in a tag 259# 260# Results: 261# Returns a Tcl list representing the name-value pairs in the 262# attribute string 263 264proc xml::ParseAttrs attrs { 265 variable Wsp 266 variable Name 267 268 # First check whether there's any work to do 269 if {![string compare {} [string trim $attrs]]} { 270 return {} 271 } 272 273 # Strip the trailing slash on empty elements 274 regsub [format {/[%s]*$} " \t\n\r"] $attrs {} atList 275 276 set mode name 277 set result {} 278 foreach component [split $atList =] { 279 switch $mode { 280 name { 281 set component [string trim $component] 282 if {[regexp $Name $component]} { 283 lappend result $component 284 } else { 285 return -code error "invalid attribute name \"$component\"" 286 } 287 set mode value:start 288 } 289 value:start { 290 set component [string trimleft $component] 291 set delimiter [string index $component 0] 292 set value {} 293 switch -- $delimiter { 294 \" - 295 ' { 296 if {[regexp [format {%s([^%s]*)%s(.*)} $delimiter $delimiter $delimiter] $component discard value remainder]} { 297 lappend result $value 298 set remainder [string trim $remainder] 299 if {[string length $remainder]} { 300 if {[regexp $Name $remainder]} { 301 lappend result $remainder 302 set mode value:start 303 } else { 304 return -code error "invalid attribute name \"$remainder\"" 305 } 306 } else { 307 set mode end 308 } 309 } else { 310 set value [string range $component 1 end] 311 set mode value:continue 312 } 313 } 314 default { 315 return -code error "invalid value for attribute \"[lindex $result end]\"" 316 } 317 } 318 } 319 value:continue { 320 if {[regexp [format {([^%s]*)%s(.*)} $delimiter $delimiter] $component discard valuepart remainder]} { 321 append value = $valuepart 322 lappend result $value 323 set remainder [string trim $remainder] 324 if {[string length $remainder]} { 325 if {[regexp $Name $remainder]} { 326 lappend result $remainder 327 set mode value:start 328 } else { 329 return -code error "invalid attribute name \"$remainder\"" 330 } 331 } else { 332 set mode end 333 } 334 } else { 335 append value = $component 336 } 337 } 338 end { 339 return -code error "unexpected data found after end of attribute list" 340 } 341 } 342 } 343 344 switch $mode { 345 name - 346 end { 347 # This is normal 348 } 349 default { 350 return -code error "unexpected end of attribute list" 351 } 352 } 353 354 return $result 355} 356 357# xml::ParseCommand_reset -- 358# 359# Initialize parser data 360# 361# Arguments: 362# object parser object 363# 364# Results: 365# Parser data structure initialised 366 367proc xml::ParseCommand_reset object { 368 upvar \#0 [namespace current]::$object parser 369 370 array set parser [list \ 371 -final 1 \ 372 internaldtd {} \ 373 ] 374} 375 376