1# xml.tcl -- 2# 3# This file provides XML services. 4# These services include a XML document instance and DTD parser, 5# as well as support for generating XML. 6# 7# Copyright (c) 1998,1999 Zveno Pty Ltd 8# http://www.zveno.com/ 9# 10# Zveno makes this software and all associated data and documentation 11# ('Software') available free of charge for non-commercial purposes only. You 12# may make copies of the Software but you must include all of this notice on 13# any copy. 14# 15# The Software was developed for research purposes and Zveno does not warrant 16# that it is error free or fit for any purpose. Zveno disclaims any 17# liability for all claims, expenses, losses, damages and costs any user may 18# incur as a result of using, copying or modifying the Software. 19# 20# Copyright (c) 1997 Australian National University (ANU). 21# 22# ANU makes this software and all associated data and documentation 23# ('Software') available free of charge for non-commercial purposes only. You 24# may make copies of the Software but you must include all of this notice on 25# any copy. 26# 27# The Software was developed for research purposes and ANU does not warrant 28# that it is error free or fit for any purpose. ANU disclaims any 29# liability for all claims, expenses, losses, damages and costs any user may 30# incur as a result of using, copying or modifying the Software. 31# 32# $Id: xml.tcl,v 1.4 2006/09/27 08:12:40 neumann Exp $ 33 34package provide xml 1.8 35 36package require sgml 1.6 37 38namespace eval xml { 39 40 # Procedures for parsing XML documents 41 namespace export parser 42 # Procedures for parsing XML DTDs 43 namespace export DTDparser 44 45 # Counter for creating unique parser objects 46 variable ParserCounter 0 47 48 # Convenience routine 49 proc cl x { 50 return "\[$x\]" 51 } 52 53 # Define various regular expressions 54 # white space 55 variable Wsp " \t\r\n" 56 variable noWsp [cl ^$Wsp] 57 58 # Various XML names and tokens 59 60 # BUG: NameChar does not include CombiningChar or Extender 61 variable NameChar [cl -a-zA-Z0-9._:] 62 variable Name [cl a-zA-Z_:]$NameChar* 63 variable Nmtoken $NameChar+ 64 65 # Tokenising expressions 66 67 variable tokExpr <(/?)([cl ^$Wsp>]+)([cl $Wsp]*[cl ^>]*)> 68 variable substExpr "\}\n{\\2} {\\1} {} {\\3} \{" 69 70 # table of predefined entities 71 72 variable EntityPredef 73 array set EntityPredef { 74 lt < gt > amp & quot \" apos ' 75 } 76 77} 78 79 80# xml::parser -- 81# 82# Creates XML parser object. 83# 84# Arguments: 85# args Unique name for parser object 86# plus option/value pairs 87# 88# Recognised Options: 89# -final Indicates end of document data 90# -elementstartcommand Called when an element starts 91# -elementendcommand Called when an element ends 92# -characterdatacommand Called when character data occurs 93# -processinginstructioncommand Called when a PI occurs 94# -externalentityrefcommand Called for an external entity reference 95# 96# (Not compatible with expat) 97# -xmldeclcommand Called when the XML declaration occurs 98# -doctypecommand Called when the document type declaration occurs 99# 100# -errorcommand Script to evaluate for a fatal error 101# -warningcommand Script to evaluate for a reportable warning 102# -statevariable global state variable 103# -reportempty whether to provide empty element indication 104# 105# Results: 106# The state variable is initialised. 107 108proc xml::parser {args} { 109 variable ParserCounter 110 111 if {[llength $args] > 0} { 112 set name [lindex $args 0] 113 set args [lreplace $args 0 0] 114 } else { 115 set name parser[incr ParserCounter] 116 } 117 118 if {[info command [namespace current]::$name] != {}} { 119 return -code error "unable to create parser object \"[namespace current]::$name\" command" 120 } 121 122 # Initialise state variable and object command 123 upvar \#0 [namespace current]::$name parser 124 set sgml_ns [namespace parent]::sgml 125 array set parser [list name $name \ 126 -final 1 \ 127 -elementstartcommand ${sgml_ns}::noop \ 128 -elementendcommand ${sgml_ns}::noop \ 129 -characterdatacommand ${sgml_ns}::noop \ 130 -processinginstructioncommand ${sgml_ns}::noop \ 131 -externalentityrefcommand ${sgml_ns}::noop \ 132 -xmldeclcommand ${sgml_ns}::noop \ 133 -doctypecommand ${sgml_ns}::noop \ 134 -warningcommand ${sgml_ns}::noop \ 135 -statevariable [namespace current]::$name \ 136 -reportempty 0 \ 137 internaldtd {} \ 138 ] 139 140 proc [namespace current]::$name {method args} \ 141 "eval ParseCommand $name \$method \$args" 142 143 eval ParseCommand [list $name] configure $args 144 145 return [namespace current]::$name 146} 147 148# xml::ParseCommand -- 149# 150# Handles parse object command invocations 151# 152# Valid Methods: 153# cget 154# configure 155# parse 156# reset 157# 158# Arguments: 159# parser parser object 160# method minor command 161# args other arguments 162# 163# Results: 164# Depends on method 165 166proc xml::ParseCommand {parser method args} { 167 upvar \#0 [namespace current]::$parser state 168 169 switch -- $method { 170 cget { 171 return $state([lindex $args 0]) 172 } 173 configure { 174 foreach {opt value} $args { 175 set state($opt) $value 176 } 177 } 178 parse { 179 ParseCommand_parse $parser [lindex $args 0] 180 } 181 reset { 182 if {[llength $args]} { 183 return -code error "too many arguments" 184 } 185 ParseCommand_reset $parser 186 } 187 default { 188 return -code error "unknown method \"$method\"" 189 } 190 } 191 192 return {} 193} 194 195# xml::ParseCommand_parse -- 196# 197# Parses document instance data 198# 199# Arguments: 200# object parser object 201# xml data 202# 203# Results: 204# Callbacks are invoked, if any are defined 205 206proc xml::ParseCommand_parse {object xml} { 207 upvar \#0 [namespace current]::$object parser 208 variable Wsp 209 variable tokExpr 210 variable substExpr 211 212 set parent [namespace parent] 213 if {"::" eq $parent } { 214 set parent {} 215 } 216 217 set tokenised [lrange \ 218 [${parent}::sgml::tokenise $xml \ 219 $tokExpr \ 220 $substExpr \ 221 -internaldtdvariable [namespace current]::${object}(internaldtd)] \ 222 5 end] 223 224 eval ${parent}::sgml::parseEvent \ 225 [list $tokenised \ 226 -emptyelement [namespace code ParseEmpty] \ 227 -parseattributelistcommand [namespace code ParseAttrs]] \ 228 [array get parser -*command] \ 229 [array get parser -entityvariable] \ 230 [array get parser -reportempty] \ 231 -normalize 0 \ 232 -internaldtd [list $parser(internaldtd)] 233 234 return {} 235} 236 237# xml::ParseEmpty -- 238# 239# Used by parser to determine whether an element is empty. 240# This should be dead easy in XML. The only complication is 241# that the RE above can't catch the trailing slash, so we have 242# to dig it out of the tag name or attribute list. 243# 244# Tcl 8.1 REs should fix this. 245# 246# Arguments: 247# tag element name 248# attr attribute list (raw) 249# e End tag delimiter. 250# 251# Results: 252# "/" if the trailing slash is found. Optionally, return a list 253# containing new values for the tag name and/or attribute list. 254 255proc xml::ParseEmpty {tag attr e} { 256 257 if {[string match */ [string trimright $tag]] && \ 258 ![string length $attr]} { 259 regsub {/$} $tag {} tag 260 return [list / $tag $attr] 261 } elseif {[string match */ [string trimright $attr]]} { 262 regsub {/$} [string trimright $attr] {} attr 263 return [list / $tag $attr] 264 } else { 265 return {} 266 } 267 268} 269 270# xml::ParseAttrs -- 271# 272# Parse element attributes. 273# 274# There are two forms for name-value pairs: 275# 276# name="value" 277# name='value' 278# 279# Watch out for the trailing slash on empty elements. 280# 281# Arguments: 282# attrs attribute string given in a tag 283# 284# Results: 285# Returns a Tcl list representing the name-value pairs in the 286# attribute string 287 288proc xml::ParseAttrs attrs { 289 variable Wsp 290 variable Name 291 292 # First check whether there's any work to do 293 if {{} eq [string trim $attrs] } { 294 return {} 295 } 296 297 # Strip the trailing slash on empty elements 298 regsub [format {/[%s]*$} " \t\n\r"] $attrs {} atList 299 300 set mode name 301 set result {} 302 foreach component [split $atList =] { 303 switch $mode { 304 name { 305 set component [string trim $component] 306 if {[regexp $Name $component]} { 307 lappend result $component 308 } else { 309 return -code error "invalid attribute name \"$component\"" 310 } 311 set mode value:start 312 } 313 value:start { 314 set component [string trimleft $component] 315 set delimiter [string index $component 0] 316 set value {} 317 switch -- $delimiter { 318 \" - 319 ' { 320 if {[regexp [format {%s([^%s]*)%s(.*)} $delimiter $delimiter $delimiter] $component discard value remainder]} { 321 lappend result $value 322 set remainder [string trim $remainder] 323 if {[string length $remainder]} { 324 if {[regexp $Name $remainder]} { 325 lappend result $remainder 326 set mode value:start 327 } else { 328 return -code error "invalid attribute name \"$remainder\"" 329 } 330 } else { 331 set mode end 332 } 333 } else { 334 set value [string range $component 1 end] 335 set mode value:continue 336 } 337 } 338 default { 339 return -code error "invalid value for attribute \"[lindex $result end]\"" 340 } 341 } 342 } 343 value:continue { 344 if {[regexp [format {([^%s]*)%s(.*)} $delimiter $delimiter] $component discard valuepart remainder]} { 345 append value = $valuepart 346 lappend result $value 347 set remainder [string trim $remainder] 348 if {[string length $remainder]} { 349 if {[regexp $Name $remainder]} { 350 lappend result $remainder 351 set mode value:start 352 } else { 353 return -code error "invalid attribute name \"$remainder\"" 354 } 355 } else { 356 set mode end 357 } 358 } else { 359 append value = $component 360 } 361 } 362 end { 363 return -code error "unexpected data found after end of attribute list" 364 } 365 } 366 } 367 368 switch $mode { 369 name - 370 end { 371 # This is normal 372 } 373 default { 374 return -code error "unexpected end of attribute list" 375 } 376 } 377 378 return $result 379} 380 381proc xml::OLDParseAttrs {attrs} { 382 variable Wsp 383 variable Name 384 385 # First check whether there's any work to do 386 if {{} eq [string trim $attrs] } { 387 return {} 388 } 389 390 # Strip the trailing slash on empty elements 391 regsub [format {/[%s]*$} " \t\n\r"] $attrs {} atList 392 393 # Protect Tcl special characters 394 #regsub -all {([[\$\\])} $atList {\\\1} atList 395 regsub -all & $atList {\&} atList 396 regsub -all {\[} $atList {\&ob;} atList 397 regsub -all {\]} $atList {\&cb;} atlist 398 # NB. sgml package delivers braces and backslashes quoted 399 regsub -all {\\\{} $atList {\&oc;} atList 400 regsub -all {\\\}} $atList {\&cc;} atlist 401 regsub -all {\$} $atList {\$} atList 402 regsub -all {\\\\} $atList {\&bs;} atList 403 404 regsub -all [format {(%s)[%s]*=[%s]*"([^"]*)"} $Name $Wsp $Wsp] \ 405 $atList {[set parsed(\1) {\2}; set dummy {}] } atList ;# " 406 regsub -all [format {(%s)[%s]*=[%s]*'([^']*)'} $Name $Wsp $Wsp] \ 407 $atList {[set parsed(\1) {\2}; set dummy {}] } atList 408 409 set leftovers [subst $atList] 410 411 if {[string length [string trim $leftovers]]} { 412 return -code error "syntax error in attribute list \"$attrs\"" 413 } 414 415 return [ParseAttrs:Deprotect [array get parsed]] 416} 417 418# xml::ParseAttrs:Deprotect -- 419# 420# Reverse map Tcl special characters previously protected 421# 422# Arguments: 423# attrs attribute list 424# 425# Results: 426# Characters substituted 427 428proc xml::ParseAttrs:Deprotect attrs { 429 430 regsub -all &\; $attrs \\& attrs 431 regsub -all &ob\; $attrs \[ attrs 432 regsub -all &cb\; $attrs \] attrs 433 regsub -all &oc\; $attrs \{ attrs 434 regsub -all &cc\; $attrs \} attrs 435 regsub -all &dollar\; $attrs \$ attrs 436 regsub -all &bs\; $attrs \\\\ attrs 437 438 return $attrs 439 440} 441 442# xml::ParseCommand_reset -- 443# 444# Initialize parser data 445# 446# Arguments: 447# object parser object 448# 449# Results: 450# Parser data structure initialised 451 452proc xml::ParseCommand_reset object { 453 upvar \#0 [namespace current]::$object parser 454 455 array set parser [list \ 456 -final 1 \ 457 internaldtd {} \ 458 ] 459} 460 461# xml::noop -- 462# 463# A do-nothing proc 464 465proc xml::noop args {} 466 467### Following procedures are based on html_library 468 469# xml::zapWhite -- 470# 471# Convert multiple white space into a single space. 472# 473# Arguments: 474# data plain text 475# 476# Results: 477# As above 478 479proc xml::zapWhite data { 480 regsub -all "\[ \t\r\n\]+" $data { } data 481 return $data 482} 483 484# 485# DTD parser for XML is wholly contained within the sgml.tcl package 486# 487 488# xml::parseDTD -- 489# 490# Entry point to the XML DTD parser. 491# 492# Arguments: 493# dtd XML data defining the DTD to be parsed 494# args configuration options 495# 496# Results: 497# Returns a three element list, first element is the content model 498# for each element, second element are the attribute lists of the 499# elements and the third element is the entity map. 500 501proc xml::parseDTD {dtd args} { 502 return [eval [expr {[namespace parent] == {::} ? {} : [namespace parent]}]::sgml::parseDTD [list $dtd] $args] 503} 504 505