1# sgmlparser.tcl -- 2# 3# This file provides the generic part of a parser for SGML-based 4# languages, namely HTML and XML. 5# 6# NB. It is a misnomer. There is no support for parsing 7# arbitrary SGML as such. 8# 9# See sgml.tcl for variable definitions. 10# 11# Copyright (c) 1998-2003 Zveno Pty Ltd 12# http://www.zveno.com/ 13# 14# Zveno makes this software available free of charge for any purpose. 15# Copies may be made of this software but all of this notice must be included 16# on any copy. 17# 18# The software was developed for research purposes only and Zveno does not 19# warrant that it is error free or fit for any purpose. Zveno disclaims any 20# liability for all claims, expenses, losses, damages and costs any user may 21# incur as a result of using, copying or modifying this software. 22# 23# Copyright (c) 1997 ANU and CSIRO on behalf of the 24# participants in the CRC for Advanced Computational Systems ('ACSys'). 25# 26# ACSys makes this software and all associated data and documentation 27# ('Software') available free of charge for any purpose. You may make copies 28# of the Software but you must include all of this notice on any copy. 29# 30# The Software was developed for research purposes and ACSys does not warrant 31# that it is error free or fit for any purpose. ACSys disclaims any 32# liability for all claims, expenses, losses, damages and costs any user may 33# incur as a result of using, copying or modifying the Software. 34# 35# $Id: sgmlparser.tcl,v 1.30 2003/02/25 04:09:20 balls Exp $ 36 37package require xmldefs 38 39package require sgml 1.9 40 41package require uri 1.1 42 43package provide sgmlparser 1.0 44 45namespace eval sgml { 46 namespace export tokenise parseEvent 47 48 namespace export parseDTD 49 50 # NB. Most namespace variables are defined in sgml-8.[01].tcl 51 # to account for differences between versions of Tcl. 52 # This especially includes the regular expressions used. 53 54 variable ParseEventNum 55 if {![info exists ParseEventNum]} { 56 set ParseEventNum 0 57 } 58 variable ParseDTDnum 59 if {![info exists ParseDTDNum]} { 60 set ParseDTDNum 0 61 } 62 63 variable declExpr [cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*) 64 variable EntityExpr [cl $::sgml::Wsp]*(%[cl $::sgml::Wsp])?[cl $::sgml::Wsp]*($::sgml::Name)[cl $::sgml::Wsp]+(.*) 65 66 #variable MarkupDeclExpr <([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp]+)[cl $::sgml::Wsp]*([cl ^>]*)> 67 #variable MarkupDeclSub "} {\\1} {\\2} {\\3} {" 68 variable MarkupDeclExpr <[cl $::sgml::Wsp]*([cl ^$::sgml::Wsp>]+)[cl $::sgml::Wsp]*([cl ^>]*)> 69 variable MarkupDeclSub "\} {\\1} {\\2} \{" 70 71 variable ExternalEntityExpr ^(PUBLIC|SYSTEM)[cl $::sgml::Wsp]+("|')(.*?)\\2([cl $::sgml::Wsp]+("|')(.*?)\\2)?([cl $::sgml::Wsp]+NDATA[cl $::sgml::Wsp]+($::xml::Name))?\$ 72 73 variable StdOptions 74 array set StdOptions [list \ 75 -elementstartcommand [namespace current]::noop \ 76 -elementendcommand [namespace current]::noop \ 77 -characterdatacommand [namespace current]::noop \ 78 -processinginstructioncommand [namespace current]::noop \ 79 -externalentitycommand {} \ 80 -xmldeclcommand [namespace current]::noop \ 81 -doctypecommand [namespace current]::noop \ 82 -commentcommand [namespace current]::noop \ 83 -entitydeclcommand [namespace current]::noop \ 84 -unparsedentitydeclcommand [namespace current]::noop \ 85 -parameterentitydeclcommand [namespace current]::noop \ 86 -notationdeclcommand [namespace current]::noop \ 87 -elementdeclcommand [namespace current]::noop \ 88 -attlistdeclcommand [namespace current]::noop \ 89 -paramentityparsing 1 \ 90 -defaultexpandinternalentities 1 \ 91 -startdoctypedeclcommand [namespace current]::noop \ 92 -enddoctypedeclcommand [namespace current]::noop \ 93 -entityreferencecommand {} \ 94 -warningcommand [namespace current]::noop \ 95 -errorcommand [namespace current]::Error \ 96 -final 1 \ 97 -validate 0 \ 98 -baseurl {} \ 99 -name {} \ 100 -emptyelement [namespace current]::EmptyElement \ 101 -parseattributelistcommand [namespace current]::noop \ 102 -parseentitydeclcommand [namespace current]::noop \ 103 -normalize 1 \ 104 -internaldtd {} \ 105 -reportempty 0 \ 106 -ignorewhitespace 0 \ 107 ] 108} 109 110# sgml::tokenise -- 111# 112# Transform the given HTML/XML text into a Tcl list. 113# 114# Arguments: 115# sgml text to tokenize 116# elemExpr RE to recognise tags 117# elemSub transform for matched tags 118# args options 119# 120# Valid Options: 121# -internaldtdvariable 122# -final boolean True if no more data is to be supplied 123# -statevariable varName Name of a variable used to store info 124# 125# Results: 126# Returns a Tcl list representing the document. 127 128proc sgml::tokenise {sgml elemExpr elemSub args} { 129 array set options {-final 1} 130 array set options $args 131 set options(-final) [Boolean $options(-final)] 132 133 # If the data is not final then there must be a variable to store 134 # unused data. 135 if {!$options(-final) && ![info exists options(-statevariable)]} { 136 return -code error {option "-statevariable" required if not final} 137 } 138 139 # Pre-process stage 140 # 141 # Extract the internal DTD subset, if any 142 143 catch {upvar #0 $options(-internaldtdvariable) dtd} 144 if {[regexp {<!DOCTYPE[^[<]+\[([^]]+)\]} $sgml discard dtd]} { 145 regsub {(<!DOCTYPE[^[<]+)(\[[^]]+\])} $sgml {\1\&xml:intdtd;} sgml 146 } 147 148 # Protect Tcl special characters 149 regsub -all {([{}\\])} $sgml {\\\1} sgml 150 151 # Do the translation 152 153 if {[info exists options(-statevariable)]} { 154 # Mats: Several rewrites here to handle -final 0 option. 155 # If any cached unparsed xml (state(leftover)), prepend it. 156 upvar #0 $options(-statevariable) state 157 if {[string length $state(leftover)]} { 158 regsub -all $elemExpr $state(leftover)$sgml $elemSub sgml 159 set state(leftover) {} 160 } else { 161 regsub -all $elemExpr $sgml $elemSub sgml 162 } 163 set sgml "{} {} {} \{$sgml\}" 164 165 # Performance note (Tcl 8.0): 166 # Use of lindex, lreplace will cause parsing to list object 167 168 # This RE only fixes chopped inside tags, not chopped text. 169 if {[regexp {^([^<]*)(<[^>]*$)} [lindex $sgml end] x text rest]} { 170 set sgml [lreplace $sgml end end $text] 171 # Mats: unmatched stuff means that it is chopped off. Cache it for next round. 172 set state(leftover) $rest 173 } 174 175 # Patch from bug report #596959, Marshall Rose 176 if {[string compare [lindex $sgml 4] ""]} { 177 set sgml [linsert $sgml 0 {} {} {} {} {}] 178 } 179 180 } else { 181 182 # Performance note (Tcl 8.0): 183 # In this case, no conversion to list object is performed 184 185 # Mats: This fails if not -final and $sgml is chopped off right in a tag. 186 regsub -all $elemExpr $sgml $elemSub sgml 187 set sgml "{} {} {} \{$sgml\}" 188 } 189 190 return $sgml 191 192} 193 194# sgml::parseEvent -- 195# 196# Produces an event stream for a XML/HTML document, 197# given the Tcl list format returned by tokenise. 198# 199# This procedure checks that the document is well-formed, 200# and throws an error if the document is found to be not 201# well formed. Warnings are passed via the -warningcommand script. 202# 203# The procedure only check for well-formedness, 204# no DTD is required. However, facilities are provided for entity expansion. 205# 206# Arguments: 207# sgml Instance data, as a Tcl list. 208# args option/value pairs 209# 210# Valid Options: 211# -final Indicates end of document data 212# -validate Boolean to enable validation 213# -baseurl URL for resolving relative URLs 214# -elementstartcommand Called when an element starts 215# -elementendcommand Called when an element ends 216# -characterdatacommand Called when character data occurs 217# -entityreferencecommand Called when an entity reference occurs 218# -processinginstructioncommand Called when a PI occurs 219# -externalentitycommand Called for an external entity reference 220# 221# -xmldeclcommand Called when the XML declaration occurs 222# -doctypecommand Called when the document type declaration occurs 223# -commentcommand Called when a comment occurs 224# -entitydeclcommand Called when a parsed entity is declared 225# -unparsedentitydeclcommand Called when an unparsed external entity is declared 226# -parameterentitydeclcommand Called when a parameter entity is declared 227# -notationdeclcommand Called when a notation is declared 228# -elementdeclcommand Called when an element is declared 229# -attlistdeclcommand Called when an attribute list is declared 230# -paramentityparsing Boolean to enable/disable parameter entity substitution 231# -defaultexpandinternalentities Boolean to enable/disable expansion of entities declared in internal DTD subset 232# 233# -startdoctypedeclcommand Called when the Doc Type declaration starts (see also -doctypecommand) 234# -enddoctypedeclcommand Called when the Doc Type declaration ends (see also -doctypecommand) 235# 236# -errorcommand Script to evaluate for a fatal error 237# -warningcommand Script to evaluate for a reportable warning 238# -statevariable global state variable 239# -normalize whether to normalize names 240# -reportempty whether to include an indication of empty elements 241# -ignorewhitespace whether to automatically strip whitespace 242# 243# Results: 244# The various callback scripts are invoked. 245# Returns empty string. 246# 247# BUGS: 248# If command options are set to empty string then they should not be invoked. 249 250proc sgml::parseEvent {sgml args} { 251 variable Wsp 252 variable noWsp 253 variable Nmtoken 254 variable Name 255 variable ParseEventNum 256 variable StdOptions 257 258 array set options [array get StdOptions] 259 catch {array set options $args} 260 261 # Mats: 262 # If the data is not final then there must be a variable to persistently store the parse state. 263 if {!$options(-final) && ![info exists options(-statevariable)]} { 264 return -code error {option "-statevariable" required if not final} 265 } 266 267 foreach {opt value} [array get options *command] { 268 if {[string compare $opt "-externalentitycommand"] && ![string length $value]} { 269 set options($opt) [namespace current]::noop 270 } 271 } 272 273 if {![info exists options(-statevariable)]} { 274 set options(-statevariable) [namespace current]::ParseEvent[incr ParseEventNum] 275 } 276 if {![info exists options(entities)]} { 277 set options(entities) [namespace current]::Entities$ParseEventNum 278 array set $options(entities) [array get [namespace current]::EntityPredef] 279 } 280 if {![info exists options(extentities)]} { 281 set options(extentities) [namespace current]::ExtEntities$ParseEventNum 282 } 283 if {![info exists options(parameterentities)]} { 284 set options(parameterentities) [namespace current]::ParamEntities$ParseEventNum 285 } 286 if {![info exists options(externalparameterentities)]} { 287 set options(externalparameterentities) [namespace current]::ExtParamEntities$ParseEventNum 288 } 289 if {![info exists options(elementdecls)]} { 290 set options(elementdecls) [namespace current]::ElementDecls$ParseEventNum 291 } 292 if {![info exists options(attlistdecls)]} { 293 set options(attlistdecls) [namespace current]::AttListDecls$ParseEventNum 294 } 295 if {![info exists options(notationdecls)]} { 296 set options(notationdecls) [namespace current]::NotationDecls$ParseEventNum 297 } 298 if {![info exists options(namespaces)]} { 299 set options(namespaces) [namespace current]::Namespaces$ParseEventNum 300 } 301 302 # Choose an external entity resolver 303 304 if {![string length $options(-externalentitycommand)]} { 305 if {$options(-validate)} { 306 set options(-externalentitycommand) [namespace code ResolveEntity] 307 } else { 308 set options(-externalentitycommand) [namespace code noop] 309 } 310 } 311 312 upvar #0 $options(-statevariable) state 313 upvar #0 $options(entities) entities 314 315 # Mats: 316 # The problem is that the state is not maintained when -final 0 ! 317 # I've switched back to an older version here. 318 319 if {![info exists state(line)]} { 320 # Initialise the state variable 321 array set state { 322 mode normal 323 haveXMLDecl 0 324 haveDocElement 0 325 inDTD 0 326 context {} 327 stack {} 328 line 0 329 defaultNS {} 330 defaultNSURI {} 331 } 332 } 333 334 foreach {tag close param text} $sgml { 335 336 # Keep track of lines in the input 337 incr state(line) [regsub -all \n $param {} discard] 338 incr state(line) [regsub -all \n $text {} discard] 339 340 # If the current mode is cdata or comment then we must undo what the 341 # regsub has done to reconstitute the data 342 343 set empty {} 344 switch $state(mode) { 345 comment { 346 # This had "[string length $param] && " as a guard - 347 # can't remember why :-( 348 if {[regexp ([cl ^-]*)--\$ $tag discard comm1]} { 349 # end of comment (in tag) 350 set tag {} 351 set close {} 352 set state(mode) normal 353 uplevel #0 $options(-commentcommand) [list $state(commentdata)<$comm1] 354 unset state(commentdata) 355 } elseif {[regexp ([cl ^-]*)--\$ $param discard comm1]} { 356 # end of comment (in attributes) 357 uplevel #0 $options(-commentcommand) [list $state(commentdata)<$close$tag>$comm1] 358 unset state(commentdata) 359 set tag {} 360 set param {} 361 set close {} 362 set state(mode) normal 363 } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm1 text]} { 364 # end of comment (in text) 365 uplevel #0 $options(-commentcommand) [list $state(commentdata)<$close$tag$param>$comm1] 366 unset state(commentdata) 367 set tag {} 368 set param {} 369 set close {} 370 set state(mode) normal 371 } else { 372 # comment continues 373 append state(commentdata) <$close$tag$param>$text 374 continue 375 } 376 } 377 cdata { 378 if {[string length $param] && [regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $tag discard cdata1]} { 379 # end of CDATA (in tag) 380 PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$cdata1] 381 set text [subst -novariable -nocommand $text] 382 set tag {} 383 unset state(cdata) 384 set state(mode) normal 385 } elseif {[regexp ([cl ^\]]*)\]\][cl $Wsp]*\$ $param discard cdata1]} { 386 # end of CDATA (in attributes) 387 PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$cdata1] 388 set text [subst -novariable -nocommand $text] 389 set tag {} 390 set param {} 391 unset state(cdata) 392 set state(mode) normal 393 } elseif {[regexp (.*)\]\][cl $Wsp]*>(.*) $text discard cdata1 text]} { 394 # end of CDATA (in text) 395 PCDATA [array get options] $state(cdata)<[subst -nocommand -novariable $close$tag$param>$cdata1] 396 set text [subst -novariable -nocommand $text] 397 set tag {} 398 set param {} 399 set close {} 400 unset state(cdata) 401 set state(mode) normal 402 } else { 403 # CDATA continues 404 append state(cdata) [subst -nocommand -novariable <$close$tag$param>$text] 405 continue 406 } 407 } 408 continue { 409 # We're skipping elements looking for the close tag 410 switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close { 411 0,* { 412 continue 413 } 414 *,0, { 415 if {![string compare $tag $state(continue:tag)]} { 416 set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]] 417 if {![string length $empty]} { 418 incr state(continue:level) 419 } 420 } 421 continue 422 } 423 *,0,/ { 424 if {![string compare $tag $state(continue:tag)]} { 425 incr state(continue:level) -1 426 } 427 if {!$state(continue:level)} { 428 unset state(continue:tag) 429 unset state(continue:level) 430 set state(mode) {} 431 } 432 } 433 default { 434 continue 435 } 436 } 437 } 438 default { 439 # The trailing slash on empty elements can't be automatically separated out 440 # in the RE, so we must do it here. 441 regexp (.*)(/)[cl $Wsp]*$ $param discard param empty 442 } 443 } 444 445 # default: normal mode 446 447 # Bug: if the attribute list has a right angle bracket then the empty 448 # element marker will not be seen 449 450 set empty [uplevel #0 $options(-emptyelement) [list $tag $param $empty]] 451 452 switch -glob -- [string length $tag],[regexp {^\?|!.*} $tag],$close,$empty { 453 454 0,0,, { 455 # Ignore empty tag - dealt with non-normal mode above 456 } 457 *,0,, { 458 459 # Start tag for an element. 460 461 # Check if the internal DTD entity is in an attribute value 462 regsub -all &xml:intdtd\; $param \[$options(-internaldtd)\] param 463 464 set code [catch {ParseEvent:ElementOpen $tag $param [array get options]} msg] 465 set state(haveDocElement) 1 466 switch $code { 467 0 {# OK} 468 3 { 469 # break 470 return {} 471 } 472 4 { 473 # continue 474 # Remember this tag and look for its close 475 set state(continue:tag) $tag 476 set state(continue:level) 1 477 set state(mode) continue 478 continue 479 } 480 default { 481 return -code $code -errorinfo $::errorInfo $msg 482 } 483 } 484 485 } 486 487 *,0,/, { 488 489 # End tag for an element. 490 491 set code [catch {ParseEvent:ElementClose $tag [array get options]} msg] 492 switch $code { 493 0 {# OK} 494 3 { 495 # break 496 return {} 497 } 498 4 { 499 # continue 500 # skip sibling nodes 501 set state(continue:tag) [lindex $state(stack) end] 502 set state(continue:level) 1 503 set state(mode) continue 504 continue 505 } 506 default { 507 return -code $code -errorinfo $::errorInfo $msg 508 } 509 } 510 511 } 512 513 *,0,,/ { 514 515 # Empty element 516 517 # The trailing slash sneaks through into the param variable 518 regsub -all /[cl $::sgml::Wsp]*\$ $param {} param 519 520 set code [catch {ParseEvent:ElementOpen $tag $param [array get options] -empty 1} msg] 521 set state(haveDocElement) 1 522 switch $code { 523 0 {# OK} 524 3 { 525 # break 526 return {} 527 } 528 4 { 529 # continue 530 # Pretty useless since it closes straightaway 531 } 532 default { 533 return -code $code -errorinfo $::errorInfo $msg 534 } 535 } 536 set code [catch {ParseEvent:ElementClose $tag [array get options] -empty 1} msg] 537 switch $code { 538 0 {# OK} 539 3 { 540 # break 541 return {} 542 } 543 4 { 544 # continue 545 # skip sibling nodes 546 set state(continue:tag) [lindex $state(stack) end] 547 set state(continue:level) 1 548 set state(mode) continue 549 continue 550 } 551 default { 552 return -code $code -errorinfo $::errorInfo $msg 553 } 554 } 555 556 } 557 558 *,1,* { 559 # Processing instructions or XML declaration 560 switch -glob -- $tag { 561 562 {\?xml} { 563 # XML Declaration 564 if {$state(haveXMLDecl)} { 565 uplevel #0 $options(-errorcommand) [list illegalcharacter "unexpected characters \"<$tag\" around line $state(line)"] 566 } elseif {![regexp {\?$} $param]} { 567 uplevel #0 $options(-errorcommand) [list missingcharacters "XML Declaration missing characters \"?>\" around line $state(line)"] 568 } else { 569 570 # We can do the parsing in one step with Tcl 8.1 RE's 571 # This has the benefit of performing better WF checking 572 573 set adv_re [format {^[%s]*version[%s]*=[%s]*("|')(-+|[a-zA-Z0-9_.:]+)\1([%s]+encoding[%s]*=[%s]*("|')([A-Za-z][-A-Za-z0-9._]*)\4)?([%s]*standalone[%s]*=[%s]*("|')(yes|no)\7)?[%s]*\?$} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] 574 575 if {[catch {regexp $adv_re $param discard delimiter version discard delimiter encoding discard delimiter standalone} matches]} { 576 # Otherwise we must fallback to 8.0. 577 # This won't detect certain well-formedness errors 578 579 # Get the version number 580 if {[regexp [format {[%s]*version[%s]*=[%s]*"(-+|[a-zA-Z0-9_.:]+)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version] || [regexp [format {[%s]*version[%s]*=[%s]*'(-+|[a-zA-Z0-9_.:]+)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard version]} { 581 if {[string compare $version "1.0"]} { 582 # Should we support future versions? 583 # At least 1.X? 584 uplevel #0 $options(-errorcommand) [list versionincompatibility "document XML version \"$version\" is incompatible with XML version 1.0"] 585 } 586 } else { 587 uplevel #0 $options(-errorcommand) [list missingversion "XML Declaration missing version information around line $state(line)"] 588 } 589 590 # Get the encoding declaration 591 set encoding {} 592 regexp [format {[%s]*encoding[%s]*=[%s]*"([A-Za-z]([A-Za-z0-9._]|-)*)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding 593 regexp [format {[%s]*encoding[%s]*=[%s]*'([A-Za-z]([A-Za-z0-9._]|-)*)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard encoding 594 595 # Get the standalone declaration 596 set standalone {} 597 regexp [format {[%s]*standalone[%s]*=[%s]*"(yes|no)"[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone 598 regexp [format {[%s]*standalone[%s]*=[%s]*'(yes|no)'[%s]*} $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp $::sgml::Wsp] $param discard standalone 599 600 # Invoke the callback 601 uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone] 602 603 } elseif {$matches == 0} { 604 uplevel #0 $options(-errorcommand) [list illformeddeclaration "XML Declaration not well-formed around line $state(line)"] 605 } else { 606 607 # Invoke the callback 608 uplevel #0 $options(-xmldeclcommand) [list $version $encoding $standalone] 609 610 } 611 612 } 613 614 } 615 616 {\?*} { 617 # Processing instruction 618 set tag [string range $tag 1 end] 619 if {[regsub {\?$} $tag {} tag]} { 620 if {[string length [string trim $param]]} { 621 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$param\" in processing instruction around line $state(line)"] 622 } 623 } elseif {![regexp ^$Name\$ $tag]} { 624 uplevel #0 $options(-errorcommand) [list illegalcharacter "illegal character in processing instruction target \"$tag\""] 625 } elseif {[regexp {^[xX][mM][lL]$} $tag]} { 626 uplevel #0 $options(-errorcommand) [list illegalcharacters "characters \"xml\" not permitted in processing instruction target \"$tag\""] 627 } elseif {![regsub {\?$} $param {} param]} { 628 uplevel #0 $options(-errorcommand) [list missingquestion "PI: expected '?' character around line $state(line)"] 629 } 630 set code [catch {uplevel #0 $options(-processinginstructioncommand) [list $tag [string trimleft $param]]} msg] 631 switch $code { 632 0 {# OK} 633 3 { 634 # break 635 return {} 636 } 637 4 { 638 # continue 639 # skip sibling nodes 640 set state(continue:tag) [lindex $state(stack) end] 641 set state(continue:level) 1 642 set state(mode) continue 643 continue 644 } 645 default { 646 return -code $code -errorinfo $::errorInfo $msg 647 } 648 } 649 } 650 651 !DOCTYPE { 652 # External entity reference 653 # This should move into xml.tcl 654 # Parse the params supplied. Looking for Name, ExternalID and MarkupDecl 655 set matched [regexp ^[cl $Wsp]*($Name)[cl $Wsp]*(.*) $param x state(doc_name) param] 656 set state(doc_name) [Normalize $state(doc_name) $options(-normalize)] 657 set externalID {} 658 set pubidlit {} 659 set systemlit {} 660 set externalID {} 661 if {[regexp -nocase ^[cl $Wsp]*(SYSTEM|PUBLIC)(.*) $param x id param]} { 662 switch [string toupper $id] { 663 SYSTEM { 664 if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} { 665 set externalID [list SYSTEM $systemlit] ;# " 666 } else { 667 uplevel #0 $options(-errorcommand) {syntaxerror {syntax error: SYSTEM identifier not followed by literal}} 668 } 669 } 670 PUBLIC { 671 if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x pubidlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x pubidlit param]} { 672 if {[regexp ^[cl $Wsp]+"([cl ^"]*)"(.*) $param x systemlit param] || [regexp ^[cl $Wsp]+'([cl ^']*)'(.*) $param x systemlit param]} { 673 set externalID [list PUBLIC $pubidlit $systemlit] 674 } else { 675 uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by system literal around line $state(line)"] 676 } 677 } else { 678 uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: PUBLIC identifier not followed by literal around line $state(line)"] 679 } 680 } 681 } 682 if {[regexp -nocase ^[cl $Wsp]+NDATA[cl $Wsp]+($Name)(.*) $param x notation param]} { 683 lappend externalID $notation 684 } 685 } 686 687 set state(inDTD) 1 688 689 ParseEvent:DocTypeDecl [array get options] $state(doc_name) $pubidlit $systemlit $options(-internaldtd) 690 691 set state(inDTD) 0 692 693 } 694 695 !--* { 696 697 # Start of a comment 698 # See if it ends in the same tag, otherwise change the 699 # parsing mode 700 701 regexp {!--(.*)} $tag discard comm1 702 if {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $comm1 discard comm1_1]} { 703 # processed comment (end in tag) 704 uplevel #0 $options(-commentcommand) [list $comm1_1] 705 } elseif {[regexp ([cl ^-]*)--[cl $Wsp]*\$ $param discard comm2]} { 706 # processed comment (end in attributes) 707 uplevel #0 $options(-commentcommand) [list $comm1$comm2] 708 } elseif {[regexp ([cl ^-]*)-->(.*) $text discard comm2 text]} { 709 # processed comment (end in text) 710 uplevel #0 $options(-commentcommand) [list $comm1$param$empty>$comm2] 711 } else { 712 # start of comment 713 set state(mode) comment 714 set state(commentdata) "$comm1$param$empty>$text" 715 continue 716 } 717 } 718 719 {!\[CDATA\[*} { 720 721 regexp {!\[CDATA\[(.*)} $tag discard cdata1 722 if {[regexp {(.*)]]$} $cdata1 discard cdata2]} { 723 # processed CDATA (end in tag) 724 PCDATA [array get options] [subst -novariable -nocommand $cdata2] 725 set text [subst -novariable -nocommand $text] 726 } elseif {[regexp {(.*)]]$} $param discard cdata2]} { 727 # processed CDATA (end in attribute) 728 # Backslashes in param are quoted at this stage 729 PCDATA [array get options] $cdata1[subst -novariable -nocommand $cdata2] 730 set text [subst -novariable -nocommand $text] 731 } elseif {[regexp {(.*)]]>(.*)} $text discard cdata2 text]} { 732 # processed CDATA (end in text) 733 # Backslashes in param and text are quoted at this stage 734 PCDATA [array get options] $cdata1[subst -novariable -nocommand $param]$empty>[subst -novariable -nocommand $cdata2] 735 set text [subst -novariable -nocommand $text] 736 } else { 737 # start CDATA 738 set state(cdata) "$cdata1$param>$text" 739 set state(mode) cdata 740 continue 741 } 742 743 } 744 745 !ELEMENT - 746 !ATTLIST - 747 !ENTITY - 748 !NOTATION { 749 uplevel #0 $options(-errorcommand) [list illegaldeclaration "[string range $tag 1 end] declaration not expected in document instance around line $state(line)"] 750 } 751 752 default { 753 uplevel #0 $options(-errorcommand) [list unknowninstruction "unknown processing instruction \"<$tag>\" around line $state(line)"] 754 } 755 } 756 } 757 *,1,* - 758 *,0,/,/ { 759 # Syntax error 760 uplevel #0 $options(-errorcommand) [list syntaxerror "syntax error: closed/empty tag: tag $tag param $param empty $empty close $close around line $state(line)"] 761 } 762 } 763 764 # Process character data 765 766 if {$state(haveDocElement) && [llength $state(stack)]} { 767 768 # Check if the internal DTD entity is in the text 769 regsub -all &xml:intdtd\; $text \[$options(-internaldtd)\] text 770 771 # Look for entity references 772 if {([array size entities] || \ 773 [string length $options(-entityreferencecommand)]) && \ 774 $options(-defaultexpandinternalentities) && \ 775 [regexp {&[^;]+;} $text]} { 776 777 # protect Tcl specials 778 # NB. braces and backslashes may already be protected 779 regsub -all {\\({|}|\\)} $text {\1} text 780 regsub -all {([][$\\{}])} $text {\\\1} text 781 782 # Mark entity references 783 regsub -all {&([^;]+);} $text [format {%s; %s {\1} ; %s %s} \}\} [namespace code [list Entity [array get options] $options(-entityreferencecommand) [namespace code [list PCDATA [array get options]]] $options(entities)]] [namespace code [list DeProtect [namespace code [list PCDATA [array get options]]]]] \{\{] text 784 set text "uplevel #0 [namespace code [list DeProtect1 [namespace code [list PCDATA [array get options]]]]] {{$text}}" 785 eval $text 786 } else { 787 788 # Restore protected special characters 789 regsub -all {\\([][{}\\])} $text {\1} text 790 PCDATA [array get options] $text 791 } 792 } elseif {[string length [string trim $text]]} { 793 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\" in document prolog around line $state(line)"] 794 } 795 796 } 797 798 # If this is the end of the document, close all open containers 799 if {$options(-final) && [llength $state(stack)]} { 800 eval $options(-errorcommand) [list unclosedelement "element [lindex $state(stack) end] remains unclosed around line $state(line)"] 801 } 802 803 return {} 804} 805 806# sgml::DeProtect -- 807# 808# Invoke given command after removing protecting backslashes 809# from given text. 810# 811# Arguments: 812# cmd Command to invoke 813# text Text to deprotect 814# 815# Results: 816# Depends on command 817 818proc sgml::DeProtect1 {cmd text} { 819 if {[string compare {} $text]} { 820 regsub -all {\\([]$[{}\\])} $text {\1} text 821 uplevel #0 $cmd [list $text] 822 } 823} 824proc sgml::DeProtect {cmd text} { 825 set text [lindex $text 0] 826 if {[string compare {} $text]} { 827 regsub -all {\\([]$[{}\\])} $text {\1} text 828 uplevel #0 $cmd [list $text] 829 } 830} 831 832# sgml::ParserDelete -- 833# 834# Free all memory associated with parser 835# 836# Arguments: 837# var global state array 838# 839# Results: 840# Variables unset 841 842proc sgml::ParserDelete var { 843 upvar #0 $var state 844 845 if {![info exists state]} { 846 return -code error "unknown parser" 847 } 848 849 catch {unset $state(entities)} 850 catch {unset $state(parameterentities)} 851 catch {unset $state(elementdecls)} 852 catch {unset $state(attlistdecls)} 853 catch {unset $state(notationdecls)} 854 catch {unset $state(namespaces)} 855 856 unset state 857 858 return {} 859} 860 861# sgml::ParseEvent:ElementOpen -- 862# 863# Start of an element. 864# 865# Arguments: 866# tag Element name 867# attr Attribute list 868# opts Options 869# args further configuration options 870# 871# Options: 872# -empty boolean 873# indicates whether the element was an empty element 874# 875# Results: 876# Modify state and invoke callback 877 878proc sgml::ParseEvent:ElementOpen {tag attr opts args} { 879 variable Name 880 variable Wsp 881 882 array set options $opts 883 upvar #0 $options(-statevariable) state 884 array set cfg {-empty 0} 885 array set cfg $args 886 set handleEmpty 0 887 888 if {$options(-normalize)} { 889 set tag [string toupper $tag] 890 } 891 892 # Update state 893 lappend state(stack) $tag 894 895 # Parse attribute list into a key-value representation 896 if {[string compare $options(-parseattributelistcommand) {}]} { 897 if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $opts $attr]} attr]} { 898 if {[string compare [lindex $attr 0] "unterminated attribute value"]} { 899 uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] 900 set attr {} 901 } else { 902 903 # It is most likely that a ">" character was in an attribute value. 904 # This manifests itself by ">" appearing in the element's text. 905 # In this case the callback should return a three element list; 906 # the message "unterminated attribute value", the attribute list it 907 # did manage to parse and the remainder of the attribute list. 908 909 foreach {msg attlist brokenattr} $attr break 910 911 upvar text elemText 912 if {[string first > $elemText] >= 0} { 913 914 # Now piece the attribute list back together 915 regexp [cl $Wsp]*($Name)[cl $Wsp]*=[cl $Wsp]*("|')(.*) $brokenattr discard attname delimiter attvalue 916 regexp (.*)>([cl ^>]*)\$ $elemText discard remattlist elemText 917 regexp ([cl ^$delimiter]*)${delimiter}(.*) $remattlist discard remattvalue remattlist 918 919 # Gotcha: watch out for empty element syntax 920 if {[string match */ [string trimright $remattlist]]} { 921 set remattlist [string range $remattlist 0 end-1] 922 set handleEmpty 1 923 set cfg(-empty) 1 924 } 925 926 append attvalue >$remattvalue 927 lappend attlist $attname $attvalue 928 929 # Complete parsing the attribute list 930 if {[catch {uplevel #0 $options(-parseattributelistcommand) [list $options(-statevariable) $remattlist]} attr]} { 931 uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] 932 set attr {} 933 set attlist {} 934 } else { 935 eval lappend attlist $attr 936 } 937 938 set attr $attlist 939 940 } else { 941 uplevel #0 $options(-errorcommand) [list unterminatedattribute "$attr around line $state(line)"] 942 set attr {} 943 } 944 } 945 } 946 } 947 948 set empty {} 949 if {$cfg(-empty) && $options(-reportempty)} { 950 set empty {-empty 1} 951 } 952 953 # Check for namespace declarations 954 upvar #0 $options(namespaces) namespaces 955 set nsdecls {} 956 if {[llength $attr]} { 957 array set attrlist $attr 958 foreach {attrName attrValue} [array get attrlist xmlns*] { 959 unset attrlist($attrName) 960 set colon [set prefix {}] 961 if {[regexp {^xmlns(:(.+))?$} $attrName discard colon prefix]} { 962 switch -glob [string length $colon],[string length $prefix] { 963 0,0 { 964 # default NS declaration 965 lappend state(defaultNSURI) $attrValue 966 lappend state(defaultNS) [llength $state(stack)] 967 lappend nsdecls $attrValue {} 968 } 969 0,* { 970 # Huh? 971 } 972 *,0 { 973 # Error 974 uplevel #0 $state(-warningcommand) "no prefix specified for namespace URI \"$attrValue\" in element \"$tag\"" 975 } 976 default { 977 set namespaces($prefix,[llength $state(stack)]) $attrValue 978 lappend nsdecls $attrValue $prefix 979 } 980 } 981 } 982 } 983 if {[llength $nsdecls]} { 984 set nsdecls [list -namespacedecls $nsdecls] 985 } 986 set attr [array get attrlist] 987 } 988 989 # Check whether this element has an expanded name 990 set ns {} 991 if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} { 992 set nsspec [lsort -dictionary -decreasing [array names namespaces $prefix,*]] 993 if {[llength $nsspec]} { 994 set nsuri $namespaces([lindex $nsspec 0]) 995 set ns [list -namespace $nsuri] 996 } else { 997 uplevel #0 $options(-errorcommand) [list namespaceundeclared "no namespace declared for prefix \"$prefix\" in element $tag"] 998 } 999 } elseif {[llength $state(defaultNSURI)]} { 1000 set ns [list -namespace [lindex $state(defaultNSURI) end]] 1001 } 1002 1003 # Invoke callback 1004 set code [catch {uplevel #0 $options(-elementstartcommand) [list $tag $attr] $empty $ns $nsdecls} msg] 1005 1006 # Sometimes empty elements must be handled here (see above) 1007 if {$code == 0 && $handleEmpty} { 1008 ParseEvent:ElementClose $tag $opts -empty 1 1009 } 1010 1011 return -code $code -errorinfo $::errorInfo $msg 1012} 1013 1014# sgml::ParseEvent:ElementClose -- 1015# 1016# End of an element. 1017# 1018# Arguments: 1019# tag Element name 1020# opts Options 1021# args further configuration options 1022# 1023# Options: 1024# -empty boolean 1025# indicates whether the element as an empty element 1026# 1027# Results: 1028# Modify state and invoke callback 1029 1030proc sgml::ParseEvent:ElementClose {tag opts args} { 1031 array set options $opts 1032 upvar #0 $options(-statevariable) state 1033 array set cfg {-empty 0} 1034 array set cfg $args 1035 1036 # WF check 1037 if {[string compare $tag [lindex $state(stack) end]]} { 1038 uplevel #0 $options(-errorcommand) [list illegalendtag "end tag \"$tag\" does not match open element \"[lindex $state(stack) end]\" around line $state(line)"] 1039 return 1040 } 1041 1042 # Check whether this element has an expanded name 1043 upvar #0 $options(namespaces) namespaces 1044 set ns {} 1045 if {[regexp {([^:]+):(.*)$} $tag discard prefix tag]} { 1046 set nsuri $namespaces([lindex [lsort -dictionary -decreasing [array names namespaces $prefix,*]] 0]) 1047 set ns [list -namespace $nsuri] 1048 } elseif {[llength $state(defaultNSURI)]} { 1049 set ns [list -namespace [lindex $state(defaultNSURI) end]] 1050 } 1051 1052 # Pop namespace stacks, if any 1053 if {[llength $state(defaultNS)]} { 1054 if {[llength $state(stack)] == [lindex $state(defaultNS) end]} { 1055 set state(defaultNS) [lreplace $state(defaultNS) end end] 1056 } 1057 } 1058 foreach nsspec [array names namespaces *,[llength $state(stack)]] { 1059 unset namespaces($nsspec) 1060 } 1061 1062 # Update state 1063 set state(stack) [lreplace $state(stack) end end] 1064 1065 set empty {} 1066 if {$cfg(-empty) && $options(-reportempty)} { 1067 set empty {-empty 1} 1068 } 1069 1070 # Invoke callback 1071 # Mats: Shall be same as sgml::ParseEvent:ElementOpen to handle exceptions in callback. 1072 set code [catch {uplevel #0 $options(-elementendcommand) [list $tag] $empty $ns} msg] 1073 return -code $code -errorinfo $::errorInfo $msg 1074} 1075 1076# sgml::PCDATA -- 1077# 1078# Process PCDATA before passing to application 1079# 1080# Arguments: 1081# opts options 1082# pcdata Character data to be processed 1083# 1084# Results: 1085# Checks that characters are legal, 1086# checks -ignorewhitespace setting. 1087 1088proc sgml::PCDATA {opts pcdata} { 1089 array set options $opts 1090 1091 if {$options(-ignorewhitespace) && \ 1092 ![string length [string trim $pcdata]]} { 1093 return {} 1094 } 1095 1096 if {![regexp ^[cl $::sgml::Char]*\$ $pcdata]} { 1097 upvar \#0 $options(-statevariable) state 1098 uplevel \#0 $options(-errorcommand) [list illegalcharacters "illegal, non-Unicode characters found in text \"$pcdata\" around line $state(line)"] 1099 } 1100 1101 uplevel \#0 $options(-characterdatacommand) [list $pcdata] 1102} 1103 1104# sgml::Normalize -- 1105# 1106# Perform name normalization if required 1107# 1108# Arguments: 1109# name name to normalize 1110# req normalization required 1111# 1112# Results: 1113# Name returned as upper-case if normalization required 1114 1115proc sgml::Normalize {name req} { 1116 if {$req} { 1117 return [string toupper $name] 1118 } else { 1119 return $name 1120 } 1121} 1122 1123# sgml::Entity -- 1124# 1125# Resolve XML entity references (syntax: &xxx;). 1126# 1127# Arguments: 1128# opts options 1129# entityrefcmd application callback for entity references 1130# pcdatacmd application callback for character data 1131# entities name of array containing entity definitions. 1132# ref entity reference (the "xxx" bit) 1133# 1134# Results: 1135# Returns substitution text for given entity. 1136 1137proc sgml::Entity {opts entityrefcmd pcdatacmd entities ref} { 1138 array set options $opts 1139 upvar #0 $options(-statevariable) state 1140 1141 if {![string length $entities]} { 1142 set entities [namespace current]::EntityPredef 1143 } 1144 1145 switch -glob -- $ref { 1146 %* { 1147 # Parameter entity - not recognised outside of a DTD 1148 } 1149 #x* { 1150 # Character entity - hex 1151 if {[catch {format %c [scan [string range $ref 2 end] %x tmp; set tmp]} char]} { 1152 return -code error "malformed character entity \"$ref\"" 1153 } 1154 uplevel #0 $pcdatacmd [list $char] 1155 1156 return {} 1157 1158 } 1159 #* { 1160 # Character entity - decimal 1161 if {[catch {format %c [scan [string range $ref 1 end] %d tmp; set tmp]} char]} { 1162 return -code error "malformed character entity \"$ref\"" 1163 } 1164 uplevel #0 $pcdatacmd [list $char] 1165 1166 return {} 1167 1168 } 1169 default { 1170 # General entity 1171 upvar #0 $entities map 1172 if {[info exists map($ref)]} { 1173 1174 if {![regexp {<|&} $map($ref)]} { 1175 1176 # Simple text replacement - optimise 1177 uplevel #0 $pcdatacmd [list $map($ref)] 1178 1179 return {} 1180 1181 } 1182 1183 # Otherwise an additional round of parsing is required. 1184 # This only applies to XML, since HTML doesn't have general entities 1185 1186 # Must parse the replacement text for start & end tags, etc 1187 # This text must be self-contained: balanced closing tags, and so on 1188 1189 set tokenised [tokenise $map($ref) $::xml::tokExpr $::xml::substExpr] 1190 set options(-final) 0 1191 eval parseEvent [list $tokenised] [array get options] 1192 1193 return {} 1194 1195 } elseif {[string compare $entityrefcmd "::sgml::noop"]} { 1196 1197 set result [uplevel #0 $entityrefcmd [list $ref]] 1198 1199 if {[string length $result]} { 1200 uplevel #0 $pcdatacmd [list $result] 1201 } 1202 1203 return {} 1204 1205 } else { 1206 1207 # Reconstitute entity reference 1208 1209 uplevel #0 $options(-errorcommand) [list illegalentity "undefined entity reference \"$ref\""] 1210 1211 return {} 1212 1213 } 1214 } 1215 } 1216 1217 # If all else fails leave the entity reference untouched 1218 uplevel #0 $pcdatacmd [list &$ref\;] 1219 1220 return {} 1221} 1222 1223#################################### 1224# 1225# DTD parser for SGML (XML). 1226# 1227# This DTD actually only handles XML DTDs. Other language's 1228# DTD's, such as HTML, must be written in terms of a XML DTD. 1229# 1230#################################### 1231 1232# sgml::ParseEvent:DocTypeDecl -- 1233# 1234# Entry point for DTD parsing 1235# 1236# Arguments: 1237# opts configuration options 1238# docEl document element name 1239# pubId public identifier 1240# sysId system identifier (a URI) 1241# intSSet internal DTD subset 1242 1243proc sgml::ParseEvent:DocTypeDecl {opts docEl pubId sysId intSSet} { 1244 array set options {} 1245 array set options $opts 1246 1247 set code [catch {uplevel #0 $options(-doctypecommand) [list $docEl $pubId $sysId $intSSet]} err] 1248 switch $code { 1249 3 { 1250 # break 1251 return {} 1252 } 1253 0 - 1254 4 { 1255 # continue 1256 } 1257 default { 1258 return -code $code $err 1259 } 1260 } 1261 1262 # Otherwise we'll parse the DTD and report it piecemeal 1263 1264 # The internal DTD subset is processed first (XML 2.8) 1265 # During this stage, parameter entities are only allowed 1266 # between markup declarations 1267 1268 ParseDTD:Internal [array get options] $intSSet 1269 1270 # The external DTD subset is processed last (XML 2.8) 1271 # During this stage, parameter entities may occur anywhere 1272 1273 # We must resolve the external identifier to obtain the 1274 # DTD data. The application may supply its own resolver. 1275 1276 if {[string length $pubId] || [string length $sysId]} { 1277 uplevel #0 $options(-externalentitycommand) [list $options(-name) $options(-baseurl) $sysId $pubId] 1278 } 1279 1280 return {} 1281} 1282 1283# sgml::ParseDTD:Internal -- 1284# 1285# Parse the internal DTD subset. 1286# 1287# Parameter entities are only allowed between markup declarations. 1288# 1289# Arguments: 1290# opts configuration options 1291# dtd DTD data 1292# 1293# Results: 1294# Markup declarations parsed may cause callback invocation 1295 1296proc sgml::ParseDTD:Internal {opts dtd} { 1297 variable MarkupDeclExpr 1298 variable MarkupDeclSub 1299 1300 array set options {} 1301 array set options $opts 1302 1303 upvar #0 $options(-statevariable) state 1304 upvar #0 $options(parameterentities) PEnts 1305 upvar #0 $options(externalparameterentities) ExtPEnts 1306 1307 # Tokenize the DTD 1308 1309 # Protect Tcl special characters 1310 regsub -all {([{}\\])} $dtd {\\\1} dtd 1311 1312 regsub -all $MarkupDeclExpr $dtd $MarkupDeclSub dtd 1313 1314 # Entities may have angle brackets in their replacement 1315 # text, which breaks the RE processing. So, we must 1316 # use a similar technique to processing doc instances 1317 # to rebuild the declarations from the pieces 1318 1319 set mode {} ;# normal 1320 set delimiter {} 1321 set name {} 1322 set param {} 1323 1324 set state(inInternalDTD) 1 1325 1326 # Process the tokens 1327 foreach {decl value text} [lrange "{} {} \{$dtd\}" 3 end] { 1328 1329 # Keep track of line numbers 1330 incr state(line) [regsub -all \n $text {} discard] 1331 1332 ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param 1333 1334 ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode replText text param 1335 1336 # There may be parameter entity references between markup decls 1337 1338 if {[regexp {%.*;} $text]} { 1339 1340 # Protect Tcl special characters 1341 regsub -all {([{}\\])} $text {\\\1} text 1342 1343 regsub -all %($::sgml::Name)\; $text "\} {\\1} \{" text 1344 1345 set PElist "\{$text\}" 1346 set PElist [lreplace $PElist end end] 1347 foreach {text entref} $PElist { 1348 if {[string length [string trim $text]]} { 1349 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text in internal DTD subset around line $state(line)"] 1350 } 1351 1352 # Expand parameter entity and recursively parse 1353 # BUG: no checks yet for recursive entity references 1354 1355 if {[info exists PEnts($entref)]} { 1356 set externalParser [$options(-name) entityparser] 1357 $externalParser parse $PEnts($entref) -dtdsubset internal 1358 } elseif {[info exists ExtPEnts($entref)]} { 1359 set externalParser [$options(-name) entityparser] 1360 $externalParser parse $ExtPEnts($entref) -dtdsubset external 1361 #$externalParser free 1362 } else { 1363 uplevel #0 $options(-errorcommand) [list illegalreference "reference to undeclared parameter entity \"$entref\""] 1364 } 1365 } 1366 1367 } 1368 1369 } 1370 1371 return {} 1372} 1373 1374# sgml::ParseDTD:EntityMode -- 1375# 1376# Perform special processing for various parser modes 1377# 1378# Arguments: 1379# opts configuration options 1380# modeVar pass-by-reference mode variable 1381# replTextVar pass-by-ref 1382# declVar pass-by-ref 1383# valueVar pass-by-ref 1384# textVar pass-by-ref 1385# delimiter delimiter currently in force 1386# name 1387# param 1388# 1389# Results: 1390# Depends on current mode 1391 1392proc sgml::ParseDTD:EntityMode {opts modeVar replTextVar declVar valueVar textVar delimiter name param} { 1393 upvar 1 $modeVar mode 1394 upvar 1 $replTextVar replText 1395 upvar 1 $declVar decl 1396 upvar 1 $valueVar value 1397 upvar 1 $textVar text 1398 array set options $opts 1399 1400 switch $mode { 1401 {} { 1402 # Pass through to normal processing section 1403 } 1404 entity { 1405 # Look for closing delimiter 1406 if {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $decl discard val1 remainder]} { 1407 append replText <$val1 1408 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter 1409 set decl / 1410 set text $remainder\ $value>$text 1411 set value {} 1412 set mode {} 1413 } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $value discard val2 remainder]} { 1414 append replText <$decl\ $val2 1415 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter 1416 set decl / 1417 set text $remainder>$text 1418 set value {} 1419 set mode {} 1420 } elseif {[regexp ([cl ^$delimiter]*)${delimiter}(.*) $text discard val3 remainder]} { 1421 append replText <$decl\ $value>$val3 1422 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter 1423 set decl / 1424 set text $remainder 1425 set value {} 1426 set mode {} 1427 } else { 1428 1429 # Remain in entity mode 1430 append replText <$decl\ $value>$text 1431 return -code continue 1432 1433 } 1434 } 1435 1436 ignore { 1437 upvar #0 $options(-statevariable) state 1438 1439 if {[regexp {]](.*)$} $decl discard remainder]} { 1440 set state(condSections) [lreplace $state(condSections) end end] 1441 set decl $remainder 1442 set mode {} 1443 } elseif {[regexp {]](.*)$} $value discard remainder]} { 1444 set state(condSections) [lreplace $state(condSections) end end] 1445 regexp <[cl $::sgml::Wsp]*($::sgml::Name)(.*) $remainder discard decl value 1446 set mode {} 1447 } elseif {[regexp {]]>(.*)$} $text discard remainder]} { 1448 set state(condSections) [lreplace $state(condSections) end end] 1449 set decl / 1450 set value {} 1451 set text $remainder 1452 #regexp <[cl $::sgml::Wsp]*($::sgml::Name)([cl ^>]*)>(.*) $remainder discard decl value text 1453 set mode {} 1454 } else { 1455 set decl / 1456 } 1457 1458 } 1459 1460 comment { 1461 # Look for closing comment delimiter 1462 1463 upvar #0 $options(-statevariable) state 1464 1465 if {[regexp (.*?)--(.*)\$ $decl discard data1 remainder]} { 1466 } elseif {[regexp (.*?)--(.*)\$ $value discard data1 remainder]} { 1467 } elseif {[regexp (.*?)--(.*)\$ $text discard data1 remainder]} { 1468 } else { 1469 # comment continues 1470 append state(commentdata) <$decl\ $value>$text 1471 set decl / 1472 set value {} 1473 set text {} 1474 } 1475 } 1476 1477 } 1478 1479 return {} 1480} 1481 1482# sgml::ParseDTD:ProcessMarkupDecl -- 1483# 1484# Process a single markup declaration 1485# 1486# Arguments: 1487# opts configuration options 1488# declVar pass-by-ref 1489# valueVar pass-by-ref 1490# delimiterVar pass-by-ref for current delimiter in force 1491# nameVar pass-by-ref 1492# modeVar pass-by-ref for current parser mode 1493# replTextVar pass-by-ref 1494# textVar pass-by-ref 1495# paramVar pass-by-ref 1496# 1497# Results: 1498# Depends on markup declaration. May change parser mode 1499 1500proc sgml::ParseDTD:ProcessMarkupDecl {opts declVar valueVar delimiterVar nameVar modeVar replTextVar textVar paramVar} { 1501 upvar 1 $modeVar mode 1502 upvar 1 $replTextVar replText 1503 upvar 1 $textVar text 1504 upvar 1 $declVar decl 1505 upvar 1 $valueVar value 1506 upvar 1 $nameVar name 1507 upvar 1 $delimiterVar delimiter 1508 upvar 1 $paramVar param 1509 1510 variable declExpr 1511 variable ExternalEntityExpr 1512 1513 array set options $opts 1514 upvar #0 $options(-statevariable) state 1515 1516 switch -glob -- $decl { 1517 1518 / { 1519 # continuation from entity processing 1520 } 1521 1522 !ELEMENT { 1523 # Element declaration 1524 if {[regexp $declExpr $value discard tag cmodel]} { 1525 DTD:ELEMENT [array get options] $tag $cmodel 1526 } else { 1527 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed element declaration around line $state(line)"] 1528 } 1529 } 1530 1531 !ATTLIST { 1532 # Attribute list declaration 1533 variable declExpr 1534 if {[regexp $declExpr $value discard tag attdefns]} { 1535 if {[catch {DTD:ATTLIST [array get options] $tag $attdefns} err]} { 1536 #puts stderr "Stack trace: $::errorInfo\n***\n" 1537 # Atttribute parsing has bugs at the moment 1538 #return -code error "$err around line $state(line)" 1539 return {} 1540 } 1541 } else { 1542 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed attribute list declaration around line $state(line)"] 1543 } 1544 } 1545 1546 !ENTITY { 1547 # Entity declaration 1548 variable EntityExpr 1549 1550 if {[regexp $EntityExpr $value discard param name value]} { 1551 1552 # Entity replacement text may have a '>' character. 1553 # In this case, the real delimiter will be in the following 1554 # text. This is complicated by the possibility of there 1555 # being several '<','>' pairs in the replacement text. 1556 # At this point, we are searching for the matching quote delimiter. 1557 1558 if {[regexp $ExternalEntityExpr $value]} { 1559 DTD:ENTITY [array get options] $name [string trim $param] $value 1560 } elseif {[regexp ("|')(.*?)\\1(.*) $value discard delimiter replText value]} { 1561 1562 if {[string length [string trim $value]]} { 1563 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] 1564 } else { 1565 DTD:ENTITY [array get options] $name [string trim $param] $delimiter$replText$delimiter 1566 } 1567 } elseif {[regexp ("|')(.*) $value discard delimiter replText]} { 1568 append replText >$text 1569 set text {} 1570 set mode entity 1571 } else { 1572 uplevel #0 $options(-errorcommand) [list illegaldeclaration "no delimiter for entity declaration around line $state(line)"] 1573 } 1574 1575 } else { 1576 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] 1577 } 1578 } 1579 1580 !NOTATION { 1581 # Notation declaration 1582 if {[regexp $declExpr param discard tag notation]} { 1583 DTD:ENTITY [array get options] $tag $notation 1584 } else { 1585 uplevel #0 $options(-errorcommand) [list illegaldeclaration "malformed entity declaration around line $state(line)"] 1586 } 1587 } 1588 1589 !--* { 1590 # Start of a comment 1591 1592 if {[regexp !--(.*?)--\$ $decl discard data]} { 1593 if {[string length [string trim $value]]} { 1594 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$value\""] 1595 } 1596 uplevel #0 $options(-commentcommand) [list $data] 1597 set decl / 1598 set value {} 1599 } elseif {[regexp -- ^(.*?)--\$ $value discard data2]} { 1600 regexp !--(.*)\$ $decl discard data1 1601 uplevel #0 $options(-commentcommand) [list $data1\ $data2] 1602 set decl / 1603 set value {} 1604 } elseif {[regexp (.*?)-->(.*)\$ $text discard data3 remainder]} { 1605 regexp !--(.*)\$ $decl discard data1 1606 uplevel #0 $options(-commentcommand) [list $data1\ $value>$data3] 1607 set decl / 1608 set value {} 1609 set text $remainder 1610 } else { 1611 regexp !--(.*)\$ $decl discard data1 1612 set state(commentdata) $data1\ $value>$text 1613 set decl / 1614 set value {} 1615 set text {} 1616 set mode comment 1617 } 1618 } 1619 1620 !*INCLUDE* - 1621 !*IGNORE* { 1622 if {$state(inInternalDTD)} { 1623 uplevel #0 $options(-errorcommand) [list illegalsection "conditional section not permitted in internal DTD subset around line $state(line)"] 1624 } 1625 1626 if {[regexp {^!\[INCLUDE\[(.*)} $decl discard remainder]} { 1627 # Push conditional section stack, popped by ]]> sequence 1628 1629 if {[regexp {(.*?)]]$} $remainder discard r2]} { 1630 # section closed immediately 1631 if {[string length [string trim $r2]]} { 1632 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] 1633 } 1634 } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} { 1635 # section closed immediately 1636 if {[string length [string trim $r2]]} { 1637 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] 1638 } 1639 if {[string length [string trim $r3]]} { 1640 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"] 1641 } 1642 } else { 1643 1644 lappend state(condSections) INCLUDE 1645 1646 set parser [$options(-name) entityparser] 1647 $parser parse $remainder\ $value> -dtdsubset external 1648 #$parser free 1649 1650 if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} { 1651 if {[string length [string trim $t1]]} { 1652 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""] 1653 } 1654 if {![llength $state(condSections)]} { 1655 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] 1656 } 1657 set state(condSections) [lreplace $state(condSections) end end] 1658 set text $t2 1659 } 1660 1661 } 1662 } elseif {[regexp {^!\[IGNORE\[(.*)} $decl discard remainder]} { 1663 # Set ignore mode. Still need a stack 1664 set mode ignore 1665 1666 if {[regexp {(.*?)]]$} $remainder discard r2]} { 1667 # section closed immediately 1668 if {[string length [string trim $r2]]} { 1669 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] 1670 } 1671 } elseif {[regexp {(.*?)]](.*)} $value discard r2 r3]} { 1672 # section closed immediately 1673 if {[string length [string trim $r2]]} { 1674 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r2\" in conditional section"] 1675 } 1676 if {[string length [string trim $r3]]} { 1677 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$r3\" in conditional section"] 1678 } 1679 } else { 1680 1681 lappend state(condSections) IGNORE 1682 1683 if {[regexp {(.*?)]]>(.*)} $text discard t1 t2]} { 1684 if {[string length [string trim $t1]]} { 1685 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""] 1686 } 1687 if {![llength $state(condSections)]} { 1688 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] 1689 } 1690 set state(condSections) [lreplace $state(condSections) end end] 1691 set text $t2 1692 } 1693 1694 } 1695 } else { 1696 uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\" around line $state(line)"] 1697 } 1698 1699 } 1700 1701 default { 1702 if {[regexp {^\?(.*)} $decl discard target]} { 1703 # Processing instruction 1704 } else { 1705 uplevel #0 $options(-errorcommand) [list illegaldeclaration "illegal markup declaration \"$decl\""] 1706 } 1707 } 1708 } 1709 1710 return {} 1711} 1712 1713# sgml::ParseDTD:External -- 1714# 1715# Parse the external DTD subset. 1716# 1717# Parameter entities are allowed anywhere. 1718# 1719# Arguments: 1720# opts configuration options 1721# dtd DTD data 1722# 1723# Results: 1724# Markup declarations parsed may cause callback invocation 1725 1726proc sgml::ParseDTD:External {opts dtd} { 1727 variable MarkupDeclExpr 1728 variable MarkupDeclSub 1729 variable declExpr 1730 1731 array set options $opts 1732 upvar #0 $options(parameterentities) PEnts 1733 upvar #0 $options(externalparameterentities) ExtPEnts 1734 upvar #0 $options(-statevariable) state 1735 1736 # As with the internal DTD subset, watch out for 1737 # entities with angle brackets 1738 set mode {} ;# normal 1739 set delimiter {} 1740 set name {} 1741 set param {} 1742 1743 set oldState 0 1744 catch {set oldState $state(inInternalDTD)} 1745 set state(inInternalDTD) 0 1746 1747 # Initialise conditional section stack 1748 if {![info exists state(condSections)]} { 1749 set state(condSections) {} 1750 } 1751 set startCondSectionDepth [llength $state(condSections)] 1752 1753 while {[string length $dtd]} { 1754 set progress 0 1755 set PEref {} 1756 if {![string compare $mode "ignore"]} { 1757 set progress 1 1758 if {[regexp {]]>(.*)} $dtd discard dtd]} { 1759 set remainder {} 1760 set mode {} ;# normal 1761 set state(condSections) [lreplace $state(condSections) end end] 1762 continue 1763 } else { 1764 uplevel #0 $options(-errorcommand) [list missingdelimiter "IGNORE conditional section closing delimiter not found"] 1765 } 1766 } elseif {[regexp ^(.*?)%($::sgml::Name)\;(.*)\$ $dtd discard data PEref remainder]} { 1767 set progress 1 1768 } else { 1769 set data $dtd 1770 set dtd {} 1771 set remainder {} 1772 } 1773 1774 # Tokenize the DTD (so far) 1775 1776 # Protect Tcl special characters 1777 regsub -all {([{}\\])} $data {\\\1} dataP 1778 1779 set n [regsub -all $MarkupDeclExpr $dataP $MarkupDeclSub dataP] 1780 1781 if {$n} { 1782 set progress 1 1783 # All but the last markup declaration should have no text 1784 set dataP [lrange "{} {} \{$dataP\}" 3 end] 1785 if {[llength $dataP] > 3} { 1786 foreach {decl value text} [lrange $dataP 0 [expr [llength $dataP] - 4]] { 1787 ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param 1788 ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param 1789 1790 if {[string length [string trim $text]]} { 1791 # check for conditional section close 1792 if {[regexp {]]>(.*)$} $text discard text]} { 1793 if {[string length [string trim $text]]} { 1794 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""] 1795 } 1796 if {![llength $state(condSections)]} { 1797 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] 1798 } 1799 set state(condSections) [lreplace $state(condSections) end end] 1800 if {![string compare $mode "ignore"]} { 1801 set mode {} ;# normal 1802 } 1803 } else { 1804 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$text\""] 1805 } 1806 } 1807 } 1808 } 1809 # Do the last declaration 1810 foreach {decl value text} [lrange $dataP [expr [llength $dataP] - 3] end] { 1811 ParseDTD:EntityMode [array get options] mode replText decl value text $delimiter $name $param 1812 ParseDTD:ProcessMarkupDecl [array get options] decl value delimiter name mode repltextVar text param 1813 } 1814 } 1815 1816 # Now expand the PE reference, if any 1817 switch -glob $mode,[string length $PEref],$n { 1818 ignore,0,* { 1819 set dtd $text 1820 } 1821 ignore,*,* { 1822 set dtd $text$remainder 1823 } 1824 *,0,0 { 1825 set dtd $data 1826 } 1827 *,0,* { 1828 set dtd $text 1829 } 1830 *,*,0 { 1831 if {[catch {append data $PEnts($PEref)}]} { 1832 if {[info exists ExtPEnts($PEref)]} { 1833 set externalParser [$options(-name) entityparser] 1834 $externalParser parse $ExtPEnts($PEref) -dtdsubset external 1835 #$externalParser free 1836 } else { 1837 uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"] 1838 } 1839 } 1840 set dtd $data$remainder 1841 } 1842 default { 1843 if {[catch {append text $PEnts($PEref)}]} { 1844 if {[info exists ExtPEnts($PEref)]} { 1845 set externalParser [$options(-name) entityparser] 1846 $externalParser parse $ExtPEnts($PEref) -dtdsubset external 1847 #$externalParser free 1848 } else { 1849 uplevel #0 $options(-errorcommand) [list entityundeclared "parameter entity \"$PEref\" not declared"] 1850 } 1851 } 1852 set dtd $text$remainder 1853 } 1854 } 1855 1856 # Check whether a conditional section has been terminated 1857 if {[regexp {^(.*?)]]>(.*)$} $dtd discard t1 t2]} { 1858 if {![regexp <.*> $t1]} { 1859 if {[string length [string trim $t1]]} { 1860 uplevel #0 $options(-errorcommand) [list unexpectedtext "unexpected text \"$t1\""] 1861 } 1862 if {![llength $state(condSections)]} { 1863 uplevel #0 $options(-errorcommand) [list illegalsection "extraneous conditional section close"] 1864 } 1865 set state(condSections) [lreplace $state(condSections) end end] 1866 if {![string compare $mode "ignore"]} { 1867 set mode {} ;# normal 1868 } 1869 set dtd $t2 1870 set progress 1 1871 } 1872 } 1873 1874 if {!$progress} { 1875 # No parameter entity references were found and 1876 # the text does not contain a well-formed markup declaration 1877 # Avoid going into an infinite loop 1878 upvar #0 $options(-errorcommand) [list syntaxerror "external entity does not contain well-formed markup declaration"] 1879 break 1880 } 1881 } 1882 1883 set state(inInternalDTD) $oldState 1884 1885 # Check that conditional sections have been closed properly 1886 if {[llength $state(condSections)] > $startCondSectionDepth} { 1887 uplevel #0 $options(-errorcommand) [list syntaxerror "[lindex $state(condSections) end] conditional section not closed"] 1888 } 1889 if {[llength $state(condSections)] < $startCondSectionDepth} { 1890 uplevel #0 $options(-errorcommand) [list syntaxerror "too many conditional section closures"] 1891 } 1892 1893 return {} 1894} 1895 1896# Procedures for handling the various declarative elements in a DTD. 1897# New elements may be added by creating a procedure of the form 1898# parse:DTD:_element_ 1899 1900# For each of these procedures, the various regular expressions they use 1901# are created outside of the proc to avoid overhead at runtime 1902 1903# sgml::DTD:ELEMENT -- 1904# 1905# <!ELEMENT ...> defines an element. 1906# 1907# The content model for the element is stored in the contentmodel array, 1908# indexed by the element name. The content model is parsed into the 1909# following list form: 1910# 1911# {} Content model is EMPTY. 1912# Indicated by an empty list. 1913# * Content model is ANY. 1914# Indicated by an asterix. 1915# {ELEMENT ...} 1916# Content model is element-only. 1917# {MIXED {element1 element2 ...}} 1918# Content model is mixed (PCDATA and elements). 1919# The second element of the list contains the 1920# elements that may occur. #PCDATA is assumed 1921# (ie. the list is normalised). 1922# 1923# Arguments: 1924# opts configuration options 1925# name element GI 1926# modspec unparsed content model specification 1927 1928proc sgml::DTD:ELEMENT {opts name modspec} { 1929 variable Wsp 1930 array set options $opts 1931 1932 upvar #0 $options(elementdecls) elements 1933 1934 if {$options(-validate) && [info exists elements($name)]} { 1935 eval $options(-errorcommand) [list elementdeclared "element \"$name\" already declared"] 1936 } else { 1937 switch -- $modspec { 1938 EMPTY { 1939 set elements($name) {} 1940 uplevel #0 $options(-elementdeclcommand) $name {{}} 1941 } 1942 ANY { 1943 set elements($name) * 1944 uplevel #0 $options(-elementdeclcommand) $name * 1945 } 1946 default { 1947 # Don't parse the content model for now, 1948 # just pass the model to the application 1949 if {0 && [regexp [format {^\([%s]*#PCDATA[%s]*(\|([^)]+))?[%s]*\)*[%s]*$} $Wsp $Wsp $Wsp $Wsp] discard discard mtoks]} { 1950 set cm($name) [list MIXED [split $mtoks |]] 1951 } elseif {0} { 1952 if {[catch {CModelParse $state(state) $value} result]} { 1953 eval $options(-errorcommand) [list element? $result] 1954 } else { 1955 set cm($id) [list ELEMENT $result] 1956 } 1957 } else { 1958 set elements($name) $modspec 1959 uplevel #0 $options(-elementdeclcommand) $name [list $modspec] 1960 } 1961 } 1962 } 1963 } 1964} 1965 1966# sgml::CModelParse -- 1967# 1968# Parse an element content model (non-mixed). 1969# A syntax tree is constructed. 1970# A transition table is built next. 1971# 1972# This is going to need alot of work! 1973# 1974# Arguments: 1975# state state array variable 1976# value the content model data 1977# 1978# Results: 1979# A Tcl list representing the content model. 1980 1981proc sgml::CModelParse {state value} { 1982 upvar #0 $state var 1983 1984 # First build syntax tree 1985 set syntaxTree [CModelMakeSyntaxTree $state $value] 1986 1987 # Build transition table 1988 set transitionTable [CModelMakeTransitionTable $state $syntaxTree] 1989 1990 return [list $syntaxTree $transitionTable] 1991} 1992 1993# sgml::CModelMakeSyntaxTree -- 1994# 1995# Construct a syntax tree for the regular expression. 1996# 1997# Syntax tree is represented as a Tcl list: 1998# rep {:choice|:seq {{rep list1} {rep list2} ...}} 1999# where: rep is repetition character, *, + or ?. {} for no repetition 2000# listN is nested expression or Name 2001# 2002# Arguments: 2003# spec Element specification 2004# 2005# Results: 2006# Syntax tree for element spec as nested Tcl list. 2007# 2008# Examples: 2009# (memo) 2010# {} {:seq {{} memo}} 2011# (front, body, back?) 2012# {} {:seq {{} front} {{} body} {? back}} 2013# (head, (p | list | note)*, div2*) 2014# {} {:seq {{} head} {* {:choice {{} p} {{} list} {{} note}}} {* div2}} 2015# (p | a | ul)+ 2016# + {:choice {{} p} {{} a} {{} ul}} 2017 2018proc sgml::CModelMakeSyntaxTree {state spec} { 2019 upvar #0 $state var 2020 variable Wsp 2021 variable name 2022 2023 # Translate the spec into a Tcl list. 2024 2025 # None of the Tcl special characters are allowed in a content model spec. 2026 if {[regexp {\$|\[|\]|\{|\}} $spec]} { 2027 return -code error "illegal characters in specification" 2028 } 2029 2030 regsub -all [format {(%s)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $name $Wsp $Wsp] $spec [format {%sCModelSTname %s {\1} {\2} {\3}} \n $state] spec 2031 regsub -all {\(} $spec "\nCModelSTopenParen $state " spec 2032 regsub -all [format {\)[%s]*(\?|\*|\+)?[%s]*(,|\|)?} $Wsp $Wsp] $spec [format {%sCModelSTcloseParen %s {\1} {\2}} \n $state] spec 2033 2034 array set var {stack {} state start} 2035 eval $spec 2036 2037 # Peel off the outer seq, its redundant 2038 return [lindex [lindex $var(stack) 1] 0] 2039} 2040 2041# sgml::CModelSTname -- 2042# 2043# Processes a name in a content model spec. 2044# 2045# Arguments: 2046# state state array variable 2047# name name specified 2048# rep repetition operator 2049# cs choice or sequence delimiter 2050# 2051# Results: 2052# See CModelSTcp. 2053 2054proc sgml::CModelSTname {state name rep cs args} { 2055 if {[llength $args]} { 2056 return -code error "syntax error in specification: \"$args\"" 2057 } 2058 2059 CModelSTcp $state $name $rep $cs 2060} 2061 2062# sgml::CModelSTcp -- 2063# 2064# Process a content particle. 2065# 2066# Arguments: 2067# state state array variable 2068# name name specified 2069# rep repetition operator 2070# cs choice or sequence delimiter 2071# 2072# Results: 2073# The content particle is added to the current group. 2074 2075proc sgml::CModelSTcp {state cp rep cs} { 2076 upvar #0 $state var 2077 2078 switch -glob -- [lindex $var(state) end]=$cs { 2079 start= { 2080 set var(state) [lreplace $var(state) end end end] 2081 # Add (dummy) grouping, either choice or sequence will do 2082 CModelSTcsSet $state , 2083 CModelSTcpAdd $state $cp $rep 2084 } 2085 :choice= - 2086 :seq= { 2087 set var(state) [lreplace $var(state) end end end] 2088 CModelSTcpAdd $state $cp $rep 2089 } 2090 start=| - 2091 start=, { 2092 set var(state) [lreplace $var(state) end end [expr {$cs == "," ? ":seq" : ":choice"}]] 2093 CModelSTcsSet $state $cs 2094 CModelSTcpAdd $state $cp $rep 2095 } 2096 :choice=| - 2097 :seq=, { 2098 CModelSTcpAdd $state $cp $rep 2099 } 2100 :choice=, - 2101 :seq=| { 2102 return -code error "syntax error in specification: incorrect delimiter after \"$cp\", should be \"[expr {$cs == "," ? "|" : ","}]\"" 2103 } 2104 end=* { 2105 return -code error "syntax error in specification: no delimiter before \"$cp\"" 2106 } 2107 default { 2108 return -code error "syntax error" 2109 } 2110 } 2111 2112} 2113 2114# sgml::CModelSTcsSet -- 2115# 2116# Start a choice or sequence on the stack. 2117# 2118# Arguments: 2119# state state array 2120# cs choice oir sequence 2121# 2122# Results: 2123# state is modified: end element of state is appended. 2124 2125proc sgml::CModelSTcsSet {state cs} { 2126 upvar #0 $state var 2127 2128 set cs [expr {$cs == "," ? ":seq" : ":choice"}] 2129 2130 if {[llength $var(stack)]} { 2131 set var(stack) [lreplace $var(stack) end end $cs] 2132 } else { 2133 set var(stack) [list $cs {}] 2134 } 2135} 2136 2137# sgml::CModelSTcpAdd -- 2138# 2139# Append a content particle to the top of the stack. 2140# 2141# Arguments: 2142# state state array 2143# cp content particle 2144# rep repetition 2145# 2146# Results: 2147# state is modified: end element of state is appended. 2148 2149proc sgml::CModelSTcpAdd {state cp rep} { 2150 upvar #0 $state var 2151 2152 if {[llength $var(stack)]} { 2153 set top [lindex $var(stack) end] 2154 lappend top [list $rep $cp] 2155 set var(stack) [lreplace $var(stack) end end $top] 2156 } else { 2157 set var(stack) [list $rep $cp] 2158 } 2159} 2160 2161# sgml::CModelSTopenParen -- 2162# 2163# Processes a '(' in a content model spec. 2164# 2165# Arguments: 2166# state state array 2167# 2168# Results: 2169# Pushes stack in state array. 2170 2171proc sgml::CModelSTopenParen {state args} { 2172 upvar #0 $state var 2173 2174 if {[llength $args]} { 2175 return -code error "syntax error in specification: \"$args\"" 2176 } 2177 2178 lappend var(state) start 2179 lappend var(stack) [list {} {}] 2180} 2181 2182# sgml::CModelSTcloseParen -- 2183# 2184# Processes a ')' in a content model spec. 2185# 2186# Arguments: 2187# state state array 2188# rep repetition 2189# cs choice or sequence delimiter 2190# 2191# Results: 2192# Stack is popped, and former top of stack is appended to previous element. 2193 2194proc sgml::CModelSTcloseParen {state rep cs args} { 2195 upvar #0 $state var 2196 2197 if {[llength $args]} { 2198 return -code error "syntax error in specification: \"$args\"" 2199 } 2200 2201 set cp [lindex $var(stack) end] 2202 set var(stack) [lreplace $var(stack) end end] 2203 set var(state) [lreplace $var(state) end end] 2204 CModelSTcp $state $cp $rep $cs 2205} 2206 2207# sgml::CModelMakeTransitionTable -- 2208# 2209# Given a content model's syntax tree, constructs 2210# the transition table for the regular expression. 2211# 2212# See "Compilers, Principles, Techniques, and Tools", 2213# Aho, Sethi and Ullman. Section 3.9, algorithm 3.5. 2214# 2215# Arguments: 2216# state state array variable 2217# st syntax tree 2218# 2219# Results: 2220# The transition table is returned, as a key/value Tcl list. 2221 2222proc sgml::CModelMakeTransitionTable {state st} { 2223 upvar #0 $state var 2224 2225 # Construct nullable, firstpos and lastpos functions 2226 array set var {number 0} 2227 foreach {nullable firstpos lastpos} [ \ 2228 TraverseDepth1st $state $st { 2229 # Evaluated for leaf nodes 2230 # Compute nullable(n) 2231 # Compute firstpos(n) 2232 # Compute lastpos(n) 2233 set nullable [nullable leaf $rep $name] 2234 set firstpos [list {} $var(number)] 2235 set lastpos [list {} $var(number)] 2236 set var(pos:$var(number)) $name 2237 } { 2238 # Evaluated for nonterminal nodes 2239 # Compute nullable, firstpos, lastpos 2240 set firstpos [firstpos $cs $firstpos $nullable] 2241 set lastpos [lastpos $cs $lastpos $nullable] 2242 set nullable [nullable nonterm $rep $cs $nullable] 2243 } \ 2244 ] break 2245 2246 set accepting [incr var(number)] 2247 set var(pos:$accepting) # 2248 2249 # var(pos:N) maps from position to symbol. 2250 # Construct reverse map for convenience. 2251 # NB. A symbol may appear in more than one position. 2252 # var is about to be reset, so use different arrays. 2253 2254 foreach {pos symbol} [array get var pos:*] { 2255 set pos [lindex [split $pos :] 1] 2256 set pos2symbol($pos) $symbol 2257 lappend sym2pos($symbol) $pos 2258 } 2259 2260 # Construct the followpos functions 2261 catch {unset var} 2262 followpos $state $st $firstpos $lastpos 2263 2264 # Construct transition table 2265 # Dstates is [union $marked $unmarked] 2266 set unmarked [list [lindex $firstpos 1]] 2267 while {[llength $unmarked]} { 2268 set T [lindex $unmarked 0] 2269 lappend marked $T 2270 set unmarked [lrange $unmarked 1 end] 2271 2272 # Find which input symbols occur in T 2273 set symbols {} 2274 foreach pos $T { 2275 if {$pos != $accepting && [lsearch $symbols $pos2symbol($pos)] < 0} { 2276 lappend symbols $pos2symbol($pos) 2277 } 2278 } 2279 foreach a $symbols { 2280 set U {} 2281 foreach pos $sym2pos($a) { 2282 if {[lsearch $T $pos] >= 0} { 2283 # add followpos($pos) 2284 if {$var($pos) == {}} { 2285 lappend U $accepting 2286 } else { 2287 eval lappend U $var($pos) 2288 } 2289 } 2290 } 2291 set U [makeSet $U] 2292 if {[llength $U] && [lsearch $marked $U] < 0 && [lsearch $unmarked $U] < 0} { 2293 lappend unmarked $U 2294 } 2295 set Dtran($T,$a) $U 2296 } 2297 2298 } 2299 2300 return [list [array get Dtran] [array get sym2pos] $accepting] 2301} 2302 2303# sgml::followpos -- 2304# 2305# Compute the followpos function, using the already computed 2306# firstpos and lastpos. 2307# 2308# Arguments: 2309# state array variable to store followpos functions 2310# st syntax tree 2311# firstpos firstpos functions for the syntax tree 2312# lastpos lastpos functions 2313# 2314# Results: 2315# followpos functions for each leaf node, in name/value format 2316 2317proc sgml::followpos {state st firstpos lastpos} { 2318 upvar #0 $state var 2319 2320 switch -- [lindex [lindex $st 1] 0] { 2321 :seq { 2322 for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} { 2323 followpos $state [lindex [lindex $st 1] $i] \ 2324 [lindex [lindex $firstpos 0] [expr $i - 1]] \ 2325 [lindex [lindex $lastpos 0] [expr $i - 1]] 2326 foreach pos [lindex [lindex [lindex $lastpos 0] [expr $i - 1]] 1] { 2327 eval lappend var($pos) [lindex [lindex [lindex $firstpos 0] $i] 1] 2328 set var($pos) [makeSet $var($pos)] 2329 } 2330 } 2331 } 2332 :choice { 2333 for {set i 1} {$i < [llength [lindex $st 1]]} {incr i} { 2334 followpos $state [lindex [lindex $st 1] $i] \ 2335 [lindex [lindex $firstpos 0] [expr $i - 1]] \ 2336 [lindex [lindex $lastpos 0] [expr $i - 1]] 2337 } 2338 } 2339 default { 2340 # No action at leaf nodes 2341 } 2342 } 2343 2344 switch -- [lindex $st 0] { 2345 ? { 2346 # We having nothing to do here ! Doing the same as 2347 # for * effectively converts this qualifier into the other. 2348 } 2349 * { 2350 foreach pos [lindex $lastpos 1] { 2351 eval lappend var($pos) [lindex $firstpos 1] 2352 set var($pos) [makeSet $var($pos)] 2353 } 2354 } 2355 } 2356 2357} 2358 2359# sgml::TraverseDepth1st -- 2360# 2361# Perform depth-first traversal of a tree. 2362# A new tree is constructed, with each node computed by f. 2363# 2364# Arguments: 2365# state state array variable 2366# t The tree to traverse, a Tcl list 2367# leaf Evaluated at a leaf node 2368# nonTerm Evaluated at a nonterminal node 2369# 2370# Results: 2371# A new tree is returned. 2372 2373proc sgml::TraverseDepth1st {state t leaf nonTerm} { 2374 upvar #0 $state var 2375 2376 set nullable {} 2377 set firstpos {} 2378 set lastpos {} 2379 2380 switch -- [lindex [lindex $t 1] 0] { 2381 :seq - 2382 :choice { 2383 set rep [lindex $t 0] 2384 set cs [lindex [lindex $t 1] 0] 2385 2386 foreach child [lrange [lindex $t 1] 1 end] { 2387 foreach {childNullable childFirstpos childLastpos} \ 2388 [TraverseDepth1st $state $child $leaf $nonTerm] break 2389 lappend nullable $childNullable 2390 lappend firstpos $childFirstpos 2391 lappend lastpos $childLastpos 2392 } 2393 2394 eval $nonTerm 2395 } 2396 default { 2397 incr var(number) 2398 set rep [lindex [lindex $t 0] 0] 2399 set name [lindex [lindex $t 1] 0] 2400 eval $leaf 2401 } 2402 } 2403 2404 return [list $nullable $firstpos $lastpos] 2405} 2406 2407# sgml::firstpos -- 2408# 2409# Computes the firstpos function for a nonterminal node. 2410# 2411# Arguments: 2412# cs node type, choice or sequence 2413# firstpos firstpos functions for the subtree 2414# nullable nullable functions for the subtree 2415# 2416# Results: 2417# firstpos function for this node is returned. 2418 2419proc sgml::firstpos {cs firstpos nullable} { 2420 switch -- $cs { 2421 :seq { 2422 set result [lindex [lindex $firstpos 0] 1] 2423 for {set i 0} {$i < [llength $nullable]} {incr i} { 2424 if {[lindex [lindex $nullable $i] 1]} { 2425 eval lappend result [lindex [lindex $firstpos [expr $i + 1]] 1] 2426 } else { 2427 break 2428 } 2429 } 2430 } 2431 :choice { 2432 foreach child $firstpos { 2433 eval lappend result $child 2434 } 2435 } 2436 } 2437 2438 return [list $firstpos [makeSet $result]] 2439} 2440 2441# sgml::lastpos -- 2442# 2443# Computes the lastpos function for a nonterminal node. 2444# Same as firstpos, only logic is reversed 2445# 2446# Arguments: 2447# cs node type, choice or sequence 2448# lastpos lastpos functions for the subtree 2449# nullable nullable functions forthe subtree 2450# 2451# Results: 2452# lastpos function for this node is returned. 2453 2454proc sgml::lastpos {cs lastpos nullable} { 2455 switch -- $cs { 2456 :seq { 2457 set result [lindex [lindex $lastpos end] 1] 2458 for {set i [expr [llength $nullable] - 1]} {$i >= 0} {incr i -1} { 2459 if {[lindex [lindex $nullable $i] 1]} { 2460 eval lappend result [lindex [lindex $lastpos $i] 1] 2461 } else { 2462 break 2463 } 2464 } 2465 } 2466 :choice { 2467 foreach child $lastpos { 2468 eval lappend result $child 2469 } 2470 } 2471 } 2472 2473 return [list $lastpos [makeSet $result]] 2474} 2475 2476# sgml::makeSet -- 2477# 2478# Turn a list into a set, ie. remove duplicates. 2479# 2480# Arguments: 2481# s a list 2482# 2483# Results: 2484# A set is returned, which is a list with duplicates removed. 2485 2486proc sgml::makeSet s { 2487 foreach r $s { 2488 if {[llength $r]} { 2489 set unique($r) {} 2490 } 2491 } 2492 return [array names unique] 2493} 2494 2495# sgml::nullable -- 2496# 2497# Compute the nullable function for a node. 2498# 2499# Arguments: 2500# nodeType leaf or nonterminal 2501# rep repetition applying to this node 2502# name leaf node: symbol for this node, nonterm node: choice or seq node 2503# subtree nonterm node: nullable functions for the subtree 2504# 2505# Results: 2506# Returns nullable function for this branch of the tree. 2507 2508proc sgml::nullable {nodeType rep name {subtree {}}} { 2509 switch -glob -- $rep:$nodeType { 2510 :leaf - 2511 +:leaf { 2512 return [list {} 0] 2513 } 2514 \\*:leaf - 2515 \\?:leaf { 2516 return [list {} 1] 2517 } 2518 \\*:nonterm - 2519 \\?:nonterm { 2520 return [list $subtree 1] 2521 } 2522 :nonterm - 2523 +:nonterm { 2524 switch -- $name { 2525 :choice { 2526 set result 0 2527 foreach child $subtree { 2528 set result [expr $result || [lindex $child 1]] 2529 } 2530 } 2531 :seq { 2532 set result 1 2533 foreach child $subtree { 2534 set result [expr $result && [lindex $child 1]] 2535 } 2536 } 2537 } 2538 return [list $subtree $result] 2539 } 2540 } 2541} 2542 2543# sgml::DTD:ATTLIST -- 2544# 2545# <!ATTLIST ...> defines an attribute list. 2546# 2547# Arguments: 2548# opts configuration opions 2549# name Element GI 2550# attspec unparsed attribute definitions 2551# 2552# Results: 2553# Attribute list variables are modified. 2554 2555proc sgml::DTD:ATTLIST {opts name attspec} { 2556 variable attlist_exp 2557 variable attlist_enum_exp 2558 variable attlist_fixed_exp 2559 2560 array set options $opts 2561 2562 # Parse the attribute list. If it were regular, could just use foreach, 2563 # but some attributes may have values. 2564 regsub -all {([][$\\])} $attspec {\\\1} attspec 2565 regsub -all $attlist_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {} \{" attspec 2566 regsub -all $attlist_enum_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {} {\\4} \{" attspec 2567 regsub -all $attlist_fixed_exp $attspec "\}\nDTDAttribute {$options(-attlistdeclcommand)} $name $options(attlistdecls) {\\1} {\\2} {\\3} {\\4} \{" attspec 2568 2569 eval "noop \{$attspec\}" 2570 2571 return {} 2572} 2573 2574# sgml::DTDAttribute -- 2575# 2576# Parse definition of a single attribute. 2577# 2578# Arguments: 2579# callback attribute defn callback 2580# name element name 2581# var array variable 2582# att attribute name 2583# type type of this attribute 2584# default default value of the attribute 2585# value other information 2586# text other text (should be empty) 2587# 2588# Results: 2589# Attribute defn added to array, unless it already exists 2590 2591proc sgml::DTDAttribute args { 2592 # BUG: Some problems with parameter passing - deal with it later 2593 foreach {callback name var att type default value text} $args break 2594 2595 upvar #0 $var atts 2596 2597 if {[string length [string trim $text]]} { 2598 return -code error "unexpected text \"$text\" in attribute definition" 2599 } 2600 2601 # What about overridden attribute defns? 2602 # A non-validating app may want to know about them 2603 # (eg. an editor) 2604 if {![info exists atts($name/$att)]} { 2605 set atts($name/$att) [list $type $default $value] 2606 uplevel #0 $callback [list $name $att $type $default $value] 2607 } 2608 2609 return {} 2610} 2611 2612# sgml::DTD:ENTITY -- 2613# 2614# <!ENTITY ...> declaration. 2615# 2616# Callbacks: 2617# -entitydeclcommand for general entity declaration 2618# -unparsedentitydeclcommand for unparsed external entity declaration 2619# -parameterentitydeclcommand for parameter entity declaration 2620# 2621# Arguments: 2622# opts configuration options 2623# name name of entity being defined 2624# param whether a parameter entity is being defined 2625# value unparsed replacement text 2626# 2627# Results: 2628# Modifies the caller's entities array variable 2629 2630proc sgml::DTD:ENTITY {opts name param value} { 2631 2632 array set options $opts 2633 2634 if {[string compare % $param]} { 2635 # Entity declaration - general or external 2636 upvar #0 $options(entities) ents 2637 upvar #0 $options(extentities) externals 2638 2639 if {[info exists ents($name)] || [info exists externals($name)]} { 2640 eval $options(-warningcommand) entity [list "entity \"$name\" already declared"] 2641 } else { 2642 if {[catch {uplevel #0 $options(-parseentitydeclcommand) [list $value]} value]} { 2643 return -code error "unable to parse entity declaration due to \"$value\"" 2644 } 2645 switch -glob [lindex $value 0],[lindex $value 3] { 2646 internal, { 2647 set ents($name) [EntitySubst [array get options] [lindex $value 1]] 2648 uplevel #0 $options(-entitydeclcommand) [list $name $ents($name)] 2649 } 2650 internal,* { 2651 return -code error "unexpected NDATA declaration" 2652 } 2653 external, { 2654 set externals($name) [lrange $value 1 2] 2655 uplevel #0 $options(-entitydeclcommand) [eval list $name [lrange $value 1 2]] 2656 } 2657 external,* { 2658 set externals($name) [lrange $value 1 3] 2659 uplevel #0 $options(-unparsedentitydeclcommand) [eval list $name [lrange $value 1 3]] 2660 } 2661 default { 2662 return -code error "internal error: unexpected parser state" 2663 } 2664 } 2665 } 2666 } else { 2667 # Parameter entity declaration 2668 upvar #0 $options(parameterentities) PEnts 2669 upvar #0 $options(externalparameterentities) ExtPEnts 2670 2671 if {[info exists PEnts($name)] || [info exists ExtPEnts($name)]} { 2672 eval $options(-warningcommand) parameterentity [list "parameter entity \"$name\" already declared"] 2673 } else { 2674 if {[catch {uplevel #0 $options(-parseentitydeclcommand) [list $value]} value]} { 2675 return -code error "unable to parse parameter entity declaration due to \"$value\"" 2676 } 2677 if {[string length [lindex $value 3]]} { 2678 return -code error "NDATA illegal in parameter entity declaration" 2679 } 2680 switch [lindex $value 0] { 2681 internal { 2682 # Substitute character references and PEs (XML: 4.5) 2683 set value [EntitySubst [array get options] [lindex $value 1]] 2684 2685 set PEnts($name) $value 2686 uplevel #0 $options(-parameterentitydeclcommand) [list $name $value] 2687 } 2688 external - 2689 default { 2690 # Get the replacement text now. 2691 # Could wait until the first reference, but easier 2692 # to just do it now. 2693 2694 set token [uri::geturl [uri::resolve $options(-baseurl) [lindex $value 1]]] 2695 2696 set ExtPEnts($name) [lindex [array get $token data] 1] 2697 uplevel #0 $options(-parameterentitydeclcommand) [eval list $name [lrange $value 1 2]] 2698 } 2699 } 2700 } 2701 } 2702} 2703 2704# sgml::EntitySubst -- 2705# 2706# Perform entity substitution on an entity replacement text. 2707# This differs slightly from other substitution procedures, 2708# because only parameter and character entity substitution 2709# is performed, not general entities. 2710# See XML Rec. section 4.5. 2711# 2712# Arguments: 2713# opts configuration options 2714# value Literal entity value 2715# 2716# Results: 2717# Expanded replacement text 2718 2719proc sgml::EntitySubst {opts value} { 2720 array set options $opts 2721 2722 # Protect Tcl special characters 2723 regsub -all {([{}\\])} $value {\\\1} value 2724 2725 # Find entity references 2726 regsub -all (&#\[0-9\]+|&#x\[0-9a-fA-F\]+|%${::sgml::Name})\; $value "\[EntitySubstValue [list $options(parameterentities)] {\\1}\]" value 2727 2728 set result [subst $value] 2729 2730 return $result 2731} 2732 2733# sgml::EntitySubstValue -- 2734# 2735# Handle a single character or parameter entity substitution 2736# 2737# Arguments: 2738# PEvar array variable containing PE declarations 2739# ref character or parameter entity reference 2740# 2741# Results: 2742# Replacement text 2743 2744proc sgml::EntitySubstValue {PEvar ref} { 2745 switch -glob -- $ref { 2746 &#x* { 2747 scan [string range $ref 3 end] %x hex 2748 return [format %c $hex] 2749 } 2750 &#* { 2751 return [format %c [string range $ref 2 end]] 2752 } 2753 %* { 2754 upvar #0 $PEvar PEs 2755 set ref [string range $ref 1 end] 2756 if {[info exists PEs($ref)]} { 2757 return $PEs($ref) 2758 } else { 2759 return -code error "parameter entity \"$ref\" not declared" 2760 } 2761 } 2762 default { 2763 return -code error "internal error - unexpected entity reference" 2764 } 2765 } 2766 return {} 2767} 2768 2769# sgml::DTD:NOTATION -- 2770# 2771# Process notation declaration 2772# 2773# Arguments: 2774# opts configuration options 2775# name notation name 2776# value unparsed notation spec 2777 2778proc sgml::DTD:NOTATION {opts name value} { 2779 return {} 2780 2781 variable notation_exp 2782 upvar opts state 2783 2784 if {[regexp $notation_exp $value x scheme data] == 2} { 2785 } else { 2786 eval $state(-errorcommand) [list notationvalue "notation value \"$value\" incorrectly specified"] 2787 } 2788} 2789 2790# sgml::ResolveEntity -- 2791# 2792# Default entity resolution routine 2793# 2794# Arguments: 2795# name name of parent parser 2796# base base URL for relative URLs 2797# sysId system identifier 2798# pubId public identifier 2799 2800proc sgml::ResolveEntity {name base sysId pubId} { 2801 variable ParseEventNum 2802 2803 if {[catch {uri::resolve $base $sysId} url]} { 2804 return -code error "unable to resolve system identifier \"$sysId\"" 2805 } 2806 if {[catch {uri::geturl $url} token]} { 2807 return -code error "unable to retrieve external entity \"$url\" for system identifier \"$sysId\"" 2808 } 2809 2810 upvar #0 $token data 2811 2812 set parser [uplevel #0 $name entityparser] 2813 2814 $parser parse $data(body) -dtdsubset external 2815 #$parser free 2816 2817 return {} 2818} 2819