1# xmlswitch.tcl -- 2# 3# This file implements a control structure for Tcl. 4# 'xmlswitch' iterates over an XML document. Features in 5# the document may be specified using XPath location paths, 6# and these will trigger Tcl scripts when matched. 7# 8# Copyright (c) 2000-2003 Zveno Pty Ltd 9# http://www.zveno.com/ 10# 11# Zveno makes this software available free of charge for any purpose. 12# Copies may be made of this software but all of this notice must be included 13# on any copy. 14# 15# The software was developed for research purposes only and Zveno does not 16# warrant that it is error free or fit for any purpose. Zveno disclaims any 17# liability for all claims, expenses, losses, damages and costs any user may 18# incur as a result of using, copying or modifying this software. 19# 20# $Id: xmlswitch.tcl,v 1.4 2003/03/09 11:12:49 balls Exp $ 21 22package provide xmlswitch 1.0 23 24# We need the xml, dom and xpath packages 25 26package require xml 2.6 27package require dom 2.6 28package require xpath 1.0 29 30namespace eval xmlswitch { 31 namespace export xmlswitch xmlswitchcont xmlswitchend 32 namespace export domswitch 33 namespace export free rootnode 34 35 variable counter 0 36 37 variable typemap 38 array set typemap { 39 text textNode 40 comment comment 41 processing-instruction processingInstruction 42 } 43} 44 45# xmlswitch::xmlswitch -- 46# 47# Parse XML data, matching for XPath locations along the way 48# and (possibly) triggering callbacks. 49# 50# A DOM tree is built as a side-effect (necessary for resolving 51# XPath location paths). 52# 53# Arguments: 54# xml XML document 55# args configuration options, 56# plus a single path/script expression, or multiple expressions 57# 58# Results: 59# Tcl callbacks may be invoked. 60# If -async option is true returns a token for this "process". 61 62proc xmlswitch::xmlswitch {xml args} { 63 variable counter 64 65 #puts stderr [list xmlswitch::xmlswitch $xml $args] 66 67 set stateVarName [namespace current]::State[incr counter] 68 upvar #0 $stateVarName state 69 set state(stateVarName) $stateVarName 70 set state(-async) 0 71 72 set state(pathArray) ${stateVarName}Paths 73 upvar #0 $state(pathArray) paths 74 array set paths {} 75 76 set cleanup { 77 unset state 78 unset paths 79 } 80 81 # Find configuration options and remove 82 set numOpts 0 83 foreach {opt value} $args { 84 switch -glob -- $opt { 85 -* { 86 set state($opt) $value 87 incr numOpts 2 88 } 89 default { 90 set args [lrange $args $numOpts end] 91 break 92 } 93 } 94 } 95 96 switch -- [llength $args] { 97 0 { 98 # Nothing to do 99 eval $cleanup 100 return $stateVarName 101 } 102 1 { 103 foreach {path script} [lindex $args 0] { 104 set paths([xpath::split $path]) $script 105 } 106 } 107 default { 108 if {[llength $args] % 2} { 109 eval $cleanup 110 return -code error "no script matching location path \"[lindex $args end]\"" 111 } 112 foreach {path script} $args { 113 set paths([xpath::split $path]) $script 114 } 115 } 116 } 117 118 set root [set state(root) [dom::DOMImplementation create]] 119 set state(current) $root 120 121 # Parse the document 122 # We're going to do this incrementally, so the caller can 123 # break at any time 124 set state(parser) [eval xml::parser [array get state -parser]] 125 #append cleanup "\n $parser destroy\n" 126 $state(parser) configure \ 127 -elementstartcommand [namespace code [list ParseElementStart $stateVarName]] \ 128 -elementendcommand [namespace code [list ParseElementEnd $stateVarName]] \ 129 -characterdatacommand [namespace code [list ParseCharacterData $stateVarName]] \ 130 -final false 131 132# -processinginstructioncommand [namespace code [list ParsePI $stateVarName]] \ 133# -commentcommand [namespace code [list ParseComment]] 134 135 if {[catch {$state(parser) parse $xml} err]} { 136 eval $cleanup 137 return -code error $err 138 } 139 140 if {$state(-async)} { 141 return $stateVarName 142 } else { 143 eval $cleanup 144 return {} 145 } 146} 147 148# xmlswitch::xmlswitchcont -- 149# 150# Provide more XML data to parse 151# 152# Arguments: 153# token state variable name 154# xml XML data 155# 156# Results: 157# More parsing 158 159proc xmlswitch::xmlswitchcont {token xml} { 160 upvar #0 $token state 161 162 $state(parser) parse $xml 163 164 return {} 165} 166 167# xmlswitch::xmlswitchend -- 168# 169# Signal that no further data is available 170# 171# Arguments: 172# token state array 173# 174# Results: 175# Parser configuration changed 176 177proc xmlswitch::xmlswitchend token { 178 upvar #0 $token state 179 180 $state(parser) configure -final true 181 182 return {} 183} 184 185# xmlswitch::rootnode -- 186# 187# Get the root node 188# 189# Arguments: 190# token state array 191# 192# Results: 193# Returns root node token 194 195proc xmlswitch::rootnode token { 196 upvar #0 $token state 197 198 return $state(root) 199} 200 201# xmlswitch::free -- 202# 203# Free resources EXCEPT the DOM tree. 204# "-all" causes DOM tree to be destroyed too. 205# 206# Arguments: 207# token state array 208# args options 209# 210# Results: 211# Resources freed. 212 213proc xmlswitch::free {token args} { 214 upvar #0 $token state 215 216 if {[lsearch $args "-all"] >= 0} { 217 dom::DOMImplementation destroy $state(root) 218 } 219 220 catch {unset $state(pathArray)} 221 catch {unset state} 222 223 catch {$state(parser) free} 224 225 return {} 226} 227 228# xmlswitch::ParseElementStart -- 229# 230# Handle element start tag 231# 232# Arguments: 233# token state array 234# name element type 235# attrList attribute list 236# args options 237# Results: 238# All XPath location paths are checked for a match, 239# and script evaluated for matching XPath. 240# DOM tree node added. 241 242proc xmlswitch::ParseElementStart:dbgdisabled {token name attrList args} { 243 if {[catch {eval ParseElementStart:dbg [list $token $name $attrList] $args} msg]} { 244 puts stderr [list ParseElementStart failed with msg $msg] 245 puts stderr $::errorInfo 246 return -code error $msg 247 } else { 248 puts stderr [list ParseElementStart returned OK] 249 } 250 return $msg 251} 252proc xmlswitch::ParseElementStart {token name attrList args} { 253 254 upvar #0 $token state 255 array set opts $args 256 257 #puts stderr [list xmlswitch::ParseElementStart $token $name $attrList $args] 258 259 lappend state(current) \ 260 [dom::document createElement [lindex $state(current) end] $name] 261 foreach {name value} $attrList { 262 dom::element setAttribute [lindex $state(current) end] $name $value 263 } 264 265 MatchTemplates $token [lindex $state(current) end] 266 267 return {} 268} 269 270# xmlswitch::ParseElementEnd -- 271# 272# Handle element end tag 273# 274# Arguments: 275# token state array 276# name element type 277# args options 278# Results: 279# State changed 280 281proc xmlswitch::ParseElementEnd {token name args} { 282 upvar #0 $token state 283 284 set state(current) [lreplace $state(current) end end] 285 286 return {} 287} 288 289# xmlswitch::ParseCharacterData -- 290# 291# Handle character data 292# 293# Arguments: 294# token state array 295# data pcdata 296# 297# Results: 298# All XPath location paths are checked for a match, 299# and script evaluated for matching XPath. 300# DOM tree node added. 301 302proc xmlswitch::ParseCharacterData {token data} { 303 upvar #0 $token state 304 305 lappend state(current) \ 306 [dom::document createTextNode [lindex $state(current) end] $data] 307 308 MatchTemplates $token [lindex $state(current) end] 309 310 set state(current) [lreplace $state(current) end end] 311 312 return {} 313} 314 315# xmlswitch::domswitch -- 316# 317# Similar to xmlswitch above, but iterates over a pre-built 318# DOM tree. 319# 320# Arguments: 321# xml XML document 322# args a single path/script expression, or multiple expressions 323# 324# Results: 325# Tcl callbacks may be invoked. 326 327proc xmlswitch::domswitch {xml args} { 328} 329 330# xmlswitch::MatchTemplates -- 331# 332# Check all templates for one which matches 333# the current node. 334# 335# Arguments: 336# token state array 337# node Current DOM node 338# 339# Results: 340# If a template matches, its script is evaluated 341 342proc xmlswitch::MatchTemplates {token node} { 343 upvar #0 $token state 344 upvar #0 $state(pathArray) paths 345 346 #puts stderr [list xmlswitch::MatchTemplates $token $node (type: [dom::node cget $node -nodeType]) (name: [dom::node cget $node -nodeName])] 347 348 set matches {} 349 350 foreach {path script} [array get paths] { 351 352 #puts stderr [list checking path $path for a match] 353 354 set context $node 355 356 # Work backwards along the path, reversing each axis 357 set match 0 358 set i [llength $path] 359 #puts stderr [list $i steps to be tested] 360 while {[incr i -1] >= 0} { 361 #puts stderr [list step $i [lindex $path $i]] 362 switch -glob [llength [lindex $path $i]],$i { 363 0,0 { 364 #puts stderr [list absolute path, end of steps - am I at the root?] 365 if {![string length [dom::node parent $context]]} { 366 #puts stderr [list absolute path matched] 367 lappend matches [list $path $script] 368 } else { 369 #puts stderr [list absolute path did not match] 370 } 371 } 372 *,0 { 373 #puts stderr [list last step, relative path] 374 switch [lindex [lindex $path $i] 0] { 375 child { 376 if {[NodeTest [lindex $path $i] $context] && \ 377 [CheckPredicates [lindex $path $i] $context]} { 378 #puts stderr [list relative path matched] 379 lappend matches [list $path $script] 380 } else { 381 #puts stderr [list relative path did not match] 382 } 383 } 384 default { 385 return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported" 386 } 387 } 388 } 389 default { 390 #puts stderr [list continuing checking steps] 391 switch [lindex [lindex $path $i] 0] { 392 child { 393 if {[NodeTest [lindex $path $i] $context] && \ 394 [CheckPredicates [lindex $path $i] $context]} { 395 set context [dom::node parent $context] 396 } else { 397 #puts stderr [list no match] 398 } 399 } 400 default { 401 return -code error "axis \"[lindex [lindex $path $i] 0]\" not supported" 402 } 403 } 404 } 405 } 406 } 407 } 408 409 # TODO: If there are multiple matches then we must pick the 410 # most specific match 411 412 if {[llength $matches] > 1} { 413 # For the moment we'll just take the first match 414 set matches [list [lindex $matches 0]] 415 } 416 417 if {[llength $matches]} { 418 #puts stderr [list evaluating callback at level [info level]] 419 uplevel 3 [lindex [lindex $matches 0] 1] 420 } 421 422 return {} 423} 424 425# xmlswitch::NodeTest -- 426# 427# Check that the node passes the node (type) test 428# 429# Arguments: 430# step Location step 431# node DOM node 432# 433# Results: 434# Boolean 435 436proc xmlswitch::NodeTest {step node} { 437 438 if {[llength [lindex $step 1]] > 1} { 439 switch -glob -- [lindex [lindex $step 1] 0],[dom::node cget $node -nodeType] { 440 node,* - 441 text,textNode - 442 comment,comment - 443 processing-instruction,processingInstruction { 444 return 1 445 } 446 default { 447 return 0 448 } 449 } 450 } elseif {![string compare [lindex $step 1] "*"]} { 451 return 1 452 } elseif {![string compare [lindex $step 1] [dom::node cget $node -nodeName]]} { 453 return 1 454 } else { 455 return 0 456 } 457} 458 459# xmlswitch::CheckPredicates -- 460# 461# Check that the node passes the predicates 462# 463# Arguments: 464# step Location step 465# node DOM node 466# 467# Results: 468# Boolean 469 470proc xmlswitch::CheckPredicates {step node} { 471 variable typemap 472 473 set predicates [lindex $step 2] 474 # Shortcut: no predicates means everything passes 475 if {![llength $predicates]} { 476 return 1 477 } 478 479 # Get the context node set 480 switch [lindex $step 0] { 481 child { 482 set nodeset {} 483 if {[llength [lindex $step 1]]} { 484 foreach {name typetest} [lindex $step 1] break 485 switch -- $name { 486 node { 487 set nodeset [dom::node children [dom::node parent $node]] 488 } 489 text - 490 comment - 491 processing-instruction { 492 foreach child [dom::node children [dom::node parent $node]] { 493 if {![string compare [dom::node cget $child -nodeType] $typemap($name)]} { 494 lappend nodeset $child 495 } 496 } 497 } 498 default { 499 # Error 500 } 501 } 502 } else { 503 foreach child [dom::node children [dom::node parent $node]] { 504 if {![string compare [lindex $step 1] [dom::node cget $child -nodeName]]} { 505 lappend nodeset $child 506 } 507 } 508 } 509 } 510 default { 511 return -code error "axis \"[lindex $step 0]\" not supported" 512 } 513 } 514 515 foreach predicate $predicates { 516 # position() is the only supported predicate 517 if {[lsearch $nodeset $node] + 1 == $predicate} { 518 # continue 519 } else { 520 return 0 521 } 522 } 523 524 return 1 525} 526 527