1# xpath.tcl -- 2# 3# Provides an XPath parser for Tcl, 4# plus various support procedures 5# 6# Copyright (c) 2000-2002 Zveno Pty Ltd 7# 8# $Id: xpath.tcl,v 1.7 2002/06/14 12:16:23 balls Exp $ 9 10package provide xpath 1.0 11 12# We need the XML package for definition of Names 13package require xml 14 15namespace eval xpath { 16 namespace export split join createnode 17 18 variable axes { 19 ancestor 20 ancestor-or-self 21 attribute 22 child 23 descendant 24 descendant-or-self 25 following 26 following-sibling 27 namespace 28 parent 29 preceding 30 preceding-sibling 31 self 32 } 33 34 variable nodeTypes { 35 comment 36 text 37 processing-instruction 38 node 39 } 40 41 # NB. QName has parens for prefix 42 43 variable nodetestExpr ^(${::xml::QName})${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\5)?${::xml::allWsp}\\))?${::xml::allWsp}(.*) 44 45 variable nodetestExpr2 ((($::xml::QName)${::xml::allWsp}(\\(${::xml::allWsp}(("|')(.*?)\\7)?${::xml::allWsp}\\))?)|${::xml::allWsp}(\\*))${::xml::allWsp}(.*) 46} 47 48# xpath::split -- 49# 50# Parse an XPath location path 51# 52# Arguments: 53# locpath location path 54# 55# Results: 56# A Tcl list representing the location path. 57# The list has the form: {{axis node-test {predicate predicate ...}} ...} 58# Where each list item is a location step. 59 60proc xpath::split locpath { 61 set leftover {} 62 63 set result [InnerSplit $locpath leftover] 64 65 if {[string length [string trim $leftover]]} { 66 return -code error "unexpected text \"$leftover\"" 67 } 68 69 return $result 70} 71 72proc xpath::InnerSplit {locpath leftoverVar} { 73 upvar $leftoverVar leftover 74 75 variable axes 76 variable nodetestExpr 77 variable nodetestExpr2 78 79 # First determine whether we have an absolute location path 80 if {[regexp {^/(.*)} $locpath discard locpath]} { 81 set path {{}} 82 } else { 83 set path {} 84 } 85 86 while {[string length [string trimleft $locpath]]} { 87 if {[regexp {^\.\.(.*)} $locpath discard locpath]} { 88 # .. abbreviation 89 set axis parent 90 set nodetest * 91 } elseif {[regexp {^/(.*)} $locpath discard locpath]} { 92 # // abbreviation 93 set axis descendant-or-self 94 if {[regexp ^$nodetestExpr2 [string trimleft $locpath] discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} { 95 set nodetest [ResolveWildcard $nodetest $typetest $wildcard $literal] 96 } else { 97 set leftover $locpath 98 return $path 99 } 100 } elseif {[regexp ^\\.${::xml::allWsp}(.*) $locpath discard locpath]} { 101 # . abbreviation 102 set axis self 103 set nodetest * 104 } elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}"(\[^"\])"(.*) $locpath discard attrName discard attrValue locpath]} { 105 # @ abbreviation 106 set axis attribute 107 set nodetest $attrName 108 } elseif {[regexp ^@($::xml::QName)${::xml::allWsp}=${::xml::allWsp}'(\[^'\])'(.*) $locpath discard attrName discard attrValue locpath]} { 109 # @ abbreviation 110 set axis attribute 111 set nodetest $attrName 112 } elseif {[regexp ^@($::xml::QName)(.*) $locpath discard attrName discard2 locpath]} { 113 # @ abbreviation 114 set axis attribute 115 set nodetest $attrName 116 } elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?\\*(.*) $locpath discard discard axis discard locpath]} { 117 # wildcard specified 118 set nodetest * 119 if {![string length $axis]} { 120 set axis child 121 } 122 } elseif {[regexp ^((${::xml::QName})${::xml::allWsp}::${::xml::allWsp})?$nodetestExpr2 $locpath discard discard axis discard discard discard nodetest discard typetest discard discard literal wildcard locpath]} { 123 # nodetest, with or without axis 124 if {![string length $axis]} { 125 set axis child 126 } 127 set nodetest [ResolveWildcard $nodetest $typetest $wildcard $literal] 128 } else { 129 set leftover $locpath 130 return $path 131 } 132 133 # ParsePredicates 134 set predicates {} 135 set locpath [string trimleft $locpath] 136 while {[regexp {^\[(.*)} $locpath discard locpath]} { 137 if {[regexp {^([0-9]+)(\].*)} [string trim $locpath] discard posn locpath]} { 138 set predicate [list = {function position {}} [list number $posn]] 139 } else { 140 set leftover2 {} 141 set predicate [ParseExpr $locpath leftover2] 142 set locpath $leftover2 143 unset leftover2 144 } 145 146 if {[regexp {^\](.*)} [string trimleft $locpath] discard locpath]} { 147 lappend predicates $predicate 148 } else { 149 return -code error "unexpected text in predicate \"$locpath\"" 150 } 151 } 152 153 set axis [string trim $axis] 154 set nodetest [string trim $nodetest] 155 156 # This step completed 157 if {[lsearch $axes $axis] < 0} { 158 return -code error "invalid axis \"$axis\"" 159 } 160 lappend path [list $axis $nodetest $predicates] 161 162 # Move to next step 163 164 if {[string length $locpath] && ![regexp ^/(.*) $locpath discard locpath]} { 165 set leftover $locpath 166 return $path 167 } 168 169 } 170 171 return $path 172} 173 174# xpath::ParseExpr -- 175# 176# Parse one expression in a predicate 177# 178# Arguments: 179# locpath location path to parse 180# leftoverVar Name of variable in which to store remaining path 181# 182# Results: 183# Returns parsed expression as a Tcl list 184 185proc xpath::ParseExpr {locpath leftoverVar} { 186 upvar $leftoverVar leftover 187 variable nodeTypes 188 189 set expr {} 190 set mode expr 191 set stack {} 192 193 while {[string index [string trimleft $locpath] 0] != "\]"} { 194 set locpath [string trimleft $locpath] 195 switch $mode { 196 expr { 197 # We're looking for a term 198 if {[regexp ^-(.*) $locpath discard locpath]} { 199 # UnaryExpr 200 lappend stack "-" 201 } elseif {[regexp ^\\\$({$::xml::QName})(.*) $locpath discard varname discard locpath]} { 202 # VariableReference 203 lappend stack [list varRef $varname] 204 set mode term 205 } elseif {[regexp {^\((.*)} $locpath discard locpath]} { 206 # Start grouping 207 set leftover2 {} 208 lappend stack [list group [ParseExpr $locpath leftover2]] 209 set locpath $leftover2 210 unset leftover2 211 212 if {[regexp {^\)(.*)} [string trimleft $locpath] discard locpath]} { 213 set mode term 214 } else { 215 return -code error "unexpected text \"$locpath\", expected \")\"" 216 } 217 218 } elseif {[regexp {^"([^"]*)"(.*)} $locpath discard literal locpath]} { 219 # Literal (" delimited) 220 lappend stack [list literal $literal] 221 set mode term 222 } elseif {[regexp {^'([^']*)'(.*)} $locpath discard literal locpath]} { 223 # Literal (' delimited) 224 lappend stack [list literal $literal] 225 set mode term 226 } elseif {[regexp {^([0-9]+(\.[0-9]+)?)(.*)} $locpath discard number discard locpath]} { 227 # Number 228 lappend stack [list number $number] 229 set mode term 230 } elseif {[regexp {^(\.[0-9]+)(.*)} $locpath discard number locpath]} { 231 # Number 232 lappend stack [list number $number] 233 set mode term 234 } elseif {[regexp ^(${::xml::QName})\\(${::xml::allWsp}(.*) $locpath discard functionName discard locpath]} { 235 # Function call start or abbreviated node-type test 236 237 if {[lsearch $nodeTypes $functionName] >= 0} { 238 # Looking like a node-type test 239 if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} { 240 lappend stack [list path [list child [list $functionName ()] {}]] 241 set mode term 242 } else { 243 return -code error "invalid node-type test \"$functionName\"" 244 } 245 } else { 246 if {[regexp ^\\)${::xml::allWsp}(.*) $locpath discard locpath]} { 247 set parameters {} 248 } else { 249 set leftover2 {} 250 set parameters [ParseExpr $locpath leftover2] 251 set locpath $leftover2 252 unset leftover2 253 while {[regexp {^,(.*)} $locpath discard locpath]} { 254 set leftover2 {} 255 lappend parameters [ParseExpr $locpath leftover2] 256 set locpath $leftover2 257 unset leftover2 258 } 259 260 if {![regexp ^\\)${::xml::allWsp}(.*) [string trimleft $locpath] discard locpath]} { 261 return -code error "unexpected text \"locpath\" - expected \")\"" 262 } 263 } 264 265 lappend stack [list function $functionName $parameters] 266 set mode term 267 } 268 269 } else { 270 # LocationPath 271 set leftover2 {} 272 lappend stack [list path [InnerSplit $locpath leftover2]] 273 set locpath $leftover2 274 unset leftover2 275 set mode term 276 } 277 } 278 term { 279 # We're looking for an expression operator 280 if {[regexp ^-(.*) $locpath discard locpath]} { 281 # UnaryExpr 282 set stack [linsert $stack 0 expr "-"] 283 set mode expr 284 } elseif {[regexp ^(and|or|\\=|!\\=|<|>|<\\=|>\\=|\\||\\+|\\-|\\*|div|mod)(.*) $locpath discard exprtype locpath]} { 285 # AndExpr, OrExpr, EqualityExpr, RelationalExpr or UnionExpr 286 set stack [linsert $stack 0 $exprtype] 287 set mode expr 288 } else { 289 return -code error "unexpected text \"$locpath\", expecting operator" 290 } 291 } 292 default { 293 # Should never be here! 294 return -code error "internal error" 295 } 296 } 297 } 298 299 set leftover $locpath 300 return $stack 301} 302 303# xpath::ResolveWildcard -- 304 305proc xpath::ResolveWildcard {nodetest typetest wildcard literal} { 306 variable nodeTypes 307 308 switch -glob -- [string length $nodetest],[string length $typetest],[string length $wildcard],[string length $literal] { 309 0,0,0,* { 310 return -code error "bad location step (nothing parsed)" 311 } 312 0,0,* { 313 # Name wildcard specified 314 return * 315 } 316 *,0,0,* { 317 # Element type test - nothing to do 318 return $nodetest 319 } 320 *,0,*,* { 321 # Internal error? 322 return -code error "bad location step (found both nodetest and wildcard)" 323 } 324 *,*,0,0 { 325 # Node type test 326 if {[lsearch $nodeTypes $nodetest] < 0} { 327 return -code error "unknown node type \"$typetest\"" 328 } 329 return [list $nodetest $typetest] 330 } 331 *,*,0,* { 332 # Node type test 333 if {[lsearch $nodeTypes $nodetest] < 0} { 334 return -code error "unknown node type \"$typetest\"" 335 } 336 return [list $nodetest $literal] 337 } 338 default { 339 # Internal error? 340 return -code error "bad location step" 341 } 342 } 343} 344 345# xpath::join -- 346# 347# Reconstitute an XPath location path from a 348# Tcl list representation. 349# 350# Arguments: 351# spath split path 352# 353# Results: 354# Returns an Xpath location path 355 356proc xpath::join spath { 357 return -code error "not yet implemented" 358} 359 360