1# uri.tcl -- 2# 3# URI parsing and fetch 4# 5# Copyright (c) 2000 Zveno Pty Ltd 6# Copyright (c) 2006 Pierre DAVID <Pierre.David@crc.u-strasbg.fr> 7# Copyright (c) 2006 Andreas Kupries <andreas_kupries@users.sourceforge.net> 8# Steve Ball, http://www.zveno.com/ 9# Derived from urls.tcl by Andreas Kupries 10# 11# TODO: 12# Handle www-url-encoding details 13# 14# CVS: $Id: uri.tcl,v 1.35 2007/01/11 19:35:23 andreas_kupries Exp $ 15 16package require Tcl 8.2 17 18namespace eval ::uri { 19 20 namespace export split join 21 namespace export resolve isrelative 22 namespace export geturl 23 namespace export canonicalize 24 namespace export register 25 26 variable file:counter 0 27 28 # extend these variable in the coming namespaces 29 variable schemes {} 30 variable schemePattern "" 31 variable url "" 32 variable url2part 33 array set url2part {} 34 35 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 36 # basic regular expressions used in URL syntax. 37 38 namespace eval basic { 39 variable loAlpha {[a-z]} 40 variable hiAlpha {[A-Z]} 41 variable digit {[0-9]} 42 variable alpha {[a-zA-Z]} 43 variable safe {[$_.+-]} 44 variable extra {[!*'(,)]} 45 # danger in next pattern, order important for [] 46 variable national {[][|\}\{\^~`]} 47 variable punctuation {[<>#%"]} ;#" fake emacs hilit 48 variable reserved {[;/?:@&=]} 49 variable hex {[0-9A-Fa-f]} 50 variable alphaDigit {[A-Za-z0-9]} 51 variable alphaDigitMinus {[A-Za-z0-9-]} 52 53 # next is <national | punctuation> 54 variable unsafe {[][<>"#%\{\}|\\^~`]} ;#" emacs hilit 55 variable escape "%${hex}${hex}" 56 57 # unreserved = alpha | digit | safe | extra 58 # xchar = unreserved | reserved | escape 59 60 variable unreserved {[a-zA-Z0-9$_.+!*'(,)-]} 61 variable uChar "(${unreserved}|${escape})" 62 variable xCharN {[a-zA-Z0-9$_.+!*'(,);/?:@&=-]} 63 variable xChar "(${xCharN}|${escape})" 64 variable digits "${digit}+" 65 66 variable toplabel \ 67 "(${alpha}${alphaDigitMinus}*${alphaDigit}|${alpha})" 68 variable domainlabel \ 69 "(${alphaDigit}${alphaDigitMinus}*${alphaDigit}|${alphaDigit})" 70 71 variable hostname \ 72 "((${domainlabel}\\.)*${toplabel})" 73 variable hostnumber \ 74 "(${digits}\\.${digits}\\.${digits}\\.${digits})" 75 76 variable host "(${hostname}|${hostnumber})" 77 78 variable port $digits 79 variable hostOrPort "${host}(:${port})?" 80 81 variable usrCharN {[a-zA-Z0-9$_.+!*'(,);?&=-]} 82 variable usrChar "(${usrCharN}|${escape})" 83 variable user "${usrChar}*" 84 variable password $user 85 variable login "(${user}(:${password})?@)?${hostOrPort}" 86 } ;# basic {} 87} 88 89 90# ::uri::register -- 91# 92# Register a scheme (and aliases) in the package. The command 93# creates a namespace below "::uri" with the same name as the 94# scheme and executes the script declaring the pattern variables 95# for this scheme in the new namespace. At last it updates the 96# uri variables keeping track of overall scheme information. 97# 98# The script has to declare at least the variable "schemepart", 99# the pattern for an url of the registered scheme after the 100# scheme declaration. Not declaring this variable is an error. 101# 102# Arguments: 103# schemeList Name of the scheme to register, plus aliases 104# script Script declaring the scheme patterns 105# 106# Results: 107# None. 108 109proc ::uri::register {schemeList script} { 110 variable schemes 111 variable schemePattern 112 variable url 113 variable url2part 114 115 # Check scheme and its aliases for existence. 116 foreach scheme $schemeList { 117 if {[lsearch -exact $schemes $scheme] >= 0} { 118 return -code error \ 119 "trying to register scheme (\"$scheme\") which is already known" 120 } 121 } 122 123 # Get the main scheme 124 set scheme [lindex $schemeList 0] 125 126 if {[catch {namespace eval $scheme $script} msg]} { 127 catch {namespace delete $scheme} 128 return -code error \ 129 "error while evaluating scheme script: $msg" 130 } 131 132 if {![info exists ${scheme}::schemepart]} { 133 namespace delete $scheme 134 return -code error \ 135 "Variable \"schemepart\" is missing." 136 } 137 138 # Now we can extend the variables which keep track of the registered schemes. 139 140 eval [linsert $schemeList 0 lappend schemes] 141 set schemePattern "([::join $schemes |]):" 142 143 foreach s $schemeList { 144 # FRINK: nocheck 145 set url2part($s) "${s}:[set ${scheme}::schemepart]" 146 # FRINK: nocheck 147 append url "(${s}:[set ${scheme}::schemepart])|" 148 } 149 set url [string trimright $url |] 150 return 151} 152 153# ::uri::split -- 154# 155# Splits the given <a url> into its constituents. 156# 157# Arguments: 158# url the URL to split 159# 160# Results: 161# Tcl list containing constituents, suitable for 'array set'. 162 163proc ::uri::split {url {defaultscheme http}} { 164 165 set url [string trim $url] 166 set scheme {} 167 168 # RFC 1738: scheme = 1*[ lowalpha | digit | "+" | "-" | "." ] 169 regexp -- {^([a-z0-9+.-][a-z0-9+.-]*):} $url dummy scheme 170 171 if {$scheme == {}} { 172 set scheme $defaultscheme 173 } 174 175 # ease maintenance: dynamic dispatch, able to handle all schemes 176 # added in future! 177 178 if {[::info procs Split[string totitle $scheme]] == {}} { 179 error "unknown scheme '$scheme' in '$url'" 180 } 181 182 regsub -- "^${scheme}:" $url {} url 183 184 set parts(scheme) $scheme 185 array set parts [Split[string totitle $scheme] $url] 186 187 # should decode all encoded characters! 188 189 return [array get parts] 190} 191 192proc ::uri::SplitFtp {url} { 193 # @c Splits the given ftp-<a url> into its constituents. 194 # @a url: The url to split, without! scheme specification. 195 # @r List containing the constituents, suitable for 'array set'. 196 197 # general syntax: 198 # //<user>:<password>@<host>:<port>/<cwd1>/.../<cwdN>/<name>;type=<typecode> 199 # 200 # additional rules: 201 # 202 # <user>:<password> are optional, detectable by presence of @. 203 # <password> is optional too. 204 # 205 # "//" [ <user> [":" <password> ] "@"] <host> [":" <port>] "/" 206 # <cwd1> "/" ..."/" <cwdN> "/" <name> [";type=" <typecode>] 207 208 upvar \#0 [namespace current]::ftp::typepart ftptype 209 210 array set parts {user {} pwd {} host {} port {} path {} type {}} 211 212 # slash off possible type specification 213 214 if {[regexp -indices -- "${ftptype}$" $url dummy ftype]} { 215 216 set from [lindex $ftype 0] 217 set to [lindex $ftype 1] 218 219 set parts(type) [string range $url $from $to] 220 221 set from [lindex $dummy 0] 222 set url [string replace $url $from end] 223 } 224 225 # Handle user, password, host and port 226 227 if {[string match "//*" $url]} { 228 set url [string range $url 2 end] 229 230 array set parts [GetUPHP url] 231 } 232 233 set parts(path) [string trimleft $url /] 234 235 return [array get parts] 236} 237 238proc ::uri::JoinFtp args { 239 array set components { 240 user {} pwd {} host {} port {} 241 path {} type {} 242 } 243 array set components $args 244 245 set userPwd {} 246 if {[string length $components(user)] || [string length $components(pwd)]} { 247 set userPwd $components(user)[expr {[string length $components(pwd)] ? ":$components(pwd)" : {}}]@ 248 } 249 250 set port {} 251 if {[string length $components(port)]} { 252 set port :$components(port) 253 } 254 255 set type {} 256 if {[string length $components(type)]} { 257 set type \;type=$components(type) 258 } 259 260 return ftp://${userPwd}$components(host)${port}/[string trimleft $components(path) /]$type 261} 262 263proc ::uri::SplitHttps {url} { 264 return [SplitHttp $url] 265} 266 267proc ::uri::SplitHttp {url} { 268 # @c Splits the given http-<a url> into its constituents. 269 # @a url: The url to split, without! scheme specification. 270 # @r List containing the constituents, suitable for 'array set'. 271 272 # general syntax: 273 # //<host>:<port>/<path>?<searchpart> 274 # 275 # where <host> and <port> are as described in Section 3.1. If :<port> 276 # is omitted, the port defaults to 80. No user name or password is 277 # allowed. <path> is an HTTP selector, and <searchpart> is a query 278 # string. The <path> is optional, as is the <searchpart> and its 279 # preceding "?". If neither <path> nor <searchpart> is present, the "/" 280 # may also be omitted. 281 # 282 # Within the <path> and <searchpart> components, "/", ";", "?" are 283 # reserved. The "/" character may be used within HTTP to designate a 284 # hierarchical structure. 285 # 286 # path == <cwd1> "/" ..."/" <cwdN> "/" <name> ["#" <fragment>] 287 288 upvar #0 [namespace current]::http::search search 289 upvar #0 [namespace current]::http::segment segment 290 291 array set parts {host {} port {} path {} query {}} 292 293 set searchPattern "\\?(${search})\$" 294 set fragmentPattern "#(${segment})\$" 295 296 # slash off possible query. the 'search' regexp, while official, 297 # is not good enough. We have apparently lots of urls in the wild 298 # which contain unquoted urls with queries in a query. The RE 299 # finds the embedded query, not the actual one. Using string first 300 # now instead of a RE 301 302 if {[set pos [string first ? $url]] >= 0} { 303 incr pos 304 set parts(query) [string range $url $pos end] 305 incr pos -1 306 set url [string replace $url $pos end] 307 } 308 309 # slash off possible fragment 310 311 if {[regexp -indices -- $fragmentPattern $url match fragment]} { 312 set from [lindex $fragment 0] 313 set to [lindex $fragment 1] 314 315 set parts(fragment) [string range $url $from $to] 316 317 set url [string replace $url [lindex $match 0] end] 318 } 319 320 if {[string match "//*" $url]} { 321 set url [string range $url 2 end] 322 323 array set parts [GetUPHP url] 324 } 325 326 set parts(path) [string trimleft $url /] 327 328 return [array get parts] 329} 330 331proc ::uri::JoinHttp {args} { 332 return [eval [linsert $args 0 ::uri::JoinHttpInner http 80]] 333} 334 335proc ::uri::JoinHttps {args} { 336 return [eval [linsert $args 0 ::uri::JoinHttpInner https 443]] 337} 338 339proc ::uri::JoinHttpInner {scheme defport args} { 340 array set components {host {} path {} query {}} 341 set components(port) $defport 342 array set components $args 343 344 set port {} 345 if {[string length $components(port)] && $components(port) != $defport} { 346 set port :$components(port) 347 } 348 349 set query {} 350 if {[string length $components(query)]} { 351 set query ?$components(query) 352 } 353 354 regsub -- {^/} $components(path) {} components(path) 355 356 if { [info exists components(fragment)] && $components(fragment) != "" } { 357 set components(fragment) "#$components(fragment)" 358 } else { 359 set components(fragment) "" 360 } 361 362 return $scheme://$components(host)$port/$components(path)$components(fragment)$query 363} 364 365proc ::uri::SplitFile {url} { 366 # @c Splits the given file-<a url> into its constituents. 367 # @a url: The url to split, without! scheme specification. 368 # @r List containing the constituents, suitable for 'array set'. 369 370 upvar #0 [namespace current]::basic::hostname hostname 371 upvar #0 [namespace current]::basic::hostnumber hostnumber 372 373 if {[string match "//*" $url]} { 374 set url [string range $url 2 end] 375 376 set hostPattern "^($hostname|$hostnumber)" 377 switch -exact -- $::tcl_platform(platform) { 378 windows { 379 # Catch drive letter 380 append hostPattern :? 381 } 382 default { 383 # Proceed as usual 384 } 385 } 386 387 if {[regexp -indices -- $hostPattern $url match host]} { 388 set fh [lindex $host 0] 389 set th [lindex $host 1] 390 391 set parts(host) [string range $url $fh $th] 392 393 set matchEnd [lindex $match 1] 394 incr matchEnd 395 396 set url [string range $url $matchEnd end] 397 } 398 } 399 400 set parts(path) $url 401 402 return [array get parts] 403} 404 405proc ::uri::JoinFile args { 406 array set components { 407 host {} port {} path {} 408 } 409 array set components $args 410 411 switch -exact -- $::tcl_platform(platform) { 412 windows { 413 if {[string length $components(host)]} { 414 return file://$components(host):$components(path) 415 } else { 416 return file://$components(path) 417 } 418 } 419 default { 420 return file://$components(host)$components(path) 421 } 422 } 423} 424 425proc ::uri::SplitMailto {url} { 426 # @c Splits the given mailto-<a url> into its constituents. 427 # @a url: The url to split, without! scheme specification. 428 # @r List containing the constituents, suitable for 'array set'. 429 430 if {[string match "*@*" $url]} { 431 set url [::split $url @] 432 return [list user [lindex $url 0] host [lindex $url 1]] 433 } else { 434 return [list user $url] 435 } 436} 437 438proc ::uri::JoinMailto args { 439 array set components { 440 user {} host {} 441 } 442 array set components $args 443 444 return mailto:$components(user)@$components(host) 445} 446 447proc ::uri::SplitNews {url} { 448 if { [string first @ $url] >= 0 } { 449 return [list message-id $url] 450 } else { 451 return [list newsgroup-name $url] 452 } 453} 454 455proc ::uri::JoinNews args { 456 array set components { 457 message-id {} newsgroup-name {} 458 } 459 array set components $args 460 return news:$components(message-id)$components(newsgroup-name) 461} 462 463proc ::uri::SplitLdaps {url} { 464 ::uri::SplitLdap $url 465} 466 467proc ::uri::SplitLdap {url} { 468 # @c Splits the given Ldap-<a url> into its constituents. 469 # @a url: The url to split, without! scheme specification. 470 # @r List containing the constituents, suitable for 'array set'. 471 472 # general syntax: 473 # //<host>:<port>/<dn>?<attrs>?<scope>?<filter>?<extensions> 474 # 475 # where <host> and <port> are as described in Section 5 of RFC 1738. 476 # No user name or password is allowed. 477 # If omitted, the port defaults to 389 for ldap, 636 for ldaps 478 # <dn> is the base DN for the search 479 # <attrs> is a comma separated list of attributes description 480 # <scope> is either "base", "one" or "sub". 481 # <filter> is a RFC 2254 filter specification 482 # <extensions> are documented in RFC 2255 483 # 484 485 array set parts {host {} port {} dn {} attrs {} scope {} filter {} extensions {}} 486 487 # host port dn attrs scope filter extns 488 set re {//([^:?/]+)(?::([0-9]+))?(?:/([^?]+)(?:\?([^?]*)(?:\?(base|one|sub)?(?:\?([^?]*)(?:\?(.*))?)?)?)?)?} 489 490 if {! [regexp $re $url match parts(host) parts(port) \ 491 parts(dn) parts(attrs) parts(scope) parts(filter) \ 492 parts(extensions)]} then { 493 return -code error "unable to match URL \"$url\"" 494 } 495 496 set parts(attrs) [::split $parts(attrs) ","] 497 498 return [array get parts] 499} 500 501proc ::uri::JoinLdap {args} { 502 return [eval [linsert $args 0 ::uri::JoinLdapInner ldap 389]] 503} 504 505proc ::uri::JoinLdaps {args} { 506 return [eval [linsert $args 0 ::uri::JoinLdapInner ldaps 636]] 507} 508 509proc ::uri::JoinLdapInner {scheme defport args} { 510 array set components {host {} port {} dn {} attrs {} scope {} filter {} extensions {}} 511 set components(port) $defport 512 array set components $args 513 514 set port {} 515 if {[string length $components(port)] && $components(port) != $defport} { 516 set port :$components(port) 517 } 518 519 set url "$scheme://$components(host)$port" 520 521 set components(attrs) [::join $components(attrs) ","] 522 523 set s "" 524 foreach c {dn attrs scope filter extensions} { 525 if {[string equal $c "dn"]} then { 526 append s "/" 527 } else { 528 append s "?" 529 } 530 if {! [string equal $components($c) ""]} then { 531 append url "${s}$components($c)" 532 set s "" 533 } 534 } 535 536 return $url 537} 538 539proc ::uri::GetUPHP {urlvar} { 540 # @c Parse user, password host and port out of the url stored in 541 # @c variable <a urlvar>. 542 # @d Side effect: The extracted information is removed from the given url. 543 # @r List containing the extracted information in a format suitable for 544 # @r 'array set'. 545 # @a urlvar: Name of the variable containing the url to parse. 546 547 upvar \#0 [namespace current]::basic::user user 548 upvar \#0 [namespace current]::basic::password password 549 upvar \#0 [namespace current]::basic::hostname hostname 550 upvar \#0 [namespace current]::basic::hostnumber hostnumber 551 upvar \#0 [namespace current]::basic::port port 552 553 upvar $urlvar url 554 555 array set parts {user {} pwd {} host {} port {}} 556 557 # syntax 558 # "//" [ <user> [":" <password> ] "@"] <host> [":" <port>] "/" 559 # "//" already cut off by caller 560 561 set upPattern "^(${user})(:(${password}))?@" 562 563 if {[regexp -indices -- $upPattern $url match theUser c d thePassword]} { 564 set fu [lindex $theUser 0] 565 set tu [lindex $theUser 1] 566 567 set fp [lindex $thePassword 0] 568 set tp [lindex $thePassword 1] 569 570 set parts(user) [string range $url $fu $tu] 571 set parts(pwd) [string range $url $fp $tp] 572 573 set matchEnd [lindex $match 1] 574 incr matchEnd 575 576 set url [string range $url $matchEnd end] 577 } 578 579 set hpPattern "^($hostname|$hostnumber)(:($port))?" 580 581 if {[regexp -indices -- $hpPattern $url match theHost c d e f g h thePort]} { 582 set fh [lindex $theHost 0] 583 set th [lindex $theHost 1] 584 585 set fp [lindex $thePort 0] 586 set tp [lindex $thePort 1] 587 588 set parts(host) [string range $url $fh $th] 589 set parts(port) [string range $url $fp $tp] 590 591 set matchEnd [lindex $match 1] 592 incr matchEnd 593 594 set url [string range $url $matchEnd end] 595 } 596 597 return [array get parts] 598} 599 600proc ::uri::GetHostPort {urlvar} { 601 # @c Parse host and port out of the url stored in variable <a urlvar>. 602 # @d Side effect: The extracted information is removed from the given url. 603 # @r List containing the extracted information in a format suitable for 604 # @r 'array set'. 605 # @a urlvar: Name of the variable containing the url to parse. 606 607 upvar #0 [namespace current]::basic::hostname hostname 608 upvar #0 [namespace current]::basic::hostnumber hostnumber 609 upvar #0 [namespace current]::basic::port port 610 611 upvar $urlvar url 612 613 set pattern "^(${hostname}|${hostnumber})(:(${port}))?" 614 615 if {[regexp -indices -- $pattern $url match host c d e f g h thePort]} { 616 set fromHost [lindex $host 0] 617 set toHost [lindex $host 1] 618 619 set fromPort [lindex $thePort 0] 620 set toPort [lindex $thePort 1] 621 622 set parts(host) [string range $url $fromHost $toHost] 623 set parts(port) [string range $url $fromPort $toPort] 624 625 set matchEnd [lindex $match 1] 626 incr matchEnd 627 628 set url [string range $url $matchEnd end] 629 } 630 631 return [array get parts] 632} 633 634# ::uri::resolve -- 635# 636# Resolve an arbitrary URL, given a base URL 637# 638# Arguments: 639# base base URL (absolute) 640# url arbitrary URL 641# 642# Results: 643# Returns a URL 644 645proc ::uri::resolve {base url} { 646 if {[string length $url]} { 647 if {[isrelative $url]} { 648 649 array set baseparts [split $base] 650 651 switch -- $baseparts(scheme) { 652 http - 653 https - 654 ftp - 655 file { 656 array set relparts [split $url] 657 if { [string match /* $url] } { 658 catch { set baseparts(path) $relparts(path) } 659 } elseif { [string match */ $baseparts(path)] } { 660 set baseparts(path) "$baseparts(path)$relparts(path)" 661 } else { 662 if { [string length $relparts(path)] > 0 } { 663 set path [lreplace [::split $baseparts(path) /] end end] 664 set baseparts(path) "[::join $path /]/$relparts(path)" 665 } 666 } 667 catch { set baseparts(query) $relparts(query) } 668 catch { set baseparts(fragment) $relparts(fragment) } 669 return [eval [linsert [array get baseparts] 0 join]] 670 } 671 default { 672 return -code error "unable to resolve relative URL \"$url\"" 673 } 674 } 675 676 } else { 677 return $url 678 } 679 } else { 680 return $base 681 } 682} 683 684# ::uri::isrelative -- 685# 686# Determines whether a URL is absolute or relative 687# 688# Arguments: 689# url URL to check 690# 691# Results: 692# Returns 1 if the URL is relative, 0 otherwise 693 694proc ::uri::isrelative url { 695 return [expr {![regexp -- {^[a-z0-9+-.][a-z0-9+-.]*:} $url]}] 696} 697 698# ::uri::geturl -- 699# 700# Fetch the data from an arbitrary URL. 701# 702# This package provides a handler for the file: 703# scheme, since this conflicts with the file command. 704# 705# Arguments: 706# url address of data resource 707# args configuration options 708# 709# Results: 710# Depends on scheme 711 712proc ::uri::geturl {url args} { 713 array set urlparts [split $url] 714 715 switch -- $urlparts(scheme) { 716 file { 717 return [eval [linsert $args 0 file_geturl $url]] 718 } 719 default { 720 # Load a geturl package for the scheme first and only if 721 # that fails the scheme package itself. This prevents 722 # cyclic dependencies between packages. 723 if {[catch {package require $urlparts(scheme)::geturl}]} { 724 package require $urlparts(scheme) 725 } 726 return [eval [linsert $args 0 $urlparts(scheme)::geturl $url]] 727 } 728 } 729} 730 731# ::uri::file_geturl -- 732# 733# geturl implementation for file: scheme 734# 735# TODO: 736# This is an initial, basic implementation. 737# Eventually want to support all options for geturl. 738# 739# Arguments: 740# url URL to fetch 741# args configuration options 742# 743# Results: 744# Returns data from file 745 746proc ::uri::file_geturl {url args} { 747 variable file:counter 748 749 set var [namespace current]::file[incr file:counter] 750 upvar #0 $var state 751 array set state {data {}} 752 753 array set parts [split $url] 754 755 set ch [open $parts(path)] 756 # Could determine text/binary from file extension, 757 # except on Macintosh 758 # fconfigure $ch -translation binary 759 set state(data) [read $ch] 760 close $ch 761 762 return $var 763} 764 765# ::uri::join -- 766# 767# Format a URL 768# 769# Arguments: 770# args components, key-value format 771# 772# Results: 773# A URL 774 775proc ::uri::join args { 776 array set components $args 777 778 return [eval [linsert $args 0 Join[string totitle $components(scheme)]]] 779} 780 781# ::uri::canonicalize -- 782# 783# Canonicalize a URL 784# 785# Acknowledgements: 786# Andreas Kupries <andreas_kupries@users.sourceforge.net> 787# 788# Arguments: 789# uri URI (which contains a path component) 790# 791# Results: 792# The canonical form of the URI 793 794proc ::uri::canonicalize uri { 795 796 # Make uri canonical with respect to dots (path changing commands) 797 # 798 # Remove single dots (.) => pwd not changing 799 # Remove double dots (..) => gobble previous segment of path 800 # 801 # Fixes for this command: 802 # 803 # * Ignore any url which cannot be split into components by this 804 # module. Just assume that such urls do not have a path to 805 # canonicalize. 806 # 807 # * Ignore any url which could be split into components, but does 808 # not have a path component. 809 # 810 # In the text above 'ignore' means 811 # 'return the url unchanged to the caller'. 812 813 if {[catch {array set u [::uri::split $uri]}]} { 814 return $uri 815 } 816 if {![info exists u(path)]} { 817 return $uri 818 } 819 820 set uri $u(path) 821 822 # Remove leading "./" "../" "/.." (and "/../") 823 regsub -all -- {^(\./)+} $uri {} uri 824 regsub -all -- {^/(\.\./)+} $uri {/} uri 825 regsub -all -- {^(\.\./)+} $uri {} uri 826 827 # Remove inner /./ and /../ 828 while {[regsub -all -- {/\./} $uri {/} uri]} {} 829 while {[regsub -all -- {/[^/]+/\.\./} $uri {/} uri]} {} 830 while {[regsub -all -- {^[^/]+/\.\./} $uri {} uri]} {} 831 # Munge trailing /.. 832 while {[regsub -all -- {/[^/]+/\.\.} $uri {/} uri]} {} 833 if { $uri == ".." } { set uri "/" } 834 835 set u(path) $uri 836 set uri [eval [linsert [array get u] 0 ::uri::join]] 837 838 return $uri 839} 840 841# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 842# regular expressions covering various url schemes 843 844# Currently known URL schemes: 845# 846# (RFC 1738) 847# ------------------------------------------------ 848# scheme basic syntax of scheme specific part 849# ------------------------------------------------ 850# ftp //<user>:<password>@<host>:<port>/<cwd1>/.../<cwdN>/<name>;type=<typecode> 851# 852# http //<host>:<port>/<path>?<searchpart> 853# 854# gopher //<host>:<port>/<gophertype><selector> 855# <gophertype><selector>%09<search> 856# <gophertype><selector>%09<search>%09<gopher+_string> 857# 858# mailto <rfc822-addr-spec> 859# news <newsgroup-name> 860# <message-id> 861# nntp //<host>:<port>/<newsgroup-name>/<article-number> 862# telnet //<user>:<password>@<host>:<port>/ 863# wais //<host>:<port>/<database> 864# //<host>:<port>/<database>?<search> 865# //<host>:<port>/<database>/<wtype>/<wpath> 866# file //<host>/<path> 867# prospero //<host>:<port>/<hsoname>;<field>=<value> 868# ------------------------------------------------ 869# 870# (RFC 2111) 871# ------------------------------------------------ 872# scheme basic syntax of scheme specific part 873# ------------------------------------------------ 874# mid message-id 875# message-id/content-id 876# cid content-id 877# ------------------------------------------------ 878# 879# (RFC 2255) 880# ------------------------------------------------ 881# scheme basic syntax of scheme specific part 882# ------------------------------------------------ 883# ldap //<host>:<port>/<dn>?<attrs>?<scope>?<filter>?<extensions> 884# ------------------------------------------------ 885 886# FTP 887uri::register ftp { 888 variable escape [set [namespace parent [namespace current]]::basic::escape] 889 variable login [set [namespace parent [namespace current]]::basic::login] 890 891 variable charN {[a-zA-Z0-9$_.+!*'(,)?:@&=-]} 892 variable char "(${charN}|${escape})" 893 variable segment "${char}*" 894 variable path "${segment}(/${segment})*" 895 896 variable type {[AaDdIi]} 897 variable typepart ";type=(${type})" 898 variable schemepart \ 899 "//${login}(/${path}(${typepart})?)?" 900 901 variable url "ftp:${schemepart}" 902} 903 904# FILE 905uri::register file { 906 variable host [set [namespace parent [namespace current]]::basic::host] 907 variable path [set [namespace parent [namespace current]]::ftp::path] 908 909 variable schemepart "//(${host}|localhost)?/${path}" 910 variable url "file:${schemepart}" 911} 912 913# HTTP 914uri::register http { 915 variable escape \ 916 [set [namespace parent [namespace current]]::basic::escape] 917 variable hostOrPort \ 918 [set [namespace parent [namespace current]]::basic::hostOrPort] 919 920 variable charN {[a-zA-Z0-9$_.+!*'(,);:@&=-]} 921 variable char "($charN|${escape})" 922 variable segment "${char}*" 923 924 variable path "${segment}(/${segment})*" 925 variable search $segment 926 variable schemepart \ 927 "//${hostOrPort}(/${path}(\\?${search})?)?" 928 929 variable url "http:${schemepart}" 930} 931 932# GOPHER 933uri::register gopher { 934 variable xChar \ 935 [set [namespace parent [namespace current]]::basic::xChar] 936 variable hostOrPort \ 937 [set [namespace parent [namespace current]]::basic::hostOrPort] 938 variable search \ 939 [set [namespace parent [namespace current]]::http::search] 940 941 variable type $xChar 942 variable selector "$xChar*" 943 variable string $selector 944 variable schemepart \ 945 "//${hostOrPort}(/(${type}(${selector}(%09${search}(%09${string})?)?)?)?)?" 946 variable url "gopher:${schemepart}" 947} 948 949# MAILTO 950uri::register mailto { 951 variable xChar [set [namespace parent [namespace current]]::basic::xChar] 952 variable host [set [namespace parent [namespace current]]::basic::host] 953 954 variable schemepart "$xChar+(@${host})?" 955 variable url "mailto:${schemepart}" 956} 957 958# NEWS 959uri::register news { 960 variable escape [set [namespace parent [namespace current]]::basic::escape] 961 variable alpha [set [namespace parent [namespace current]]::basic::alpha] 962 variable host [set [namespace parent [namespace current]]::basic::host] 963 964 variable aCharN {[a-zA-Z0-9$_.+!*'(,);/?:&=-]} 965 variable aChar "($aCharN|${escape})" 966 variable gChar {[a-zA-Z0-9$_.+-]} 967 variable newsgroup-name "${alpha}${gChar}*" 968 variable message-id "${aChar}+@${host}" 969 variable schemepart "\\*|${newsgroup-name}|${message-id}" 970 variable url "news:${schemepart}" 971} 972 973# WAIS 974uri::register wais { 975 variable uChar \ 976 [set [namespace parent [namespace current]]::basic::xChar] 977 variable hostOrPort \ 978 [set [namespace parent [namespace current]]::basic::hostOrPort] 979 variable search \ 980 [set [namespace parent [namespace current]]::http::search] 981 982 variable db "${uChar}*" 983 variable type "${uChar}*" 984 variable path "${uChar}*" 985 986 variable database "//${hostOrPort}/${db}" 987 variable index "//${hostOrPort}/${db}\\?${search}" 988 variable doc "//${hostOrPort}/${db}/${type}/${path}" 989 990 #variable schemepart "${doc}|${index}|${database}" 991 992 variable schemepart \ 993 "//${hostOrPort}/${db}((\\?${search})|(/${type}/${path}))?" 994 995 variable url "wais:${schemepart}" 996} 997 998# PROSPERO 999uri::register prospero { 1000 variable escape \ 1001 [set [namespace parent [namespace current]]::basic::escape] 1002 variable hostOrPort \ 1003 [set [namespace parent [namespace current]]::basic::hostOrPort] 1004 variable path \ 1005 [set [namespace parent [namespace current]]::ftp::path] 1006 1007 variable charN {[a-zA-Z0-9$_.+!*'(,)?:@&-]} 1008 variable char "(${charN}|$escape)" 1009 1010 variable fieldname "${char}*" 1011 variable fieldvalue "${char}*" 1012 variable fieldspec ";${fieldname}=${fieldvalue}" 1013 1014 variable schemepart "//${hostOrPort}/${path}(${fieldspec})*" 1015 variable url "prospero:$schemepart" 1016} 1017 1018# LDAP 1019uri::register ldap { 1020 variable hostOrPort \ 1021 [set [namespace parent [namespace current]]::basic::hostOrPort] 1022 1023 # very crude parsing 1024 variable dn {[^?]*} 1025 variable attrs {[^?]*} 1026 variable scope "base|one|sub" 1027 variable filter {[^?]*} 1028 # extensions are not handled yet 1029 1030 variable schemepart "//${hostOrPort}(/${dn}(\?${attrs}(\?(${scope})(\?${filter})?)?)?)?" 1031 variable url "ldap:$schemepart" 1032} 1033 1034package provide uri 1.2.1 1035