1# http.tcl -- 2# 3# Client-side HTTP for GET, POST, and HEAD commands. These routines can 4# be used in untrusted code that uses the Safesock security policy. These 5# procedures use a callback interface to avoid using vwait, which is not 6# defined in the safe base. 7# 8# See the file "license.terms" for information on usage and redistribution of 9# this file, and for a DISCLAIMER OF ALL WARRANTIES. 10# 11# RCS: @(#) $Id: http.tcl,v 1.8 2008/08/11 22:14:26 patthoyts Exp $ 12 13# Rough version history post-core-split: 14# 2.5 Added HTTP/1.1 support for persistent connections. New options 15# -protocol, -keepalive, -socketvar. (Pat Thoyts) 16# 2.6 Added support for HTTP/1.1 extensions. New option -method used 17# for WebDav. (Vince Darley) 18# 2.6.1 Synchronized with Tcl http 2.4.4 (encoding enhancements) 19# 2.6.2 Removed to -socketvar option and now handle socket usage internally 20# 2.6.3 Added support for chunked encoding. 21# 2.6.4 Merged in jcw's webdav mods to fix the chunked transfer 22# 2.6.5 Merged up to 2.5.3 from tcl cvs (formMap, url decomposition) 23# 2.6.6 Support content-encoding gzip. Handle 0 length body in chunked. 24# 2.6.7 Merged up to 2.5.5 from tcl cvs, whitespace corrections 25# 2.6.8 Merged with core version in 8.5.2 and 8.4.19 and above changes. 26# Core is 2.7, this v2.6.8 has defaultKeepalive 1 and different 27# default -useragent. 28# 2.6.9 Merged fix for zlib crc check on 64bit systems. 29 30package require Tcl 8.4 31# keep this in sync with pkgIndex.tcl 32package provide http 2.6.9 33 34namespace eval http { 35 # Allow resourcing to not clobber existing data 36 37 variable http 38 if {![info exists http]} { 39 array set http { 40 -accept */* 41 -proxyhost {} 42 -proxyport {} 43 -proxyfilter http::ProxyRequired 44 -urlencoding utf-8 45 } 46 # Use a Mozilla compatible useragent header to avoid problems with 47 # some web sites. 48 set http(-useragent) \ 49 "Mozilla/5.0 ([string totitle $::tcl_platform(platform)]; U;\ 50 $::tcl_platform(os) $::tcl_platform(osVersion))\ 51 http/[package provide http] Tcl/[package provide Tcl]" 52 } 53 54 proc init {} { 55 # Set up the map for quoting chars. RFC3986 Section 2.3 say percent 56 # encode all except: "... percent-encoded octets in the ranges of ALPHA 57 # (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E), 58 # underscore (%5F), or tilde (%7E) should not be created by URI 59 # producers ..." 60 for {set i 0} {$i <= 256} {incr i} { 61 set c [format %c $i] 62 if {![string match {[-._~a-zA-Z0-9]} $c]} { 63 set map($c) %[format %.2x $i] 64 } 65 } 66 # These are handled specially 67 set map(\n) %0d%0a 68 variable formMap [array get map] 69 70 # Create a map for HTTP/1.1 open sockets 71 variable socketmap 72 if {[info exists socketmap]} { 73 # Close but don't remove open sockets on re-init 74 foreach {url sock} [array get socketmap] { 75 catch {close $sock} 76 } 77 } 78 array set socketmap {} 79 } 80 init 81 82 variable urlTypes 83 if {![info exists urlTypes]} { 84 set urlTypes(http) [list 80 ::socket] 85 } 86 87 variable encodings [string tolower [encoding names]] 88 # This can be changed, but iso8859-1 is the RFC standard. 89 variable defaultCharset 90 if {![info exists defaultCharset]} { 91 set defaultCharset "iso8859-1" 92 } 93 94 # Force RFC 3986 strictness in geturl url verification? 95 variable strict 96 if {![info exists strict]} { 97 set strict 1 98 } 99 100 # Let user control default keepalive for compatibility 101 variable defaultKeepalive 102 if {![info exists defaultKeepalive]} { 103 set defaultKeepalive 0 104 } 105 106 namespace export geturl config reset wait formatQuery register unregister 107 # Useful, but not exported: data size status code 108} 109 110# http::Log -- 111# 112# Debugging output -- define this to observe HTTP/1.1 socket usage. 113# Should echo any args received. 114# 115# Arguments: 116# msg Message to output 117# 118proc http::Log {args} {} 119 120# http::register -- 121# 122# See documentation for details. 123# 124# Arguments: 125# proto URL protocol prefix, e.g. https 126# port Default port for protocol 127# command Command to use to create socket 128# Results: 129# list of port and command that was registered. 130 131proc http::register {proto port command} { 132 variable urlTypes 133 set urlTypes($proto) [list $port $command] 134} 135 136# http::unregister -- 137# 138# Unregisters URL protocol handler 139# 140# Arguments: 141# proto URL protocol prefix, e.g. https 142# Results: 143# list of port and command that was unregistered. 144 145proc http::unregister {proto} { 146 variable urlTypes 147 if {![info exists urlTypes($proto)]} { 148 return -code error "unsupported url type \"$proto\"" 149 } 150 set old $urlTypes($proto) 151 unset urlTypes($proto) 152 return $old 153} 154 155# http::config -- 156# 157# See documentation for details. 158# 159# Arguments: 160# args Options parsed by the procedure. 161# Results: 162# TODO 163 164proc http::config {args} { 165 variable http 166 set options [lsort [array names http -*]] 167 set usage [join $options ", "] 168 if {[llength $args] == 0} { 169 set result {} 170 foreach name $options { 171 lappend result $name $http($name) 172 } 173 return $result 174 } 175 set options [string map {- ""} $options] 176 set pat ^-([join $options |])$ 177 if {[llength $args] == 1} { 178 set flag [lindex $args 0] 179 if {[regexp -- $pat $flag]} { 180 return $http($flag) 181 } else { 182 return -code error "Unknown option $flag, must be: $usage" 183 } 184 } else { 185 foreach {flag value} $args { 186 if {[regexp -- $pat $flag]} { 187 set http($flag) $value 188 } else { 189 return -code error "Unknown option $flag, must be: $usage" 190 } 191 } 192 } 193} 194 195# http::Finish -- 196# 197# Clean up the socket and eval close time callbacks 198# 199# Arguments: 200# token Connection token. 201# errormsg (optional) If set, forces status to error. 202# skipCB (optional) If set, don't call the -command callback. This 203# is useful when geturl wants to throw an exception instead 204# of calling the callback. That way, the same error isn't 205# reported to two places. 206# 207# Side Effects: 208# Closes the socket 209 210proc http::Finish { token {errormsg ""} {skipCB 0}} { 211 variable $token 212 upvar 0 $token state 213 global errorInfo errorCode 214 if {$errormsg ne ""} { 215 set state(error) [list $errormsg $errorInfo $errorCode] 216 set state(status) "error" 217 } 218 if {($state(status) eq "timeout") || ($state(status) eq "error") 219 || ([info exists state(connection)] && ($state(connection) eq "close")) 220 } { 221 CloseSocket $state(sock) $token 222 } 223 if {[info exists state(after)]} { after cancel $state(after) } 224 if {[info exists state(-command)] && !$skipCB} { 225 if {[catch {eval $state(-command) {$token}} err]} { 226 if {$errormsg eq ""} { 227 set state(error) [list $err $errorInfo $errorCode] 228 set state(status) error 229 } 230 } 231 # Command callback may already have unset our state 232 unset -nocomplain state(-command) 233 } 234} 235 236# http::CloseSocket - 237# 238# Close a socket and remove it from the persistent sockets table. 239# If possible an http token is included here but when we are called 240# from a fileevent on remote closure we need to find the correct 241# entry - hence the second section. 242 243proc ::http::CloseSocket {s {token {}}} { 244 variable socketmap 245 catch {fileevent $s readable {}} 246 set conn_id {} 247 if {$token ne ""} { 248 variable $token 249 upvar 0 $token state 250 if {[info exists state(socketinfo)]} { 251 set conn_id $state(socketinfo) 252 } 253 } else { 254 set map [array get socketmap] 255 set ndx [lsearch -exact $map $s] 256 if {$ndx != -1} { 257 incr ndx -1 258 set conn_id [lindex $map $ndx] 259 } 260 } 261 if {$conn_id eq {} || ![info exists socketmap($conn_id)]} { 262 Log "Closing socket $s (no connection info)" 263 if {[catch {close $s} err]} { Log "Error: $err" } 264 } else { 265 if {[info exists socketmap($conn_id)]} { 266 Log "Closing connection $conn_id (sock $socketmap($conn_id))" 267 if {[catch {close $socketmap($conn_id)} err]} { Log "Error: $err" } 268 unset socketmap($conn_id) 269 } else { 270 Log "Cannot close connection $conn_id - no socket in socket map" 271 } 272 } 273} 274 275# http::reset -- 276# 277# See documentation for details. 278# 279# Arguments: 280# token Connection token. 281# why Status info. 282# 283# Side Effects: 284# See Finish 285 286proc http::reset { token {why reset} } { 287 variable $token 288 upvar 0 $token state 289 set state(status) $why 290 catch {fileevent $state(sock) readable {}} 291 catch {fileevent $state(sock) writable {}} 292 Finish $token 293 if {[info exists state(error)]} { 294 set errorlist $state(error) 295 unset state 296 eval ::error $errorlist 297 } 298} 299 300# http::geturl -- 301# 302# Establishes a connection to a remote url via http. 303# 304# Arguments: 305# url The http URL to goget. 306# args Option value pairs. Valid options include: 307# -blocksize, -validate, -headers, -timeout 308# Results: 309# Returns a token for this connection. This token is the name of an array 310# that the caller should unset to garbage collect the state. 311 312proc http::geturl { url args } { 313 variable http 314 variable urlTypes 315 variable defaultCharset 316 variable defaultKeepalive 317 variable strict 318 319 # Initialize the state variable, an array. We'll return the name of this 320 # array as the token for the transaction. 321 322 if {![info exists http(uid)]} { 323 set http(uid) 0 324 } 325 set token [namespace current]::[incr http(uid)] 326 variable $token 327 upvar 0 $token state 328 reset $token 329 330 # Process command options. 331 332 array set state { 333 -binary false 334 -blocksize 8192 335 -queryblocksize 8192 336 -validate 0 337 -headers {} 338 -timeout 0 339 -type application/x-www-form-urlencoded 340 -queryprogress {} 341 -protocol 1.1 342 binary 0 343 state header 344 meta {} 345 coding {} 346 currentsize 0 347 totalsize 0 348 querylength 0 349 queryoffset 0 350 type text/html 351 body {} 352 status "" 353 http "" 354 connection close 355 } 356 set state(-keepalive) $defaultKeepalive 357 set state(-strict) $strict 358 # These flags have their types verified [Bug 811170] 359 array set type { 360 -binary boolean 361 -blocksize integer 362 -queryblocksize integer 363 -strict boolean 364 -timeout integer 365 -validate boolean 366 } 367 set state(charset) $defaultCharset 368 set options { 369 -binary -blocksize -channel -command -handler -headers -keepalive 370 -method -myaddr -progress -protocol -query -queryblocksize 371 -querychannel -queryprogress -strict -timeout -type -validate 372 } 373 set usage [join [lsort $options] ", "] 374 set options [string map {- ""} $options] 375 set pat ^-([join $options |])$ 376 foreach {flag value} $args { 377 if {[regexp -- $pat $flag]} { 378 # Validate numbers 379 if {[info exists type($flag)] && 380 ![string is $type($flag) -strict $value]} { 381 unset $token 382 return -code error "Bad value for $flag ($value), must be $type($flag)" 383 } 384 set state($flag) $value 385 } else { 386 unset $token 387 return -code error "Unknown option $flag, can be: $usage" 388 } 389 } 390 391 # Make sure -query and -querychannel aren't both specified 392 393 set isQueryChannel [info exists state(-querychannel)] 394 set isQuery [info exists state(-query)] 395 if {$isQuery && $isQueryChannel} { 396 unset $token 397 return -code error "Can't combine -query and -querychannel options!" 398 } 399 400 # Validate URL, determine the server host and port, and check proxy case 401 # Recognize user:pass@host URLs also, although we do not do anything with 402 # that info yet. 403 404 # URLs have basically four parts. 405 # First, before the colon, is the protocol scheme (e.g. http) 406 # Second, for HTTP-like protocols, is the authority 407 # The authority is preceded by // and lasts up to (but not including) 408 # the following / and it identifies up to four parts, of which only one, 409 # the host, is required (if an authority is present at all). All other 410 # parts of the authority (user name, password, port number) are optional. 411 # Third is the resource name, which is split into two parts at a ? 412 # The first part (from the single "/" up to "?") is the path, and the 413 # second part (from that "?" up to "#") is the query. *HOWEVER*, we do 414 # not need to separate them; we send the whole lot to the server. 415 # Fourth is the fragment identifier, which is everything after the first 416 # "#" in the URL. The fragment identifier MUST NOT be sent to the server 417 # and indeed, we don't bother to validate it (it could be an error to 418 # pass it in here, but it's cheap to strip). 419 # 420 # An example of a URL that has all the parts: 421 # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes 422 # The "http" is the protocol, the user is "jschmoe", the password is 423 # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is 424 # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes". 425 # 426 # Note that the RE actually combines the user and password parts, as 427 # recommended in RFC 3986. Indeed, that RFC states that putting passwords 428 # in URLs is a Really Bad Idea, something with which I would agree utterly. 429 # Also note that we do not currently support IPv6 addresses. 430 # 431 # From a validation perspective, we need to ensure that the parts of the 432 # URL that are going to the server are correctly encoded. 433 # This is only done if $state(-strict) is true (inherited from 434 # $::http::strict). 435 436 set URLmatcher {(?x) # this is _expanded_ syntax 437 ^ 438 (?: (\w+) : ) ? # <protocol scheme> 439 (?: // 440 (?: 441 ( 442 [^@/\#?]+ # <userinfo part of authority> 443 ) @ 444 )? 445 ( [^/:\#?]+ ) # <host part of authority> 446 (?: : (\d+) )? # <port part of authority> 447 )? 448 ( / [^\#?]* (?: \? [^\#?]* )?)? # <path> (including query) 449 (?: \# (.*) )? # <fragment> 450 $ 451 } 452 453 # Phase one: parse 454 if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} { 455 unset $token 456 return -code error "Unsupported URL: $url" 457 } 458 # Phase two: validate 459 if {$host eq ""} { 460 # Caller has to provide a host name; we do not have a "default host" 461 # that would enable us to handle relative URLs. 462 unset $token 463 return -code error "Missing host part: $url" 464 # Note that we don't check the hostname for validity here; if it's 465 # invalid, we'll simply fail to resolve it later on. 466 } 467 if {$port ne "" && $port > 65535} { 468 unset $token 469 return -code error "Invalid port number: $port" 470 } 471 # The user identification and resource identification parts of the URL can 472 # have encoded characters in them; take care! 473 if {$user ne ""} { 474 # Check for validity according to RFC 3986, Appendix A 475 set validityRE {(?xi) 476 ^ 477 (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+ 478 $ 479 } 480 if {$state(-strict) && ![regexp -- $validityRE $user]} { 481 unset $token 482 # Provide a better error message in this error case 483 if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} { 484 return -code error \ 485 "Illegal encoding character usage \"$bad\" in URL user" 486 } 487 return -code error "Illegal characters in URL user" 488 } 489 } 490 if {$srvurl ne ""} { 491 # Check for validity according to RFC 3986, Appendix A 492 set validityRE {(?xi) 493 ^ 494 # Path part (already must start with / character) 495 (?: [-\w.~!$&'()*+,;=:@/] | %[0-9a-f][0-9a-f] )* 496 # Query part (optional, permits ? characters) 497 (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )? 498 $ 499 } 500 if {$state(-strict) && ![regexp -- $validityRE $srvurl]} { 501 unset $token 502 # Provide a better error message in this error case 503 if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { 504 return -code error \ 505 "Illegal encoding character usage \"$bad\" in URL path" 506 } 507 return -code error "Illegal characters in URL path" 508 } 509 } else { 510 set srvurl / 511 } 512 if {$proto eq ""} { 513 set proto http 514 } 515 if {![info exists urlTypes($proto)]} { 516 unset $token 517 return -code error "Unsupported URL type \"$proto\"" 518 } 519 set defport [lindex $urlTypes($proto) 0] 520 set defcmd [lindex $urlTypes($proto) 1] 521 522 if {$port eq ""} { 523 set port $defport 524 } 525 if {![catch {$http(-proxyfilter) $host} proxy]} { 526 set phost [lindex $proxy 0] 527 set pport [lindex $proxy 1] 528 } 529 530 # OK, now reassemble into a full URL 531 set url ${proto}:// 532 if {$user ne ""} { 533 append url $user 534 append url @ 535 } 536 append url $host 537 if {$port != $defport} { 538 append url : $port 539 } 540 append url $srvurl 541 # Don't append the fragment! 542 set state(url) $url 543 544 # If a timeout is specified we set up the after event and arrange for an 545 # asynchronous socket connection. 546 547 set sockopts [list] 548 if {$state(-timeout) > 0} { 549 set state(after) [after $state(-timeout) \ 550 [list http::reset $token timeout]] 551 lappend sockopts -async 552 } 553 554 # If we are using the proxy, we must pass in the full URL that includes 555 # the server name. 556 557 if {[info exists phost] && ($phost ne "")} { 558 set srvurl $url 559 set targetAddr [list $phost $pport] 560 } else { 561 set targetAddr [list $host $port] 562 } 563 # Proxy connections aren't shared among different hosts. 564 set state(socketinfo) $host:$port 565 566 # See if we are supposed to use a previously opened channel. 567 if {$state(-keepalive)} { 568 variable socketmap 569 if {[info exists socketmap($state(socketinfo))]} { 570 if {[catch {fconfigure $socketmap($state(socketinfo))}]} { 571 Log "WARNING: socket for $state(socketinfo) was closed" 572 unset socketmap($state(socketinfo)) 573 } else { 574 set sock $socketmap($state(socketinfo)) 575 Log "reusing socket $sock for $state(socketinfo)" 576 catch {fileevent $sock writable {}} 577 catch {fileevent $sock readable {}} 578 } 579 } 580 # don't automatically close this connection socket 581 set state(connection) {} 582 } 583 if {![info exists sock]} { 584 # Pass -myaddr directly to the socket command 585 if {[info exists state(-myaddr)]} { 586 lappend sockopts -myaddr $state(-myaddr) 587 } 588 if {[catch {eval $defcmd $sockopts $targetAddr} sock]} { 589 # something went wrong while trying to establish the 590 # connection. Clean up after events and such, but DON'T call the 591 # command callback (if available) because we're going to throw an 592 # exception from here instead. 593 594 set state(sock) $sock 595 Finish $token "" 1 596 cleanup $token 597 return -code error $sock 598 } 599 } 600 set state(sock) $sock 601 Log "Using $sock for $state(socketinfo)" \ 602 [expr {$state(-keepalive)?"keepalive":""}] 603 if {$state(-keepalive)} { 604 set socketmap($state(socketinfo)) $sock 605 } 606 607 # Wait for the connection to complete. 608 609 if {$state(-timeout) > 0} { 610 fileevent $sock writable [list http::Connect $token] 611 http::wait $token 612 613 if {![info exists state]} { 614 # If we timed out then Finish has been called and the users 615 # command callback may have cleaned up the token. If so 616 # we end up here with nothing left to do. 617 return $token 618 } elseif {$state(status) eq "error"} { 619 # Something went wrong while trying to establish the connection. 620 # Clean up after events and such, but DON'T call the command 621 # callback (if available) because we're going to throw an 622 # exception from here instead. 623 set err [lindex $state(error) 0] 624 cleanup $token 625 return -code error $err 626 } elseif {$state(status) ne "connect"} { 627 # Likely to be connection timeout 628 return $token 629 } 630 set state(status) "" 631 } 632 633 # Send data in cr-lf format, but accept any line terminators 634 635 fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize) 636 637 # The following is disallowed in safe interpreters, but the socket is 638 # already in non-blocking mode in that case. 639 640 catch {fconfigure $sock -blocking off} 641 set how GET 642 if {$isQuery} { 643 set state(querylength) [string length $state(-query)] 644 if {$state(querylength) > 0} { 645 set how POST 646 set contDone 0 647 } else { 648 # There's no query data. 649 unset state(-query) 650 set isQuery 0 651 } 652 } elseif {$state(-validate)} { 653 set how HEAD 654 } elseif {$isQueryChannel} { 655 set how POST 656 # The query channel must be blocking for the async Write to 657 # work properly. 658 fconfigure $state(-querychannel) -blocking 1 -translation binary 659 set contDone 0 660 } 661 if {[info exists state(-method)] && $state(-method) ne ""} { 662 set how $state(-method) 663 } 664 665 if {[catch { 666 puts $sock "$how $srvurl HTTP/$state(-protocol)" 667 puts $sock "Accept: $http(-accept)" 668 array set hdrs $state(-headers) 669 if {[info exists hdrs(Host)]} { 670 # Allow Host spoofing [Bug 928154] 671 puts $sock "Host: $hdrs(Host)" 672 } elseif {$port == $defport} { 673 # Don't add port in this case, to handle broken servers. 674 # [Bug #504508] 675 puts $sock "Host: $host" 676 } else { 677 puts $sock "Host: $host:$port" 678 } 679 unset hdrs 680 puts $sock "User-Agent: $http(-useragent)" 681 if {$state(-protocol) == 1.0 && $state(-keepalive)} { 682 puts $sock "Connection: keep-alive" 683 } 684 if {$state(-protocol) > 1.0 && !$state(-keepalive)} { 685 puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 686 } 687 if {[info exists phost] && ($phost ne "") && $state(-keepalive)} { 688 puts $sock "Proxy-Connection: Keep-Alive" 689 } 690 set accept_encoding_seen 0 691 foreach {key value} $state(-headers) { 692 if {[string equal -nocase $key "host"]} { continue } 693 if {[string equal -nocase $key "accept-encoding"]} { 694 set accept_encoding_seen 1 695 } 696 set value [string map [list \n "" \r ""] $value] 697 set key [string trim $key] 698 if {[string equal -nocase $key "content-length"]} { 699 set contDone 1 700 set state(querylength) $value 701 } 702 if {[string length $key]} { 703 puts $sock "$key: $value" 704 } 705 } 706 # Soft zlib dependency check - no package require 707 if {!$accept_encoding_seen && [llength [package provide zlib]] 708 && !([info exists state(-channel)] || [info exists state(-handler)]) 709 } { 710 puts $sock "Accept-Encoding: gzip, identity, *;q=0.1" 711 } 712 if {$isQueryChannel && $state(querylength) == 0} { 713 # Try to determine size of data in channel. If we cannot seek, the 714 # surrounding catch will trap us 715 716 set start [tell $state(-querychannel)] 717 seek $state(-querychannel) 0 end 718 set state(querylength) \ 719 [expr {[tell $state(-querychannel)] - $start}] 720 seek $state(-querychannel) $start 721 } 722 723 # Flush the request header and set up the fileevent that will either 724 # push the POST data or read the response. 725 # 726 # fileevent note: 727 # 728 # It is possible to have both the read and write fileevents active at 729 # this point. The only scenario it seems to affect is a server that 730 # closes the connection without reading the POST data. (e.g., early 731 # versions TclHttpd in various error cases). Depending on the platform, 732 # the client may or may not be able to get the response from the server 733 # because of the error it will get trying to write the post data. 734 # Having both fileevents active changes the timing and the behavior, 735 # but no two platforms (among Solaris, Linux, and NT) behave the same, 736 # and none behave all that well in any case. Servers should always read 737 # their POST data if they expect the client to read their response. 738 739 if {$isQuery || $isQueryChannel} { 740 puts $sock "Content-Type: $state(-type)" 741 if {!$contDone} { 742 puts $sock "Content-Length: $state(querylength)" 743 } 744 puts $sock "" 745 fconfigure $sock -translation {auto binary} 746 fileevent $sock writable [list http::Write $token] 747 } else { 748 puts $sock "" 749 flush $sock 750 fileevent $sock readable [list http::Event $sock $token] 751 } 752 753 if {! [info exists state(-command)]} { 754 # geturl does EVERYTHING asynchronously, so if the user calls it 755 # synchronously, we just do a wait here. 756 757 wait $token 758 if {$state(status) eq "error"} { 759 # Something went wrong, so throw the exception, and the 760 # enclosing catch will do cleanup. 761 return -code error [lindex $state(error) 0] 762 } 763 } 764 } err]} { 765 # The socket probably was never connected, or the connection dropped 766 # later. 767 768 # Clean up after events and such, but DON'T call the command callback 769 # (if available) because we're going to throw an exception from here 770 # instead. 771 772 # if state(status) is error, it means someone's already called Finish 773 # to do the above-described clean up. 774 if {$state(status) ne "error"} { 775 Finish $token $err 1 776 } 777 cleanup $token 778 return -code error $err 779 } 780 781 return $token 782} 783 784# Data access functions: 785# Data - the URL data 786# Status - the transaction status: ok, reset, eof, timeout 787# Code - the HTTP transaction code, e.g., 200 788# Size - the size of the URL data 789 790proc http::data {token} { 791 variable $token 792 upvar 0 $token state 793 return $state(body) 794} 795proc http::status {token} { 796 if {![info exists $token]} { return "error" } 797 variable $token 798 upvar 0 $token state 799 return $state(status) 800} 801proc http::code {token} { 802 variable $token 803 upvar 0 $token state 804 return $state(http) 805} 806proc http::ncode {token} { 807 variable $token 808 upvar 0 $token state 809 if {[regexp {[0-9]{3}} $state(http) numeric_code]} { 810 return $numeric_code 811 } else { 812 return $state(http) 813 } 814} 815proc http::size {token} { 816 variable $token 817 upvar 0 $token state 818 return $state(currentsize) 819} 820proc http::meta {token} { 821 variable $token 822 upvar 0 $token state 823 return $state(meta) 824} 825proc http::error {token} { 826 variable $token 827 upvar 0 $token state 828 if {[info exists state(error)]} { 829 return $state(error) 830 } 831 return "" 832} 833 834# http::cleanup 835# 836# Garbage collect the state associated with a transaction 837# 838# Arguments 839# token The token returned from http::geturl 840# 841# Side Effects 842# unsets the state array 843 844proc http::cleanup {token} { 845 variable $token 846 upvar 0 $token state 847 if {[info exists state]} { 848 unset state 849 } 850} 851 852# http::Connect 853# 854# This callback is made when an asyncronous connection completes. 855# 856# Arguments 857# token The token returned from http::geturl 858# 859# Side Effects 860# Sets the status of the connection, which unblocks 861# the waiting geturl call 862 863proc http::Connect {token} { 864 variable $token 865 upvar 0 $token state 866 global errorInfo errorCode 867 if {[eof $state(sock)] || 868 [string length [fconfigure $state(sock) -error]]} { 869 Finish $token "connect failed [fconfigure $state(sock) -error]" 1 870 } else { 871 set state(status) connect 872 fileevent $state(sock) writable {} 873 } 874 return 875} 876 877# http::Write 878# 879# Write POST query data to the socket 880# 881# Arguments 882# token The token for the connection 883# 884# Side Effects 885# Write the socket and handle callbacks. 886 887proc http::Write {token} { 888 variable $token 889 upvar 0 $token state 890 set sock $state(sock) 891 892 # Output a block. Tcl will buffer this if the socket blocks 893 set done 0 894 if {[catch { 895 # Catch I/O errors on dead sockets 896 897 if {[info exists state(-query)]} { 898 # Chop up large query strings so queryprogress callback can give 899 # smooth feedback. 900 901 puts -nonewline $sock \ 902 [string range $state(-query) $state(queryoffset) \ 903 [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]] 904 incr state(queryoffset) $state(-queryblocksize) 905 if {$state(queryoffset) >= $state(querylength)} { 906 set state(queryoffset) $state(querylength) 907 puts $sock "" 908 set done 1 909 } 910 } else { 911 # Copy blocks from the query channel 912 913 set outStr [read $state(-querychannel) $state(-queryblocksize)] 914 puts -nonewline $sock $outStr 915 incr state(queryoffset) [string length $outStr] 916 if {[eof $state(-querychannel)]} { 917 set done 1 918 } 919 } 920 } err]} { 921 # Do not call Finish here, but instead let the read half of the socket 922 # process whatever server reply there is to get. 923 924 set state(posterror) $err 925 set done 1 926 } 927 if {$done} { 928 catch {flush $sock} 929 fileevent $sock writable {} 930 fileevent $sock readable [list http::Event $sock $token] 931 } 932 933 # Callback to the client after we've completely handled everything. 934 935 if {[string length $state(-queryprogress)]} { 936 eval $state(-queryprogress) \ 937 [list $token $state(querylength) $state(queryoffset)] 938 } 939} 940 941# http::Event 942# 943# Handle input on the socket 944# 945# Arguments 946# sock The socket receiving input. 947# token The token returned from http::geturl 948# 949# Side Effects 950# Read the socket and handle callbacks. 951 952proc http::Event {sock token} { 953 variable $token 954 upvar 0 $token state 955 956 if {![info exists state]} { 957 Log "Event $sock with invalid token '$token' - remote close?" 958 if {! [eof $sock]} { 959 if {[string length [set d [read $sock]]] != 0} { 960 Log "WARNING: additional data left on closed socket" 961 } 962 } 963 CloseSocket $sock 964 return 965 } 966 if {$state(state) eq "header"} { 967 if {[catch {gets $sock line} n]} { 968 return [Finish $token $n] 969 } elseif {$n == 0} { 970 # We have now read all headers 971 # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3 972 if {$state(http) == "" || [lindex $state(http) 1] == 100} { return } 973 974 set state(state) body 975 976 # If doing a HEAD, then we won't get any body 977 if {$state(-validate)} { 978 Eof $token 979 return 980 } 981 982 # For non-chunked transfer we may have no body -- in this case we 983 # may get no further file event if the connection doesn't close and 984 # no more data is sent. We can tell and must finish up now - not 985 # later. 986 if {!(([info exists state(connection)] 987 && ($state(connection) eq "close")) 988 || [info exists state(transfer)]) 989 && $state(totalsize) == 0 990 } then { 991 Log "body size is 0 and no events likely - complete." 992 Eof $token 993 return 994 } 995 996 # We have to use binary translation to count bytes properly. 997 fconfigure $sock -translation binary 998 999 if {$state(-binary) || ![string match -nocase text* $state(type)]} { 1000 # Turn off conversions for non-text data 1001 set state(binary) 1 1002 } 1003 if {$state(binary) || [string match *gzip* $state(coding)] 1004 || [string match *compress* $state(coding)]} { 1005 if {[info exists state(-channel)]} { 1006 fconfigure $state(-channel) -translation binary 1007 } 1008 } 1009 if {[info exists state(-channel)] && 1010 ![info exists state(-handler)]} { 1011 # Initiate a sequence of background fcopies 1012 fileevent $sock readable {} 1013 CopyStart $sock $token 1014 return 1015 } 1016 } elseif {$n > 0} { 1017 # Process header lines 1018 if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { 1019 switch -- [string tolower $key] { 1020 content-type { 1021 set state(type) [string trim [string tolower $value]] 1022 # grab the optional charset information 1023 regexp -nocase {charset\s*=\s*(\S+?);?} \ 1024 $state(type) -> state(charset) 1025 } 1026 content-length { 1027 set state(totalsize) [string trim $value] 1028 } 1029 content-encoding { 1030 set state(coding) [string trim $value] 1031 } 1032 transfer-encoding { 1033 set state(transfer) \ 1034 [string trim [string tolower $value]] 1035 } 1036 proxy-connection - 1037 connection { 1038 set state(connection) \ 1039 [string trim [string tolower $value]] 1040 } 1041 } 1042 lappend state(meta) $key [string trim $value] 1043 } elseif {[string match HTTP* $line]} { 1044 set state(http) $line 1045 } 1046 } 1047 } else { 1048 # Now reading body 1049 if {[catch { 1050 if {[info exists state(-handler)]} { 1051 set n [eval $state(-handler) [list $sock $token]] 1052 } elseif {[info exists state(transfer_final)]} { 1053 set line [getTextLine $sock] 1054 set n [string length $line] 1055 if {$n > 0} { 1056 Log "found $n bytes following final chunk" 1057 append state(transfer_final) $line 1058 } else { 1059 Log "final chunk part" 1060 Eof $token 1061 } 1062 } elseif {[info exists state(transfer)] 1063 && $state(transfer) eq "chunked"} { 1064 set size 0 1065 set chunk [getTextLine $sock] 1066 set n [string length $chunk] 1067 if {[string trim $chunk] ne ""} { 1068 scan $chunk %x size 1069 if {$size != 0} { 1070 set bl [fconfigure $sock -blocking] 1071 fconfigure $sock -blocking 1 1072 set chunk [read $sock $size] 1073 fconfigure $sock -blocking $bl 1074 set n [string length $chunk] 1075 if {$n >= 0} { 1076 append state(body) $chunk 1077 } 1078 if {$size != [string length $chunk]} { 1079 Log "WARNING: mis-sized chunk:\ 1080 was [string length $chunk], should be $size" 1081 } 1082 getTextLine $sock 1083 } else { 1084 set state(transfer_final) {} 1085 } 1086 } 1087 } else { 1088 #Log "read non-chunk $state(currentsize) of $state(totalsize)" 1089 set block [read $sock $state(-blocksize)] 1090 set n [string length $block] 1091 if {$n >= 0} { 1092 append state(body) $block 1093 } 1094 } 1095 if {[info exists state]} { 1096 if {$n >= 0} { 1097 incr state(currentsize) $n 1098 } 1099 # If Content-Length - check for end of data. 1100 if {($state(totalsize) > 0) 1101 && ($state(currentsize) >= $state(totalsize))} { 1102 Eof $token 1103 } 1104 } 1105 } err]} { 1106 return [Finish $token $err] 1107 } else { 1108 if {[info exists state(-progress)]} { 1109 eval $state(-progress) \ 1110 [list $token $state(totalsize) $state(currentsize)] 1111 } 1112 } 1113 } 1114 1115 # catch as an Eof above may have closed the socket already 1116 if {![catch {eof $sock} eof] && $eof} { 1117 if {[info exists $token]} { 1118 set state(connection) close 1119 Eof $token 1120 } else { 1121 # open connection closed on a token that has been cleaned up. 1122 CloseSocket $sock 1123 } 1124 return 1125 } 1126} 1127 1128# http::getTextLine -- 1129# 1130# Get one line with the stream in blocking crlf mode 1131# 1132# Arguments 1133# sock The socket receiving input. 1134# 1135# Results: 1136# The line of text, without trailing newline 1137 1138proc http::getTextLine {sock} { 1139 set tr [fconfigure $sock -translation] 1140 set bl [fconfigure $sock -blocking] 1141 fconfigure $sock -translation crlf -blocking 1 1142 set r [gets $sock] 1143 fconfigure $sock -translation $tr -blocking $bl 1144 return $r 1145} 1146 1147# http::CopyStart 1148# 1149# Error handling wrapper around fcopy 1150# 1151# Arguments 1152# sock The socket to copy from 1153# token The token returned from http::geturl 1154# 1155# Side Effects 1156# This closes the connection upon error 1157 1158proc http::CopyStart {sock token} { 1159 variable $token 1160 upvar 0 $token state 1161 if {[catch { 1162 fcopy $sock $state(-channel) -size $state(-blocksize) -command \ 1163 [list http::CopyDone $token] 1164 } err]} { 1165 Finish $token $err 1166 } 1167} 1168 1169# http::CopyDone 1170# 1171# fcopy completion callback 1172# 1173# Arguments 1174# token The token returned from http::geturl 1175# count The amount transfered 1176# 1177# Side Effects 1178# Invokes callbacks 1179 1180proc http::CopyDone {token count {error {}}} { 1181 variable $token 1182 upvar 0 $token state 1183 set sock $state(sock) 1184 incr state(currentsize) $count 1185 if {[info exists state(-progress)]} { 1186 eval $state(-progress) \ 1187 [list $token $state(totalsize) $state(currentsize)] 1188 } 1189 # At this point the token may have been reset 1190 if {[string length $error]} { 1191 Finish $token $error 1192 } elseif {[catch {eof $sock} iseof] || $iseof} { 1193 Eof $token 1194 } else { 1195 CopyStart $sock $token 1196 } 1197} 1198 1199# http::Eof 1200# 1201# Handle eof on the socket 1202# 1203# Arguments 1204# token The token returned from http::geturl 1205# 1206# Side Effects 1207# Clean up the socket 1208 1209proc http::Eof {token {force 0}} { 1210 variable $token 1211 upvar 0 $token state 1212 if {$state(state) eq "header"} { 1213 # Premature eof 1214 set state(status) eof 1215 } else { 1216 set state(status) ok 1217 } 1218 1219 if {($state(coding) eq "gzip") && [string length $state(body)] > 0} { 1220 if {[catch { 1221 set state(body) [Gunzip $state(body)] 1222 } err]} { 1223 return [Finish $token $err] 1224 } 1225 } 1226 1227 if {!$state(binary)} { 1228 1229 # If we are getting text, set the incoming channel's 1230 # encoding correctly. iso8859-1 is the RFC default, but 1231 # this could be any IANA charset. However, we only know 1232 # how to convert what we have encodings for. 1233 1234 set enc [CharsetToEncoding $state(charset)] 1235 if {$enc ne "binary"} { 1236 set state(body) [encoding convertfrom $enc $state(body)] 1237 } 1238 1239 # Translate text line endings. 1240 set state(body) [string map {\r\n \n \r \n} $state(body)] 1241 } 1242 1243 Finish $token 1244} 1245 1246# http::wait -- 1247# 1248# See documentation for details. 1249# 1250# Arguments: 1251# token Connection token. 1252# 1253# Results: 1254# The status after the wait. 1255 1256proc http::wait {token} { 1257 variable $token 1258 upvar 0 $token state 1259 1260 if {![info exists state(status)] || $state(status) eq ""} { 1261 # We must wait on the original variable name, not the upvar alias 1262 vwait ${token}(status) 1263 } 1264 1265 return [status $token] 1266} 1267 1268# http::formatQuery -- 1269# 1270# See documentation for details. Call http::formatQuery with an even 1271# number of arguments, where the first is a name, the second is a value, 1272# the third is another name, and so on. 1273# 1274# Arguments: 1275# args A list of name-value pairs. 1276# 1277# Results: 1278# TODO 1279 1280proc http::formatQuery {args} { 1281 set result "" 1282 set sep "" 1283 foreach i $args { 1284 append result $sep [mapReply $i] 1285 if {$sep eq "="} { 1286 set sep & 1287 } else { 1288 set sep = 1289 } 1290 } 1291 return $result 1292} 1293 1294# http::mapReply -- 1295# 1296# Do x-www-urlencoded character mapping 1297# 1298# Arguments: 1299# string The string the needs to be encoded 1300# 1301# Results: 1302# The encoded string 1303 1304proc http::mapReply {string} { 1305 variable http 1306 variable formMap 1307 1308 # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use 1309 # a pre-computed map and [string map] to do the conversion (much faster 1310 # than [regsub]/[subst]). [Bug 1020491] 1311 1312 if {$http(-urlencoding) ne ""} { 1313 set string [encoding convertto $http(-urlencoding) $string] 1314 return [string map $formMap $string] 1315 } 1316 set converted [string map $formMap $string] 1317 if {[string match "*\[\u0100-\uffff\]*" $converted]} { 1318 regexp {[\u0100-\uffff]} $converted badChar 1319 # Return this error message for maximum compatability... :^/ 1320 return -code error \ 1321 "can't read \"formMap($badChar)\": no such element in array" 1322 } 1323 return $converted 1324} 1325 1326# http::ProxyRequired -- 1327# Default proxy filter. 1328# 1329# Arguments: 1330# host The destination host 1331# 1332# Results: 1333# The current proxy settings 1334 1335proc http::ProxyRequired {host} { 1336 variable http 1337 if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} { 1338 if {![info exists http(-proxyport)] || \ 1339 ![string length $http(-proxyport)]} { 1340 set http(-proxyport) 8080 1341 } 1342 return [list $http(-proxyhost) $http(-proxyport)] 1343 } 1344} 1345 1346# http::CharsetToEncoding -- 1347# 1348# Tries to map a given IANA charset to a tcl encoding. 1349# If no encoding can be found, returns binary. 1350# 1351 1352proc http::CharsetToEncoding {charset} { 1353 variable encodings 1354 1355 set charset [string tolower $charset] 1356 if {[regexp {iso-?8859-([0-9]+)} $charset - num]} { 1357 set encoding "iso8859-$num" 1358 } elseif {[regexp {iso-?2022-(jp|kr)} $charset - ext]} { 1359 set encoding "iso2022-$ext" 1360 } elseif {[regexp {shift[-_]?js} $charset -]} { 1361 set encoding "shiftjis" 1362 } elseif {[regexp {(windows|cp)-?([0-9]+)} $charset - - num]} { 1363 set encoding "cp$num" 1364 } elseif {$charset eq "us-ascii"} { 1365 set encoding "ascii" 1366 } elseif {[regexp {(iso-?)?lat(in)?-?([0-9]+)} $charset - - - num]} { 1367 switch -- $num { 1368 5 {set encoding "iso8859-9"} 1369 1 - 1370 2 - 1371 3 {set encoding "iso8859-$num"} 1372 } 1373 } else { 1374 # other charset, like euc-xx, utf-8,... may directly maps to encoding 1375 set encoding $charset 1376 } 1377 set idx [lsearch -exact $encodings $encoding] 1378 if {$idx >= 0} { 1379 return $encoding 1380 } else { 1381 return "binary" 1382 } 1383} 1384 1385# http::Gunzip -- 1386# 1387# Decompress data transmitted using the gzip transfer coding. 1388# 1389 1390# FIX ME: redo using zlib sinflate 1391proc http::Gunzip {data} { 1392 binary scan $data Scb5icc magic method flags time xfl os 1393 set pos 10 1394 if {$magic != 0x1f8b} { 1395 return -code error "invalid data: supplied data is not in gzip format" 1396 } 1397 if {$method != 8} { 1398 return -code error "invalid compression method" 1399 } 1400 1401 foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break 1402 set extra "" 1403 if { $f_extra } { 1404 binary scan $data @${pos}S xlen 1405 incr pos 2 1406 set extra [string range $data $pos $xlen] 1407 set pos [incr xlen] 1408 } 1409 1410 set name "" 1411 if { $f_name } { 1412 set ndx [string first \0 $data $pos] 1413 set name [string range $data $pos $ndx] 1414 set pos [incr ndx] 1415 } 1416 1417 set comment "" 1418 if { $f_comment } { 1419 set ndx [string first \0 $data $pos] 1420 set comment [string range $data $pos $ndx] 1421 set pos [incr ndx] 1422 } 1423 1424 set fcrc "" 1425 if { $f_crc } { 1426 set fcrc [string range $data $pos [incr pos]] 1427 incr pos 1428 } 1429 1430 binary scan [string range $data end-7 end] ii crc size 1431 set inflated [zlib inflate [string range $data $pos end-8]] 1432 set chk [zlib crc32 $inflated] 1433 if { ($crc & 0xffffffff) != ($chk & 0xffffffff)} { 1434 return -code error "invalid data: checksum mismatch $crc != $chk" 1435 } 1436 return $inflated 1437} 1438 1439# Local variables: 1440# indent-tabs-mode: t 1441# End: 1442