1# http.tcl -- 2# 3# Client-side HTTP for GET, POST, and HEAD commands. These routines can 4# be used in untrusted code that uses the Safesock security policy. 5# These procedures use a callback interface to avoid using vwait, which 6# is not defined in the safe base. 7# 8# See the file "license.terms" for information on usage and redistribution of 9# this file, and for a DISCLAIMER OF ALL WARRANTIES. 10# 11# RCS: @(#) $Id: http.tcl,v 1.67.2.9 2009/11/11 16:14:43 dgp Exp $ 12 13package require Tcl 8.4 14# Keep this in sync with pkgIndex.tcl and with the install directories in 15# Makefiles 16package provide http 2.7.5 17 18namespace eval http { 19 # Allow resourcing to not clobber existing data 20 21 variable http 22 if {![info exists http]} { 23 array set http { 24 -accept */* 25 -proxyhost {} 26 -proxyport {} 27 -proxyfilter http::ProxyRequired 28 -urlencoding utf-8 29 } 30 set http(-useragent) "Tcl http client package [package provide http]" 31 } 32 33 proc init {} { 34 # Set up the map for quoting chars. RFC3986 Section 2.3 say percent 35 # encode all except: "... percent-encoded octets in the ranges of 36 # ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period 37 # (%2E), underscore (%5F), or tilde (%7E) should not be created by URI 38 # producers ..." 39 for {set i 0} {$i <= 256} {incr i} { 40 set c [format %c $i] 41 if {![string match {[-._~a-zA-Z0-9]} $c]} { 42 set map($c) %[format %.2x $i] 43 } 44 } 45 # These are handled specially 46 set map(\n) %0d%0a 47 variable formMap [array get map] 48 49 # Create a map for HTTP/1.1 open sockets 50 variable socketmap 51 if {[info exists socketmap]} { 52 # Close but don't remove open sockets on re-init 53 foreach {url sock} [array get socketmap] { 54 catch {close $sock} 55 } 56 } 57 array set socketmap {} 58 } 59 init 60 61 variable urlTypes 62 if {![info exists urlTypes]} { 63 set urlTypes(http) [list 80 ::socket] 64 } 65 66 variable encodings [string tolower [encoding names]] 67 # This can be changed, but iso8859-1 is the RFC standard. 68 variable defaultCharset 69 if {![info exists defaultCharset]} { 70 set defaultCharset "iso8859-1" 71 } 72 73 # Force RFC 3986 strictness in geturl url verification? 74 variable strict 75 if {![info exists strict]} { 76 set strict 1 77 } 78 79 # Let user control default keepalive for compatibility 80 variable defaultKeepalive 81 if {![info exists defaultKeepalive]} { 82 set defaultKeepalive 0 83 } 84 85 namespace export geturl config reset wait formatQuery register unregister 86 # Useful, but not exported: data size status code 87} 88 89# http::Log -- 90# 91# Debugging output -- define this to observe HTTP/1.1 socket usage. 92# Should echo any args received. 93# 94# Arguments: 95# msg Message to output 96# 97proc http::Log {args} {} 98 99# http::register -- 100# 101# See documentation for details. 102# 103# Arguments: 104# proto URL protocol prefix, e.g. https 105# port Default port for protocol 106# command Command to use to create socket 107# Results: 108# list of port and command that was registered. 109 110proc http::register {proto port command} { 111 variable urlTypes 112 set urlTypes($proto) [list $port $command] 113} 114 115# http::unregister -- 116# 117# Unregisters URL protocol handler 118# 119# Arguments: 120# proto URL protocol prefix, e.g. https 121# Results: 122# list of port and command that was unregistered. 123 124proc http::unregister {proto} { 125 variable urlTypes 126 if {![info exists urlTypes($proto)]} { 127 return -code error "unsupported url type \"$proto\"" 128 } 129 set old $urlTypes($proto) 130 unset urlTypes($proto) 131 return $old 132} 133 134# http::config -- 135# 136# See documentation for details. 137# 138# Arguments: 139# args Options parsed by the procedure. 140# Results: 141# TODO 142 143proc http::config {args} { 144 variable http 145 set options [lsort [array names http -*]] 146 set usage [join $options ", "] 147 if {[llength $args] == 0} { 148 set result {} 149 foreach name $options { 150 lappend result $name $http($name) 151 } 152 return $result 153 } 154 set options [string map {- ""} $options] 155 set pat ^-(?:[join $options |])$ 156 if {[llength $args] == 1} { 157 set flag [lindex $args 0] 158 if {![regexp -- $pat $flag]} { 159 return -code error "Unknown option $flag, must be: $usage" 160 } 161 return $http($flag) 162 } else { 163 foreach {flag value} $args { 164 if {![regexp -- $pat $flag]} { 165 return -code error "Unknown option $flag, must be: $usage" 166 } 167 set http($flag) $value 168 } 169 } 170} 171 172# http::Finish -- 173# 174# Clean up the socket and eval close time callbacks 175# 176# Arguments: 177# token Connection token. 178# errormsg (optional) If set, forces status to error. 179# skipCB (optional) If set, don't call the -command callback. This 180# is useful when geturl wants to throw an exception instead 181# of calling the callback. That way, the same error isn't 182# reported to two places. 183# 184# Side Effects: 185# Closes the socket 186 187proc http::Finish {token {errormsg ""} {skipCB 0}} { 188 variable $token 189 upvar 0 $token state 190 global errorInfo errorCode 191 if {$errormsg ne ""} { 192 set state(error) [list $errormsg $errorInfo $errorCode] 193 set state(status) "error" 194 } 195 if { 196 ($state(status) eq "timeout") || ($state(status) eq "error") || 197 ([info exists state(connection)] && ($state(connection) eq "close")) 198 } then { 199 CloseSocket $state(sock) $token 200 } 201 if {[info exists state(after)]} { 202 after cancel $state(after) 203 } 204 if {[info exists state(-command)] && !$skipCB} { 205 if {[catch {eval $state(-command) {$token}} err]} { 206 if {$errormsg eq ""} { 207 set state(error) [list $err $errorInfo $errorCode] 208 set state(status) error 209 } 210 } 211 # Command callback may already have unset our state 212 unset -nocomplain state(-command) 213 } 214} 215 216# http::CloseSocket - 217# 218# Close a socket and remove it from the persistent sockets table. If 219# possible an http token is included here but when we are called from a 220# fileevent on remote closure we need to find the correct entry - hence 221# the second section. 222 223proc ::http::CloseSocket {s {token {}}} { 224 variable socketmap 225 catch {fileevent $s readable {}} 226 set conn_id {} 227 if {$token ne ""} { 228 variable $token 229 upvar 0 $token state 230 if {[info exists state(socketinfo)]} { 231 set conn_id $state(socketinfo) 232 } 233 } else { 234 set map [array get socketmap] 235 set ndx [lsearch -exact $map $s] 236 if {$ndx != -1} { 237 incr ndx -1 238 set conn_id [lindex $map $ndx] 239 } 240 } 241 if {$conn_id eq {} || ![info exists socketmap($conn_id)]} { 242 Log "Closing socket $s (no connection info)" 243 if {[catch {close $s} err]} { 244 Log "Error: $err" 245 } 246 } else { 247 if {[info exists socketmap($conn_id)]} { 248 Log "Closing connection $conn_id (sock $socketmap($conn_id))" 249 if {[catch {close $socketmap($conn_id)} err]} { 250 Log "Error: $err" 251 } 252 unset socketmap($conn_id) 253 } else { 254 Log "Cannot close connection $conn_id - no socket in socket map" 255 } 256 } 257} 258 259# http::reset -- 260# 261# See documentation for details. 262# 263# Arguments: 264# token Connection token. 265# why Status info. 266# 267# Side Effects: 268# See Finish 269 270proc http::reset {token {why reset}} { 271 variable $token 272 upvar 0 $token state 273 set state(status) $why 274 catch {fileevent $state(sock) readable {}} 275 catch {fileevent $state(sock) writable {}} 276 Finish $token 277 if {[info exists state(error)]} { 278 set errorlist $state(error) 279 unset state 280 eval ::error $errorlist 281 } 282} 283 284# http::geturl -- 285# 286# Establishes a connection to a remote url via http. 287# 288# Arguments: 289# url The http URL to goget. 290# args Option value pairs. Valid options include: 291# -blocksize, -validate, -headers, -timeout 292# Results: 293# Returns a token for this connection. This token is the name of an 294# array that the caller should unset to garbage collect the state. 295 296proc http::geturl {url args} { 297 variable http 298 variable urlTypes 299 variable defaultCharset 300 variable defaultKeepalive 301 variable strict 302 303 # Initialize the state variable, an array. We'll return the name of this 304 # array as the token for the transaction. 305 306 if {![info exists http(uid)]} { 307 set http(uid) 0 308 } 309 set token [namespace current]::[incr http(uid)] 310 variable $token 311 upvar 0 $token state 312 reset $token 313 314 # Process command options. 315 316 array set state { 317 -binary false 318 -blocksize 8192 319 -queryblocksize 8192 320 -validate 0 321 -headers {} 322 -timeout 0 323 -type application/x-www-form-urlencoded 324 -queryprogress {} 325 -protocol 1.1 326 binary 0 327 state connecting 328 meta {} 329 coding {} 330 currentsize 0 331 totalsize 0 332 querylength 0 333 queryoffset 0 334 type text/html 335 body {} 336 status "" 337 http "" 338 connection close 339 } 340 set state(-keepalive) $defaultKeepalive 341 set state(-strict) $strict 342 # These flags have their types verified [Bug 811170] 343 array set type { 344 -binary boolean 345 -blocksize integer 346 -queryblocksize integer 347 -strict boolean 348 -timeout integer 349 -validate boolean 350 } 351 set state(charset) $defaultCharset 352 set options { 353 -binary -blocksize -channel -command -handler -headers -keepalive 354 -method -myaddr -progress -protocol -query -queryblocksize 355 -querychannel -queryprogress -strict -timeout -type -validate 356 } 357 set usage [join [lsort $options] ", "] 358 set options [string map {- ""} $options] 359 set pat ^-(?:[join $options |])$ 360 foreach {flag value} $args { 361 if {[regexp -- $pat $flag]} { 362 # Validate numbers 363 if { 364 [info exists type($flag)] && 365 ![string is $type($flag) -strict $value] 366 } then { 367 unset $token 368 return -code error \ 369 "Bad value for $flag ($value), must be $type($flag)" 370 } 371 set state($flag) $value 372 } else { 373 unset $token 374 return -code error "Unknown option $flag, can be: $usage" 375 } 376 } 377 378 # Make sure -query and -querychannel aren't both specified 379 380 set isQueryChannel [info exists state(-querychannel)] 381 set isQuery [info exists state(-query)] 382 if {$isQuery && $isQueryChannel} { 383 unset $token 384 return -code error "Can't combine -query and -querychannel options!" 385 } 386 387 # Validate URL, determine the server host and port, and check proxy case 388 # Recognize user:pass@host URLs also, although we do not do anything with 389 # that info yet. 390 391 # URLs have basically four parts. 392 # First, before the colon, is the protocol scheme (e.g. http) 393 # Second, for HTTP-like protocols, is the authority 394 # The authority is preceded by // and lasts up to (but not including) 395 # the following / and it identifies up to four parts, of which only one, 396 # the host, is required (if an authority is present at all). All other 397 # parts of the authority (user name, password, port number) are optional. 398 # Third is the resource name, which is split into two parts at a ? 399 # The first part (from the single "/" up to "?") is the path, and the 400 # second part (from that "?" up to "#") is the query. *HOWEVER*, we do 401 # not need to separate them; we send the whole lot to the server. 402 # Fourth is the fragment identifier, which is everything after the first 403 # "#" in the URL. The fragment identifier MUST NOT be sent to the server 404 # and indeed, we don't bother to validate it (it could be an error to 405 # pass it in here, but it's cheap to strip). 406 # 407 # An example of a URL that has all the parts: 408 # 409 # http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes 410 # 411 # The "http" is the protocol, the user is "jschmoe", the password is 412 # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is 413 # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes". 414 # 415 # Note that the RE actually combines the user and password parts, as 416 # recommended in RFC 3986. Indeed, that RFC states that putting passwords 417 # in URLs is a Really Bad Idea, something with which I would agree utterly. 418 # Also note that we do not currently support IPv6 addresses. 419 # 420 # From a validation perspective, we need to ensure that the parts of the 421 # URL that are going to the server are correctly encoded. This is only 422 # done if $state(-strict) is true (inherited from $::http::strict). 423 424 set URLmatcher {(?x) # this is _expanded_ syntax 425 ^ 426 (?: (\w+) : ) ? # <protocol scheme> 427 (?: // 428 (?: 429 ( 430 [^@/\#?]+ # <userinfo part of authority> 431 ) @ 432 )? 433 ( [^/:\#?]+ ) # <host part of authority> 434 (?: : (\d+) )? # <port part of authority> 435 )? 436 ( / [^\#]*)? # <path> (including query) 437 (?: \# (.*) )? # <fragment> 438 $ 439 } 440 441 # Phase one: parse 442 if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} { 443 unset $token 444 return -code error "Unsupported URL: $url" 445 } 446 # Phase two: validate 447 if {$host eq ""} { 448 # Caller has to provide a host name; we do not have a "default host" 449 # that would enable us to handle relative URLs. 450 unset $token 451 return -code error "Missing host part: $url" 452 # Note that we don't check the hostname for validity here; if it's 453 # invalid, we'll simply fail to resolve it later on. 454 } 455 if {$port ne "" && $port > 65535} { 456 unset $token 457 return -code error "Invalid port number: $port" 458 } 459 # The user identification and resource identification parts of the URL can 460 # have encoded characters in them; take care! 461 if {$user ne ""} { 462 # Check for validity according to RFC 3986, Appendix A 463 set validityRE {(?xi) 464 ^ 465 (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+ 466 $ 467 } 468 if {$state(-strict) && ![regexp -- $validityRE $user]} { 469 unset $token 470 # Provide a better error message in this error case 471 if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} { 472 return -code error \ 473 "Illegal encoding character usage \"$bad\" in URL user" 474 } 475 return -code error "Illegal characters in URL user" 476 } 477 } 478 if {$srvurl ne ""} { 479 # Check for validity according to RFC 3986, Appendix A 480 set validityRE {(?xi) 481 ^ 482 # Path part (already must start with / character) 483 (?: [-\w.~!$&'()*+,;=:@/] | %[0-9a-f][0-9a-f] )* 484 # Query part (optional, permits ? characters) 485 (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )? 486 $ 487 } 488 if {$state(-strict) && ![regexp -- $validityRE $srvurl]} { 489 unset $token 490 # Provide a better error message in this error case 491 if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} { 492 return -code error \ 493 "Illegal encoding character usage \"$bad\" in URL path" 494 } 495 return -code error "Illegal characters in URL path" 496 } 497 } else { 498 set srvurl / 499 } 500 if {$proto eq ""} { 501 set proto http 502 } 503 if {![info exists urlTypes($proto)]} { 504 unset $token 505 return -code error "Unsupported URL type \"$proto\"" 506 } 507 set defport [lindex $urlTypes($proto) 0] 508 set defcmd [lindex $urlTypes($proto) 1] 509 510 if {$port eq ""} { 511 set port $defport 512 } 513 if {![catch {$http(-proxyfilter) $host} proxy]} { 514 set phost [lindex $proxy 0] 515 set pport [lindex $proxy 1] 516 } 517 518 # OK, now reassemble into a full URL 519 set url ${proto}:// 520 if {$user ne ""} { 521 append url $user 522 append url @ 523 } 524 append url $host 525 if {$port != $defport} { 526 append url : $port 527 } 528 append url $srvurl 529 # Don't append the fragment! 530 set state(url) $url 531 532 # If a timeout is specified we set up the after event and arrange for an 533 # asynchronous socket connection. 534 535 set sockopts [list] 536 if {$state(-timeout) > 0} { 537 set state(after) [after $state(-timeout) \ 538 [list http::reset $token timeout]] 539 lappend sockopts -async 540 } 541 542 # If we are using the proxy, we must pass in the full URL that includes 543 # the server name. 544 545 if {[info exists phost] && ($phost ne "")} { 546 set srvurl $url 547 set targetAddr [list $phost $pport] 548 } else { 549 set targetAddr [list $host $port] 550 } 551 # Proxy connections aren't shared among different hosts. 552 set state(socketinfo) $host:$port 553 554 # See if we are supposed to use a previously opened channel. 555 if {$state(-keepalive)} { 556 variable socketmap 557 if {[info exists socketmap($state(socketinfo))]} { 558 if {[catch {fconfigure $socketmap($state(socketinfo))}]} { 559 Log "WARNING: socket for $state(socketinfo) was closed" 560 unset socketmap($state(socketinfo)) 561 } else { 562 set sock $socketmap($state(socketinfo)) 563 Log "reusing socket $sock for $state(socketinfo)" 564 catch {fileevent $sock writable {}} 565 catch {fileevent $sock readable {}} 566 } 567 } 568 # don't automatically close this connection socket 569 set state(connection) {} 570 } 571 if {![info exists sock]} { 572 # Pass -myaddr directly to the socket command 573 if {[info exists state(-myaddr)]} { 574 lappend sockopts -myaddr $state(-myaddr) 575 } 576 if {[catch {eval $defcmd $sockopts $targetAddr} sock]} { 577 # something went wrong while trying to establish the connection. 578 # Clean up after events and such, but DON'T call the command 579 # callback (if available) because we're going to throw an 580 # exception from here instead. 581 582 set state(sock) $sock 583 Finish $token "" 1 584 cleanup $token 585 return -code error $sock 586 } 587 } 588 set state(sock) $sock 589 Log "Using $sock for $state(socketinfo)" \ 590 [expr {$state(-keepalive)?"keepalive":""}] 591 if {$state(-keepalive)} { 592 set socketmap($state(socketinfo)) $sock 593 } 594 595 # Wait for the connection to complete. 596 597 if {$state(-timeout) > 0} { 598 fileevent $sock writable [list http::Connect $token] 599 http::wait $token 600 601 if {![info exists state]} { 602 # If we timed out then Finish has been called and the users 603 # command callback may have cleaned up the token. If so we end up 604 # here with nothing left to do. 605 return $token 606 } elseif {$state(status) eq "error"} { 607 # Something went wrong while trying to establish the connection. 608 # Clean up after events and such, but DON'T call the command 609 # callback (if available) because we're going to throw an 610 # exception from here instead. 611 set err [lindex $state(error) 0] 612 cleanup $token 613 return -code error $err 614 } elseif {$state(status) ne "connect"} { 615 # Likely to be connection timeout 616 return $token 617 } 618 set state(status) "" 619 } 620 621 # Send data in cr-lf format, but accept any line terminators 622 623 fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize) 624 625 # The following is disallowed in safe interpreters, but the socket is 626 # already in non-blocking mode in that case. 627 628 catch {fconfigure $sock -blocking off} 629 set how GET 630 if {$isQuery} { 631 set state(querylength) [string length $state(-query)] 632 if {$state(querylength) > 0} { 633 set how POST 634 set contDone 0 635 } else { 636 # There's no query data. 637 unset state(-query) 638 set isQuery 0 639 } 640 } elseif {$state(-validate)} { 641 set how HEAD 642 } elseif {$isQueryChannel} { 643 set how POST 644 # The query channel must be blocking for the async Write to 645 # work properly. 646 fconfigure $state(-querychannel) -blocking 1 -translation binary 647 set contDone 0 648 } 649 if {[info exists state(-method)] && $state(-method) ne ""} { 650 set how $state(-method) 651 } 652 653 if {[catch { 654 puts $sock "$how $srvurl HTTP/$state(-protocol)" 655 puts $sock "Accept: $http(-accept)" 656 array set hdrs $state(-headers) 657 if {[info exists hdrs(Host)]} { 658 # Allow Host spoofing. [Bug 928154] 659 puts $sock "Host: $hdrs(Host)" 660 } elseif {$port == $defport} { 661 # Don't add port in this case, to handle broken servers. [Bug 662 # #504508] 663 puts $sock "Host: $host" 664 } else { 665 puts $sock "Host: $host:$port" 666 } 667 unset hdrs 668 puts $sock "User-Agent: $http(-useragent)" 669 if {$state(-protocol) == 1.0 && $state(-keepalive)} { 670 puts $sock "Connection: keep-alive" 671 } 672 if {$state(-protocol) > 1.0 && !$state(-keepalive)} { 673 puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1 674 } 675 if {[info exists phost] && ($phost ne "") && $state(-keepalive)} { 676 puts $sock "Proxy-Connection: Keep-Alive" 677 } 678 set accept_encoding_seen 0 679 foreach {key value} $state(-headers) { 680 if {[string equal -nocase $key "host"]} { 681 continue 682 } 683 if {[string equal -nocase $key "accept-encoding"]} { 684 set accept_encoding_seen 1 685 } 686 set value [string map [list \n "" \r ""] $value] 687 set key [string trim $key] 688 if {[string equal -nocase $key "content-length"]} { 689 set contDone 1 690 set state(querylength) $value 691 } 692 if {[string length $key]} { 693 puts $sock "$key: $value" 694 } 695 } 696 # Soft zlib dependency check - no package require 697 if { 698 !$accept_encoding_seen && 699 ([package vsatisfies [package provide Tcl] 8.6] 700 || [llength [package provide zlib]]) && 701 !([info exists state(-channel)] || [info exists state(-handler)]) 702 } then { 703 puts $sock "Accept-Encoding: gzip, identity, *;q=0.1" 704 } 705 if {$isQueryChannel && $state(querylength) == 0} { 706 # Try to determine size of data in channel. If we cannot seek, the 707 # surrounding catch will trap us 708 709 set start [tell $state(-querychannel)] 710 seek $state(-querychannel) 0 end 711 set state(querylength) \ 712 [expr {[tell $state(-querychannel)] - $start}] 713 seek $state(-querychannel) $start 714 } 715 716 # Flush the request header and set up the fileevent that will either 717 # push the POST data or read the response. 718 # 719 # fileevent note: 720 # 721 # It is possible to have both the read and write fileevents active at 722 # this point. The only scenario it seems to affect is a server that 723 # closes the connection without reading the POST data. (e.g., early 724 # versions TclHttpd in various error cases). Depending on the 725 # platform, the client may or may not be able to get the response from 726 # the server because of the error it will get trying to write the post 727 # data. Having both fileevents active changes the timing and the 728 # behavior, but no two platforms (among Solaris, Linux, and NT) behave 729 # the same, and none behave all that well in any case. Servers should 730 # always read their POST data if they expect the client to read their 731 # response. 732 733 if {$isQuery || $isQueryChannel} { 734 puts $sock "Content-Type: $state(-type)" 735 if {!$contDone} { 736 puts $sock "Content-Length: $state(querylength)" 737 } 738 puts $sock "" 739 fconfigure $sock -translation {auto binary} 740 fileevent $sock writable [list http::Write $token] 741 } else { 742 puts $sock "" 743 flush $sock 744 fileevent $sock readable [list http::Event $sock $token] 745 } 746 747 if {![info exists state(-command)]} { 748 # geturl does EVERYTHING asynchronously, so if the user calls it 749 # synchronously, we just do a wait here. 750 751 wait $token 752 if {$state(status) eq "error"} { 753 # Something went wrong, so throw the exception, and the 754 # enclosing catch will do cleanup. 755 return -code error [lindex $state(error) 0] 756 } 757 } 758 } err]} then { 759 # The socket probably was never connected, or the connection dropped 760 # later. 761 762 # Clean up after events and such, but DON'T call the command callback 763 # (if available) because we're going to throw an exception from here 764 # instead. 765 766 # if state(status) is error, it means someone's already called Finish 767 # to do the above-described clean up. 768 if {$state(status) ne "error"} { 769 Finish $token $err 1 770 } 771 cleanup $token 772 return -code error $err 773 } 774 775 return $token 776} 777 778# Data access functions: 779# Data - the URL data 780# Status - the transaction status: ok, reset, eof, timeout 781# Code - the HTTP transaction code, e.g., 200 782# Size - the size of the URL data 783 784proc http::data {token} { 785 variable $token 786 upvar 0 $token state 787 return $state(body) 788} 789proc http::status {token} { 790 if {![info exists $token]} { 791 return "error" 792 } 793 variable $token 794 upvar 0 $token state 795 return $state(status) 796} 797proc http::code {token} { 798 variable $token 799 upvar 0 $token state 800 return $state(http) 801} 802proc http::ncode {token} { 803 variable $token 804 upvar 0 $token state 805 if {[regexp {[0-9]{3}} $state(http) numeric_code]} { 806 return $numeric_code 807 } else { 808 return $state(http) 809 } 810} 811proc http::size {token} { 812 variable $token 813 upvar 0 $token state 814 return $state(currentsize) 815} 816proc http::meta {token} { 817 variable $token 818 upvar 0 $token state 819 return $state(meta) 820} 821proc http::error {token} { 822 variable $token 823 upvar 0 $token state 824 if {[info exists state(error)]} { 825 return $state(error) 826 } 827 return "" 828} 829 830# http::cleanup 831# 832# Garbage collect the state associated with a transaction 833# 834# Arguments 835# token The token returned from http::geturl 836# 837# Side Effects 838# unsets the state array 839 840proc http::cleanup {token} { 841 variable $token 842 upvar 0 $token state 843 if {[info exists state]} { 844 unset state 845 } 846} 847 848# http::Connect 849# 850# This callback is made when an asyncronous connection completes. 851# 852# Arguments 853# token The token returned from http::geturl 854# 855# Side Effects 856# Sets the status of the connection, which unblocks 857# the waiting geturl call 858 859proc http::Connect {token} { 860 variable $token 861 upvar 0 $token state 862 global errorInfo errorCode 863 if { 864 [eof $state(sock)] || 865 [string length [fconfigure $state(sock) -error]] 866 } then { 867 Finish $token "connect failed [fconfigure $state(sock) -error]" 1 868 } else { 869 set state(status) connect 870 fileevent $state(sock) writable {} 871 } 872 return 873} 874 875# http::Write 876# 877# Write POST query data to the socket 878# 879# Arguments 880# token The token for the connection 881# 882# Side Effects 883# Write the socket and handle callbacks. 884 885proc http::Write {token} { 886 variable $token 887 upvar 0 $token state 888 set sock $state(sock) 889 890 # Output a block. Tcl will buffer this if the socket blocks 891 set done 0 892 if {[catch { 893 # Catch I/O errors on dead sockets 894 895 if {[info exists state(-query)]} { 896 # Chop up large query strings so queryprogress callback can give 897 # smooth feedback. 898 899 puts -nonewline $sock \ 900 [string range $state(-query) $state(queryoffset) \ 901 [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]] 902 incr state(queryoffset) $state(-queryblocksize) 903 if {$state(queryoffset) >= $state(querylength)} { 904 set state(queryoffset) $state(querylength) 905 puts $sock "" 906 set done 1 907 } 908 } else { 909 # Copy blocks from the query channel 910 911 set outStr [read $state(-querychannel) $state(-queryblocksize)] 912 puts -nonewline $sock $outStr 913 incr state(queryoffset) [string length $outStr] 914 if {[eof $state(-querychannel)]} { 915 set done 1 916 } 917 } 918 } err]} then { 919 # Do not call Finish here, but instead let the read half of the socket 920 # process whatever server reply there is to get. 921 922 set state(posterror) $err 923 set done 1 924 } 925 if {$done} { 926 catch {flush $sock} 927 fileevent $sock writable {} 928 fileevent $sock readable [list http::Event $sock $token] 929 } 930 931 # Callback to the client after we've completely handled everything. 932 933 if {[string length $state(-queryprogress)]} { 934 eval $state(-queryprogress) \ 935 [list $token $state(querylength) $state(queryoffset)] 936 } 937} 938 939# http::Event 940# 941# Handle input on the socket 942# 943# Arguments 944# sock The socket receiving input. 945# token The token returned from http::geturl 946# 947# Side Effects 948# Read the socket and handle callbacks. 949 950proc http::Event {sock token} { 951 variable $token 952 upvar 0 $token state 953 954 if {![info exists state]} { 955 Log "Event $sock with invalid token '$token' - remote close?" 956 if {![eof $sock]} { 957 if {[set d [read $sock]] ne ""} { 958 Log "WARNING: additional data left on closed socket" 959 } 960 } 961 CloseSocket $sock 962 return 963 } 964 if {$state(state) eq "connecting"} { 965 if {[catch {gets $sock state(http)} n]} { 966 return [Finish $token $n] 967 } elseif {$n >= 0} { 968 set state(state) "header" 969 } 970 } elseif {$state(state) eq "header"} { 971 if {[catch {gets $sock line} n]} { 972 return [Finish $token $n] 973 } elseif {$n == 0} { 974 # We have now read all headers 975 # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3 976 if {$state(http) == "" || [lindex $state(http) 1] == 100} { 977 return 978 } 979 980 set state(state) body 981 982 # If doing a HEAD, then we won't get any body 983 if {$state(-validate)} { 984 Eof $token 985 return 986 } 987 988 # For non-chunked transfer we may have no body - in this case we 989 # may get no further file event if the connection doesn't close 990 # and no more data is sent. We can tell and must finish up now - 991 # not later. 992 if { 993 !(([info exists state(connection)] 994 && ($state(connection) eq "close")) 995 || [info exists state(transfer)]) 996 && ($state(totalsize) == 0) 997 } then { 998 Log "body size is 0 and no events likely - complete." 999 Eof $token 1000 return 1001 } 1002 1003 # We have to use binary translation to count bytes properly. 1004 fconfigure $sock -translation binary 1005 1006 if { 1007 $state(-binary) || ![string match -nocase text* $state(type)] 1008 } then { 1009 # Turn off conversions for non-text data 1010 set state(binary) 1 1011 } 1012 if { 1013 $state(binary) || [string match *gzip* $state(coding)] || 1014 [string match *compress* $state(coding)] 1015 } then { 1016 if {[info exists state(-channel)]} { 1017 fconfigure $state(-channel) -translation binary 1018 } 1019 } 1020 if { 1021 [info exists state(-channel)] && 1022 ![info exists state(-handler)] 1023 } then { 1024 # Initiate a sequence of background fcopies 1025 fileevent $sock readable {} 1026 CopyStart $sock $token 1027 return 1028 } 1029 } elseif {$n > 0} { 1030 # Process header lines 1031 if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} { 1032 switch -- [string tolower $key] { 1033 content-type { 1034 set state(type) [string trim [string tolower $value]] 1035 # grab the optional charset information 1036 if {[regexp -nocase \ 1037 {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \ 1038 $state(type) -> cs]} { 1039 set state(charset) [string map {{\"} \"} $cs] 1040 } else { 1041 regexp -nocase {charset\s*=\s*(\S+?);?} \ 1042 $state(type) -> state(charset) 1043 } 1044 } 1045 content-length { 1046 set state(totalsize) [string trim $value] 1047 } 1048 content-encoding { 1049 set state(coding) [string trim $value] 1050 } 1051 transfer-encoding { 1052 set state(transfer) \ 1053 [string trim [string tolower $value]] 1054 } 1055 proxy-connection - 1056 connection { 1057 set state(connection) \ 1058 [string trim [string tolower $value]] 1059 } 1060 } 1061 lappend state(meta) $key [string trim $value] 1062 } 1063 } 1064 } else { 1065 # Now reading body 1066 if {[catch { 1067 if {[info exists state(-handler)]} { 1068 set n [eval $state(-handler) [list $sock $token]] 1069 } elseif {[info exists state(transfer_final)]} { 1070 set line [getTextLine $sock] 1071 set n [string length $line] 1072 if {$n > 0} { 1073 Log "found $n bytes following final chunk" 1074 append state(transfer_final) $line 1075 } else { 1076 Log "final chunk part" 1077 Eof $token 1078 } 1079 } elseif { 1080 [info exists state(transfer)] 1081 && $state(transfer) eq "chunked" 1082 } then { 1083 set size 0 1084 set chunk [getTextLine $sock] 1085 set n [string length $chunk] 1086 if {[string trim $chunk] ne ""} { 1087 scan $chunk %x size 1088 if {$size != 0} { 1089 set bl [fconfigure $sock -blocking] 1090 fconfigure $sock -blocking 1 1091 set chunk [read $sock $size] 1092 fconfigure $sock -blocking $bl 1093 set n [string length $chunk] 1094 if {$n >= 0} { 1095 append state(body) $chunk 1096 } 1097 if {$size != [string length $chunk]} { 1098 Log "WARNING: mis-sized chunk:\ 1099 was [string length $chunk], should be $size" 1100 } 1101 getTextLine $sock 1102 } else { 1103 set state(transfer_final) {} 1104 } 1105 } 1106 } else { 1107 #Log "read non-chunk $state(currentsize) of $state(totalsize)" 1108 set block [read $sock $state(-blocksize)] 1109 set n [string length $block] 1110 if {$n >= 0} { 1111 append state(body) $block 1112 } 1113 } 1114 if {[info exists state]} { 1115 if {$n >= 0} { 1116 incr state(currentsize) $n 1117 } 1118 # If Content-Length - check for end of data. 1119 if { 1120 ($state(totalsize) > 0) 1121 && ($state(currentsize) >= $state(totalsize)) 1122 } then { 1123 Eof $token 1124 } 1125 } 1126 } err]} then { 1127 return [Finish $token $err] 1128 } else { 1129 if {[info exists state(-progress)]} { 1130 eval $state(-progress) \ 1131 [list $token $state(totalsize) $state(currentsize)] 1132 } 1133 } 1134 } 1135 1136 # catch as an Eof above may have closed the socket already 1137 if {![catch {eof $sock} eof] && $eof} { 1138 if {[info exists $token]} { 1139 set state(connection) close 1140 Eof $token 1141 } else { 1142 # open connection closed on a token that has been cleaned up. 1143 CloseSocket $sock 1144 } 1145 return 1146 } 1147} 1148 1149# http::getTextLine -- 1150# 1151# Get one line with the stream in blocking crlf mode 1152# 1153# Arguments 1154# sock The socket receiving input. 1155# 1156# Results: 1157# The line of text, without trailing newline 1158 1159proc http::getTextLine {sock} { 1160 set tr [fconfigure $sock -translation] 1161 set bl [fconfigure $sock -blocking] 1162 fconfigure $sock -translation crlf -blocking 1 1163 set r [gets $sock] 1164 fconfigure $sock -translation $tr -blocking $bl 1165 return $r 1166} 1167 1168# http::CopyStart 1169# 1170# Error handling wrapper around fcopy 1171# 1172# Arguments 1173# sock The socket to copy from 1174# token The token returned from http::geturl 1175# 1176# Side Effects 1177# This closes the connection upon error 1178 1179proc http::CopyStart {sock token} { 1180 variable $token 1181 upvar 0 $token state 1182 if {[catch { 1183 fcopy $sock $state(-channel) -size $state(-blocksize) -command \ 1184 [list http::CopyDone $token] 1185 } err]} then { 1186 Finish $token $err 1187 } 1188} 1189 1190# http::CopyDone 1191# 1192# fcopy completion callback 1193# 1194# Arguments 1195# token The token returned from http::geturl 1196# count The amount transfered 1197# 1198# Side Effects 1199# Invokes callbacks 1200 1201proc http::CopyDone {token count {error {}}} { 1202 variable $token 1203 upvar 0 $token state 1204 set sock $state(sock) 1205 incr state(currentsize) $count 1206 if {[info exists state(-progress)]} { 1207 eval $state(-progress) \ 1208 [list $token $state(totalsize) $state(currentsize)] 1209 } 1210 # At this point the token may have been reset 1211 if {[string length $error]} { 1212 Finish $token $error 1213 } elseif {[catch {eof $sock} iseof] || $iseof} { 1214 Eof $token 1215 } else { 1216 CopyStart $sock $token 1217 } 1218} 1219 1220# http::Eof 1221# 1222# Handle eof on the socket 1223# 1224# Arguments 1225# token The token returned from http::geturl 1226# 1227# Side Effects 1228# Clean up the socket 1229 1230proc http::Eof {token {force 0}} { 1231 variable $token 1232 upvar 0 $token state 1233 if {$state(state) eq "header"} { 1234 # Premature eof 1235 set state(status) eof 1236 } else { 1237 set state(status) ok 1238 } 1239 1240 if {($state(coding) eq "gzip") && [string length $state(body)] > 0} { 1241 if {[catch { 1242 if {[package vsatisfies [package present Tcl] 8.6]} { 1243 # The zlib integration into 8.6 includes proper gzip support 1244 set state(body) [zlib gunzip $state(body)] 1245 } else { 1246 set state(body) [Gunzip $state(body)] 1247 } 1248 } err]} then { 1249 return [Finish $token $err] 1250 } 1251 } 1252 1253 if {!$state(binary)} { 1254 # If we are getting text, set the incoming channel's encoding 1255 # correctly. iso8859-1 is the RFC default, but this could be any IANA 1256 # charset. However, we only know how to convert what we have 1257 # encodings for. 1258 1259 set enc [CharsetToEncoding $state(charset)] 1260 if {$enc ne "binary"} { 1261 set state(body) [encoding convertfrom $enc $state(body)] 1262 } 1263 1264 # Translate text line endings. 1265 set state(body) [string map {\r\n \n \r \n} $state(body)] 1266 } 1267 1268 Finish $token 1269} 1270 1271# http::wait -- 1272# 1273# See documentation for details. 1274# 1275# Arguments: 1276# token Connection token. 1277# 1278# Results: 1279# The status after the wait. 1280 1281proc http::wait {token} { 1282 variable $token 1283 upvar 0 $token state 1284 1285 if {![info exists state(status)] || $state(status) eq ""} { 1286 # We must wait on the original variable name, not the upvar alias 1287 vwait ${token}(status) 1288 } 1289 1290 return [status $token] 1291} 1292 1293# http::formatQuery -- 1294# 1295# See documentation for details. Call http::formatQuery with an even 1296# number of arguments, where the first is a name, the second is a value, 1297# the third is another name, and so on. 1298# 1299# Arguments: 1300# args A list of name-value pairs. 1301# 1302# Results: 1303# TODO 1304 1305proc http::formatQuery {args} { 1306 set result "" 1307 set sep "" 1308 foreach i $args { 1309 append result $sep [mapReply $i] 1310 if {$sep eq "="} { 1311 set sep & 1312 } else { 1313 set sep = 1314 } 1315 } 1316 return $result 1317} 1318 1319# http::mapReply -- 1320# 1321# Do x-www-urlencoded character mapping 1322# 1323# Arguments: 1324# string The string the needs to be encoded 1325# 1326# Results: 1327# The encoded string 1328 1329proc http::mapReply {string} { 1330 variable http 1331 variable formMap 1332 1333 # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use 1334 # a pre-computed map and [string map] to do the conversion (much faster 1335 # than [regsub]/[subst]). [Bug 1020491] 1336 1337 if {$http(-urlencoding) ne ""} { 1338 set string [encoding convertto $http(-urlencoding) $string] 1339 return [string map $formMap $string] 1340 } 1341 set converted [string map $formMap $string] 1342 if {[string match "*\[\u0100-\uffff\]*" $converted]} { 1343 regexp {[\u0100-\uffff]} $converted badChar 1344 # Return this error message for maximum compatability... :^/ 1345 return -code error \ 1346 "can't read \"formMap($badChar)\": no such element in array" 1347 } 1348 return $converted 1349} 1350 1351# http::ProxyRequired -- 1352# Default proxy filter. 1353# 1354# Arguments: 1355# host The destination host 1356# 1357# Results: 1358# The current proxy settings 1359 1360proc http::ProxyRequired {host} { 1361 variable http 1362 if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} { 1363 if { 1364 ![info exists http(-proxyport)] || 1365 ![string length $http(-proxyport)] 1366 } then { 1367 set http(-proxyport) 8080 1368 } 1369 return [list $http(-proxyhost) $http(-proxyport)] 1370 } 1371} 1372 1373# http::CharsetToEncoding -- 1374# 1375# Tries to map a given IANA charset to a tcl encoding. If no encoding 1376# can be found, returns binary. 1377# 1378 1379proc http::CharsetToEncoding {charset} { 1380 variable encodings 1381 1382 set charset [string tolower $charset] 1383 if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} { 1384 set encoding "iso8859-$num" 1385 } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} { 1386 set encoding "iso2022-$ext" 1387 } elseif {[regexp {shift[-_]?js} $charset]} { 1388 set encoding "shiftjis" 1389 } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} { 1390 set encoding "cp$num" 1391 } elseif {$charset eq "us-ascii"} { 1392 set encoding "ascii" 1393 } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} { 1394 switch -- $num { 1395 5 {set encoding "iso8859-9"} 1396 1 - 2 - 3 { 1397 set encoding "iso8859-$num" 1398 } 1399 } 1400 } else { 1401 # other charset, like euc-xx, utf-8,... may directly map to encoding 1402 set encoding $charset 1403 } 1404 set idx [lsearch -exact $encodings $encoding] 1405 if {$idx >= 0} { 1406 return $encoding 1407 } else { 1408 return "binary" 1409 } 1410} 1411 1412# http::Gunzip -- 1413# 1414# Decompress data transmitted using the gzip transfer coding. 1415# 1416 1417# FIX ME: redo using zlib sinflate 1418proc http::Gunzip {data} { 1419 binary scan $data Scb5icc magic method flags time xfl os 1420 set pos 10 1421 if {$magic != 0x1f8b} { 1422 return -code error "invalid data: supplied data is not in gzip format" 1423 } 1424 if {$method != 8} { 1425 return -code error "invalid compression method" 1426 } 1427 1428 # lassign [split $flags ""] f_text f_crc f_extra f_name f_comment 1429 foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break 1430 set extra "" 1431 if {$f_extra} { 1432 binary scan $data @${pos}S xlen 1433 incr pos 2 1434 set extra [string range $data $pos $xlen] 1435 set pos [incr xlen] 1436 } 1437 1438 set name "" 1439 if {$f_name} { 1440 set ndx [string first \0 $data $pos] 1441 set name [string range $data $pos $ndx] 1442 set pos [incr ndx] 1443 } 1444 1445 set comment "" 1446 if {$f_comment} { 1447 set ndx [string first \0 $data $pos] 1448 set comment [string range $data $pos $ndx] 1449 set pos [incr ndx] 1450 } 1451 1452 set fcrc "" 1453 if {$f_crc} { 1454 set fcrc [string range $data $pos [incr pos]] 1455 incr pos 1456 } 1457 1458 binary scan [string range $data end-7 end] ii crc size 1459 set inflated [zlib inflate [string range $data $pos end-8]] 1460 set chk [zlib crc32 $inflated] 1461 if {($crc & 0xffffffff) != ($chk & 0xffffffff)} { 1462 return -code error "invalid data: checksum mismatch $crc != $chk" 1463 } 1464 return $inflated 1465} 1466 1467# Local variables: 1468# indent-tabs-mode: t 1469# End: 1470