1# http.tcl --
2#
3#	Client-side HTTP for GET, POST, and HEAD commands. These routines can
4#	be used in untrusted code that uses the Safesock security policy. These
5#	procedures use a callback interface to avoid using vwait, which is not
6#	defined in the safe base.
7#
8# See the file "license.terms" for information on usage and redistribution of
9# this file, and for a DISCLAIMER OF ALL WARRANTIES.
10#
11# RCS: @(#) $Id: http.tcl,v 1.8 2008/08/11 22:14:26 patthoyts Exp $
12
13# Rough version history post-core-split:
14# 2.5   Added HTTP/1.1 support for persistent connections. New options
15#       -protocol, -keepalive, -socketvar. (Pat Thoyts)
16# 2.6   Added support for HTTP/1.1 extensions.  New option -method used
17#       for WebDav. (Vince Darley)
18# 2.6.1 Synchronized with Tcl http 2.4.4 (encoding enhancements)
19# 2.6.2 Removed to -socketvar option and now handle socket usage internally
20# 2.6.3 Added support for chunked encoding.
21# 2.6.4 Merged in jcw's webdav mods to fix the chunked transfer
22# 2.6.5 Merged up to 2.5.3 from tcl cvs (formMap, url decomposition)
23# 2.6.6 Support content-encoding gzip. Handle 0 length body in chunked.
24# 2.6.7 Merged up to 2.5.5 from tcl cvs, whitespace corrections
25# 2.6.8 Merged with core version in 8.5.2 and 8.4.19 and above changes.
26#	Core is 2.7, this v2.6.8 has defaultKeepalive 1 and different
27#	default -useragent.
28# 2.6.9 Merged fix for zlib crc check on 64bit systems.
29
30package require Tcl 8.4
31# keep this in sync with pkgIndex.tcl
32package provide http 2.6.9
33
34namespace eval http {
35    # Allow resourcing to not clobber existing data
36
37    variable http
38    if {![info exists http]} {
39	array set http {
40	    -accept */*
41	    -proxyhost {}
42	    -proxyport {}
43	    -proxyfilter http::ProxyRequired
44	    -urlencoding utf-8
45	}
46	# Use a Mozilla compatible useragent header to avoid problems with
47	# some web sites.
48        set http(-useragent) \
49            "Mozilla/5.0 ([string totitle $::tcl_platform(platform)]; U;\
50             $::tcl_platform(os) $::tcl_platform(osVersion))\
51             http/[package provide http] Tcl/[package provide Tcl]"
52    }
53
54    proc init {} {
55	# Set up the map for quoting chars. RFC3986 Section 2.3 say percent
56	# encode all except: "... percent-encoded octets in the ranges of ALPHA
57	# (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
58	# underscore (%5F), or tilde (%7E) should not be created by URI
59	# producers ..."
60	for {set i 0} {$i <= 256} {incr i} {
61	    set c [format %c $i]
62	    if {![string match {[-._~a-zA-Z0-9]} $c]} {
63		set map($c) %[format %.2x $i]
64	    }
65	}
66	# These are handled specially
67	set map(\n) %0d%0a
68	variable formMap [array get map]
69
70	# Create a map for HTTP/1.1 open sockets
71	variable socketmap
72	if {[info exists socketmap]} {
73	    # Close but don't remove open sockets on re-init
74	    foreach {url sock} [array get socketmap] {
75		catch {close $sock}
76	    }
77	}
78	array set socketmap {}
79    }
80    init
81
82    variable urlTypes
83    if {![info exists urlTypes]} {
84	set urlTypes(http) [list 80 ::socket]
85    }
86
87    variable encodings [string tolower [encoding names]]
88    # This can be changed, but iso8859-1 is the RFC standard.
89    variable defaultCharset
90    if {![info exists defaultCharset]} {
91	set defaultCharset "iso8859-1"
92    }
93
94    # Force RFC 3986 strictness in geturl url verification?
95    variable strict
96    if {![info exists strict]} {
97	set strict 1
98    }
99
100    # Let user control default keepalive for compatibility
101    variable defaultKeepalive
102    if {![info exists defaultKeepalive]} {
103	set defaultKeepalive 0
104    }
105
106    namespace export geturl config reset wait formatQuery register unregister
107    # Useful, but not exported: data size status code
108}
109
110# http::Log --
111#
112#	Debugging output -- define this to observe HTTP/1.1 socket usage.
113#	Should echo any args received.
114#
115# Arguments:
116#     msg	Message to output
117#
118proc http::Log {args} {}
119
120# http::register --
121#
122#     See documentation for details.
123#
124# Arguments:
125#     proto           URL protocol prefix, e.g. https
126#     port            Default port for protocol
127#     command         Command to use to create socket
128# Results:
129#     list of port and command that was registered.
130
131proc http::register {proto port command} {
132    variable urlTypes
133    set urlTypes($proto) [list $port $command]
134}
135
136# http::unregister --
137#
138#     Unregisters URL protocol handler
139#
140# Arguments:
141#     proto           URL protocol prefix, e.g. https
142# Results:
143#     list of port and command that was unregistered.
144
145proc http::unregister {proto} {
146    variable urlTypes
147    if {![info exists urlTypes($proto)]} {
148	return -code error "unsupported url type \"$proto\""
149    }
150    set old $urlTypes($proto)
151    unset urlTypes($proto)
152    return $old
153}
154
155# http::config --
156#
157#	See documentation for details.
158#
159# Arguments:
160#	args		Options parsed by the procedure.
161# Results:
162#        TODO
163
164proc http::config {args} {
165    variable http
166    set options [lsort [array names http -*]]
167    set usage [join $options ", "]
168    if {[llength $args] == 0} {
169	set result {}
170	foreach name $options {
171	    lappend result $name $http($name)
172	}
173	return $result
174    }
175    set options [string map {- ""} $options]
176    set pat ^-([join $options |])$
177    if {[llength $args] == 1} {
178	set flag [lindex $args 0]
179	if {[regexp -- $pat $flag]} {
180	    return $http($flag)
181	} else {
182	    return -code error "Unknown option $flag, must be: $usage"
183	}
184    } else {
185	foreach {flag value} $args {
186	    if {[regexp -- $pat $flag]} {
187		set http($flag) $value
188	    } else {
189		return -code error "Unknown option $flag, must be: $usage"
190	    }
191	}
192    }
193}
194
195# http::Finish --
196#
197#	Clean up the socket and eval close time callbacks
198#
199# Arguments:
200#	token	    Connection token.
201#	errormsg    (optional) If set, forces status to error.
202#       skipCB      (optional) If set, don't call the -command callback. This
203#                   is useful when geturl wants to throw an exception instead
204#                   of calling the callback. That way, the same error isn't
205#                   reported to two places.
206#
207# Side Effects:
208#        Closes the socket
209
210proc http::Finish { token {errormsg ""} {skipCB 0}} {
211    variable $token
212    upvar 0 $token state
213    global errorInfo errorCode
214    if {$errormsg ne ""} {
215	set state(error) [list $errormsg $errorInfo $errorCode]
216	set state(status) "error"
217    }
218    if {($state(status) eq "timeout") || ($state(status) eq "error")
219        || ([info exists state(connection)] && ($state(connection) eq "close"))
220    } {
221        CloseSocket $state(sock) $token
222    }
223    if {[info exists state(after)]} { after cancel $state(after) }
224    if {[info exists state(-command)] && !$skipCB} {
225	if {[catch {eval $state(-command) {$token}} err]} {
226	    if {$errormsg eq ""} {
227		set state(error) [list $err $errorInfo $errorCode]
228		set state(status) error
229	    }
230	}
231	# Command callback may already have unset our state
232	unset -nocomplain state(-command)
233    }
234}
235
236# http::CloseSocket -
237#
238#	Close a socket and remove it from the persistent sockets table.
239#	If possible an http token is included here but when we are called
240#	from a fileevent on remote closure we need to find the correct
241#	entry - hence the second section.
242
243proc ::http::CloseSocket {s {token {}}} {
244    variable socketmap
245    catch {fileevent $s readable {}}
246    set conn_id {}
247    if {$token ne ""} {
248        variable $token
249        upvar 0 $token state
250        if {[info exists state(socketinfo)]} {
251            set conn_id $state(socketinfo)
252        }
253    } else {
254        set map [array get socketmap]
255        set ndx [lsearch -exact $map $s]
256        if {$ndx != -1} {
257            incr ndx -1
258            set conn_id [lindex $map $ndx]
259        }
260    }
261    if {$conn_id eq {} || ![info exists socketmap($conn_id)]} {
262        Log "Closing socket $s (no connection info)"
263        if {[catch {close $s} err]} { Log "Error: $err" }
264    } else {
265	if {[info exists socketmap($conn_id)]} {
266	    Log "Closing connection $conn_id (sock $socketmap($conn_id))"
267	    if {[catch {close $socketmap($conn_id)} err]} { Log "Error: $err" }
268	    unset socketmap($conn_id)
269	} else {
270	    Log "Cannot close connection $conn_id - no socket in socket map"
271	}
272    }
273}
274
275# http::reset --
276#
277#	See documentation for details.
278#
279# Arguments:
280#	token	Connection token.
281#	why	Status info.
282#
283# Side Effects:
284#       See Finish
285
286proc http::reset { token {why reset} } {
287    variable $token
288    upvar 0 $token state
289    set state(status) $why
290    catch {fileevent $state(sock) readable {}}
291    catch {fileevent $state(sock) writable {}}
292    Finish $token
293    if {[info exists state(error)]} {
294	set errorlist $state(error)
295	unset state
296	eval ::error $errorlist
297    }
298}
299
300# http::geturl --
301#
302#	Establishes a connection to a remote url via http.
303#
304# Arguments:
305#       url		The http URL to goget.
306#       args		Option value pairs. Valid options include:
307#				-blocksize, -validate, -headers, -timeout
308# Results:
309#	Returns a token for this connection. This token is the name of an array
310#	that the caller should unset to garbage collect the state.
311
312proc http::geturl { url args } {
313    variable http
314    variable urlTypes
315    variable defaultCharset
316    variable defaultKeepalive
317    variable strict
318
319    # Initialize the state variable, an array. We'll return the name of this
320    # array as the token for the transaction.
321
322    if {![info exists http(uid)]} {
323	set http(uid) 0
324    }
325    set token [namespace current]::[incr http(uid)]
326    variable $token
327    upvar 0 $token state
328    reset $token
329
330    # Process command options.
331
332    array set state {
333	-binary		false
334	-blocksize	8192
335	-queryblocksize 8192
336	-validate	0
337	-headers	{}
338	-timeout	0
339	-type		application/x-www-form-urlencoded
340	-queryprogress	{}
341	-protocol	1.1
342	binary		0
343	state		header
344	meta		{}
345	coding		{}
346	currentsize	0
347	totalsize	0
348	querylength	0
349	queryoffset	0
350	type		text/html
351	body		{}
352	status		""
353	http		""
354	connection	close
355    }
356    set state(-keepalive) $defaultKeepalive
357    set state(-strict) $strict
358    # These flags have their types verified [Bug 811170]
359    array set type {
360	-binary		boolean
361	-blocksize	integer
362	-queryblocksize integer
363	-strict		boolean
364	-timeout	integer
365	-validate	boolean
366    }
367    set state(charset)	$defaultCharset
368    set options {
369	-binary -blocksize -channel -command -handler -headers -keepalive
370	-method -myaddr -progress -protocol -query -queryblocksize
371	-querychannel -queryprogress -strict -timeout -type -validate
372    }
373    set usage [join [lsort $options] ", "]
374    set options [string map {- ""} $options]
375    set pat ^-([join $options |])$
376    foreach {flag value} $args {
377	if {[regexp -- $pat $flag]} {
378	    # Validate numbers
379	    if {[info exists type($flag)] &&
380		![string is $type($flag) -strict $value]} {
381		unset $token
382		return -code error "Bad value for $flag ($value), must be $type($flag)"
383	    }
384	    set state($flag) $value
385	} else {
386	    unset $token
387	    return -code error "Unknown option $flag, can be: $usage"
388	}
389    }
390
391    # Make sure -query and -querychannel aren't both specified
392
393    set isQueryChannel [info exists state(-querychannel)]
394    set isQuery [info exists state(-query)]
395    if {$isQuery && $isQueryChannel} {
396	unset $token
397	return -code error "Can't combine -query and -querychannel options!"
398    }
399
400    # Validate URL, determine the server host and port, and check proxy case
401    # Recognize user:pass@host URLs also, although we do not do anything with
402    # that info yet.
403
404    # URLs have basically four parts.
405    # First, before the colon, is the protocol scheme (e.g. http)
406    # Second, for HTTP-like protocols, is the authority
407    #	The authority is preceded by // and lasts up to (but not including)
408    #	the following / and it identifies up to four parts, of which only one,
409    #	the host, is required (if an authority is present at all). All other
410    #	parts of the authority (user name, password, port number) are optional.
411    # Third is the resource name, which is split into two parts at a ?
412    #	The first part (from the single "/" up to "?") is the path, and the
413    #	second part (from that "?" up to "#") is the query. *HOWEVER*, we do
414    #	not need to separate them; we send the whole lot to the server.
415    # Fourth is the fragment identifier, which is everything after the first
416    #	"#" in the URL. The fragment identifier MUST NOT be sent to the server
417    #	and indeed, we don't bother to validate it (it could be an error to
418    #	pass it in here, but it's cheap to strip).
419    #
420    # An example of a URL that has all the parts:
421    #   http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes
422    # The "http" is the protocol, the user is "jschmoe", the password is
423    # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is
424    # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes".
425    #
426    # Note that the RE actually combines the user and password parts, as
427    # recommended in RFC 3986. Indeed, that RFC states that putting passwords
428    # in URLs is a Really Bad Idea, something with which I would agree utterly.
429    # Also note that we do not currently support IPv6 addresses.
430    #
431    # From a validation perspective, we need to ensure that the parts of the
432    # URL that are going to the server are correctly encoded.
433    # This is only done if $state(-strict) is true (inherited from
434    # $::http::strict).
435
436    set URLmatcher {(?x)		# this is _expanded_ syntax
437	^
438	(?: (\w+) : ) ?			# <protocol scheme>
439	(?: //
440	    (?:
441		(
442		    [^@/\#?]+		# <userinfo part of authority>
443		) @
444	    )?
445	    ( [^/:\#?]+ )		# <host part of authority>
446	    (?: : (\d+) )?		# <port part of authority>
447	)?
448	( / [^\#?]* (?: \? [^\#?]* )?)?	# <path> (including query)
449	(?: \# (.*) )?			# <fragment>
450	$
451    }
452
453    # Phase one: parse
454    if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} {
455	unset $token
456	return -code error "Unsupported URL: $url"
457    }
458    # Phase two: validate
459    if {$host eq ""} {
460	# Caller has to provide a host name; we do not have a "default host"
461	# that would enable us to handle relative URLs.
462	unset $token
463	return -code error "Missing host part: $url"
464	# Note that we don't check the hostname for validity here; if it's
465	# invalid, we'll simply fail to resolve it later on.
466    }
467    if {$port ne "" && $port > 65535} {
468	unset $token
469	return -code error "Invalid port number: $port"
470    }
471    # The user identification and resource identification parts of the URL can
472    # have encoded characters in them; take care!
473    if {$user ne ""} {
474	# Check for validity according to RFC 3986, Appendix A
475	set validityRE {(?xi)
476	    ^
477	    (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+
478	    $
479	}
480	if {$state(-strict) && ![regexp -- $validityRE $user]} {
481	    unset $token
482	    # Provide a better error message in this error case
483	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} {
484		return -code error \
485			"Illegal encoding character usage \"$bad\" in URL user"
486	    }
487	    return -code error "Illegal characters in URL user"
488	}
489    }
490    if {$srvurl ne ""} {
491	# Check for validity according to RFC 3986, Appendix A
492	set validityRE {(?xi)
493	    ^
494	    # Path part (already must start with / character)
495	    (?:	      [-\w.~!$&'()*+,;=:@/]  | %[0-9a-f][0-9a-f] )*
496	    # Query part (optional, permits ? characters)
497	    (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )?
498	    $
499	}
500	if {$state(-strict) && ![regexp -- $validityRE $srvurl]} {
501	    unset $token
502	    # Provide a better error message in this error case
503	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} {
504		return -code error \
505			"Illegal encoding character usage \"$bad\" in URL path"
506	    }
507	    return -code error "Illegal characters in URL path"
508	}
509    } else {
510	set srvurl /
511    }
512    if {$proto eq ""} {
513	set proto http
514    }
515    if {![info exists urlTypes($proto)]} {
516	unset $token
517	return -code error "Unsupported URL type \"$proto\""
518    }
519    set defport [lindex $urlTypes($proto) 0]
520    set defcmd [lindex $urlTypes($proto) 1]
521
522    if {$port eq ""} {
523	set port $defport
524    }
525    if {![catch {$http(-proxyfilter) $host} proxy]} {
526	set phost [lindex $proxy 0]
527	set pport [lindex $proxy 1]
528    }
529
530    # OK, now reassemble into a full URL
531    set url ${proto}://
532    if {$user ne ""} {
533	append url $user
534	append url @
535    }
536    append url $host
537    if {$port != $defport} {
538	append url : $port
539    }
540    append url $srvurl
541    # Don't append the fragment!
542    set state(url) $url
543
544    # If a timeout is specified we set up the after event and arrange for an
545    # asynchronous socket connection.
546
547    set sockopts [list]
548    if {$state(-timeout) > 0} {
549	set state(after) [after $state(-timeout) \
550		[list http::reset $token timeout]]
551	lappend sockopts -async
552    }
553
554    # If we are using the proxy, we must pass in the full URL that includes
555    # the server name.
556
557    if {[info exists phost] && ($phost ne "")} {
558	set srvurl $url
559	set targetAddr [list $phost $pport]
560    } else {
561	set targetAddr [list $host $port]
562    }
563    # Proxy connections aren't shared among different hosts.
564    set state(socketinfo) $host:$port
565
566    # See if we are supposed to use a previously opened channel.
567    if {$state(-keepalive)} {
568	variable socketmap
569	if {[info exists socketmap($state(socketinfo))]} {
570	    if {[catch {fconfigure $socketmap($state(socketinfo))}]} {
571		Log "WARNING: socket for $state(socketinfo) was closed"
572		unset socketmap($state(socketinfo))
573	    } else {
574		set sock $socketmap($state(socketinfo))
575		Log "reusing socket $sock for $state(socketinfo)"
576		catch {fileevent $sock writable {}}
577		catch {fileevent $sock readable {}}
578	    }
579	}
580	# don't automatically close this connection socket
581	set state(connection) {}
582    }
583    if {![info exists sock]} {
584	# Pass -myaddr directly to the socket command
585	if {[info exists state(-myaddr)]} {
586	    lappend sockopts -myaddr $state(-myaddr)
587	}
588        if {[catch {eval $defcmd $sockopts $targetAddr} sock]} {
589            # something went wrong while trying to establish the
590            # connection. Clean up after events and such, but DON'T call the
591            # command callback (if available) because we're going to throw an
592            # exception from here instead.
593
594	    set state(sock) $sock
595            Finish $token "" 1
596            cleanup $token
597            return -code error $sock
598        }
599    }
600    set state(sock) $sock
601    Log "Using $sock for $state(socketinfo)" \
602        [expr {$state(-keepalive)?"keepalive":""}]
603    if {$state(-keepalive)} {
604        set socketmap($state(socketinfo)) $sock
605    }
606
607    # Wait for the connection to complete.
608
609    if {$state(-timeout) > 0} {
610	fileevent $sock writable [list http::Connect $token]
611	http::wait $token
612
613	if {![info exists state]} {
614	    # If we timed out then Finish has been called and the users
615	    # command callback may have cleaned up the token. If so
616	    # we end up here with nothing left to do.
617	    return $token
618	} elseif {$state(status) eq "error"} {
619	    # Something went wrong while trying to establish the connection.
620	    # Clean up after events and such, but DON'T call the command
621	    # callback (if available) because we're going to throw an
622	    # exception from here instead.
623	    set err [lindex $state(error) 0]
624	    cleanup $token
625	    return -code error $err
626	} elseif {$state(status) ne "connect"} {
627	    # Likely to be connection timeout
628	    return $token
629	}
630	set state(status) ""
631    }
632
633    # Send data in cr-lf format, but accept any line terminators
634
635    fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize)
636
637    # The following is disallowed in safe interpreters, but the socket is
638    # already in non-blocking mode in that case.
639
640    catch {fconfigure $sock -blocking off}
641    set how GET
642    if {$isQuery} {
643	set state(querylength) [string length $state(-query)]
644	if {$state(querylength) > 0} {
645	    set how POST
646	    set contDone 0
647	} else {
648	    # There's no query data.
649	    unset state(-query)
650	    set isQuery 0
651	}
652    } elseif {$state(-validate)} {
653	set how HEAD
654    } elseif {$isQueryChannel} {
655	set how POST
656	# The query channel must be blocking for the async Write to
657	# work properly.
658	fconfigure $state(-querychannel) -blocking 1 -translation binary
659	set contDone 0
660    }
661    if {[info exists state(-method)] && $state(-method) ne ""} {
662	set how $state(-method)
663    }
664
665    if {[catch {
666	puts $sock "$how $srvurl HTTP/$state(-protocol)"
667	puts $sock "Accept: $http(-accept)"
668	array set hdrs $state(-headers)
669	if {[info exists hdrs(Host)]} {
670	    # Allow Host spoofing [Bug 928154]
671	    puts $sock "Host: $hdrs(Host)"
672	} elseif {$port == $defport} {
673	    # Don't add port in this case, to handle broken servers.
674	    # [Bug #504508]
675	    puts $sock "Host: $host"
676	} else {
677	    puts $sock "Host: $host:$port"
678	}
679	unset hdrs
680	puts $sock "User-Agent: $http(-useragent)"
681        if {$state(-protocol) == 1.0 && $state(-keepalive)} {
682            puts $sock "Connection: keep-alive"
683        }
684        if {$state(-protocol) > 1.0 && !$state(-keepalive)} {
685            puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1
686        }
687        if {[info exists phost] && ($phost ne "") && $state(-keepalive)} {
688            puts $sock "Proxy-Connection: Keep-Alive"
689        }
690        set accept_encoding_seen 0
691	foreach {key value} $state(-headers) {
692	    if {[string equal -nocase $key "host"]} { continue }
693            if {[string equal -nocase $key "accept-encoding"]} {
694                set accept_encoding_seen 1
695            }
696	    set value [string map [list \n "" \r ""] $value]
697	    set key [string trim $key]
698	    if {[string equal -nocase $key "content-length"]} {
699		set contDone 1
700		set state(querylength) $value
701	    }
702	    if {[string length $key]} {
703		puts $sock "$key: $value"
704	    }
705	}
706	# Soft zlib dependency check - no package require
707        if {!$accept_encoding_seen && [llength [package provide zlib]]
708            && !([info exists state(-channel)] || [info exists state(-handler)])
709        } {
710            puts $sock "Accept-Encoding: gzip, identity, *;q=0.1"
711        }
712	if {$isQueryChannel && $state(querylength) == 0} {
713	    # Try to determine size of data in channel. If we cannot seek, the
714	    # surrounding catch will trap us
715
716	    set start [tell $state(-querychannel)]
717	    seek $state(-querychannel) 0 end
718	    set state(querylength) \
719		    [expr {[tell $state(-querychannel)] - $start}]
720	    seek $state(-querychannel) $start
721	}
722
723	# Flush the request header and set up the fileevent that will either
724	# push the POST data or read the response.
725	#
726	# fileevent note:
727	#
728	# It is possible to have both the read and write fileevents active at
729	# this point. The only scenario it seems to affect is a server that
730	# closes the connection without reading the POST data. (e.g., early
731	# versions TclHttpd in various error cases). Depending on the platform,
732	# the client may or may not be able to get the response from the server
733	# because of the error it will get trying to write the post data.
734	# Having both fileevents active changes the timing and the behavior,
735	# but no two platforms (among Solaris, Linux, and NT) behave the same,
736	# and none behave all that well in any case. Servers should always read
737	# their POST data if they expect the client to read their response.
738
739	if {$isQuery || $isQueryChannel} {
740	    puts $sock "Content-Type: $state(-type)"
741	    if {!$contDone} {
742		puts $sock "Content-Length: $state(querylength)"
743	    }
744	    puts $sock ""
745	    fconfigure $sock -translation {auto binary}
746	    fileevent $sock writable [list http::Write $token]
747	} else {
748	    puts $sock ""
749	    flush $sock
750	    fileevent $sock readable [list http::Event $sock $token]
751	}
752
753	if {! [info exists state(-command)]} {
754	    # geturl does EVERYTHING asynchronously, so if the user calls it
755	    # synchronously, we just do a wait here.
756
757	    wait $token
758	    if {$state(status) eq "error"} {
759		# Something went wrong, so throw the exception, and the
760		# enclosing catch will do cleanup.
761		return -code error [lindex $state(error) 0]
762	    }
763	}
764    } err]} {
765	# The socket probably was never connected, or the connection dropped
766	# later.
767
768	# Clean up after events and such, but DON'T call the command callback
769	# (if available) because we're going to throw an exception from here
770	# instead.
771
772	# if state(status) is error, it means someone's already called Finish
773	# to do the above-described clean up.
774	if {$state(status) ne "error"} {
775	    Finish $token $err 1
776	}
777	cleanup $token
778	return -code error $err
779    }
780
781    return $token
782}
783
784# Data access functions:
785# Data - the URL data
786# Status - the transaction status: ok, reset, eof, timeout
787# Code - the HTTP transaction code, e.g., 200
788# Size - the size of the URL data
789
790proc http::data {token} {
791    variable $token
792    upvar 0 $token state
793    return $state(body)
794}
795proc http::status {token} {
796    if {![info exists $token]} { return "error" }
797    variable $token
798    upvar 0 $token state
799    return $state(status)
800}
801proc http::code {token} {
802    variable $token
803    upvar 0 $token state
804    return $state(http)
805}
806proc http::ncode {token} {
807    variable $token
808    upvar 0 $token state
809    if {[regexp {[0-9]{3}} $state(http) numeric_code]} {
810	return $numeric_code
811    } else {
812	return $state(http)
813    }
814}
815proc http::size {token} {
816    variable $token
817    upvar 0 $token state
818    return $state(currentsize)
819}
820proc http::meta {token} {
821    variable $token
822    upvar 0 $token state
823    return $state(meta)
824}
825proc http::error {token} {
826    variable $token
827    upvar 0 $token state
828    if {[info exists state(error)]} {
829	return $state(error)
830    }
831    return ""
832}
833
834# http::cleanup
835#
836#	Garbage collect the state associated with a transaction
837#
838# Arguments
839#	token	The token returned from http::geturl
840#
841# Side Effects
842#	unsets the state array
843
844proc http::cleanup {token} {
845    variable $token
846    upvar 0 $token state
847    if {[info exists state]} {
848	unset state
849    }
850}
851
852# http::Connect
853#
854#	This callback is made when an asyncronous connection completes.
855#
856# Arguments
857#	token	The token returned from http::geturl
858#
859# Side Effects
860#	Sets the status of the connection, which unblocks
861# 	the waiting geturl call
862
863proc http::Connect {token} {
864    variable $token
865    upvar 0 $token state
866    global errorInfo errorCode
867    if {[eof $state(sock)] ||
868	[string length [fconfigure $state(sock) -error]]} {
869	    Finish $token "connect failed [fconfigure $state(sock) -error]" 1
870    } else {
871	set state(status) connect
872	fileevent $state(sock) writable {}
873    }
874    return
875}
876
877# http::Write
878#
879#	Write POST query data to the socket
880#
881# Arguments
882#	token	The token for the connection
883#
884# Side Effects
885#	Write the socket and handle callbacks.
886
887proc http::Write {token} {
888    variable $token
889    upvar 0 $token state
890    set sock $state(sock)
891
892    # Output a block.  Tcl will buffer this if the socket blocks
893    set done 0
894    if {[catch {
895	# Catch I/O errors on dead sockets
896
897	if {[info exists state(-query)]} {
898	    # Chop up large query strings so queryprogress callback can give
899	    # smooth feedback.
900
901	    puts -nonewline $sock \
902		[string range $state(-query) $state(queryoffset) \
903		     [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]]
904	    incr state(queryoffset) $state(-queryblocksize)
905	    if {$state(queryoffset) >= $state(querylength)} {
906		set state(queryoffset) $state(querylength)
907		puts $sock ""
908		set done 1
909	    }
910	} else {
911	    # Copy blocks from the query channel
912
913	    set outStr [read $state(-querychannel) $state(-queryblocksize)]
914	    puts -nonewline $sock $outStr
915	    incr state(queryoffset) [string length $outStr]
916	    if {[eof $state(-querychannel)]} {
917		set done 1
918	    }
919	}
920    } err]} {
921	# Do not call Finish here, but instead let the read half of the socket
922	# process whatever server reply there is to get.
923
924	set state(posterror) $err
925	set done 1
926    }
927    if {$done} {
928	catch {flush $sock}
929	fileevent $sock writable {}
930	fileevent $sock readable [list http::Event $sock $token]
931    }
932
933    # Callback to the client after we've completely handled everything.
934
935    if {[string length $state(-queryprogress)]} {
936	eval $state(-queryprogress) \
937	    [list $token $state(querylength) $state(queryoffset)]
938    }
939}
940
941# http::Event
942#
943#	Handle input on the socket
944#
945# Arguments
946#	sock	The socket receiving input.
947#	token	The token returned from http::geturl
948#
949# Side Effects
950#	Read the socket and handle callbacks.
951
952proc http::Event {sock token} {
953    variable $token
954    upvar 0 $token state
955
956    if {![info exists state]} {
957	Log "Event $sock with invalid token '$token' - remote close?"
958	if {! [eof $sock]} {
959	    if {[string length [set d [read $sock]]] != 0} {
960		Log "WARNING: additional data left on closed socket"
961	    }
962	}
963	CloseSocket $sock
964	return
965    }
966    if {$state(state) eq "header"} {
967	if {[catch {gets $sock line} n]} {
968	    return [Finish $token $n]
969	} elseif {$n == 0} {
970	    # We have now read all headers
971	    # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3
972	    if {$state(http) == "" || [lindex $state(http) 1] == 100} { return }
973
974	    set state(state) body
975
976	    # If doing a HEAD, then we won't get any body
977	    if {$state(-validate)} {
978		Eof $token
979		return
980	    }
981
982	    # For non-chunked transfer we may have no body -- in this case we
983	    # may get no further file event if the connection doesn't close and
984	    # no more data is sent. We can tell and must finish up now - not
985	    # later.
986	    if {!(([info exists state(connection)]
987		   && ($state(connection) eq "close"))
988		  || [info exists state(transfer)])
989		&&  $state(totalsize) == 0
990	    } then {
991		Log "body size is 0 and no events likely - complete."
992		Eof $token
993		return
994	    }
995
996	    # We have to use binary translation to count bytes properly.
997	    fconfigure $sock -translation binary
998
999	    if {$state(-binary) || ![string match -nocase text* $state(type)]} {
1000		# Turn off conversions for non-text data
1001		set state(binary) 1
1002	    }
1003	    if {$state(binary) || [string match *gzip* $state(coding)]
1004		|| [string match *compress* $state(coding)]} {
1005		if {[info exists state(-channel)]} {
1006		    fconfigure $state(-channel) -translation binary
1007		}
1008	    }
1009	    if {[info exists state(-channel)] &&
1010		![info exists state(-handler)]} {
1011		# Initiate a sequence of background fcopies
1012		fileevent $sock readable {}
1013		CopyStart $sock $token
1014		return
1015	    }
1016	} elseif {$n > 0} {
1017	    # Process header lines
1018	    if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
1019		switch -- [string tolower $key] {
1020		    content-type {
1021			set state(type) [string trim [string tolower $value]]
1022			# grab the optional charset information
1023			regexp -nocase {charset\s*=\s*(\S+?);?} \
1024			    $state(type) -> state(charset)
1025		    }
1026		    content-length {
1027			set state(totalsize) [string trim $value]
1028		    }
1029		    content-encoding {
1030			set state(coding) [string trim $value]
1031		    }
1032		    transfer-encoding {
1033			set state(transfer) \
1034			    [string trim [string tolower $value]]
1035		    }
1036		    proxy-connection -
1037		    connection {
1038			set state(connection) \
1039			    [string trim [string tolower $value]]
1040		    }
1041		}
1042		lappend state(meta) $key [string trim $value]
1043	    } elseif {[string match HTTP* $line]} {
1044		set state(http) $line
1045	    }
1046	}
1047    } else {
1048	# Now reading body
1049	if {[catch {
1050	    if {[info exists state(-handler)]} {
1051		set n [eval $state(-handler) [list $sock $token]]
1052	    } elseif {[info exists state(transfer_final)]} {
1053		set line [getTextLine $sock]
1054		set n [string length $line]
1055		if {$n > 0} {
1056		    Log "found $n bytes following final chunk"
1057		    append state(transfer_final) $line
1058		} else {
1059		    Log "final chunk part"
1060		    Eof $token
1061		}
1062	    } elseif {[info exists state(transfer)]
1063		      && $state(transfer) eq "chunked"} {
1064		set size 0
1065		set chunk [getTextLine $sock]
1066		set n [string length $chunk]
1067		if {[string trim $chunk] ne ""} {
1068		    scan $chunk %x size
1069		    if {$size != 0} {
1070			set bl [fconfigure $sock -blocking]
1071			fconfigure $sock -blocking 1
1072			set chunk [read $sock $size]
1073			fconfigure $sock -blocking $bl
1074			set n [string length $chunk]
1075			if {$n >= 0} {
1076			    append state(body) $chunk
1077			}
1078			if {$size != [string length $chunk]} {
1079			    Log "WARNING: mis-sized chunk:\
1080				was [string length $chunk], should be $size"
1081			}
1082			getTextLine $sock
1083		    } else {
1084			set state(transfer_final) {}
1085		    }
1086		}
1087	    } else {
1088		#Log "read non-chunk $state(currentsize) of $state(totalsize)"
1089		set block [read $sock $state(-blocksize)]
1090		set n [string length $block]
1091		if {$n >= 0} {
1092		    append state(body) $block
1093		}
1094	    }
1095	    if {[info exists state]} {
1096		if {$n >= 0} {
1097		    incr state(currentsize) $n
1098		}
1099		# If Content-Length - check for end of data.
1100		if {($state(totalsize) > 0)
1101		    && ($state(currentsize) >= $state(totalsize))} {
1102		    Eof $token
1103		}
1104	    }
1105	} err]} {
1106	    return [Finish $token $err]
1107	} else {
1108	    if {[info exists state(-progress)]} {
1109		eval $state(-progress) \
1110		    [list $token $state(totalsize) $state(currentsize)]
1111	    }
1112	}
1113    }
1114
1115    # catch as an Eof above may have closed the socket already
1116    if {![catch {eof $sock} eof] && $eof} {
1117	if {[info exists $token]} {
1118	    set state(connection) close
1119	    Eof $token
1120	} else {
1121	    # open connection closed on a token that has been cleaned up.
1122	    CloseSocket $sock
1123	}
1124	return
1125    }
1126}
1127
1128# http::getTextLine --
1129#
1130#	Get one line with the stream in blocking crlf mode
1131#
1132# Arguments
1133#	sock	The socket receiving input.
1134#
1135# Results:
1136#	The line of text, without trailing newline
1137
1138proc http::getTextLine {sock} {
1139    set tr [fconfigure $sock -translation]
1140    set bl [fconfigure $sock -blocking]
1141    fconfigure $sock -translation crlf -blocking 1
1142    set r [gets $sock]
1143    fconfigure $sock -translation $tr -blocking $bl
1144    return $r
1145}
1146
1147# http::CopyStart
1148#
1149#	Error handling wrapper around fcopy
1150#
1151# Arguments
1152#	sock	The socket to copy from
1153#	token	The token returned from http::geturl
1154#
1155# Side Effects
1156#	This closes the connection upon error
1157
1158proc http::CopyStart {sock token} {
1159    variable $token
1160    upvar 0 $token state
1161    if {[catch {
1162	fcopy $sock $state(-channel) -size $state(-blocksize) -command \
1163	    [list http::CopyDone $token]
1164    } err]} {
1165	Finish $token $err
1166    }
1167}
1168
1169# http::CopyDone
1170#
1171#	fcopy completion callback
1172#
1173# Arguments
1174#	token	The token returned from http::geturl
1175#	count	The amount transfered
1176#
1177# Side Effects
1178#	Invokes callbacks
1179
1180proc http::CopyDone {token count {error {}}} {
1181    variable $token
1182    upvar 0 $token state
1183    set sock $state(sock)
1184    incr state(currentsize) $count
1185    if {[info exists state(-progress)]} {
1186	eval $state(-progress) \
1187	    [list $token $state(totalsize) $state(currentsize)]
1188    }
1189    # At this point the token may have been reset
1190    if {[string length $error]} {
1191	Finish $token $error
1192    } elseif {[catch {eof $sock} iseof] || $iseof} {
1193	Eof $token
1194    } else {
1195	CopyStart $sock $token
1196    }
1197}
1198
1199# http::Eof
1200#
1201#	Handle eof on the socket
1202#
1203# Arguments
1204#	token	The token returned from http::geturl
1205#
1206# Side Effects
1207#	Clean up the socket
1208
1209proc http::Eof {token {force 0}} {
1210    variable $token
1211    upvar 0 $token state
1212    if {$state(state) eq "header"} {
1213	# Premature eof
1214	set state(status) eof
1215    } else {
1216	set state(status) ok
1217    }
1218
1219    if {($state(coding) eq "gzip") && [string length $state(body)] > 0} {
1220        if {[catch {
1221            set state(body) [Gunzip $state(body)]
1222        } err]} {
1223            return [Finish $token $err]
1224        }
1225    }
1226
1227    if {!$state(binary)} {
1228
1229        # If we are getting text, set the incoming channel's
1230        # encoding correctly.  iso8859-1 is the RFC default, but
1231        # this could be any IANA charset.  However, we only know
1232        # how to convert what we have encodings for.
1233
1234        set enc [CharsetToEncoding $state(charset)]
1235        if {$enc ne "binary"} {
1236            set state(body) [encoding convertfrom $enc $state(body)]
1237        }
1238
1239        # Translate text line endings.
1240        set state(body) [string map {\r\n \n \r \n} $state(body)]
1241    }
1242
1243    Finish $token
1244}
1245
1246# http::wait --
1247#
1248#	See documentation for details.
1249#
1250# Arguments:
1251#	token	Connection token.
1252#
1253# Results:
1254#        The status after the wait.
1255
1256proc http::wait {token} {
1257    variable $token
1258    upvar 0 $token state
1259
1260    if {![info exists state(status)] || $state(status) eq ""} {
1261	# We must wait on the original variable name, not the upvar alias
1262	vwait ${token}(status)
1263    }
1264
1265    return [status $token]
1266}
1267
1268# http::formatQuery --
1269#
1270#	See documentation for details.  Call http::formatQuery with an even
1271#	number of arguments, where the first is a name, the second is a value,
1272#	the third is another name, and so on.
1273#
1274# Arguments:
1275#	args	A list of name-value pairs.
1276#
1277# Results:
1278#	TODO
1279
1280proc http::formatQuery {args} {
1281    set result ""
1282    set sep ""
1283    foreach i $args {
1284	append result $sep [mapReply $i]
1285	if {$sep eq "="} {
1286	    set sep &
1287	} else {
1288	    set sep =
1289	}
1290    }
1291    return $result
1292}
1293
1294# http::mapReply --
1295#
1296#	Do x-www-urlencoded character mapping
1297#
1298# Arguments:
1299#	string	The string the needs to be encoded
1300#
1301# Results:
1302#       The encoded string
1303
1304proc http::mapReply {string} {
1305    variable http
1306    variable formMap
1307
1308    # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
1309    # a pre-computed map and [string map] to do the conversion (much faster
1310    # than [regsub]/[subst]). [Bug 1020491]
1311
1312    if {$http(-urlencoding) ne ""} {
1313	set string [encoding convertto $http(-urlencoding) $string]
1314	return [string map $formMap $string]
1315    }
1316    set converted [string map $formMap $string]
1317    if {[string match "*\[\u0100-\uffff\]*" $converted]} {
1318	regexp {[\u0100-\uffff]} $converted badChar
1319	# Return this error message for maximum compatability... :^/
1320	return -code error \
1321	    "can't read \"formMap($badChar)\": no such element in array"
1322    }
1323    return $converted
1324}
1325
1326# http::ProxyRequired --
1327#	Default proxy filter.
1328#
1329# Arguments:
1330#	host	The destination host
1331#
1332# Results:
1333#       The current proxy settings
1334
1335proc http::ProxyRequired {host} {
1336    variable http
1337    if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
1338	if {![info exists http(-proxyport)] || \
1339		![string length $http(-proxyport)]} {
1340	    set http(-proxyport) 8080
1341	}
1342	return [list $http(-proxyhost) $http(-proxyport)]
1343    }
1344}
1345
1346# http::CharsetToEncoding --
1347#
1348# 	Tries to map a given IANA charset to a tcl encoding.
1349#	If no encoding can be found, returns binary.
1350#
1351
1352proc http::CharsetToEncoding {charset} {
1353    variable encodings
1354
1355    set charset [string tolower $charset]
1356    if {[regexp {iso-?8859-([0-9]+)} $charset - num]} {
1357	set encoding "iso8859-$num"
1358    } elseif {[regexp {iso-?2022-(jp|kr)} $charset - ext]} {
1359	set encoding "iso2022-$ext"
1360    } elseif {[regexp {shift[-_]?js} $charset -]} {
1361	set encoding "shiftjis"
1362    } elseif {[regexp {(windows|cp)-?([0-9]+)} $charset - - num]} {
1363	set encoding "cp$num"
1364    } elseif {$charset eq "us-ascii"} {
1365	set encoding "ascii"
1366    } elseif {[regexp {(iso-?)?lat(in)?-?([0-9]+)} $charset - - - num]} {
1367	switch -- $num {
1368	    5 {set encoding "iso8859-9"}
1369	    1 -
1370	    2 -
1371	    3 {set encoding "iso8859-$num"}
1372	}
1373    } else {
1374	# other charset, like euc-xx, utf-8,...  may directly maps to encoding
1375	set encoding $charset
1376    }
1377    set idx [lsearch -exact $encodings $encoding]
1378    if {$idx >= 0} {
1379	return $encoding
1380    } else {
1381	return "binary"
1382    }
1383}
1384
1385# http::Gunzip --
1386#
1387#	Decompress data transmitted using the gzip transfer coding.
1388#
1389
1390# FIX ME: redo using zlib sinflate
1391proc http::Gunzip {data} {
1392    binary scan $data Scb5icc magic method flags time xfl os
1393    set pos 10
1394    if {$magic != 0x1f8b} {
1395        return -code error "invalid data: supplied data is not in gzip format"
1396    }
1397    if {$method != 8} {
1398        return -code error "invalid compression method"
1399    }
1400
1401    foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break
1402    set extra ""
1403    if { $f_extra } {
1404	binary scan $data @${pos}S xlen
1405        incr pos 2
1406        set extra [string range $data $pos $xlen]
1407        set pos [incr xlen]
1408    }
1409
1410    set name ""
1411    if { $f_name } {
1412        set ndx [string first \0 $data $pos]
1413        set name [string range $data $pos $ndx]
1414        set pos [incr ndx]
1415    }
1416
1417    set comment ""
1418    if { $f_comment } {
1419        set ndx [string first \0 $data $pos]
1420        set comment [string range $data $pos $ndx]
1421        set pos [incr ndx]
1422    }
1423
1424    set fcrc ""
1425    if { $f_crc } {
1426	set fcrc [string range $data $pos [incr pos]]
1427        incr pos
1428    }
1429
1430    binary scan [string range $data end-7 end] ii crc size
1431    set inflated [zlib inflate [string range $data $pos end-8]]
1432    set chk [zlib crc32 $inflated]
1433    if { ($crc & 0xffffffff) != ($chk & 0xffffffff)} {
1434	return -code error "invalid data: checksum mismatch $crc != $chk"
1435    }
1436    return $inflated
1437}
1438
1439# Local variables:
1440# indent-tabs-mode: t
1441# End:
1442