1# http.tcl --
2#
3#	Client-side HTTP for GET, POST, and HEAD commands. These routines can
4#	be used in untrusted code that uses the Safesock security policy.
5#	These procedures use a callback interface to avoid using vwait, which
6#	is not defined in the safe base.
7#
8# See the file "license.terms" for information on usage and redistribution of
9# this file, and for a DISCLAIMER OF ALL WARRANTIES.
10#
11# RCS: @(#) $Id: http.tcl,v 1.67.2.9 2009/11/11 16:14:43 dgp Exp $
12
13package require Tcl 8.4
14# Keep this in sync with pkgIndex.tcl and with the install directories in
15# Makefiles
16package provide http 2.7.5
17
18namespace eval http {
19    # Allow resourcing to not clobber existing data
20
21    variable http
22    if {![info exists http]} {
23	array set http {
24	    -accept */*
25	    -proxyhost {}
26	    -proxyport {}
27	    -proxyfilter http::ProxyRequired
28	    -urlencoding utf-8
29	}
30	set http(-useragent) "Tcl http client package [package provide http]"
31    }
32
33    proc init {} {
34	# Set up the map for quoting chars. RFC3986 Section 2.3 say percent
35	# encode all except: "... percent-encoded octets in the ranges of
36	# ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period
37	# (%2E), underscore (%5F), or tilde (%7E) should not be created by URI
38	# producers ..."
39	for {set i 0} {$i <= 256} {incr i} {
40	    set c [format %c $i]
41	    if {![string match {[-._~a-zA-Z0-9]} $c]} {
42		set map($c) %[format %.2x $i]
43	    }
44	}
45	# These are handled specially
46	set map(\n) %0d%0a
47	variable formMap [array get map]
48
49	# Create a map for HTTP/1.1 open sockets
50	variable socketmap
51	if {[info exists socketmap]} {
52	    # Close but don't remove open sockets on re-init
53	    foreach {url sock} [array get socketmap] {
54		catch {close $sock}
55	    }
56	}
57	array set socketmap {}
58    }
59    init
60
61    variable urlTypes
62    if {![info exists urlTypes]} {
63	set urlTypes(http) [list 80 ::socket]
64    }
65
66    variable encodings [string tolower [encoding names]]
67    # This can be changed, but iso8859-1 is the RFC standard.
68    variable defaultCharset
69    if {![info exists defaultCharset]} {
70	set defaultCharset "iso8859-1"
71    }
72
73    # Force RFC 3986 strictness in geturl url verification?
74    variable strict
75    if {![info exists strict]} {
76	set strict 1
77    }
78
79    # Let user control default keepalive for compatibility
80    variable defaultKeepalive
81    if {![info exists defaultKeepalive]} {
82	set defaultKeepalive 0
83    }
84
85    namespace export geturl config reset wait formatQuery register unregister
86    # Useful, but not exported: data size status code
87}
88
89# http::Log --
90#
91#	Debugging output -- define this to observe HTTP/1.1 socket usage.
92#	Should echo any args received.
93#
94# Arguments:
95#     msg	Message to output
96#
97proc http::Log {args} {}
98
99# http::register --
100#
101#     See documentation for details.
102#
103# Arguments:
104#     proto	URL protocol prefix, e.g. https
105#     port	Default port for protocol
106#     command	Command to use to create socket
107# Results:
108#     list of port and command that was registered.
109
110proc http::register {proto port command} {
111    variable urlTypes
112    set urlTypes($proto) [list $port $command]
113}
114
115# http::unregister --
116#
117#     Unregisters URL protocol handler
118#
119# Arguments:
120#     proto	URL protocol prefix, e.g. https
121# Results:
122#     list of port and command that was unregistered.
123
124proc http::unregister {proto} {
125    variable urlTypes
126    if {![info exists urlTypes($proto)]} {
127	return -code error "unsupported url type \"$proto\""
128    }
129    set old $urlTypes($proto)
130    unset urlTypes($proto)
131    return $old
132}
133
134# http::config --
135#
136#	See documentation for details.
137#
138# Arguments:
139#	args		Options parsed by the procedure.
140# Results:
141#        TODO
142
143proc http::config {args} {
144    variable http
145    set options [lsort [array names http -*]]
146    set usage [join $options ", "]
147    if {[llength $args] == 0} {
148	set result {}
149	foreach name $options {
150	    lappend result $name $http($name)
151	}
152	return $result
153    }
154    set options [string map {- ""} $options]
155    set pat ^-(?:[join $options |])$
156    if {[llength $args] == 1} {
157	set flag [lindex $args 0]
158	if {![regexp -- $pat $flag]} {
159	    return -code error "Unknown option $flag, must be: $usage"
160	}
161	return $http($flag)
162    } else {
163	foreach {flag value} $args {
164	    if {![regexp -- $pat $flag]} {
165		return -code error "Unknown option $flag, must be: $usage"
166	    }
167	    set http($flag) $value
168	}
169    }
170}
171
172# http::Finish --
173#
174#	Clean up the socket and eval close time callbacks
175#
176# Arguments:
177#	token	    Connection token.
178#	errormsg    (optional) If set, forces status to error.
179#       skipCB      (optional) If set, don't call the -command callback. This
180#		    is useful when geturl wants to throw an exception instead
181#		    of calling the callback. That way, the same error isn't
182#		    reported to two places.
183#
184# Side Effects:
185#        Closes the socket
186
187proc http::Finish {token {errormsg ""} {skipCB 0}} {
188    variable $token
189    upvar 0 $token state
190    global errorInfo errorCode
191    if {$errormsg ne ""} {
192	set state(error) [list $errormsg $errorInfo $errorCode]
193	set state(status) "error"
194    }
195    if {
196	($state(status) eq "timeout") || ($state(status) eq "error") ||
197	([info exists state(connection)] && ($state(connection) eq "close"))
198    } then {
199        CloseSocket $state(sock) $token
200    }
201    if {[info exists state(after)]} {
202	after cancel $state(after)
203    }
204    if {[info exists state(-command)] && !$skipCB} {
205	if {[catch {eval $state(-command) {$token}} err]} {
206	    if {$errormsg eq ""} {
207		set state(error) [list $err $errorInfo $errorCode]
208		set state(status) error
209	    }
210	}
211	# Command callback may already have unset our state
212	unset -nocomplain state(-command)
213    }
214}
215
216# http::CloseSocket -
217#
218#	Close a socket and remove it from the persistent sockets table.  If
219#	possible an http token is included here but when we are called from a
220#	fileevent on remote closure we need to find the correct entry - hence
221#	the second section.
222
223proc ::http::CloseSocket {s {token {}}} {
224    variable socketmap
225    catch {fileevent $s readable {}}
226    set conn_id {}
227    if {$token ne ""} {
228        variable $token
229        upvar 0 $token state
230        if {[info exists state(socketinfo)]} {
231	    set conn_id $state(socketinfo)
232        }
233    } else {
234        set map [array get socketmap]
235        set ndx [lsearch -exact $map $s]
236        if {$ndx != -1} {
237	    incr ndx -1
238	    set conn_id [lindex $map $ndx]
239        }
240    }
241    if {$conn_id eq {} || ![info exists socketmap($conn_id)]} {
242        Log "Closing socket $s (no connection info)"
243        if {[catch {close $s} err]} {
244	    Log "Error: $err"
245	}
246    } else {
247	if {[info exists socketmap($conn_id)]} {
248	    Log "Closing connection $conn_id (sock $socketmap($conn_id))"
249	    if {[catch {close $socketmap($conn_id)} err]} {
250		Log "Error: $err"
251	    }
252	    unset socketmap($conn_id)
253	} else {
254	    Log "Cannot close connection $conn_id - no socket in socket map"
255	}
256    }
257}
258
259# http::reset --
260#
261#	See documentation for details.
262#
263# Arguments:
264#	token	Connection token.
265#	why	Status info.
266#
267# Side Effects:
268#       See Finish
269
270proc http::reset {token {why reset}} {
271    variable $token
272    upvar 0 $token state
273    set state(status) $why
274    catch {fileevent $state(sock) readable {}}
275    catch {fileevent $state(sock) writable {}}
276    Finish $token
277    if {[info exists state(error)]} {
278	set errorlist $state(error)
279	unset state
280	eval ::error $errorlist
281    }
282}
283
284# http::geturl --
285#
286#	Establishes a connection to a remote url via http.
287#
288# Arguments:
289#       url		The http URL to goget.
290#       args		Option value pairs. Valid options include:
291#				-blocksize, -validate, -headers, -timeout
292# Results:
293#	Returns a token for this connection. This token is the name of an
294#	array that the caller should unset to garbage collect the state.
295
296proc http::geturl {url args} {
297    variable http
298    variable urlTypes
299    variable defaultCharset
300    variable defaultKeepalive
301    variable strict
302
303    # Initialize the state variable, an array. We'll return the name of this
304    # array as the token for the transaction.
305
306    if {![info exists http(uid)]} {
307	set http(uid) 0
308    }
309    set token [namespace current]::[incr http(uid)]
310    variable $token
311    upvar 0 $token state
312    reset $token
313
314    # Process command options.
315
316    array set state {
317	-binary		false
318	-blocksize	8192
319	-queryblocksize 8192
320	-validate	0
321	-headers	{}
322	-timeout	0
323	-type		application/x-www-form-urlencoded
324	-queryprogress	{}
325	-protocol	1.1
326	binary		0
327	state		connecting
328	meta		{}
329	coding		{}
330	currentsize	0
331	totalsize	0
332	querylength	0
333	queryoffset	0
334	type		text/html
335	body		{}
336	status		""
337	http		""
338	connection	close
339    }
340    set state(-keepalive) $defaultKeepalive
341    set state(-strict) $strict
342    # These flags have their types verified [Bug 811170]
343    array set type {
344	-binary		boolean
345	-blocksize	integer
346	-queryblocksize integer
347	-strict		boolean
348	-timeout	integer
349	-validate	boolean
350    }
351    set state(charset)	$defaultCharset
352    set options {
353	-binary -blocksize -channel -command -handler -headers -keepalive
354	-method -myaddr -progress -protocol -query -queryblocksize
355	-querychannel -queryprogress -strict -timeout -type -validate
356    }
357    set usage [join [lsort $options] ", "]
358    set options [string map {- ""} $options]
359    set pat ^-(?:[join $options |])$
360    foreach {flag value} $args {
361	if {[regexp -- $pat $flag]} {
362	    # Validate numbers
363	    if {
364		[info exists type($flag)] &&
365		![string is $type($flag) -strict $value]
366	    } then {
367		unset $token
368		return -code error \
369		    "Bad value for $flag ($value), must be $type($flag)"
370	    }
371	    set state($flag) $value
372	} else {
373	    unset $token
374	    return -code error "Unknown option $flag, can be: $usage"
375	}
376    }
377
378    # Make sure -query and -querychannel aren't both specified
379
380    set isQueryChannel [info exists state(-querychannel)]
381    set isQuery [info exists state(-query)]
382    if {$isQuery && $isQueryChannel} {
383	unset $token
384	return -code error "Can't combine -query and -querychannel options!"
385    }
386
387    # Validate URL, determine the server host and port, and check proxy case
388    # Recognize user:pass@host URLs also, although we do not do anything with
389    # that info yet.
390
391    # URLs have basically four parts.
392    # First, before the colon, is the protocol scheme (e.g. http)
393    # Second, for HTTP-like protocols, is the authority
394    #	The authority is preceded by // and lasts up to (but not including)
395    #	the following / and it identifies up to four parts, of which only one,
396    #	the host, is required (if an authority is present at all). All other
397    #	parts of the authority (user name, password, port number) are optional.
398    # Third is the resource name, which is split into two parts at a ?
399    #	The first part (from the single "/" up to "?") is the path, and the
400    #	second part (from that "?" up to "#") is the query. *HOWEVER*, we do
401    #	not need to separate them; we send the whole lot to the server.
402    # Fourth is the fragment identifier, which is everything after the first
403    #	"#" in the URL. The fragment identifier MUST NOT be sent to the server
404    #	and indeed, we don't bother to validate it (it could be an error to
405    #	pass it in here, but it's cheap to strip).
406    #
407    # An example of a URL that has all the parts:
408    #
409    #     http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes
410    #
411    # The "http" is the protocol, the user is "jschmoe", the password is
412    # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is
413    # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes".
414    #
415    # Note that the RE actually combines the user and password parts, as
416    # recommended in RFC 3986. Indeed, that RFC states that putting passwords
417    # in URLs is a Really Bad Idea, something with which I would agree utterly.
418    # Also note that we do not currently support IPv6 addresses.
419    #
420    # From a validation perspective, we need to ensure that the parts of the
421    # URL that are going to the server are correctly encoded.  This is only
422    # done if $state(-strict) is true (inherited from $::http::strict).
423
424    set URLmatcher {(?x)		# this is _expanded_ syntax
425	^
426	(?: (\w+) : ) ?			# <protocol scheme>
427	(?: //
428	    (?:
429		(
430		    [^@/\#?]+		# <userinfo part of authority>
431		) @
432	    )?
433	    ( [^/:\#?]+ )		# <host part of authority>
434	    (?: : (\d+) )?		# <port part of authority>
435	)?
436	( / [^\#]*)?			# <path> (including query)
437	(?: \# (.*) )?			# <fragment>
438	$
439    }
440
441    # Phase one: parse
442    if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} {
443	unset $token
444	return -code error "Unsupported URL: $url"
445    }
446    # Phase two: validate
447    if {$host eq ""} {
448	# Caller has to provide a host name; we do not have a "default host"
449	# that would enable us to handle relative URLs.
450	unset $token
451	return -code error "Missing host part: $url"
452	# Note that we don't check the hostname for validity here; if it's
453	# invalid, we'll simply fail to resolve it later on.
454    }
455    if {$port ne "" && $port > 65535} {
456	unset $token
457	return -code error "Invalid port number: $port"
458    }
459    # The user identification and resource identification parts of the URL can
460    # have encoded characters in them; take care!
461    if {$user ne ""} {
462	# Check for validity according to RFC 3986, Appendix A
463	set validityRE {(?xi)
464	    ^
465	    (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+
466	    $
467	}
468	if {$state(-strict) && ![regexp -- $validityRE $user]} {
469	    unset $token
470	    # Provide a better error message in this error case
471	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} {
472		return -code error \
473			"Illegal encoding character usage \"$bad\" in URL user"
474	    }
475	    return -code error "Illegal characters in URL user"
476	}
477    }
478    if {$srvurl ne ""} {
479	# Check for validity according to RFC 3986, Appendix A
480	set validityRE {(?xi)
481	    ^
482	    # Path part (already must start with / character)
483	    (?:	      [-\w.~!$&'()*+,;=:@/]  | %[0-9a-f][0-9a-f] )*
484	    # Query part (optional, permits ? characters)
485	    (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )?
486	    $
487	}
488	if {$state(-strict) && ![regexp -- $validityRE $srvurl]} {
489	    unset $token
490	    # Provide a better error message in this error case
491	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} {
492		return -code error \
493		    "Illegal encoding character usage \"$bad\" in URL path"
494	    }
495	    return -code error "Illegal characters in URL path"
496	}
497    } else {
498	set srvurl /
499    }
500    if {$proto eq ""} {
501	set proto http
502    }
503    if {![info exists urlTypes($proto)]} {
504	unset $token
505	return -code error "Unsupported URL type \"$proto\""
506    }
507    set defport [lindex $urlTypes($proto) 0]
508    set defcmd [lindex $urlTypes($proto) 1]
509
510    if {$port eq ""} {
511	set port $defport
512    }
513    if {![catch {$http(-proxyfilter) $host} proxy]} {
514	set phost [lindex $proxy 0]
515	set pport [lindex $proxy 1]
516    }
517
518    # OK, now reassemble into a full URL
519    set url ${proto}://
520    if {$user ne ""} {
521	append url $user
522	append url @
523    }
524    append url $host
525    if {$port != $defport} {
526	append url : $port
527    }
528    append url $srvurl
529    # Don't append the fragment!
530    set state(url) $url
531
532    # If a timeout is specified we set up the after event and arrange for an
533    # asynchronous socket connection.
534
535    set sockopts [list]
536    if {$state(-timeout) > 0} {
537	set state(after) [after $state(-timeout) \
538		[list http::reset $token timeout]]
539	lappend sockopts -async
540    }
541
542    # If we are using the proxy, we must pass in the full URL that includes
543    # the server name.
544
545    if {[info exists phost] && ($phost ne "")} {
546	set srvurl $url
547	set targetAddr [list $phost $pport]
548    } else {
549	set targetAddr [list $host $port]
550    }
551    # Proxy connections aren't shared among different hosts.
552    set state(socketinfo) $host:$port
553
554    # See if we are supposed to use a previously opened channel.
555    if {$state(-keepalive)} {
556	variable socketmap
557	if {[info exists socketmap($state(socketinfo))]} {
558	    if {[catch {fconfigure $socketmap($state(socketinfo))}]} {
559		Log "WARNING: socket for $state(socketinfo) was closed"
560		unset socketmap($state(socketinfo))
561	    } else {
562		set sock $socketmap($state(socketinfo))
563		Log "reusing socket $sock for $state(socketinfo)"
564		catch {fileevent $sock writable {}}
565		catch {fileevent $sock readable {}}
566	    }
567	}
568	# don't automatically close this connection socket
569	set state(connection) {}
570    }
571    if {![info exists sock]} {
572	# Pass -myaddr directly to the socket command
573	if {[info exists state(-myaddr)]} {
574	    lappend sockopts -myaddr $state(-myaddr)
575	}
576        if {[catch {eval $defcmd $sockopts $targetAddr} sock]} {
577	    # something went wrong while trying to establish the connection.
578	    # Clean up after events and such, but DON'T call the command
579	    # callback (if available) because we're going to throw an
580	    # exception from here instead.
581
582	    set state(sock) $sock
583	    Finish $token "" 1
584	    cleanup $token
585	    return -code error $sock
586        }
587    }
588    set state(sock) $sock
589    Log "Using $sock for $state(socketinfo)" \
590        [expr {$state(-keepalive)?"keepalive":""}]
591    if {$state(-keepalive)} {
592        set socketmap($state(socketinfo)) $sock
593    }
594
595    # Wait for the connection to complete.
596
597    if {$state(-timeout) > 0} {
598	fileevent $sock writable [list http::Connect $token]
599	http::wait $token
600
601	if {![info exists state]} {
602	    # If we timed out then Finish has been called and the users
603	    # command callback may have cleaned up the token. If so we end up
604	    # here with nothing left to do.
605	    return $token
606	} elseif {$state(status) eq "error"} {
607	    # Something went wrong while trying to establish the connection.
608	    # Clean up after events and such, but DON'T call the command
609	    # callback (if available) because we're going to throw an
610	    # exception from here instead.
611	    set err [lindex $state(error) 0]
612	    cleanup $token
613	    return -code error $err
614	} elseif {$state(status) ne "connect"} {
615	    # Likely to be connection timeout
616	    return $token
617	}
618	set state(status) ""
619    }
620
621    # Send data in cr-lf format, but accept any line terminators
622
623    fconfigure $sock -translation {auto crlf} -buffersize $state(-blocksize)
624
625    # The following is disallowed in safe interpreters, but the socket is
626    # already in non-blocking mode in that case.
627
628    catch {fconfigure $sock -blocking off}
629    set how GET
630    if {$isQuery} {
631	set state(querylength) [string length $state(-query)]
632	if {$state(querylength) > 0} {
633	    set how POST
634	    set contDone 0
635	} else {
636	    # There's no query data.
637	    unset state(-query)
638	    set isQuery 0
639	}
640    } elseif {$state(-validate)} {
641	set how HEAD
642    } elseif {$isQueryChannel} {
643	set how POST
644	# The query channel must be blocking for the async Write to
645	# work properly.
646	fconfigure $state(-querychannel) -blocking 1 -translation binary
647	set contDone 0
648    }
649    if {[info exists state(-method)] && $state(-method) ne ""} {
650	set how $state(-method)
651    }
652
653    if {[catch {
654	puts $sock "$how $srvurl HTTP/$state(-protocol)"
655	puts $sock "Accept: $http(-accept)"
656	array set hdrs $state(-headers)
657	if {[info exists hdrs(Host)]} {
658	    # Allow Host spoofing. [Bug 928154]
659	    puts $sock "Host: $hdrs(Host)"
660	} elseif {$port == $defport} {
661	    # Don't add port in this case, to handle broken servers. [Bug
662	    # #504508]
663	    puts $sock "Host: $host"
664	} else {
665	    puts $sock "Host: $host:$port"
666	}
667	unset hdrs
668	puts $sock "User-Agent: $http(-useragent)"
669        if {$state(-protocol) == 1.0 && $state(-keepalive)} {
670	    puts $sock "Connection: keep-alive"
671        }
672        if {$state(-protocol) > 1.0 && !$state(-keepalive)} {
673	    puts $sock "Connection: close" ;# RFC2616 sec 8.1.2.1
674        }
675        if {[info exists phost] && ($phost ne "") && $state(-keepalive)} {
676	    puts $sock "Proxy-Connection: Keep-Alive"
677        }
678        set accept_encoding_seen 0
679	foreach {key value} $state(-headers) {
680	    if {[string equal -nocase $key "host"]} {
681		continue
682	    }
683	    if {[string equal -nocase $key "accept-encoding"]} {
684		set accept_encoding_seen 1
685	    }
686	    set value [string map [list \n "" \r ""] $value]
687	    set key [string trim $key]
688	    if {[string equal -nocase $key "content-length"]} {
689		set contDone 1
690		set state(querylength) $value
691	    }
692	    if {[string length $key]} {
693		puts $sock "$key: $value"
694	    }
695	}
696	# Soft zlib dependency check - no package require
697        if {
698	    !$accept_encoding_seen &&
699	    ([package vsatisfies [package provide Tcl] 8.6]
700		|| [llength [package provide zlib]]) &&
701	    !([info exists state(-channel)] || [info exists state(-handler)])
702        } then {
703	    puts $sock "Accept-Encoding: gzip, identity, *;q=0.1"
704        }
705	if {$isQueryChannel && $state(querylength) == 0} {
706	    # Try to determine size of data in channel. If we cannot seek, the
707	    # surrounding catch will trap us
708
709	    set start [tell $state(-querychannel)]
710	    seek $state(-querychannel) 0 end
711	    set state(querylength) \
712		    [expr {[tell $state(-querychannel)] - $start}]
713	    seek $state(-querychannel) $start
714	}
715
716	# Flush the request header and set up the fileevent that will either
717	# push the POST data or read the response.
718	#
719	# fileevent note:
720	#
721	# It is possible to have both the read and write fileevents active at
722	# this point. The only scenario it seems to affect is a server that
723	# closes the connection without reading the POST data. (e.g., early
724	# versions TclHttpd in various error cases). Depending on the
725	# platform, the client may or may not be able to get the response from
726	# the server because of the error it will get trying to write the post
727	# data.  Having both fileevents active changes the timing and the
728	# behavior, but no two platforms (among Solaris, Linux, and NT) behave
729	# the same, and none behave all that well in any case. Servers should
730	# always read their POST data if they expect the client to read their
731	# response.
732
733	if {$isQuery || $isQueryChannel} {
734	    puts $sock "Content-Type: $state(-type)"
735	    if {!$contDone} {
736		puts $sock "Content-Length: $state(querylength)"
737	    }
738	    puts $sock ""
739	    fconfigure $sock -translation {auto binary}
740	    fileevent $sock writable [list http::Write $token]
741	} else {
742	    puts $sock ""
743	    flush $sock
744	    fileevent $sock readable [list http::Event $sock $token]
745	}
746
747	if {![info exists state(-command)]} {
748	    # geturl does EVERYTHING asynchronously, so if the user calls it
749	    # synchronously, we just do a wait here.
750
751	    wait $token
752	    if {$state(status) eq "error"} {
753		# Something went wrong, so throw the exception, and the
754		# enclosing catch will do cleanup.
755		return -code error [lindex $state(error) 0]
756	    }
757	}
758    } err]} then {
759	# The socket probably was never connected, or the connection dropped
760	# later.
761
762	# Clean up after events and such, but DON'T call the command callback
763	# (if available) because we're going to throw an exception from here
764	# instead.
765
766	# if state(status) is error, it means someone's already called Finish
767	# to do the above-described clean up.
768	if {$state(status) ne "error"} {
769	    Finish $token $err 1
770	}
771	cleanup $token
772	return -code error $err
773    }
774
775    return $token
776}
777
778# Data access functions:
779# Data - the URL data
780# Status - the transaction status: ok, reset, eof, timeout
781# Code - the HTTP transaction code, e.g., 200
782# Size - the size of the URL data
783
784proc http::data {token} {
785    variable $token
786    upvar 0 $token state
787    return $state(body)
788}
789proc http::status {token} {
790    if {![info exists $token]} {
791	return "error"
792    }
793    variable $token
794    upvar 0 $token state
795    return $state(status)
796}
797proc http::code {token} {
798    variable $token
799    upvar 0 $token state
800    return $state(http)
801}
802proc http::ncode {token} {
803    variable $token
804    upvar 0 $token state
805    if {[regexp {[0-9]{3}} $state(http) numeric_code]} {
806	return $numeric_code
807    } else {
808	return $state(http)
809    }
810}
811proc http::size {token} {
812    variable $token
813    upvar 0 $token state
814    return $state(currentsize)
815}
816proc http::meta {token} {
817    variable $token
818    upvar 0 $token state
819    return $state(meta)
820}
821proc http::error {token} {
822    variable $token
823    upvar 0 $token state
824    if {[info exists state(error)]} {
825	return $state(error)
826    }
827    return ""
828}
829
830# http::cleanup
831#
832#	Garbage collect the state associated with a transaction
833#
834# Arguments
835#	token	The token returned from http::geturl
836#
837# Side Effects
838#	unsets the state array
839
840proc http::cleanup {token} {
841    variable $token
842    upvar 0 $token state
843    if {[info exists state]} {
844	unset state
845    }
846}
847
848# http::Connect
849#
850#	This callback is made when an asyncronous connection completes.
851#
852# Arguments
853#	token	The token returned from http::geturl
854#
855# Side Effects
856#	Sets the status of the connection, which unblocks
857# 	the waiting geturl call
858
859proc http::Connect {token} {
860    variable $token
861    upvar 0 $token state
862    global errorInfo errorCode
863    if {
864	[eof $state(sock)] ||
865	[string length [fconfigure $state(sock) -error]]
866    } then {
867	Finish $token "connect failed [fconfigure $state(sock) -error]" 1
868    } else {
869	set state(status) connect
870	fileevent $state(sock) writable {}
871    }
872    return
873}
874
875# http::Write
876#
877#	Write POST query data to the socket
878#
879# Arguments
880#	token	The token for the connection
881#
882# Side Effects
883#	Write the socket and handle callbacks.
884
885proc http::Write {token} {
886    variable $token
887    upvar 0 $token state
888    set sock $state(sock)
889
890    # Output a block.  Tcl will buffer this if the socket blocks
891    set done 0
892    if {[catch {
893	# Catch I/O errors on dead sockets
894
895	if {[info exists state(-query)]} {
896	    # Chop up large query strings so queryprogress callback can give
897	    # smooth feedback.
898
899	    puts -nonewline $sock \
900		[string range $state(-query) $state(queryoffset) \
901		     [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]]
902	    incr state(queryoffset) $state(-queryblocksize)
903	    if {$state(queryoffset) >= $state(querylength)} {
904		set state(queryoffset) $state(querylength)
905		puts $sock ""
906		set done 1
907	    }
908	} else {
909	    # Copy blocks from the query channel
910
911	    set outStr [read $state(-querychannel) $state(-queryblocksize)]
912	    puts -nonewline $sock $outStr
913	    incr state(queryoffset) [string length $outStr]
914	    if {[eof $state(-querychannel)]} {
915		set done 1
916	    }
917	}
918    } err]} then {
919	# Do not call Finish here, but instead let the read half of the socket
920	# process whatever server reply there is to get.
921
922	set state(posterror) $err
923	set done 1
924    }
925    if {$done} {
926	catch {flush $sock}
927	fileevent $sock writable {}
928	fileevent $sock readable [list http::Event $sock $token]
929    }
930
931    # Callback to the client after we've completely handled everything.
932
933    if {[string length $state(-queryprogress)]} {
934	eval $state(-queryprogress) \
935	    [list $token $state(querylength) $state(queryoffset)]
936    }
937}
938
939# http::Event
940#
941#	Handle input on the socket
942#
943# Arguments
944#	sock	The socket receiving input.
945#	token	The token returned from http::geturl
946#
947# Side Effects
948#	Read the socket and handle callbacks.
949
950proc http::Event {sock token} {
951    variable $token
952    upvar 0 $token state
953
954    if {![info exists state]} {
955	Log "Event $sock with invalid token '$token' - remote close?"
956	if {![eof $sock]} {
957	    if {[set d [read $sock]] ne ""} {
958		Log "WARNING: additional data left on closed socket"
959	    }
960	}
961	CloseSocket $sock
962	return
963    }
964    if {$state(state) eq "connecting"} {
965	if {[catch {gets $sock state(http)} n]} {
966	    return [Finish $token $n]
967	} elseif {$n >= 0} {
968	    set state(state) "header"
969	}
970    } elseif {$state(state) eq "header"} {
971	if {[catch {gets $sock line} n]} {
972	    return [Finish $token $n]
973	} elseif {$n == 0} {
974	    # We have now read all headers
975	    # We ignore HTTP/1.1 100 Continue returns. RFC2616 sec 8.2.3
976	    if {$state(http) == "" || [lindex $state(http) 1] == 100} {
977		return
978	    }
979
980	    set state(state) body
981
982	    # If doing a HEAD, then we won't get any body
983	    if {$state(-validate)} {
984		Eof $token
985		return
986	    }
987
988	    # For non-chunked transfer we may have no body - in this case we
989	    # may get no further file event if the connection doesn't close
990	    # and no more data is sent. We can tell and must finish up now -
991	    # not later.
992	    if {
993		!(([info exists state(connection)]
994			&& ($state(connection) eq "close"))
995		    || [info exists state(transfer)])
996		&& ($state(totalsize) == 0)
997	    } then {
998		Log "body size is 0 and no events likely - complete."
999		Eof $token
1000		return
1001	    }
1002
1003	    # We have to use binary translation to count bytes properly.
1004	    fconfigure $sock -translation binary
1005
1006	    if {
1007		$state(-binary) || ![string match -nocase text* $state(type)]
1008	    } then {
1009		# Turn off conversions for non-text data
1010		set state(binary) 1
1011	    }
1012	    if {
1013		$state(binary) || [string match *gzip* $state(coding)] ||
1014		[string match *compress* $state(coding)]
1015	    } then {
1016		if {[info exists state(-channel)]} {
1017		    fconfigure $state(-channel) -translation binary
1018		}
1019	    }
1020	    if {
1021		[info exists state(-channel)] &&
1022		![info exists state(-handler)]
1023	    } then {
1024		# Initiate a sequence of background fcopies
1025		fileevent $sock readable {}
1026		CopyStart $sock $token
1027		return
1028	    }
1029	} elseif {$n > 0} {
1030	    # Process header lines
1031	    if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
1032		switch -- [string tolower $key] {
1033		    content-type {
1034			set state(type) [string trim [string tolower $value]]
1035			# grab the optional charset information
1036			if {[regexp -nocase \
1037				 {charset\s*=\s*\"((?:[^""]|\\\")*)\"} \
1038				 $state(type) -> cs]} {
1039			    set state(charset) [string map {{\"} \"} $cs]
1040			} else {
1041			    regexp -nocase {charset\s*=\s*(\S+?);?} \
1042				$state(type) -> state(charset)
1043			}
1044		    }
1045		    content-length {
1046			set state(totalsize) [string trim $value]
1047		    }
1048		    content-encoding {
1049			set state(coding) [string trim $value]
1050		    }
1051		    transfer-encoding {
1052			set state(transfer) \
1053			    [string trim [string tolower $value]]
1054		    }
1055		    proxy-connection -
1056		    connection {
1057			set state(connection) \
1058			    [string trim [string tolower $value]]
1059		    }
1060		}
1061		lappend state(meta) $key [string trim $value]
1062	    }
1063	}
1064    } else {
1065	# Now reading body
1066	if {[catch {
1067	    if {[info exists state(-handler)]} {
1068		set n [eval $state(-handler) [list $sock $token]]
1069	    } elseif {[info exists state(transfer_final)]} {
1070		set line [getTextLine $sock]
1071		set n [string length $line]
1072		if {$n > 0} {
1073		    Log "found $n bytes following final chunk"
1074		    append state(transfer_final) $line
1075		} else {
1076		    Log "final chunk part"
1077		    Eof $token
1078		}
1079	    } elseif {
1080		[info exists state(transfer)]
1081		&& $state(transfer) eq "chunked"
1082	    } then {
1083		set size 0
1084		set chunk [getTextLine $sock]
1085		set n [string length $chunk]
1086		if {[string trim $chunk] ne ""} {
1087		    scan $chunk %x size
1088		    if {$size != 0} {
1089			set bl [fconfigure $sock -blocking]
1090			fconfigure $sock -blocking 1
1091			set chunk [read $sock $size]
1092			fconfigure $sock -blocking $bl
1093			set n [string length $chunk]
1094			if {$n >= 0} {
1095			    append state(body) $chunk
1096			}
1097			if {$size != [string length $chunk]} {
1098			    Log "WARNING: mis-sized chunk:\
1099				was [string length $chunk], should be $size"
1100			}
1101			getTextLine $sock
1102		    } else {
1103			set state(transfer_final) {}
1104		    }
1105		}
1106	    } else {
1107		#Log "read non-chunk $state(currentsize) of $state(totalsize)"
1108		set block [read $sock $state(-blocksize)]
1109		set n [string length $block]
1110		if {$n >= 0} {
1111		    append state(body) $block
1112		}
1113	    }
1114	    if {[info exists state]} {
1115		if {$n >= 0} {
1116		    incr state(currentsize) $n
1117		}
1118		# If Content-Length - check for end of data.
1119		if {
1120		    ($state(totalsize) > 0)
1121		    && ($state(currentsize) >= $state(totalsize))
1122		} then {
1123		    Eof $token
1124		}
1125	    }
1126	} err]} then {
1127	    return [Finish $token $err]
1128	} else {
1129	    if {[info exists state(-progress)]} {
1130		eval $state(-progress) \
1131		    [list $token $state(totalsize) $state(currentsize)]
1132	    }
1133	}
1134    }
1135
1136    # catch as an Eof above may have closed the socket already
1137    if {![catch {eof $sock} eof] && $eof} {
1138	if {[info exists $token]} {
1139	    set state(connection) close
1140	    Eof $token
1141	} else {
1142	    # open connection closed on a token that has been cleaned up.
1143	    CloseSocket $sock
1144	}
1145	return
1146    }
1147}
1148
1149# http::getTextLine --
1150#
1151#	Get one line with the stream in blocking crlf mode
1152#
1153# Arguments
1154#	sock	The socket receiving input.
1155#
1156# Results:
1157#	The line of text, without trailing newline
1158
1159proc http::getTextLine {sock} {
1160    set tr [fconfigure $sock -translation]
1161    set bl [fconfigure $sock -blocking]
1162    fconfigure $sock -translation crlf -blocking 1
1163    set r [gets $sock]
1164    fconfigure $sock -translation $tr -blocking $bl
1165    return $r
1166}
1167
1168# http::CopyStart
1169#
1170#	Error handling wrapper around fcopy
1171#
1172# Arguments
1173#	sock	The socket to copy from
1174#	token	The token returned from http::geturl
1175#
1176# Side Effects
1177#	This closes the connection upon error
1178
1179proc http::CopyStart {sock token} {
1180    variable $token
1181    upvar 0 $token state
1182    if {[catch {
1183	fcopy $sock $state(-channel) -size $state(-blocksize) -command \
1184	    [list http::CopyDone $token]
1185    } err]} then {
1186	Finish $token $err
1187    }
1188}
1189
1190# http::CopyDone
1191#
1192#	fcopy completion callback
1193#
1194# Arguments
1195#	token	The token returned from http::geturl
1196#	count	The amount transfered
1197#
1198# Side Effects
1199#	Invokes callbacks
1200
1201proc http::CopyDone {token count {error {}}} {
1202    variable $token
1203    upvar 0 $token state
1204    set sock $state(sock)
1205    incr state(currentsize) $count
1206    if {[info exists state(-progress)]} {
1207	eval $state(-progress) \
1208	    [list $token $state(totalsize) $state(currentsize)]
1209    }
1210    # At this point the token may have been reset
1211    if {[string length $error]} {
1212	Finish $token $error
1213    } elseif {[catch {eof $sock} iseof] || $iseof} {
1214	Eof $token
1215    } else {
1216	CopyStart $sock $token
1217    }
1218}
1219
1220# http::Eof
1221#
1222#	Handle eof on the socket
1223#
1224# Arguments
1225#	token	The token returned from http::geturl
1226#
1227# Side Effects
1228#	Clean up the socket
1229
1230proc http::Eof {token {force 0}} {
1231    variable $token
1232    upvar 0 $token state
1233    if {$state(state) eq "header"} {
1234	# Premature eof
1235	set state(status) eof
1236    } else {
1237	set state(status) ok
1238    }
1239
1240    if {($state(coding) eq "gzip") && [string length $state(body)] > 0} {
1241        if {[catch {
1242	    if {[package vsatisfies [package present Tcl] 8.6]} {
1243		# The zlib integration into 8.6 includes proper gzip support
1244		set state(body) [zlib gunzip $state(body)]
1245	    } else {
1246		set state(body) [Gunzip $state(body)]
1247	    }
1248        } err]} then {
1249	    return [Finish $token $err]
1250        }
1251    }
1252
1253    if {!$state(binary)} {
1254        # If we are getting text, set the incoming channel's encoding
1255        # correctly.  iso8859-1 is the RFC default, but this could be any IANA
1256        # charset.  However, we only know how to convert what we have
1257        # encodings for.
1258
1259        set enc [CharsetToEncoding $state(charset)]
1260        if {$enc ne "binary"} {
1261	    set state(body) [encoding convertfrom $enc $state(body)]
1262        }
1263
1264        # Translate text line endings.
1265        set state(body) [string map {\r\n \n \r \n} $state(body)]
1266    }
1267
1268    Finish $token
1269}
1270
1271# http::wait --
1272#
1273#	See documentation for details.
1274#
1275# Arguments:
1276#	token	Connection token.
1277#
1278# Results:
1279#        The status after the wait.
1280
1281proc http::wait {token} {
1282    variable $token
1283    upvar 0 $token state
1284
1285    if {![info exists state(status)] || $state(status) eq ""} {
1286	# We must wait on the original variable name, not the upvar alias
1287	vwait ${token}(status)
1288    }
1289
1290    return [status $token]
1291}
1292
1293# http::formatQuery --
1294#
1295#	See documentation for details.  Call http::formatQuery with an even
1296#	number of arguments, where the first is a name, the second is a value,
1297#	the third is another name, and so on.
1298#
1299# Arguments:
1300#	args	A list of name-value pairs.
1301#
1302# Results:
1303#	TODO
1304
1305proc http::formatQuery {args} {
1306    set result ""
1307    set sep ""
1308    foreach i $args {
1309	append result $sep [mapReply $i]
1310	if {$sep eq "="} {
1311	    set sep &
1312	} else {
1313	    set sep =
1314	}
1315    }
1316    return $result
1317}
1318
1319# http::mapReply --
1320#
1321#	Do x-www-urlencoded character mapping
1322#
1323# Arguments:
1324#	string	The string the needs to be encoded
1325#
1326# Results:
1327#       The encoded string
1328
1329proc http::mapReply {string} {
1330    variable http
1331    variable formMap
1332
1333    # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
1334    # a pre-computed map and [string map] to do the conversion (much faster
1335    # than [regsub]/[subst]). [Bug 1020491]
1336
1337    if {$http(-urlencoding) ne ""} {
1338	set string [encoding convertto $http(-urlencoding) $string]
1339	return [string map $formMap $string]
1340    }
1341    set converted [string map $formMap $string]
1342    if {[string match "*\[\u0100-\uffff\]*" $converted]} {
1343	regexp {[\u0100-\uffff]} $converted badChar
1344	# Return this error message for maximum compatability... :^/
1345	return -code error \
1346	    "can't read \"formMap($badChar)\": no such element in array"
1347    }
1348    return $converted
1349}
1350
1351# http::ProxyRequired --
1352#	Default proxy filter.
1353#
1354# Arguments:
1355#	host	The destination host
1356#
1357# Results:
1358#       The current proxy settings
1359
1360proc http::ProxyRequired {host} {
1361    variable http
1362    if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
1363	if {
1364	    ![info exists http(-proxyport)] ||
1365	    ![string length $http(-proxyport)]
1366	} then {
1367	    set http(-proxyport) 8080
1368	}
1369	return [list $http(-proxyhost) $http(-proxyport)]
1370    }
1371}
1372
1373# http::CharsetToEncoding --
1374#
1375#	Tries to map a given IANA charset to a tcl encoding.  If no encoding
1376#	can be found, returns binary.
1377#
1378
1379proc http::CharsetToEncoding {charset} {
1380    variable encodings
1381
1382    set charset [string tolower $charset]
1383    if {[regexp {iso-?8859-([0-9]+)} $charset -> num]} {
1384	set encoding "iso8859-$num"
1385    } elseif {[regexp {iso-?2022-(jp|kr)} $charset -> ext]} {
1386	set encoding "iso2022-$ext"
1387    } elseif {[regexp {shift[-_]?js} $charset]} {
1388	set encoding "shiftjis"
1389    } elseif {[regexp {(?:windows|cp)-?([0-9]+)} $charset -> num]} {
1390	set encoding "cp$num"
1391    } elseif {$charset eq "us-ascii"} {
1392	set encoding "ascii"
1393    } elseif {[regexp {(?:iso-?)?lat(?:in)?-?([0-9]+)} $charset -> num]} {
1394	switch -- $num {
1395	    5 {set encoding "iso8859-9"}
1396	    1 - 2 - 3 {
1397		set encoding "iso8859-$num"
1398	    }
1399	}
1400    } else {
1401	# other charset, like euc-xx, utf-8,...  may directly map to encoding
1402	set encoding $charset
1403    }
1404    set idx [lsearch -exact $encodings $encoding]
1405    if {$idx >= 0} {
1406	return $encoding
1407    } else {
1408	return "binary"
1409    }
1410}
1411
1412# http::Gunzip --
1413#
1414#	Decompress data transmitted using the gzip transfer coding.
1415#
1416
1417# FIX ME: redo using zlib sinflate
1418proc http::Gunzip {data} {
1419    binary scan $data Scb5icc magic method flags time xfl os
1420    set pos 10
1421    if {$magic != 0x1f8b} {
1422        return -code error "invalid data: supplied data is not in gzip format"
1423    }
1424    if {$method != 8} {
1425        return -code error "invalid compression method"
1426    }
1427
1428    # lassign [split $flags ""] f_text f_crc f_extra f_name f_comment
1429    foreach {f_text f_crc f_extra f_name f_comment} [split $flags ""] break
1430    set extra ""
1431    if {$f_extra} {
1432	binary scan $data @${pos}S xlen
1433        incr pos 2
1434        set extra [string range $data $pos $xlen]
1435        set pos [incr xlen]
1436    }
1437
1438    set name ""
1439    if {$f_name} {
1440        set ndx [string first \0 $data $pos]
1441        set name [string range $data $pos $ndx]
1442        set pos [incr ndx]
1443    }
1444
1445    set comment ""
1446    if {$f_comment} {
1447        set ndx [string first \0 $data $pos]
1448        set comment [string range $data $pos $ndx]
1449        set pos [incr ndx]
1450    }
1451
1452    set fcrc ""
1453    if {$f_crc} {
1454	set fcrc [string range $data $pos [incr pos]]
1455        incr pos
1456    }
1457
1458    binary scan [string range $data end-7 end] ii crc size
1459    set inflated [zlib inflate [string range $data $pos end-8]]
1460    set chk [zlib crc32 $inflated]
1461    if {($crc & 0xffffffff) != ($chk & 0xffffffff)} {
1462	return -code error "invalid data: checksum mismatch $crc != $chk"
1463    }
1464    return $inflated
1465}
1466
1467# Local variables:
1468# indent-tabs-mode: t
1469# End:
1470