1# http.tcl --
2#
3#	Client-side HTTP for GET, POST, and HEAD commands. These routines can
4#	be used in untrusted code that uses the Safesock security policy. These
5#	procedures use a callback interface to avoid using vwait, which is not
6#	defined in the safe base.
7#
8# See the file "license.terms" for information on usage and redistribution of
9# this file, and for a DISCLAIMER OF ALL WARRANTIES.
10#
11# RCS: @(#) $Id: http.tcl,v 1.43.2.15 2008/02/27 23:58:18 patthoyts Exp $
12
13# Rough version history:
14# 1.0	Old http_get interface.
15# 2.0	http:: namespace and http::geturl.
16# 2.1	Added callbacks to handle arriving data, and timeouts.
17# 2.2	Added ability to fetch into a channel.
18# 2.3	Added SSL support, and ability to post from a channel. This version
19#	also cleans up error cases and eliminates the "ioerror" status in
20#	favor of raising an error
21# 2.4	Added -binary option to http::geturl and charset element to the state
22#	array.
23
24package require Tcl 8.4
25# Keep this in sync with pkgIndex.tcl and with the install directories
26# in Makefiles
27package provide http 2.5.5
28
29namespace eval http {
30    variable http
31    array set http {
32	-accept */*
33	-proxyhost {}
34	-proxyport {}
35	-proxyfilter http::ProxyRequired
36	-urlencoding utf-8
37    }
38    set http(-useragent) "Tcl http client package [package provide http]"
39
40    proc init {} {
41	# Set up the map for quoting chars. RFC3986 Section 2.3 say percent
42	# encode all except: "... percent-encoded octets in the ranges of ALPHA
43	# (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D), period (%2E),
44	# underscore (%5F), or tilde (%7E) should not be created by URI
45	# producers ..."
46	for {set i 0} {$i <= 256} {incr i} {
47	    set c [format %c $i]
48	    if {![string match {[-._~a-zA-Z0-9]} $c]} {
49		set map($c) %[format %.2x $i]
50	    }
51	}
52	# These are handled specially
53	set map(\n) %0d%0a
54	variable formMap [array get map]
55    }
56    init
57
58    variable urlTypes
59    array set urlTypes {
60	http	{80 ::socket}
61    }
62
63    variable encodings [string tolower [encoding names]]
64    # This can be changed, but iso8859-1 is the RFC standard.
65    variable defaultCharset "iso8859-1"
66
67    # Force RFC 3986 strictness in geturl url verification?  Not for 8.4.x
68    variable strict 0
69
70    namespace export geturl config reset wait formatQuery register unregister
71    # Useful, but not exported: data size status code
72}
73
74# http::register --
75#
76#     See documentation for details.
77#
78# Arguments:
79#     proto           URL protocol prefix, e.g. https
80#     port            Default port for protocol
81#     command         Command to use to create socket
82# Results:
83#     list of port and command that was registered.
84
85proc http::register {proto port command} {
86    variable urlTypes
87    set urlTypes($proto) [list $port $command]
88}
89
90# http::unregister --
91#
92#     Unregisters URL protocol handler
93#
94# Arguments:
95#     proto           URL protocol prefix, e.g. https
96# Results:
97#     list of port and command that was unregistered.
98
99proc http::unregister {proto} {
100    variable urlTypes
101    if {![info exists urlTypes($proto)]} {
102	return -code error "unsupported url type \"$proto\""
103    }
104    set old $urlTypes($proto)
105    unset urlTypes($proto)
106    return $old
107}
108
109# http::config --
110#
111#	See documentation for details.
112#
113# Arguments:
114#	args		Options parsed by the procedure.
115# Results:
116#        TODO
117
118proc http::config {args} {
119    variable http
120    set options [lsort [array names http -*]]
121    set usage [join $options ", "]
122    if {[llength $args] == 0} {
123	set result {}
124	foreach name $options {
125	    lappend result $name $http($name)
126	}
127	return $result
128    }
129    set options [string map {- ""} $options]
130    set pat ^-([join $options |])$
131    if {[llength $args] == 1} {
132	set flag [lindex $args 0]
133	if {[regexp -- $pat $flag]} {
134	    return $http($flag)
135	} else {
136	    return -code error "Unknown option $flag, must be: $usage"
137	}
138    } else {
139	foreach {flag value} $args {
140	    if {[regexp -- $pat $flag]} {
141		set http($flag) $value
142	    } else {
143		return -code error "Unknown option $flag, must be: $usage"
144	    }
145	}
146    }
147}
148
149# http::Finish --
150#
151#	Clean up the socket and eval close time callbacks
152#
153# Arguments:
154#	token	    Connection token.
155#	errormsg    (optional) If set, forces status to error.
156#       skipCB      (optional) If set, don't call the -command callback. This
157#                   is useful when geturl wants to throw an exception instead
158#                   of calling the callback. That way, the same error isn't
159#                   reported to two places.
160#
161# Side Effects:
162#        Closes the socket
163
164proc http::Finish { token {errormsg ""} {skipCB 0}} {
165    variable $token
166    upvar 0 $token state
167    global errorInfo errorCode
168    if {[string length $errormsg] != 0} {
169	set state(error) [list $errormsg $errorInfo $errorCode]
170	set state(status) error
171    }
172    catch {close $state(sock)}
173    catch {after cancel $state(after)}
174    if {[info exists state(-command)] && !$skipCB} {
175	if {[catch {eval $state(-command) {$token}} err]} {
176	    if {[string length $errormsg] == 0} {
177		set state(error) [list $err $errorInfo $errorCode]
178		set state(status) error
179	    }
180	}
181	if {[info exists state(-command)]} {
182	    # Command callback may already have unset our state
183	    unset state(-command)
184	}
185    }
186}
187
188# http::reset --
189#
190#	See documentation for details.
191#
192# Arguments:
193#	token	Connection token.
194#	why	Status info.
195#
196# Side Effects:
197#       See Finish
198
199proc http::reset { token {why reset} } {
200    variable $token
201    upvar 0 $token state
202    set state(status) $why
203    catch {fileevent $state(sock) readable {}}
204    catch {fileevent $state(sock) writable {}}
205    Finish $token
206    if {[info exists state(error)]} {
207	set errorlist $state(error)
208	unset state
209	eval ::error $errorlist
210    }
211}
212
213# http::geturl --
214#
215#	Establishes a connection to a remote url via http.
216#
217# Arguments:
218#       url		The http URL to goget.
219#       args		Option value pairs. Valid options include:
220#				-blocksize, -validate, -headers, -timeout
221# Results:
222#	Returns a token for this connection. This token is the name of an array
223#	that the caller should unset to garbage collect the state.
224
225proc http::geturl { url args } {
226    variable http
227    variable urlTypes
228    variable defaultCharset
229    variable strict
230
231    # Initialize the state variable, an array. We'll return the name of this
232    # array as the token for the transaction.
233
234    if {![info exists http(uid)]} {
235	set http(uid) 0
236    }
237    set token [namespace current]::[incr http(uid)]
238    variable $token
239    upvar 0 $token state
240    reset $token
241
242    # Process command options.
243
244    array set state {
245	-binary		false
246	-blocksize 	8192
247	-queryblocksize 8192
248	-validate 	0
249	-headers 	{}
250	-timeout 	0
251	-type           application/x-www-form-urlencoded
252	-queryprogress	{}
253	state		header
254	meta		{}
255	coding		{}
256	currentsize	0
257	totalsize	0
258	querylength	0
259	queryoffset	0
260        type            text/html
261        body            {}
262	status		""
263	http            ""
264    }
265    # These flags have their types verified [Bug 811170]
266    array set type {
267	-binary		boolean
268	-blocksize	integer
269	-queryblocksize integer
270	-validate	boolean
271	-timeout	integer
272    }
273    set state(charset)	$defaultCharset
274    set options {-binary -blocksize -channel -command -handler -headers \
275	    -progress -query -queryblocksize -querychannel -queryprogress\
276	    -validate -timeout -type}
277    set usage [join $options ", "]
278    set options [string map {- ""} $options]
279    set pat ^-([join $options |])$
280    foreach {flag value} $args {
281	if {[regexp $pat $flag]} {
282	    # Validate numbers
283	    if {[info exists type($flag)] && \
284		    ![string is $type($flag) -strict $value]} {
285		unset $token
286		return -code error "Bad value for $flag ($value), must be $type($flag)"
287	    }
288	    set state($flag) $value
289	} else {
290	    unset $token
291	    return -code error "Unknown option $flag, can be: $usage"
292	}
293    }
294
295    # Make sure -query and -querychannel aren't both specified
296
297    set isQueryChannel [info exists state(-querychannel)]
298    set isQuery [info exists state(-query)]
299    if {$isQuery && $isQueryChannel} {
300	unset $token
301	return -code error "Can't combine -query and -querychannel options!"
302    }
303
304    # Validate URL, determine the server host and port, and check proxy case
305    # Recognize user:pass@host URLs also, although we do not do anything with
306    # that info yet.
307
308    # URLs have basically four parts.
309    # First, before the colon, is the protocol scheme (e.g. http)
310    # Second, for HTTP-like protocols, is the authority
311    #	The authority is preceded by // and lasts up to (but not including)
312    #	the following / and it identifies up to four parts, of which only one,
313    #	the host, is required (if an authority is present at all). All other
314    #	parts of the authority (user name, password, port number) are optional.
315    # Third is the resource name, which is split into two parts at a ?
316    #	The first part (from the single "/" up to "?") is the path, and the
317    #	second part (from that "?" up to "#") is the query. *HOWEVER*, we do
318    #	not need to separate them; we send the whole lot to the server.
319    # Fourth is the fragment identifier, which is everything after the first
320    #	"#" in the URL. The fragment identifier MUST NOT be sent to the server
321    #	and indeed, we don't bother to validate it (it could be an error to
322    #	pass it in here, but it's cheap to strip).
323    #
324    # An example of a URL that has all the parts:
325    #   http://jschmoe:xyzzy@www.bogus.net:8000/foo/bar.tml?q=foo#changes
326    # The "http" is the protocol, the user is "jschmoe", the password is
327    # "xyzzy", the host is "www.bogus.net", the port is "8000", the path is
328    # "/foo/bar.tml", the query is "q=foo", and the fragment is "changes".
329    #
330    # Note that the RE actually combines the user and password parts, as
331    # recommended in RFC 3986. Indeed, that RFC states that putting passwords
332    # in URLs is a Really Bad Idea, something with which I would agree utterly.
333    # Also note that we do not currently support IPv6 addresses.
334    #
335    # From a validation perspective, we need to ensure that the parts of the
336    # URL that are going to the server are correctly encoded.
337    # This is only done if $::http::strict is true (default 0 for compat).
338
339    set URLmatcher {(?x)		# this is _expanded_ syntax
340	^
341	(?: (\w+) : ) ?			# <protocol scheme>
342	(?: //
343	    (?:
344		(
345		    [^@/\#?]+		# <userinfo part of authority>
346		) @
347	    )?
348	    ( [^/:\#?]+ )		# <host part of authority>
349	    (?: : (\d+) )?		# <port part of authority>
350	)?
351	( / [^\#?]* (?: \? [^\#?]* )?)?	# <path> (including query)
352	(?: \# (.*) )?			# <fragment>
353	$
354    }
355
356    # Phase one: parse
357    if {![regexp -- $URLmatcher $url -> proto user host port srvurl]} {
358	unset $token
359	return -code error "Unsupported URL: $url"
360    }
361    # Phase two: validate
362    if {$host eq ""} {
363	# Caller has to provide a host name; we do not have a "default host"
364	# that would enable us to handle relative URLs.
365	unset $token
366	return -code error "Missing host part: $url"
367	# Note that we don't check the hostname for validity here; if it's
368	# invalid, we'll simply fail to resolve it later on.
369    }
370    if {$port ne "" && $port>65535} {
371	unset $token
372	return -code error "Invalid port number: $port"
373    }
374    # The user identification and resource identification parts of the URL can
375    # have encoded characters in them; take care!
376    if {$user ne ""} {
377	# Check for validity according to RFC 3986, Appendix A
378	set validityRE {(?xi)
379	    ^
380	    (?: [-\w.~!$&'()*+,;=:] | %[0-9a-f][0-9a-f] )+
381	    $
382	}
383	if {$strict && ![regexp -- $validityRE $user]} {
384	    unset $token
385	    # Provide a better error message in this error case
386	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f]).?.?} $user bad]} {
387		return -code error \
388			"Illegal encoding character usage \"$bad\" in URL user"
389	    }
390	    return -code error "Illegal characters in URL user"
391	}
392    }
393    if {$srvurl ne ""} {
394	# Check for validity according to RFC 3986, Appendix A
395	set validityRE {(?xi)
396	    ^
397	    # Path part (already must start with / character)
398	    (?:	      [-\w.~!$&'()*+,;=:@/]  | %[0-9a-f][0-9a-f] )*
399	    # Query part (optional, permits ? characters)
400	    (?: \? (?: [-\w.~!$&'()*+,;=:@/?] | %[0-9a-f][0-9a-f] )* )?
401	    $
402	}
403	if {$strict && ![regexp -- $validityRE $srvurl]} {
404	    unset $token
405	    # Provide a better error message in this error case
406	    if {[regexp {(?i)%(?![0-9a-f][0-9a-f])..} $srvurl bad]} {
407		return -code error \
408			"Illegal encoding character usage \"$bad\" in URL path"
409	    }
410	    return -code error "Illegal characters in URL path"
411	}
412    } else {
413	set srvurl /
414    }
415    if {[string length $proto] == 0} {
416	set proto http
417    }
418    if {![info exists urlTypes($proto)]} {
419	unset $token
420	return -code error "Unsupported URL type \"$proto\""
421    }
422    set defport [lindex $urlTypes($proto) 0]
423    set defcmd [lindex $urlTypes($proto) 1]
424
425    if {[string length $port] == 0} {
426	set port $defport
427    }
428    if {![catch {$http(-proxyfilter) $host} proxy]} {
429	set phost [lindex $proxy 0]
430	set pport [lindex $proxy 1]
431    }
432
433    # OK, now reassemble into a full URL
434    set url ${proto}://
435    if {$user ne ""} {
436	append url $user
437	append url @
438    }
439    append url $host
440    if {$port != $defport} {
441	append url : $port
442    }
443    append url $srvurl
444    # Don't append the fragment!
445    set state(url) $url
446
447    # If a timeout is specified we set up the after event and arrange for an
448    # asynchronous socket connection.
449
450    if {$state(-timeout) > 0} {
451	set state(after) [after $state(-timeout) \
452		[list http::reset $token timeout]]
453	set async -async
454    } else {
455	set async ""
456    }
457
458    # If we are using the proxy, we must pass in the full URL that includes
459    # the server name.
460
461    if {[info exists phost] && [string length $phost]} {
462	set srvurl $url
463	set conStat [catch {eval $defcmd $async {$phost $pport}} s]
464    } else {
465	set conStat [catch {eval $defcmd $async {$host $port}} s]
466    }
467
468    if {$conStat} {
469	# Something went wrong while trying to establish the connection. Clean
470	# up after events and such, but DON'T call the command callback (if
471	# available) because we're going to throw an exception from here
472	# instead.
473	Finish $token "" 1
474	cleanup $token
475	return -code error $s
476    }
477    set state(sock) $s
478
479    # Wait for the connection to complete.
480
481    if {$state(-timeout) > 0} {
482	fileevent $s writable [list http::Connect $token]
483	http::wait $token
484
485	if {![info exists state]} {
486	    # If we timed out then Finish has been called and the users
487	    # command callback may have cleaned up the token. If so
488	    # we end up here with nothing left to do.
489	    return $token
490	} else {
491	    if {$state(status) eq "error"} {
492		# Something went wrong while trying to establish the connection.
493		# Clean up after events and such, but DON'T call the command
494		# callback (if available) because we're going to throw an
495		# exception from here instead.
496		set err [lindex $state(error) 0]
497		cleanup $token
498		return -code error $err
499	    } elseif {$state(status) ne "connect"} {
500		# Likely to be connection timeout
501		return $token
502	    }
503	    set state(status) ""
504	}
505    }
506
507    # Send data in cr-lf format, but accept any line terminators
508
509    fconfigure $s -translation {auto crlf} -buffersize $state(-blocksize)
510
511    # The following is disallowed in safe interpreters, but the socket is
512    # already in non-blocking mode in that case.
513
514    catch {fconfigure $s -blocking off}
515    set how GET
516    if {$isQuery} {
517	set state(querylength) [string length $state(-query)]
518	if {$state(querylength) > 0} {
519	    set how POST
520	    set contDone 0
521	} else {
522	    # There's no query data.
523	    unset state(-query)
524	    set isQuery 0
525	}
526    } elseif {$state(-validate)} {
527	set how HEAD
528    } elseif {$isQueryChannel} {
529	set how POST
530	# The query channel must be blocking for the async Write to
531	# work properly.
532	fconfigure $state(-querychannel) -blocking 1 -translation binary
533	set contDone 0
534    }
535
536    if {[catch {
537	puts $s "$how $srvurl HTTP/1.0"
538	puts $s "Accept: $http(-accept)"
539	if {$port == $defport} {
540	    # Don't add port in this case, to handle broken servers. [Bug
541	    # 504508]
542	    puts $s "Host: $host"
543	} else {
544	    puts $s "Host: $host:$port"
545	}
546	puts $s "User-Agent: $http(-useragent)"
547	foreach {key value} $state(-headers) {
548	    set value [string map [list \n "" \r ""] $value]
549	    set key [string trim $key]
550	    if {$key eq "Content-Length"} {
551		set contDone 1
552		set state(querylength) $value
553	    }
554	    if {[string length $key]} {
555		puts $s "$key: $value"
556	    }
557	}
558	if {$isQueryChannel && $state(querylength) == 0} {
559	    # Try to determine size of data in channel. If we cannot seek, the
560	    # surrounding catch will trap us
561
562	    set start [tell $state(-querychannel)]
563	    seek $state(-querychannel) 0 end
564	    set state(querylength) \
565		    [expr {[tell $state(-querychannel)] - $start}]
566	    seek $state(-querychannel) $start
567	}
568
569	# Flush the request header and set up the fileevent that will either
570	# push the POST data or read the response.
571	#
572	# fileevent note:
573	#
574	# It is possible to have both the read and write fileevents active at
575	# this point. The only scenario it seems to affect is a server that
576	# closes the connection without reading the POST data. (e.g., early
577	# versions TclHttpd in various error cases). Depending on the platform,
578	# the client may or may not be able to get the response from the server
579	# because of the error it will get trying to write the post data.
580	# Having both fileevents active changes the timing and the behavior,
581	# but no two platforms (among Solaris, Linux, and NT) behave the same,
582	# and none behave all that well in any case. Servers should always read
583	# their POST data if they expect the client to read their response.
584
585	if {$isQuery || $isQueryChannel} {
586	    puts $s "Content-Type: $state(-type)"
587	    if {!$contDone} {
588		puts $s "Content-Length: $state(querylength)"
589	    }
590	    puts $s ""
591	    fconfigure $s -translation {auto binary}
592	    fileevent $s writable [list http::Write $token]
593	} else {
594	    puts $s ""
595	    flush $s
596	    fileevent $s readable [list http::Event $token]
597	}
598
599	if {! [info exists state(-command)]} {
600	    # geturl does EVERYTHING asynchronously, so if the user calls it
601	    # synchronously, we just do a wait here.
602
603	    wait $token
604	    if {$state(status) eq "error"} {
605		# Something went wrong, so throw the exception, and the
606		# enclosing catch will do cleanup.
607		return -code error [lindex $state(error) 0]
608	    }
609	}
610    } err]} {
611	# The socket probably was never connected, or the connection dropped
612	# later.
613
614	# Clean up after events and such, but DON'T call the command callback
615	# (if available) because we're going to throw an exception from here
616	# instead.
617
618	# if state(status) is error, it means someone's already called Finish
619	# to do the above-described clean up.
620	if {$state(status) ne "error"} {
621	    Finish $token $err 1
622	}
623	cleanup $token
624	return -code error $err
625    }
626
627    return $token
628}
629
630# Data access functions:
631# Data - the URL data
632# Status - the transaction status: ok, reset, eof, timeout
633# Code - the HTTP transaction code, e.g., 200
634# Size - the size of the URL data
635
636proc http::data {token} {
637    variable $token
638    upvar 0 $token state
639    return $state(body)
640}
641proc http::status {token} {
642    if {![info exists $token]} { return "error" }
643    variable $token
644    upvar 0 $token state
645    return $state(status)
646}
647proc http::code {token} {
648    variable $token
649    upvar 0 $token state
650    return $state(http)
651}
652proc http::ncode {token} {
653    variable $token
654    upvar 0 $token state
655    if {[regexp {[0-9]{3}} $state(http) numeric_code]} {
656	return $numeric_code
657    } else {
658	return $state(http)
659    }
660}
661proc http::size {token} {
662    variable $token
663    upvar 0 $token state
664    return $state(currentsize)
665}
666proc http::meta {token} {
667    variable $token
668    upvar 0 $token state
669    return $state(meta)
670}
671proc http::error {token} {
672    variable $token
673    upvar 0 $token state
674    if {[info exists state(error)]} {
675	return $state(error)
676    }
677    return ""
678}
679
680# http::cleanup
681#
682#	Garbage collect the state associated with a transaction
683#
684# Arguments
685#	token	The token returned from http::geturl
686#
687# Side Effects
688#	unsets the state array
689
690proc http::cleanup {token} {
691    variable $token
692    upvar 0 $token state
693    if {[info exists state]} {
694	unset state
695    }
696}
697
698# http::Connect
699#
700#	This callback is made when an asyncronous connection completes.
701#
702# Arguments
703#	token	The token returned from http::geturl
704#
705# Side Effects
706#	Sets the status of the connection, which unblocks
707# 	the waiting geturl call
708
709proc http::Connect {token} {
710    variable $token
711    upvar 0 $token state
712    global errorInfo errorCode
713    if {[eof $state(sock)] ||
714	[string length [fconfigure $state(sock) -error]]} {
715	    Finish $token "connect failed [fconfigure $state(sock) -error]" 1
716    } else {
717	set state(status) connect
718	fileevent $state(sock) writable {}
719    }
720    return
721}
722
723# http::Write
724#
725#	Write POST query data to the socket
726#
727# Arguments
728#	token	The token for the connection
729#
730# Side Effects
731#	Write the socket and handle callbacks.
732
733proc http::Write {token} {
734    variable $token
735    upvar 0 $token state
736    set s $state(sock)
737
738    # Output a block.  Tcl will buffer this if the socket blocks
739    set done 0
740    if {[catch {
741	# Catch I/O errors on dead sockets
742
743	if {[info exists state(-query)]} {
744	    # Chop up large query strings so queryprogress callback can give
745	    # smooth feedback.
746
747	    puts -nonewline $s \
748		    [string range $state(-query) $state(queryoffset) \
749		    [expr {$state(queryoffset) + $state(-queryblocksize) - 1}]]
750	    incr state(queryoffset) $state(-queryblocksize)
751	    if {$state(queryoffset) >= $state(querylength)} {
752		set state(queryoffset) $state(querylength)
753		set done 1
754	    }
755	} else {
756	    # Copy blocks from the query channel
757
758	    set outStr [read $state(-querychannel) $state(-queryblocksize)]
759	    puts -nonewline $s $outStr
760	    incr state(queryoffset) [string length $outStr]
761	    if {[eof $state(-querychannel)]} {
762		set done 1
763	    }
764	}
765    } err]} {
766	# Do not call Finish here, but instead let the read half of the socket
767	# process whatever server reply there is to get.
768
769	set state(posterror) $err
770	set done 1
771    }
772    if {$done} {
773	catch {flush $s}
774	fileevent $s writable {}
775	fileevent $s readable [list http::Event $token]
776    }
777
778    # Callback to the client after we've completely handled everything.
779
780    if {[string length $state(-queryprogress)]} {
781	eval $state(-queryprogress) [list $token $state(querylength)\
782		$state(queryoffset)]
783    }
784}
785
786# http::Event
787#
788#	Handle input on the socket
789#
790# Arguments
791#	token	The token returned from http::geturl
792#
793# Side Effects
794#	Read the socket and handle callbacks.
795
796proc http::Event {token} {
797    variable $token
798    upvar 0 $token state
799    set s $state(sock)
800
801    if {$state(state) eq "header"} {
802	if {[catch {gets $s line} n]} {
803	    return [Finish $token $n]
804	} elseif {$n == 0} {
805	    variable encodings
806	    set state(state) body
807	    if {$state(-binary) || ![string match -nocase text* $state(type)]
808		    || [string match *gzip* $state(coding)]
809		    || [string match *compress* $state(coding)]} {
810		# Turn off conversions for non-text data
811		fconfigure $s -translation binary
812		if {[info exists state(-channel)]} {
813		    fconfigure $state(-channel) -translation binary
814		}
815	    } else {
816		# If we are getting text, set the incoming channel's encoding
817		# correctly. iso8859-1 is the RFC default, but this could be
818		# any IANA charset. However, we only know how to convert what
819		# we have encodings for.
820		set idx [lsearch -exact $encodings \
821			[string tolower $state(charset)]]
822		if {$idx >= 0} {
823		    fconfigure $s -encoding [lindex $encodings $idx]
824		}
825	    }
826	    if {[info exists state(-channel)] && \
827		    ![info exists state(-handler)]} {
828		# Initiate a sequence of background fcopies
829		fileevent $s readable {}
830		CopyStart $s $token
831		return
832	    }
833	} elseif {$n > 0} {
834	    if {[regexp -nocase {^content-type:(.+)$} $line x type]} {
835		set state(type) [string trim $type]
836		# grab the optional charset information
837		regexp -nocase {charset\s*=\s*(\S+)} $type x state(charset)
838	    }
839	    if {[regexp -nocase {^content-length:(.+)$} $line x length]} {
840		set state(totalsize) [string trim $length]
841	    }
842	    if {[regexp -nocase {^content-encoding:(.+)$} $line x coding]} {
843		set state(coding) [string trim $coding]
844	    }
845	    if {[regexp -nocase {^([^:]+):(.+)$} $line x key value]} {
846		lappend state(meta) $key [string trim $value]
847	    } elseif {[string match HTTP* $line]} {
848		set state(http) $line
849	    }
850	}
851    } else {
852	if {[catch {
853	    if {[info exists state(-handler)]} {
854		set n [eval $state(-handler) {$s $token}]
855	    } else {
856		set block [read $s $state(-blocksize)]
857		set n [string length $block]
858		if {$n >= 0} {
859		    append state(body) $block
860		}
861	    }
862	    if {$n >= 0} {
863		incr state(currentsize) $n
864	    }
865	} err]} {
866	    return [Finish $token $err]
867	} else {
868	    if {[info exists state(-progress)]} {
869		eval $state(-progress) \
870			{$token $state(totalsize) $state(currentsize)}
871	    }
872	}
873    }
874
875    if {[eof $s]} {
876	Eof $token
877	return
878    }
879}
880
881# http::CopyStart
882#
883#	Error handling wrapper around fcopy
884#
885# Arguments
886#	s	The socket to copy from
887#	token	The token returned from http::geturl
888#
889# Side Effects
890#	This closes the connection upon error
891
892proc http::CopyStart {s token} {
893    variable $token
894    upvar 0 $token state
895    if {[catch {
896	fcopy $s $state(-channel) -size $state(-blocksize) -command \
897	    [list http::CopyDone $token]
898    } err]} {
899	Finish $token $err
900    }
901}
902
903# http::CopyDone
904#
905#	fcopy completion callback
906#
907# Arguments
908#	token	The token returned from http::geturl
909#	count	The amount transfered
910#
911# Side Effects
912#	Invokes callbacks
913
914proc http::CopyDone {token count {error {}}} {
915    variable $token
916    upvar 0 $token state
917    set s $state(sock)
918    incr state(currentsize) $count
919    if {[info exists state(-progress)]} {
920	eval $state(-progress) {$token $state(totalsize) $state(currentsize)}
921    }
922    # At this point the token may have been reset
923    if {[string length $error]} {
924	Finish $token $error
925    } elseif {[catch {eof $s} iseof] || $iseof} {
926	Eof $token
927    } else {
928	CopyStart $s $token
929    }
930}
931
932# http::Eof
933#
934#	Handle eof on the socket
935#
936# Arguments
937#	token	The token returned from http::geturl
938#
939# Side Effects
940#	Clean up the socket
941
942proc http::Eof {token} {
943    variable $token
944    upvar 0 $token state
945    if {$state(state) eq "header"} {
946	# Premature eof
947	set state(status) eof
948    } else {
949	set state(status) ok
950    }
951    set state(state) eof
952    Finish $token
953}
954
955# http::wait --
956#
957#	See documentation for details.
958#
959# Arguments:
960#	token	Connection token.
961#
962# Results:
963#        The status after the wait.
964
965proc http::wait {token} {
966    variable $token
967    upvar 0 $token state
968
969    if {![info exists state(status)] || [string length $state(status)] == 0} {
970	# We must wait on the original variable name, not the upvar alias
971	vwait $token\(status)
972    }
973
974    return [status $token]
975}
976
977# http::formatQuery --
978#
979#	See documentation for details. Call http::formatQuery with an even
980#	number of arguments, where the first is a name, the second is a value,
981#	the third is another name, and so on.
982#
983# Arguments:
984#	args	A list of name-value pairs.
985#
986# Results:
987#	TODO
988
989proc http::formatQuery {args} {
990    set result ""
991    set sep ""
992    foreach i $args {
993	append result $sep [mapReply $i]
994	if {$sep eq "="} {
995	    set sep &
996	} else {
997	    set sep =
998	}
999    }
1000    return $result
1001}
1002
1003# http::mapReply --
1004#
1005#	Do x-www-urlencoded character mapping
1006#
1007# Arguments:
1008#	string	The string the needs to be encoded
1009#
1010# Results:
1011#       The encoded string
1012
1013proc http::mapReply {string} {
1014    variable http
1015    variable formMap
1016
1017    # The spec says: "non-alphanumeric characters are replaced by '%HH'". Use
1018    # a pre-computed map and [string map] to do the conversion (much faster
1019    # than [regsub]/[subst]). [Bug 1020491]
1020
1021    if {$http(-urlencoding) ne ""} {
1022	set string [encoding convertto $http(-urlencoding) $string]
1023	return [string map $formMap $string]
1024    }
1025    set converted [string map $formMap $string]
1026    if {[string match "*\[\u0100-\uffff\]*" $converted]} {
1027	regexp {[\u0100-\uffff]} $converted badChar
1028	# Return this error message for maximum compatability... :^/
1029	return -code error \
1030	    "can't read \"formMap($badChar)\": no such element in array"
1031    }
1032    return $converted
1033}
1034
1035# http::ProxyRequired --
1036#	Default proxy filter.
1037#
1038# Arguments:
1039#	host	The destination host
1040#
1041# Results:
1042#       The current proxy settings
1043
1044proc http::ProxyRequired {host} {
1045    variable http
1046    if {[info exists http(-proxyhost)] && [string length $http(-proxyhost)]} {
1047	if {![info exists http(-proxyport)] || \
1048		![string length $http(-proxyport)]} {
1049	    set http(-proxyport) 8080
1050	}
1051	return [list $http(-proxyhost) $http(-proxyport)]
1052    }
1053}
1054
1055# Local variables:
1056# indent-tabs-mode: t
1057# End:
1058