1/* Establishing and handling network connections.
2   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3   2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4
5This file is part of GNU Wget.
6
7GNU Wget is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12GNU Wget is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20Additional permission under GNU GPL version 3 section 7
21
22If you modify this program, or any covered work, by linking or
23combining it with the OpenSSL project's OpenSSL library (or a
24modified version of that library), containing parts covered by the
25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26grants you additional permission to convey the resulting work.
27Corresponding Source for a non-source form of such a combination
28shall include the source code for the parts of OpenSSL used as well
29as that of the covered work.  */
30
31#include "wget.h"
32
33#include <stdio.h>
34#include <stdlib.h>
35#ifdef HAVE_UNISTD_H
36# include <unistd.h>
37#endif
38#include <assert.h>
39
40#ifndef WINDOWS
41# include <sys/socket.h>
42# ifdef __VMS
43#  include "vms_ip.h"
44# else /* def __VMS */
45#  include <netdb.h>
46# endif /* def __VMS [else] */
47# include <netinet/in.h>
48# ifdef HAVE_ARPA_INET_H
49#  include <arpa/inet.h>
50# endif
51#endif /* not WINDOWS */
52
53#include <errno.h>
54#include <string.h>
55#ifdef HAVE_SYS_SELECT_H
56# include <sys/select.h>
57#endif /* HAVE_SYS_SELECT_H */
58#ifdef HAVE_SYS_TIME_H
59# include <sys/time.h>
60#endif
61#include "utils.h"
62#include "host.h"
63#include "connect.h"
64#include "hash.h"
65
66/* Apparently needed for Interix: */
67#ifdef HAVE_STDINT_H
68# include <stdint.h>
69#endif
70
71/* Define sockaddr_storage where unavailable (presumably on IPv4-only
72   hosts).  */
73
74#ifndef ENABLE_IPV6
75# ifndef HAVE_STRUCT_SOCKADDR_STORAGE
76#  define sockaddr_storage sockaddr_in
77# endif
78#endif /* ENABLE_IPV6 */
79
80/* Fill SA as per the data in IP and PORT.  SA shoult point to struct
81   sockaddr_storage if ENABLE_IPV6 is defined, to struct sockaddr_in
82   otherwise.  */
83
84static void
85sockaddr_set_data (struct sockaddr *sa, const ip_address *ip, int port)
86{
87  switch (ip->family)
88    {
89    case AF_INET:
90      {
91        struct sockaddr_in *sin = (struct sockaddr_in *)sa;
92        xzero (*sin);
93        sin->sin_family = AF_INET;
94        sin->sin_port = htons (port);
95        sin->sin_addr = ip->data.d4;
96        break;
97      }
98#ifdef ENABLE_IPV6
99    case AF_INET6:
100      {
101        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
102        xzero (*sin6);
103        sin6->sin6_family = AF_INET6;
104        sin6->sin6_port = htons (port);
105        sin6->sin6_addr = ip->data.d6;
106#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
107        sin6->sin6_scope_id = ip->ipv6_scope;
108#endif
109        break;
110      }
111#endif /* ENABLE_IPV6 */
112    default:
113      abort ();
114    }
115}
116
117/* Get the data of SA, specifically the IP address and the port.  If
118   you're not interested in one or the other information, pass NULL as
119   the pointer.  */
120
121static void
122sockaddr_get_data (const struct sockaddr *sa, ip_address *ip, int *port)
123{
124  switch (sa->sa_family)
125    {
126    case AF_INET:
127      {
128        struct sockaddr_in *sin = (struct sockaddr_in *)sa;
129        if (ip)
130          {
131            ip->family = AF_INET;
132            ip->data.d4 = sin->sin_addr;
133          }
134        if (port)
135          *port = ntohs (sin->sin_port);
136        break;
137      }
138#ifdef ENABLE_IPV6
139    case AF_INET6:
140      {
141        struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
142        if (ip)
143          {
144            ip->family = AF_INET6;
145            ip->data.d6 = sin6->sin6_addr;
146#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
147            ip->ipv6_scope = sin6->sin6_scope_id;
148#endif
149          }
150        if (port)
151          *port = ntohs (sin6->sin6_port);
152        break;
153      }
154#endif
155    default:
156      abort ();
157    }
158}
159
160/* Return the size of the sockaddr structure depending on its
161   family.  */
162
163static socklen_t
164sockaddr_size (const struct sockaddr *sa)
165{
166  switch (sa->sa_family)
167    {
168    case AF_INET:
169      return sizeof (struct sockaddr_in);
170#ifdef ENABLE_IPV6
171    case AF_INET6:
172      return sizeof (struct sockaddr_in6);
173#endif
174    default:
175      abort ();
176    }
177}
178
179/* Resolve the bind address specified via --bind-address and store it
180   to SA.  The resolved value is stored in a static variable and
181   reused after the first invocation of this function.
182
183   Returns true on success, false on failure.  */
184
185static bool
186resolve_bind_address (struct sockaddr *sa)
187{
188  struct address_list *al;
189
190  /* Make sure this is called only once.  opt.bind_address doesn't
191     change during a Wget run.  */
192  static bool called, should_bind;
193  static ip_address ip;
194  if (called)
195    {
196      if (should_bind)
197        sockaddr_set_data (sa, &ip, 0);
198      return should_bind;
199    }
200  called = true;
201
202  al = lookup_host (opt.bind_address, LH_BIND | LH_SILENT);
203  if (!al)
204    {
205      /* #### We should be able to print the error message here. */
206      logprintf (LOG_NOTQUIET,
207                 _("%s: unable to resolve bind address %s; disabling bind.\n"),
208                 exec_name, quote (opt.bind_address));
209      should_bind = false;
210      return false;
211    }
212
213  /* Pick the first address in the list and use it as bind address.
214     Perhaps we should try multiple addresses in succession, but I
215     don't think that's necessary in practice.  */
216  ip = *address_list_address_at (al, 0);
217  address_list_release (al);
218
219  sockaddr_set_data (sa, &ip, 0);
220  should_bind = true;
221  return true;
222}
223
224struct cwt_context {
225  int fd;
226  const struct sockaddr *addr;
227  socklen_t addrlen;
228  int result;
229};
230
231static void
232connect_with_timeout_callback (void *arg)
233{
234  struct cwt_context *ctx = (struct cwt_context *)arg;
235  ctx->result = connect (ctx->fd, ctx->addr, ctx->addrlen);
236}
237
238/* Like connect, but specifies a timeout.  If connecting takes longer
239   than TIMEOUT seconds, -1 is returned and errno is set to
240   ETIMEDOUT.  */
241
242static int
243connect_with_timeout (int fd, const struct sockaddr *addr, socklen_t addrlen,
244                      double timeout)
245{
246  struct cwt_context ctx;
247  ctx.fd = fd;
248  ctx.addr = addr;
249  ctx.addrlen = addrlen;
250
251  if (run_with_timeout (timeout, connect_with_timeout_callback, &ctx))
252    {
253      errno = ETIMEDOUT;
254      return -1;
255    }
256  if (ctx.result == -1 && errno == EINTR)
257    errno = ETIMEDOUT;
258  return ctx.result;
259}
260
261/* Connect via TCP to the specified address and port.
262
263   If PRINT is non-NULL, it is the host name to print that we're
264   connecting to.  */
265
266int
267connect_to_ip (const ip_address *ip, int port, const char *print)
268{
269  struct sockaddr_storage ss;
270  struct sockaddr *sa = (struct sockaddr *)&ss;
271  int sock;
272
273  /* If PRINT is non-NULL, print the "Connecting to..." line, with
274     PRINT being the host name we're connecting to.  */
275  if (print)
276    {
277      const char *txt_addr = print_address (ip);
278      if (0 != strcmp (print, txt_addr))
279        {
280				  char *str = NULL, *name;
281
282          if (opt.enable_iri && (name = idn_decode ((char *) print)) != NULL)
283            {
284              int len = strlen (print) + strlen (name) + 4;
285              str = xmalloc (len);
286              snprintf (str, len, "%s (%s)", name, print);
287              str[len-1] = '\0';
288              xfree (name);
289            }
290
291          logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "),
292                     str ? str : escnonprint_uri (print), txt_addr, port);
293
294					if (str)
295					  xfree (str);
296        }
297      else
298        logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port);
299    }
300
301  /* Store the sockaddr info to SA.  */
302  sockaddr_set_data (sa, ip, port);
303
304  /* Create the socket of the family appropriate for the address.  */
305  sock = socket (sa->sa_family, SOCK_STREAM, 0);
306  if (sock < 0)
307    goto err;
308
309#if defined(ENABLE_IPV6) && defined(IPV6_V6ONLY)
310  if (opt.ipv6_only) {
311    int on = 1;
312    /* In case of error, we will go on anyway... */
313    int err = setsockopt (sock, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof (on));
314    IF_DEBUG
315      if (err < 0)
316        DEBUGP (("Failed setting IPV6_V6ONLY: %s", strerror (errno)));
317  }
318#endif
319
320  /* For very small rate limits, set the buffer size (and hence,
321     hopefully, the kernel's TCP window size) to the per-second limit.
322     That way we should never have to sleep for more than 1s between
323     network reads.  */
324  if (opt.limit_rate && opt.limit_rate < 8192)
325    {
326      int bufsize = opt.limit_rate;
327      if (bufsize < 512)
328        bufsize = 512;          /* avoid pathologically small values */
329#ifdef SO_RCVBUF
330      setsockopt (sock, SOL_SOCKET, SO_RCVBUF,
331                  (void *)&bufsize, (socklen_t)sizeof (bufsize));
332#endif
333      /* When we add limit_rate support for writing, which is useful
334         for POST, we should also set SO_SNDBUF here.  */
335    }
336
337  if (opt.bind_address)
338    {
339      /* Bind the client side of the socket to the requested
340         address.  */
341      struct sockaddr_storage bind_ss;
342      struct sockaddr *bind_sa = (struct sockaddr *)&bind_ss;
343      if (resolve_bind_address (bind_sa))
344        {
345          if (bind (sock, bind_sa, sockaddr_size (bind_sa)) < 0)
346            goto err;
347        }
348    }
349
350  /* Connect the socket to the remote endpoint.  */
351  if (connect_with_timeout (sock, sa, sockaddr_size (sa),
352                            opt.connect_timeout) < 0)
353    goto err;
354
355  /* Success. */
356  assert (sock >= 0);
357  if (print)
358    logprintf (LOG_VERBOSE, _("connected.\n"));
359  DEBUGP (("Created socket %d.\n", sock));
360  return sock;
361
362 err:
363  {
364    /* Protect errno from possible modifications by close and
365       logprintf.  */
366    int save_errno = errno;
367    if (sock >= 0)
368      fd_close (sock);
369    if (print)
370      logprintf (LOG_VERBOSE, _("failed: %s.\n"), strerror (errno));
371    errno = save_errno;
372    return -1;
373  }
374}
375
376/* Connect via TCP to a remote host on the specified port.
377
378   HOST is resolved as an Internet host name.  If HOST resolves to
379   more than one IP address, they are tried in the order returned by
380   DNS until connecting to one of them succeeds.  */
381
382int
383connect_to_host (const char *host, int port)
384{
385  int i, start, end;
386  int sock;
387
388  struct address_list *al = lookup_host (host, 0);
389
390 retry:
391  if (!al)
392    {
393      logprintf (LOG_NOTQUIET,
394                 _("%s: unable to resolve host address %s\n"),
395                 exec_name, quote (host));
396      return E_HOST;
397    }
398
399  address_list_get_bounds (al, &start, &end);
400  for (i = start; i < end; i++)
401    {
402      const ip_address *ip = address_list_address_at (al, i);
403      sock = connect_to_ip (ip, port, host);
404      if (sock >= 0)
405        {
406          /* Success. */
407          address_list_set_connected (al);
408          address_list_release (al);
409          return sock;
410        }
411
412      /* The attempt to connect has failed.  Continue with the loop
413         and try next address. */
414
415      address_list_set_faulty (al, i);
416    }
417
418  /* Failed to connect to any of the addresses in AL. */
419
420  if (address_list_connected_p (al))
421    {
422      /* We connected to AL before, but cannot do so now.  That might
423         indicate that our DNS cache entry for HOST has expired.  */
424      address_list_release (al);
425      al = lookup_host (host, LH_REFRESH);
426      goto retry;
427    }
428  address_list_release (al);
429
430  return -1;
431}
432
433/* Create a socket, bind it to local interface BIND_ADDRESS on port
434   *PORT, set up a listen backlog, and return the resulting socket, or
435   -1 in case of error.
436
437   BIND_ADDRESS is the address of the interface to bind to.  If it is
438   NULL, the socket is bound to the default address.  PORT should
439   point to the port number that will be used for the binding.  If
440   that number is 0, the system will choose a suitable port, and the
441   chosen value will be written to *PORT.
442
443   Calling accept() on such a socket waits for and accepts incoming
444   TCP connections.  */
445
446int
447bind_local (const ip_address *bind_address, int *port)
448{
449  int sock;
450  struct sockaddr_storage ss;
451  struct sockaddr *sa = (struct sockaddr *)&ss;
452
453  /* For setting options with setsockopt. */
454  int setopt_val = 1;
455  void *setopt_ptr = (void *)&setopt_val;
456  socklen_t setopt_size = sizeof (setopt_val);
457
458  sock = socket (bind_address->family, SOCK_STREAM, 0);
459  if (sock < 0)
460    return -1;
461
462#ifdef SO_REUSEADDR
463  setsockopt (sock, SOL_SOCKET, SO_REUSEADDR, setopt_ptr, setopt_size);
464#endif
465
466  xzero (ss);
467  sockaddr_set_data (sa, bind_address, *port);
468  if (bind (sock, sa, sockaddr_size (sa)) < 0)
469    {
470      fd_close (sock);
471      return -1;
472    }
473  DEBUGP (("Local socket fd %d bound.\n", sock));
474
475  /* If *PORT is 0, find out which port we've bound to.  */
476  if (*port == 0)
477    {
478      socklen_t addrlen = sockaddr_size (sa);
479      if (getsockname (sock, sa, &addrlen) < 0)
480        {
481          /* If we can't find out the socket's local address ("name"),
482             something is seriously wrong with the socket, and it's
483             unusable for us anyway because we must know the chosen
484             port.  */
485          fd_close (sock);
486          return -1;
487        }
488      sockaddr_get_data (sa, NULL, port);
489      DEBUGP (("binding to address %s using port %i.\n",
490               print_address (bind_address), *port));
491    }
492  if (listen (sock, 1) < 0)
493    {
494      fd_close (sock);
495      return -1;
496    }
497  return sock;
498}
499
500/* Like a call to accept(), but with the added check for timeout.
501
502   In other words, accept a client connection on LOCAL_SOCK, and
503   return the new socket used for communication with the client.
504   LOCAL_SOCK should have been bound, e.g. using bind_local().
505
506   The caller is blocked until a connection is established.  If no
507   connection is established for opt.connect_timeout seconds, the
508   function exits with an error status.  */
509
510int
511accept_connection (int local_sock)
512{
513  int sock;
514
515  /* We don't need the values provided by accept, but accept
516     apparently requires them to be present.  */
517  struct sockaddr_storage ss;
518  struct sockaddr *sa = (struct sockaddr *)&ss;
519  socklen_t addrlen = sizeof (ss);
520
521  if (opt.connect_timeout)
522    {
523      int test = select_fd (local_sock, opt.connect_timeout, WAIT_FOR_READ);
524      if (test == 0)
525        errno = ETIMEDOUT;
526      if (test <= 0)
527        return -1;
528    }
529  sock = accept (local_sock, sa, &addrlen);
530  DEBUGP (("Accepted client at socket %d.\n", sock));
531  return sock;
532}
533
534/* Get the IP address associated with the connection on FD and store
535   it to IP.  Return true on success, false otherwise.
536
537   If ENDPOINT is ENDPOINT_LOCAL, it returns the address of the local
538   (client) side of the socket.  Else if ENDPOINT is ENDPOINT_PEER, it
539   returns the address of the remote (peer's) side of the socket.  */
540
541bool
542socket_ip_address (int sock, ip_address *ip, int endpoint)
543{
544  struct sockaddr_storage storage;
545  struct sockaddr *sockaddr = (struct sockaddr *)&storage;
546  socklen_t addrlen = sizeof (storage);
547  int ret;
548
549  if (endpoint == ENDPOINT_LOCAL)
550    ret = getsockname (sock, sockaddr, &addrlen);
551  else if (endpoint == ENDPOINT_PEER)
552    ret = getpeername (sock, sockaddr, &addrlen);
553  else
554    abort ();
555  if (ret < 0)
556    return false;
557
558  ip->family = sockaddr->sa_family;
559  switch (sockaddr->sa_family)
560    {
561#ifdef ENABLE_IPV6
562    case AF_INET6:
563      {
564        struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)&storage;
565        ip->data.d6 = sa6->sin6_addr;
566#ifdef HAVE_SOCKADDR_IN6_SCOPE_ID
567        ip->ipv6_scope = sa6->sin6_scope_id;
568#endif
569        DEBUGP (("conaddr is: %s\n", print_address (ip)));
570        return true;
571      }
572#endif
573    case AF_INET:
574      {
575        struct sockaddr_in *sa = (struct sockaddr_in *)&storage;
576        ip->data.d4 = sa->sin_addr;
577        DEBUGP (("conaddr is: %s\n", print_address (ip)));
578        return true;
579      }
580    default:
581      abort ();
582    }
583}
584
585/* Return true if the error from the connect code can be considered
586   retryable.  Wget normally retries after errors, but the exception
587   are the "unsupported protocol" type errors (possible on IPv4/IPv6
588   dual family systems) and "connection refused".  */
589
590bool
591retryable_socket_connect_error (int err)
592{
593  /* Have to guard against some of these values not being defined.
594     Cannot use a switch statement because some of the values might be
595     equal.  */
596  if (false
597#ifdef EAFNOSUPPORT
598      || err == EAFNOSUPPORT
599#endif
600#ifdef EPFNOSUPPORT
601      || err == EPFNOSUPPORT
602#endif
603#ifdef ESOCKTNOSUPPORT          /* no, "sockt" is not a typo! */
604      || err == ESOCKTNOSUPPORT
605#endif
606#ifdef EPROTONOSUPPORT
607      || err == EPROTONOSUPPORT
608#endif
609#ifdef ENOPROTOOPT
610      || err == ENOPROTOOPT
611#endif
612      /* Apparently, older versions of Linux and BSD used EINVAL
613         instead of EAFNOSUPPORT and such.  */
614      || err == EINVAL
615      )
616    return false;
617
618  if (!opt.retry_connrefused)
619    if (err == ECONNREFUSED
620#ifdef ENETUNREACH
621        || err == ENETUNREACH   /* network is unreachable */
622#endif
623#ifdef EHOSTUNREACH
624        || err == EHOSTUNREACH  /* host is unreachable */
625#endif
626        )
627      return false;
628
629  return true;
630}
631
632/* Wait for a single descriptor to become available, timing out after
633   MAXTIME seconds.  Returns 1 if FD is available, 0 for timeout and
634   -1 for error.  The argument WAIT_FOR can be a combination of
635   WAIT_FOR_READ and WAIT_FOR_WRITE.
636
637   This is a mere convenience wrapper around the select call, and
638   should be taken as such (for example, it doesn't implement Wget's
639   0-timeout-means-no-timeout semantics.)  */
640
641int
642select_fd (int fd, double maxtime, int wait_for)
643{
644  fd_set fdset;
645  fd_set *rd = NULL, *wr = NULL;
646  struct timeval tmout;
647  int result;
648
649  FD_ZERO (&fdset);
650  FD_SET (fd, &fdset);
651  if (wait_for & WAIT_FOR_READ)
652    rd = &fdset;
653  if (wait_for & WAIT_FOR_WRITE)
654    wr = &fdset;
655
656  tmout.tv_sec = (long) maxtime;
657  tmout.tv_usec = 1000000 * (maxtime - (long) maxtime);
658
659  do
660    result = select (fd + 1, rd, wr, NULL, &tmout);
661  while (result < 0 && errno == EINTR);
662
663  return result;
664}
665
666/* Return true iff the connection to the remote site established
667   through SOCK is still open.
668
669   Specifically, this function returns true if SOCK is not ready for
670   reading.  This is because, when the connection closes, the socket
671   is ready for reading because EOF is about to be delivered.  A side
672   effect of this method is that sockets that have pending data are
673   considered non-open.  This is actually a good thing for callers of
674   this function, where such pending data can only be unwanted
675   leftover from a previous request.  */
676
677bool
678test_socket_open (int sock)
679{
680  fd_set check_set;
681  struct timeval to;
682
683  /* Check if we still have a valid (non-EOF) connection.  From Andrew
684   * Maholski's code in the Unix Socket FAQ.  */
685
686  FD_ZERO (&check_set);
687  FD_SET (sock, &check_set);
688
689  /* Wait one microsecond */
690  to.tv_sec = 0;
691  to.tv_usec = 1;
692
693  if (select (sock + 1, &check_set, NULL, NULL, &to) == 0)
694    /* We got a timeout, it means we're still connected. */
695    return true;
696  else
697    /* Read now would not wait, it means we have either pending data
698       or EOF/error. */
699    return false;
700}
701
702/* Basic socket operations, mostly EINTR wrappers.  */
703
704#if defined(WINDOWS) || defined(USE_WATT32)
705# define read(fd, buf, cnt) recv (fd, buf, cnt, 0)
706# define write(fd, buf, cnt) send (fd, buf, cnt, 0)
707# define close(fd) closesocket (fd)
708#endif
709
710#if (defined(__BEOS__) || defined(__HAIKU__))
711# define read(fd, buf, cnt) recv (fd, buf, cnt, 0)
712# define write(fd, buf, cnt) send (fd, buf, cnt, 0)
713#endif
714
715static int
716sock_read (int fd, char *buf, int bufsize)
717{
718  int res;
719  do
720    res = read (fd, buf, bufsize);
721  while (res == -1 && errno == EINTR);
722  return res;
723}
724
725static int
726sock_write (int fd, char *buf, int bufsize)
727{
728  int res;
729  do
730    res = write (fd, buf, bufsize);
731  while (res == -1 && errno == EINTR);
732  return res;
733}
734
735static int
736sock_poll (int fd, double timeout, int wait_for)
737{
738  return select_fd (fd, timeout, wait_for);
739}
740
741static int
742sock_peek (int fd, char *buf, int bufsize)
743{
744  int res;
745  do
746    res = recv (fd, buf, bufsize, MSG_PEEK);
747  while (res == -1 && errno == EINTR);
748  return res;
749}
750
751static void
752sock_close (int fd)
753{
754  close (fd);
755  DEBUGP (("Closed fd %d\n", fd));
756}
757#undef read
758#undef write
759#undef close
760
761/* Reading and writing from the network.  We build around the socket
762   (file descriptor) API, but support "extended" operations for things
763   that are not mere file descriptors under the hood, such as SSL
764   sockets.
765
766   That way the user code can call fd_read(fd, ...) and we'll run read
767   or SSL_read or whatever is necessary.  */
768
769static struct hash_table *transport_map;
770static unsigned int transport_map_modified_tick;
771
772struct transport_info {
773  struct transport_implementation *imp;
774  void *ctx;
775};
776
777/* Register the transport layer operations that will be used when
778   reading, writing, and polling FD.
779
780   This should be used for transport layers like SSL that piggyback on
781   sockets.  FD should otherwise be a real socket, on which you can
782   call getpeername, etc.  */
783
784void
785fd_register_transport (int fd, struct transport_implementation *imp, void *ctx)
786{
787  struct transport_info *info;
788
789  /* The file descriptor must be non-negative to be registered.
790     Negative values are ignored by fd_close(), and -1 cannot be used as
791     hash key.  */
792  assert (fd >= 0);
793
794  info = xnew (struct transport_info);
795  info->imp = imp;
796  info->ctx = ctx;
797  if (!transport_map)
798    transport_map = hash_table_new (0, NULL, NULL);
799  hash_table_put (transport_map, (void *)(intptr_t) fd, info);
800  ++transport_map_modified_tick;
801}
802
803/* Return context of the transport registered with
804   fd_register_transport.  This assumes fd_register_transport was
805   previously called on FD.  */
806
807void *
808fd_transport_context (int fd)
809{
810  struct transport_info *info = hash_table_get (transport_map, (void *)(intptr_t) fd);
811  return info->ctx;
812}
813
814/* When fd_read/fd_write are called multiple times in a loop, they should
815   remember the INFO pointer instead of fetching it every time.  It is
816   not enough to compare FD to LAST_FD because FD might have been
817   closed and reopened.  modified_tick ensures that changes to
818   transport_map will not be unnoticed.
819
820   This is a macro because we want the static storage variables to be
821   per-function.  */
822
823#define LAZY_RETRIEVE_INFO(info) do {                                   \
824  static struct transport_info *last_info;                              \
825  static int last_fd = -1;                                              \
826  static unsigned int last_tick;                                        \
827  if (!transport_map)                                                   \
828    info = NULL;                                                        \
829  else if (last_fd == fd && last_tick == transport_map_modified_tick)   \
830    info = last_info;                                                   \
831  else                                                                  \
832    {                                                                   \
833      info = hash_table_get (transport_map, (void *)(intptr_t) fd);     \
834      last_fd = fd;                                                     \
835      last_info = info;                                                 \
836      last_tick = transport_map_modified_tick;                          \
837    }                                                                   \
838} while (0)
839
840static bool
841poll_internal (int fd, struct transport_info *info, int wf, double timeout)
842{
843  if (timeout == -1)
844    timeout = opt.read_timeout;
845  if (timeout)
846    {
847      int test;
848      if (info && info->imp->poller)
849        test = info->imp->poller (fd, timeout, wf, info->ctx);
850      else
851        test = sock_poll (fd, timeout, wf);
852      if (test == 0)
853        errno = ETIMEDOUT;
854      if (test <= 0)
855        return false;
856    }
857  return true;
858}
859
860/* Read no more than BUFSIZE bytes of data from FD, storing them to
861   BUF.  If TIMEOUT is non-zero, the operation aborts if no data is
862   received after that many seconds.  If TIMEOUT is -1, the value of
863   opt.timeout is used for TIMEOUT.  */
864
865int
866fd_read (int fd, char *buf, int bufsize, double timeout)
867{
868  struct transport_info *info;
869  LAZY_RETRIEVE_INFO (info);
870  if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
871    return -1;
872  if (info && info->imp->reader)
873    return info->imp->reader (fd, buf, bufsize, info->ctx);
874  else
875    return sock_read (fd, buf, bufsize);
876}
877
878/* Like fd_read, except it provides a "preview" of the data that will
879   be read by subsequent calls to fd_read.  Specifically, it copies no
880   more than BUFSIZE bytes of the currently available data to BUF and
881   returns the number of bytes copied.  Return values and timeout
882   semantics are the same as those of fd_read.
883
884   CAVEAT: Do not assume that the first subsequent call to fd_read
885   will retrieve the same amount of data.  Reading can return more or
886   less data, depending on the TCP implementation and other
887   circumstances.  However, barring an error, it can be expected that
888   all the peeked data will eventually be read by fd_read.  */
889
890int
891fd_peek (int fd, char *buf, int bufsize, double timeout)
892{
893  struct transport_info *info;
894  LAZY_RETRIEVE_INFO (info);
895  if (!poll_internal (fd, info, WAIT_FOR_READ, timeout))
896    return -1;
897  if (info && info->imp->peeker)
898    return info->imp->peeker (fd, buf, bufsize, info->ctx);
899  else
900    return sock_peek (fd, buf, bufsize);
901}
902
903/* Write the entire contents of BUF to FD.  If TIMEOUT is non-zero,
904   the operation aborts if no data is received after that many
905   seconds.  If TIMEOUT is -1, the value of opt.timeout is used for
906   TIMEOUT.  */
907
908int
909fd_write (int fd, char *buf, int bufsize, double timeout)
910{
911  int res;
912  struct transport_info *info;
913  LAZY_RETRIEVE_INFO (info);
914
915  /* `write' may write less than LEN bytes, thus the loop keeps trying
916     it until all was written, or an error occurred.  */
917  res = 0;
918  while (bufsize > 0)
919    {
920      if (!poll_internal (fd, info, WAIT_FOR_WRITE, timeout))
921        return -1;
922      if (info && info->imp->writer)
923        res = info->imp->writer (fd, buf, bufsize, info->ctx);
924      else
925        res = sock_write (fd, buf, bufsize);
926      if (res <= 0)
927        break;
928      buf += res;
929      bufsize -= res;
930    }
931  return res;
932}
933
934/* Report the most recent error(s) on FD.  This should only be called
935   after fd_* functions, such as fd_read and fd_write, and only if
936   they return a negative result.  For errors coming from other calls
937   such as setsockopt or fopen, strerror should continue to be
938   used.
939
940   If the transport doesn't support error messages or doesn't supply
941   one, strerror(errno) is returned.  The returned error message
942   should not be used after fd_close has been called.  */
943
944const char *
945fd_errstr (int fd)
946{
947  /* Don't bother with LAZY_RETRIEVE_INFO, as this will only be called
948     in case of error, never in a tight loop.  */
949  struct transport_info *info = NULL;
950  if (transport_map)
951    info = hash_table_get (transport_map, (void *)(intptr_t) fd);
952
953  if (info && info->imp->errstr)
954    {
955      const char *err = info->imp->errstr (fd, info->ctx);
956      if (err)
957        return err;
958      /* else, fall through and print the system error. */
959    }
960  return strerror (errno);
961}
962
963/* Close the file descriptor FD.  */
964
965void
966fd_close (int fd)
967{
968  struct transport_info *info;
969  if (fd < 0)
970    return;
971
972  /* Don't use LAZY_RETRIEVE_INFO because fd_close() is only called once
973     per socket, so that particular optimization wouldn't work.  */
974  info = NULL;
975  if (transport_map)
976    info = hash_table_get (transport_map, (void *)(intptr_t) fd);
977
978  if (info && info->imp->closer)
979    info->imp->closer (fd, info->ctx);
980  else
981    sock_close (fd);
982
983  if (info)
984    {
985      hash_table_remove (transport_map, (void *)(intptr_t) fd);
986      xfree (info);
987      ++transport_map_modified_tick;
988    }
989}
990