1/* HTTP support.
2   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
3   2004, 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
4
5This file is part of GNU Wget.
6
7GNU Wget is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12GNU Wget is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19
20Additional permission under GNU GPL version 3 section 7
21
22If you modify this program, or any covered work, by linking or
23combining it with the OpenSSL project's OpenSSL library (or a
24modified version of that library), containing parts covered by the
25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26grants you additional permission to convey the resulting work.
27Corresponding Source for a non-source form of such a combination
28shall include the source code for the parts of OpenSSL used as well
29as that of the covered work.  */
30
31#include "wget.h"
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36#ifdef HAVE_UNISTD_H
37# include <unistd.h>
38#endif
39#include <assert.h>
40#include <errno.h>
41#include <time.h>
42#include <locale.h>
43
44#include "hash.h"
45#include "http.h"
46#include "utils.h"
47#include "url.h"
48#include "host.h"
49#include "retr.h"
50#include "connect.h"
51#include "netrc.h"
52#ifdef HAVE_SSL
53# include "ssl.h"
54#endif
55#ifdef ENABLE_NTLM
56# include "http-ntlm.h"
57#endif
58#include "cookies.h"
59#ifdef ENABLE_DIGEST
60# include "gen-md5.h"
61#endif
62#include "convert.h"
63#include "spider.h"
64
65#ifdef TESTING
66#include "test.h"
67#endif
68
69#ifdef __VMS
70# include "vms.h"
71#endif /* def __VMS */
72
73extern char *version_string;
74
75/* Forward decls. */
76struct http_stat;
77static char *create_authorization_line (const char *, const char *,
78                                        const char *, const char *,
79                                        const char *, bool *);
80static char *basic_authentication_encode (const char *, const char *);
81static bool known_authentication_scheme_p (const char *, const char *);
82static void ensure_extension (struct http_stat *, const char *, int *);
83static void load_cookies (void);
84
85#ifndef MIN
86# define MIN(x, y) ((x) > (y) ? (y) : (x))
87#endif
88
89
90static bool cookies_loaded_p;
91static struct cookie_jar *wget_cookie_jar;
92
93#define TEXTHTML_S "text/html"
94#define TEXTXHTML_S "application/xhtml+xml"
95#define TEXTCSS_S "text/css"
96
97/* Some status code validation macros: */
98#define H_20X(x)        (((x) >= 200) && ((x) < 300))
99#define H_PARTIAL(x)    ((x) == HTTP_STATUS_PARTIAL_CONTENTS)
100#define H_REDIRECTED(x) ((x) == HTTP_STATUS_MOVED_PERMANENTLY          \
101                         || (x) == HTTP_STATUS_MOVED_TEMPORARILY       \
102                         || (x) == HTTP_STATUS_SEE_OTHER               \
103                         || (x) == HTTP_STATUS_TEMPORARY_REDIRECT)
104
105/* HTTP/1.0 status codes from RFC1945, provided for reference.  */
106/* Successful 2xx.  */
107#define HTTP_STATUS_OK                    200
108#define HTTP_STATUS_CREATED               201
109#define HTTP_STATUS_ACCEPTED              202
110#define HTTP_STATUS_NO_CONTENT            204
111#define HTTP_STATUS_PARTIAL_CONTENTS      206
112
113/* Redirection 3xx.  */
114#define HTTP_STATUS_MULTIPLE_CHOICES      300
115#define HTTP_STATUS_MOVED_PERMANENTLY     301
116#define HTTP_STATUS_MOVED_TEMPORARILY     302
117#define HTTP_STATUS_SEE_OTHER             303 /* from HTTP/1.1 */
118#define HTTP_STATUS_NOT_MODIFIED          304
119#define HTTP_STATUS_TEMPORARY_REDIRECT    307 /* from HTTP/1.1 */
120
121/* Client error 4xx.  */
122#define HTTP_STATUS_BAD_REQUEST           400
123#define HTTP_STATUS_UNAUTHORIZED          401
124#define HTTP_STATUS_FORBIDDEN             403
125#define HTTP_STATUS_NOT_FOUND             404
126#define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
127
128/* Server errors 5xx.  */
129#define HTTP_STATUS_INTERNAL              500
130#define HTTP_STATUS_NOT_IMPLEMENTED       501
131#define HTTP_STATUS_BAD_GATEWAY           502
132#define HTTP_STATUS_UNAVAILABLE           503
133
134enum rp {
135  rel_none, rel_name, rel_value, rel_both
136};
137
138struct request {
139  const char *method;
140  char *arg;
141
142  struct request_header {
143    char *name, *value;
144    enum rp release_policy;
145  } *headers;
146  int hcount, hcapacity;
147};
148
149extern int numurls;
150
151/* Create a new, empty request.  At least request_set_method must be
152   called before the request can be used.  */
153
154static struct request *
155request_new (void)
156{
157  struct request *req = xnew0 (struct request);
158  req->hcapacity = 8;
159  req->headers = xnew_array (struct request_header, req->hcapacity);
160  return req;
161}
162
163/* Set the request's method and its arguments.  METH should be a
164   literal string (or it should outlive the request) because it will
165   not be freed.  ARG will be freed by request_free.  */
166
167static void
168request_set_method (struct request *req, const char *meth, char *arg)
169{
170  req->method = meth;
171  req->arg = arg;
172}
173
174/* Return the method string passed with the last call to
175   request_set_method.  */
176
177static const char *
178request_method (const struct request *req)
179{
180  return req->method;
181}
182
183/* Free one header according to the release policy specified with
184   request_set_header.  */
185
186static void
187release_header (struct request_header *hdr)
188{
189  switch (hdr->release_policy)
190    {
191    case rel_none:
192      break;
193    case rel_name:
194      xfree (hdr->name);
195      break;
196    case rel_value:
197      xfree (hdr->value);
198      break;
199    case rel_both:
200      xfree (hdr->name);
201      xfree (hdr->value);
202      break;
203    }
204}
205
206/* Set the request named NAME to VALUE.  Specifically, this means that
207   a "NAME: VALUE\r\n" header line will be used in the request.  If a
208   header with the same name previously existed in the request, its
209   value will be replaced by this one.  A NULL value means do nothing.
210
211   RELEASE_POLICY determines whether NAME and VALUE should be released
212   (freed) with request_free.  Allowed values are:
213
214    - rel_none     - don't free NAME or VALUE
215    - rel_name     - free NAME when done
216    - rel_value    - free VALUE when done
217    - rel_both     - free both NAME and VALUE when done
218
219   Setting release policy is useful when arguments come from different
220   sources.  For example:
221
222     // Don't free literal strings!
223     request_set_header (req, "Pragma", "no-cache", rel_none);
224
225     // Don't free a global variable, we'll need it later.
226     request_set_header (req, "Referer", opt.referer, rel_none);
227
228     // Value freshly allocated, free it when done.
229     request_set_header (req, "Range",
230                         aprintf ("bytes=%s-", number_to_static_string (hs->restval)),
231                         rel_value);
232   */
233
234static void
235request_set_header (struct request *req, char *name, char *value,
236                    enum rp release_policy)
237{
238  struct request_header *hdr;
239  int i;
240
241  if (!value)
242    {
243      /* A NULL value is a no-op; if freeing the name is requested,
244         free it now to avoid leaks.  */
245      if (release_policy == rel_name || release_policy == rel_both)
246        xfree (name);
247      return;
248    }
249
250  for (i = 0; i < req->hcount; i++)
251    {
252      hdr = &req->headers[i];
253      if (0 == strcasecmp (name, hdr->name))
254        {
255          /* Replace existing header. */
256          release_header (hdr);
257          hdr->name = name;
258          hdr->value = value;
259          hdr->release_policy = release_policy;
260          return;
261        }
262    }
263
264  /* Install new header. */
265
266  if (req->hcount >= req->hcapacity)
267    {
268      req->hcapacity <<= 1;
269      req->headers = xrealloc (req->headers, req->hcapacity * sizeof (*hdr));
270    }
271  hdr = &req->headers[req->hcount++];
272  hdr->name = name;
273  hdr->value = value;
274  hdr->release_policy = release_policy;
275}
276
277/* Like request_set_header, but sets the whole header line, as
278   provided by the user using the `--header' option.  For example,
279   request_set_user_header (req, "Foo: bar") works just like
280   request_set_header (req, "Foo", "bar").  */
281
282static void
283request_set_user_header (struct request *req, const char *header)
284{
285  char *name;
286  const char *p = strchr (header, ':');
287  if (!p)
288    return;
289  BOUNDED_TO_ALLOCA (header, p, name);
290  ++p;
291  while (c_isspace (*p))
292    ++p;
293  request_set_header (req, xstrdup (name), (char *) p, rel_name);
294}
295
296/* Remove the header with specified name from REQ.  Returns true if
297   the header was actually removed, false otherwise.  */
298
299static bool
300request_remove_header (struct request *req, char *name)
301{
302  int i;
303  for (i = 0; i < req->hcount; i++)
304    {
305      struct request_header *hdr = &req->headers[i];
306      if (0 == strcasecmp (name, hdr->name))
307        {
308          release_header (hdr);
309          /* Move the remaining headers by one. */
310          if (i < req->hcount - 1)
311            memmove (hdr, hdr + 1, (req->hcount - i - 1) * sizeof (*hdr));
312          --req->hcount;
313          return true;
314        }
315    }
316  return false;
317}
318
319#define APPEND(p, str) do {                     \
320  int A_len = strlen (str);                     \
321  memcpy (p, str, A_len);                       \
322  p += A_len;                                   \
323} while (0)
324
325/* Construct the request and write it to FD using fd_write.  */
326
327static int
328request_send (const struct request *req, int fd)
329{
330  char *request_string, *p;
331  int i, size, write_error;
332
333  /* Count the request size. */
334  size = 0;
335
336  /* METHOD " " ARG " " "HTTP/1.0" "\r\n" */
337  size += strlen (req->method) + 1 + strlen (req->arg) + 1 + 8 + 2;
338
339  for (i = 0; i < req->hcount; i++)
340    {
341      struct request_header *hdr = &req->headers[i];
342      /* NAME ": " VALUE "\r\n" */
343      size += strlen (hdr->name) + 2 + strlen (hdr->value) + 2;
344    }
345
346  /* "\r\n\0" */
347  size += 3;
348
349  p = request_string = alloca_array (char, size);
350
351  /* Generate the request. */
352
353  APPEND (p, req->method); *p++ = ' ';
354  APPEND (p, req->arg);    *p++ = ' ';
355  memcpy (p, "HTTP/1.0\r\n", 10); p += 10;
356
357  for (i = 0; i < req->hcount; i++)
358    {
359      struct request_header *hdr = &req->headers[i];
360      APPEND (p, hdr->name);
361      *p++ = ':', *p++ = ' ';
362      APPEND (p, hdr->value);
363      *p++ = '\r', *p++ = '\n';
364    }
365
366  *p++ = '\r', *p++ = '\n', *p++ = '\0';
367  assert (p - request_string == size);
368
369#undef APPEND
370
371  DEBUGP (("\n---request begin---\n%s---request end---\n", request_string));
372
373  /* Send the request to the server. */
374
375  write_error = fd_write (fd, request_string, size - 1, -1);
376  if (write_error < 0)
377    logprintf (LOG_VERBOSE, _("Failed writing HTTP request: %s.\n"),
378               fd_errstr (fd));
379  return write_error;
380}
381
382/* Release the resources used by REQ. */
383
384static void
385request_free (struct request *req)
386{
387  int i;
388  xfree_null (req->arg);
389  for (i = 0; i < req->hcount; i++)
390    release_header (&req->headers[i]);
391  xfree_null (req->headers);
392  xfree (req);
393}
394
395static struct hash_table *basic_authed_hosts;
396
397/* Find out if this host has issued a Basic challenge yet; if so, give
398 * it the username, password. A temporary measure until we can get
399 * proper authentication in place. */
400
401static bool
402maybe_send_basic_creds (const char *hostname, const char *user,
403                        const char *passwd, struct request *req)
404{
405  bool do_challenge = false;
406
407  if (opt.auth_without_challenge)
408    {
409      DEBUGP(("Auth-without-challenge set, sending Basic credentials.\n"));
410      do_challenge = true;
411    }
412  else if (basic_authed_hosts
413      && hash_table_contains(basic_authed_hosts, hostname))
414    {
415      DEBUGP(("Found %s in basic_authed_hosts.\n", quote (hostname)));
416      do_challenge = true;
417    }
418  else
419    {
420      DEBUGP(("Host %s has not issued a general basic challenge.\n",
421              quote (hostname)));
422    }
423  if (do_challenge)
424    {
425      request_set_header (req, "Authorization",
426                          basic_authentication_encode (user, passwd),
427                          rel_value);
428    }
429  return do_challenge;
430}
431
432static void
433register_basic_auth_host (const char *hostname)
434{
435  if (!basic_authed_hosts)
436    {
437      basic_authed_hosts = make_nocase_string_hash_table (1);
438    }
439  if (!hash_table_contains(basic_authed_hosts, hostname))
440    {
441      hash_table_put (basic_authed_hosts, xstrdup(hostname), NULL);
442      DEBUGP(("Inserted %s into basic_authed_hosts\n", quote (hostname)));
443    }
444}
445
446
447/* Send the contents of FILE_NAME to SOCK.  Make sure that exactly
448   PROMISED_SIZE bytes are sent over the wire -- if the file is
449   longer, read only that much; if the file is shorter, report an error.  */
450
451static int
452post_file (int sock, const char *file_name, wgint promised_size)
453{
454  static char chunk[8192];
455  wgint written = 0;
456  int write_error;
457  FILE *fp;
458
459  DEBUGP (("[writing POST file %s ... ", file_name));
460
461  fp = fopen (file_name, "rb");
462  if (!fp)
463    return -1;
464  while (!feof (fp) && written < promised_size)
465    {
466      int towrite;
467      int length = fread (chunk, 1, sizeof (chunk), fp);
468      if (length == 0)
469        break;
470      towrite = MIN (promised_size - written, length);
471      write_error = fd_write (sock, chunk, towrite, -1);
472      if (write_error < 0)
473        {
474          fclose (fp);
475          return -1;
476        }
477      written += towrite;
478    }
479  fclose (fp);
480
481  /* If we've written less than was promised, report a (probably
482     nonsensical) error rather than break the promise.  */
483  if (written < promised_size)
484    {
485      errno = EINVAL;
486      return -1;
487    }
488
489  assert (written == promised_size);
490  DEBUGP (("done]\n"));
491  return 0;
492}
493
494/* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
495   If so, return the pointer to the position after the line, otherwise
496   return NULL.  This is used as callback to fd_read_hunk.  The data
497   between START and PEEKED has been read and cannot be "unread"; the
498   data after PEEKED has only been peeked.  */
499
500static const char *
501response_head_terminator (const char *start, const char *peeked, int peeklen)
502{
503  const char *p, *end;
504
505  /* If at first peek, verify whether HUNK starts with "HTTP".  If
506     not, this is a HTTP/0.9 request and we must bail out without
507     reading anything.  */
508  if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
509    return start;
510
511  /* Look for "\n[\r]\n", and return the following position if found.
512     Start two chars before the current to cover the possibility that
513     part of the terminator (e.g. "\n\r") arrived in the previous
514     batch.  */
515  p = peeked - start < 2 ? start : peeked - 2;
516  end = peeked + peeklen;
517
518  /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
519  for (; p < end - 2; p++)
520    if (*p == '\n')
521      {
522        if (p[1] == '\r' && p[2] == '\n')
523          return p + 3;
524        else if (p[1] == '\n')
525          return p + 2;
526      }
527  /* p==end-2: check for \n\n directly preceding END. */
528  if (p[0] == '\n' && p[1] == '\n')
529    return p + 2;
530
531  return NULL;
532}
533
534/* The maximum size of a single HTTP response we care to read.  Rather
535   than being a limit of the reader implementation, this limit
536   prevents Wget from slurping all available memory upon encountering
537   malicious or buggy server output, thus protecting the user.  Define
538   it to 0 to remove the limit.  */
539
540#define HTTP_RESPONSE_MAX_SIZE 65536
541
542/* Read the HTTP request head from FD and return it.  The error
543   conditions are the same as with fd_read_hunk.
544
545   To support HTTP/0.9 responses, this function tries to make sure
546   that the data begins with "HTTP".  If this is not the case, no data
547   is read and an empty request is returned, so that the remaining
548   data can be treated as body.  */
549
550static char *
551read_http_response_head (int fd)
552{
553  return fd_read_hunk (fd, response_head_terminator, 512,
554                       HTTP_RESPONSE_MAX_SIZE);
555}
556
557struct response {
558  /* The response data. */
559  const char *data;
560
561  /* The array of pointers that indicate where each header starts.
562     For example, given this HTTP response:
563
564       HTTP/1.0 200 Ok
565       Description: some
566        text
567       Etag: x
568
569     The headers are located like this:
570
571     "HTTP/1.0 200 Ok\r\nDescription: some\r\n text\r\nEtag: x\r\n\r\n"
572     ^                   ^                             ^          ^
573     headers[0]          headers[1]                    headers[2] headers[3]
574
575     I.e. headers[0] points to the beginning of the request,
576     headers[1] points to the end of the first header and the
577     beginning of the second one, etc.  */
578
579  const char **headers;
580};
581
582/* Create a new response object from the text of the HTTP response,
583   available in HEAD.  That text is automatically split into
584   constituent header lines for fast retrieval using
585   resp_header_*.  */
586
587static struct response *
588resp_new (const char *head)
589{
590  const char *hdr;
591  int count, size;
592
593  struct response *resp = xnew0 (struct response);
594  resp->data = head;
595
596  if (*head == '\0')
597    {
598      /* Empty head means that we're dealing with a headerless
599         (HTTP/0.9) response.  In that case, don't set HEADERS at
600         all.  */
601      return resp;
602    }
603
604  /* Split HEAD into header lines, so that resp_header_* functions
605     don't need to do this over and over again.  */
606
607  size = count = 0;
608  hdr = head;
609  while (1)
610    {
611      DO_REALLOC (resp->headers, size, count + 1, const char *);
612      resp->headers[count++] = hdr;
613
614      /* Break upon encountering an empty line. */
615      if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
616        break;
617
618      /* Find the end of HDR, including continuations. */
619      do
620        {
621          const char *end = strchr (hdr, '\n');
622          if (end)
623            hdr = end + 1;
624          else
625            hdr += strlen (hdr);
626        }
627      while (*hdr == ' ' || *hdr == '\t');
628    }
629  DO_REALLOC (resp->headers, size, count + 1, const char *);
630  resp->headers[count] = NULL;
631
632  return resp;
633}
634
635/* Locate the header named NAME in the request data, starting with
636   position START.  This allows the code to loop through the request
637   data, filtering for all requests of a given name.  Returns the
638   found position, or -1 for failure.  The code that uses this
639   function typically looks like this:
640
641     for (pos = 0; (pos = resp_header_locate (...)) != -1; pos++)
642       ... do something with header ...
643
644   If you only care about one header, use resp_header_get instead of
645   this function.  */
646
647static int
648resp_header_locate (const struct response *resp, const char *name, int start,
649                    const char **begptr, const char **endptr)
650{
651  int i;
652  const char **headers = resp->headers;
653  int name_len;
654
655  if (!headers || !headers[1])
656    return -1;
657
658  name_len = strlen (name);
659  if (start > 0)
660    i = start;
661  else
662    i = 1;
663
664  for (; headers[i + 1]; i++)
665    {
666      const char *b = headers[i];
667      const char *e = headers[i + 1];
668      if (e - b > name_len
669          && b[name_len] == ':'
670          && 0 == strncasecmp (b, name, name_len))
671        {
672          b += name_len + 1;
673          while (b < e && c_isspace (*b))
674            ++b;
675          while (b < e && c_isspace (e[-1]))
676            --e;
677          *begptr = b;
678          *endptr = e;
679          return i;
680        }
681    }
682  return -1;
683}
684
685/* Find and retrieve the header named NAME in the request data.  If
686   found, set *BEGPTR to its starting, and *ENDPTR to its ending
687   position, and return true.  Otherwise return false.
688
689   This function is used as a building block for resp_header_copy
690   and resp_header_strdup.  */
691
692static bool
693resp_header_get (const struct response *resp, const char *name,
694                 const char **begptr, const char **endptr)
695{
696  int pos = resp_header_locate (resp, name, 0, begptr, endptr);
697  return pos != -1;
698}
699
700/* Copy the response header named NAME to buffer BUF, no longer than
701   BUFSIZE (BUFSIZE includes the terminating 0).  If the header
702   exists, true is returned, false otherwise.  If there should be no
703   limit on the size of the header, use resp_header_strdup instead.
704
705   If BUFSIZE is 0, no data is copied, but the boolean indication of
706   whether the header is present is still returned.  */
707
708static bool
709resp_header_copy (const struct response *resp, const char *name,
710                  char *buf, int bufsize)
711{
712  const char *b, *e;
713  if (!resp_header_get (resp, name, &b, &e))
714    return false;
715  if (bufsize)
716    {
717      int len = MIN (e - b, bufsize - 1);
718      memcpy (buf, b, len);
719      buf[len] = '\0';
720    }
721  return true;
722}
723
724/* Return the value of header named NAME in RESP, allocated with
725   malloc.  If such a header does not exist in RESP, return NULL.  */
726
727static char *
728resp_header_strdup (const struct response *resp, const char *name)
729{
730  const char *b, *e;
731  if (!resp_header_get (resp, name, &b, &e))
732    return NULL;
733  return strdupdelim (b, e);
734}
735
736/* Parse the HTTP status line, which is of format:
737
738   HTTP-Version SP Status-Code SP Reason-Phrase
739
740   The function returns the status-code, or -1 if the status line
741   appears malformed.  The pointer to "reason-phrase" message is
742   returned in *MESSAGE.  */
743
744static int
745resp_status (const struct response *resp, char **message)
746{
747  int status;
748  const char *p, *end;
749
750  if (!resp->headers)
751    {
752      /* For a HTTP/0.9 response, assume status 200. */
753      if (message)
754        *message = xstrdup (_("No headers, assuming HTTP/0.9"));
755      return 200;
756    }
757
758  p = resp->headers[0];
759  end = resp->headers[1];
760
761  if (!end)
762    return -1;
763
764  /* "HTTP" */
765  if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
766    return -1;
767  p += 4;
768
769  /* Match the HTTP version.  This is optional because Gnutella
770     servers have been reported to not specify HTTP version.  */
771  if (p < end && *p == '/')
772    {
773      ++p;
774      while (p < end && c_isdigit (*p))
775        ++p;
776      if (p < end && *p == '.')
777        ++p;
778      while (p < end && c_isdigit (*p))
779        ++p;
780    }
781
782  while (p < end && c_isspace (*p))
783    ++p;
784  if (end - p < 3 || !c_isdigit (p[0]) || !c_isdigit (p[1]) || !c_isdigit (p[2]))
785    return -1;
786
787  status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
788  p += 3;
789
790  if (message)
791    {
792      while (p < end && c_isspace (*p))
793        ++p;
794      while (p < end && c_isspace (end[-1]))
795        --end;
796      *message = strdupdelim (p, end);
797    }
798
799  return status;
800}
801
802/* Release the resources used by RESP.  */
803
804static void
805resp_free (struct response *resp)
806{
807  xfree_null (resp->headers);
808  xfree (resp);
809}
810
811/* Print a single line of response, the characters [b, e).  We tried
812   getting away with
813      logprintf (LOG_VERBOSE, "%s%.*s\n", prefix, (int) (e - b), b);
814   but that failed to escape the non-printable characters and, in fact,
815   caused crashes in UTF-8 locales.  */
816
817static void
818print_response_line(const char *prefix, const char *b, const char *e)
819{
820  char *copy;
821  BOUNDED_TO_ALLOCA(b, e, copy);
822  logprintf (LOG_ALWAYS, "%s%s\n", prefix,
823             quotearg_style (escape_quoting_style, copy));
824}
825
826/* Print the server response, line by line, omitting the trailing CRLF
827   from individual header lines, and prefixed with PREFIX.  */
828
829static void
830print_server_response (const struct response *resp, const char *prefix)
831{
832  int i;
833  if (!resp->headers)
834    return;
835  for (i = 0; resp->headers[i + 1]; i++)
836    {
837      const char *b = resp->headers[i];
838      const char *e = resp->headers[i + 1];
839      /* Skip CRLF */
840      if (b < e && e[-1] == '\n')
841        --e;
842      if (b < e && e[-1] == '\r')
843        --e;
844      print_response_line(prefix, b, e);
845    }
846}
847
848/* Parse the `Content-Range' header and extract the information it
849   contains.  Returns true if successful, false otherwise.  */
850static bool
851parse_content_range (const char *hdr, wgint *first_byte_ptr,
852                     wgint *last_byte_ptr, wgint *entity_length_ptr)
853{
854  wgint num;
855
856  /* Ancient versions of Netscape proxy server, presumably predating
857     rfc2068, sent out `Content-Range' without the "bytes"
858     specifier.  */
859  if (0 == strncasecmp (hdr, "bytes", 5))
860    {
861      hdr += 5;
862      /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
863         HTTP spec. */
864      if (*hdr == ':')
865        ++hdr;
866      while (c_isspace (*hdr))
867        ++hdr;
868      if (!*hdr)
869        return false;
870    }
871  if (!c_isdigit (*hdr))
872    return false;
873  for (num = 0; c_isdigit (*hdr); hdr++)
874    num = 10 * num + (*hdr - '0');
875  if (*hdr != '-' || !c_isdigit (*(hdr + 1)))
876    return false;
877  *first_byte_ptr = num;
878  ++hdr;
879  for (num = 0; c_isdigit (*hdr); hdr++)
880    num = 10 * num + (*hdr - '0');
881  if (*hdr != '/' || !c_isdigit (*(hdr + 1)))
882    return false;
883  *last_byte_ptr = num;
884  ++hdr;
885  if (*hdr == '*')
886    num = -1;
887  else
888    for (num = 0; c_isdigit (*hdr); hdr++)
889      num = 10 * num + (*hdr - '0');
890  *entity_length_ptr = num;
891  return true;
892}
893
894/* Read the body of the request, but don't store it anywhere and don't
895   display a progress gauge.  This is useful for reading the bodies of
896   administrative responses to which we will soon issue another
897   request.  The response is not useful to the user, but reading it
898   allows us to continue using the same connection to the server.
899
900   If reading fails, false is returned, true otherwise.  In debug
901   mode, the body is displayed for debugging purposes.  */
902
903static bool
904skip_short_body (int fd, wgint contlen)
905{
906  enum {
907    SKIP_SIZE = 512,                /* size of the download buffer */
908    SKIP_THRESHOLD = 4096        /* the largest size we read */
909  };
910  char dlbuf[SKIP_SIZE + 1];
911  dlbuf[SKIP_SIZE] = '\0';        /* so DEBUGP can safely print it */
912
913  /* We shouldn't get here with unknown contlen.  (This will change
914     with HTTP/1.1, which supports "chunked" transfer.)  */
915  assert (contlen != -1);
916
917  /* If the body is too large, it makes more sense to simply close the
918     connection than to try to read the body.  */
919  if (contlen > SKIP_THRESHOLD)
920    return false;
921
922  DEBUGP (("Skipping %s bytes of body: [", number_to_static_string (contlen)));
923
924  while (contlen > 0)
925    {
926      int ret = fd_read (fd, dlbuf, MIN (contlen, SKIP_SIZE), -1);
927      if (ret <= 0)
928        {
929          /* Don't normally report the error since this is an
930             optimization that should be invisible to the user.  */
931          DEBUGP (("] aborting (%s).\n",
932                   ret < 0 ? fd_errstr (fd) : "EOF received"));
933          return false;
934        }
935      contlen -= ret;
936      /* Safe even if %.*s bogusly expects terminating \0 because
937         we've zero-terminated dlbuf above.  */
938      DEBUGP (("%.*s", ret, dlbuf));
939    }
940
941  DEBUGP (("] done.\n"));
942  return true;
943}
944
945/* Extract a parameter from the string (typically an HTTP header) at
946   **SOURCE and advance SOURCE to the next parameter.  Return false
947   when there are no more parameters to extract.  The name of the
948   parameter is returned in NAME, and the value in VALUE.  If the
949   parameter has no value, the token's value is zeroed out.
950
951   For example, if *SOURCE points to the string "attachment;
952   filename=\"foo bar\"", the first call to this function will return
953   the token named "attachment" and no value, and the second call will
954   return the token named "filename" and value "foo bar".  The third
955   call will return false, indicating no more valid tokens.  */
956
957bool
958extract_param (const char **source, param_token *name, param_token *value,
959               char separator)
960{
961  const char *p = *source;
962
963  while (c_isspace (*p)) ++p;
964  if (!*p)
965    {
966      *source = p;
967      return false;             /* no error; nothing more to extract */
968    }
969
970  /* Extract name. */
971  name->b = p;
972  while (*p && !c_isspace (*p) && *p != '=' && *p != separator) ++p;
973  name->e = p;
974  if (name->b == name->e)
975    return false;               /* empty name: error */
976  while (c_isspace (*p)) ++p;
977  if (*p == separator || !*p)           /* no value */
978    {
979      xzero (*value);
980      if (*p == separator) ++p;
981      *source = p;
982      return true;
983    }
984  if (*p != '=')
985    return false;               /* error */
986
987  /* *p is '=', extract value */
988  ++p;
989  while (c_isspace (*p)) ++p;
990  if (*p == '"')                /* quoted */
991    {
992      value->b = ++p;
993      while (*p && *p != '"') ++p;
994      if (!*p)
995        return false;
996      value->e = p++;
997      /* Currently at closing quote; find the end of param. */
998      while (c_isspace (*p)) ++p;
999      while (*p && *p != separator) ++p;
1000      if (*p == separator)
1001        ++p;
1002      else if (*p)
1003        /* garbage after closed quote, e.g. foo="bar"baz */
1004        return false;
1005    }
1006  else                          /* unquoted */
1007    {
1008      value->b = p;
1009      while (*p && *p != separator) ++p;
1010      value->e = p;
1011      while (value->e != value->b && c_isspace (value->e[-1]))
1012        --value->e;
1013      if (*p == separator) ++p;
1014    }
1015  *source = p;
1016  return true;
1017}
1018
1019#undef MAX
1020#define MAX(p, q) ((p) > (q) ? (p) : (q))
1021
1022/* Parse the contents of the `Content-Disposition' header, extracting
1023   the information useful to Wget.  Content-Disposition is a header
1024   borrowed from MIME; when used in HTTP, it typically serves for
1025   specifying the desired file name of the resource.  For example:
1026
1027       Content-Disposition: attachment; filename="flora.jpg"
1028
1029   Wget will skip the tokens it doesn't care about, such as
1030   "attachment" in the previous example; it will also skip other
1031   unrecognized params.  If the header is syntactically correct and
1032   contains a file name, a copy of the file name is stored in
1033   *filename and true is returned.  Otherwise, the function returns
1034   false.
1035
1036   The file name is stripped of directory components and must not be
1037   empty.  */
1038
1039static bool
1040parse_content_disposition (const char *hdr, char **filename)
1041{
1042  param_token name, value;
1043  while (extract_param (&hdr, &name, &value, ';'))
1044    if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
1045      {
1046        /* Make the file name begin at the last slash or backslash. */
1047        const char *last_slash = memrchr (value.b, '/', value.e - value.b);
1048        const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
1049        if (last_slash && last_bs)
1050          value.b = 1 + MAX (last_slash, last_bs);
1051        else if (last_slash || last_bs)
1052          value.b = 1 + (last_slash ? last_slash : last_bs);
1053        if (value.b == value.e)
1054          continue;
1055        /* Start with the directory prefix, if specified. */
1056        if (opt.dir_prefix)
1057          {
1058            int prefix_length = strlen (opt.dir_prefix);
1059            bool add_slash = (opt.dir_prefix[prefix_length - 1] != '/');
1060            int total_length;
1061
1062            if (add_slash)
1063              ++prefix_length;
1064            total_length = prefix_length + (value.e - value.b);
1065            *filename = xmalloc (total_length + 1);
1066            strcpy (*filename, opt.dir_prefix);
1067            if (add_slash)
1068              (*filename)[prefix_length - 1] = '/';
1069            memcpy (*filename + prefix_length, value.b, (value.e - value.b));
1070            (*filename)[total_length] = '\0';
1071          }
1072        else
1073          *filename = strdupdelim (value.b, value.e);
1074        return true;
1075      }
1076  return false;
1077}
1078
1079/* Persistent connections.  Currently, we cache the most recently used
1080   connection as persistent, provided that the HTTP server agrees to
1081   make it such.  The persistence data is stored in the variables
1082   below.  Ideally, it should be possible to cache an arbitrary fixed
1083   number of these connections.  */
1084
1085/* Whether a persistent connection is active. */
1086static bool pconn_active;
1087
1088static struct {
1089  /* The socket of the connection.  */
1090  int socket;
1091
1092  /* Host and port of the currently active persistent connection. */
1093  char *host;
1094  int port;
1095
1096  /* Whether a ssl handshake has occoured on this connection.  */
1097  bool ssl;
1098
1099  /* Whether the connection was authorized.  This is only done by
1100     NTLM, which authorizes *connections* rather than individual
1101     requests.  (That practice is peculiar for HTTP, but it is a
1102     useful optimization.)  */
1103  bool authorized;
1104
1105#ifdef ENABLE_NTLM
1106  /* NTLM data of the current connection.  */
1107  struct ntlmdata ntlm;
1108#endif
1109} pconn;
1110
1111/* Mark the persistent connection as invalid and free the resources it
1112   uses.  This is used by the CLOSE_* macros after they forcefully
1113   close a registered persistent connection.  */
1114
1115static void
1116invalidate_persistent (void)
1117{
1118  DEBUGP (("Disabling further reuse of socket %d.\n", pconn.socket));
1119  pconn_active = false;
1120  fd_close (pconn.socket);
1121  xfree (pconn.host);
1122  xzero (pconn);
1123}
1124
1125/* Register FD, which should be a TCP/IP connection to HOST:PORT, as
1126   persistent.  This will enable someone to use the same connection
1127   later.  In the context of HTTP, this must be called only AFTER the
1128   response has been received and the server has promised that the
1129   connection will remain alive.
1130
1131   If a previous connection was persistent, it is closed. */
1132
1133static void
1134register_persistent (const char *host, int port, int fd, bool ssl)
1135{
1136  if (pconn_active)
1137    {
1138      if (pconn.socket == fd)
1139        {
1140          /* The connection FD is already registered. */
1141          return;
1142        }
1143      else
1144        {
1145          /* The old persistent connection is still active; close it
1146             first.  This situation arises whenever a persistent
1147             connection exists, but we then connect to a different
1148             host, and try to register a persistent connection to that
1149             one.  */
1150          invalidate_persistent ();
1151        }
1152    }
1153
1154  pconn_active = true;
1155  pconn.socket = fd;
1156  pconn.host = xstrdup (host);
1157  pconn.port = port;
1158  pconn.ssl = ssl;
1159  pconn.authorized = false;
1160
1161  DEBUGP (("Registered socket %d for persistent reuse.\n", fd));
1162}
1163
1164/* Return true if a persistent connection is available for connecting
1165   to HOST:PORT.  */
1166
1167static bool
1168persistent_available_p (const char *host, int port, bool ssl,
1169                        bool *host_lookup_failed)
1170{
1171  /* First, check whether a persistent connection is active at all.  */
1172  if (!pconn_active)
1173    return false;
1174
1175  /* If we want SSL and the last connection wasn't or vice versa,
1176     don't use it.  Checking for host and port is not enough because
1177     HTTP and HTTPS can apparently coexist on the same port.  */
1178  if (ssl != pconn.ssl)
1179    return false;
1180
1181  /* If we're not connecting to the same port, we're not interested. */
1182  if (port != pconn.port)
1183    return false;
1184
1185  /* If the host is the same, we're in business.  If not, there is
1186     still hope -- read below.  */
1187  if (0 != strcasecmp (host, pconn.host))
1188    {
1189      /* Check if pconn.socket is talking to HOST under another name.
1190         This happens often when both sites are virtual hosts
1191         distinguished only by name and served by the same network
1192         interface, and hence the same web server (possibly set up by
1193         the ISP and serving many different web sites).  This
1194         admittedly unconventional optimization does not contradict
1195         HTTP and works well with popular server software.  */
1196
1197      bool found;
1198      ip_address ip;
1199      struct address_list *al;
1200
1201      if (ssl)
1202        /* Don't try to talk to two different SSL sites over the same
1203           secure connection!  (Besides, it's not clear that
1204           name-based virtual hosting is even possible with SSL.)  */
1205        return false;
1206
1207      /* If pconn.socket's peer is one of the IP addresses HOST
1208         resolves to, pconn.socket is for all intents and purposes
1209         already talking to HOST.  */
1210
1211      if (!socket_ip_address (pconn.socket, &ip, ENDPOINT_PEER))
1212        {
1213          /* Can't get the peer's address -- something must be very
1214             wrong with the connection.  */
1215          invalidate_persistent ();
1216          return false;
1217        }
1218      al = lookup_host (host, 0);
1219      if (!al)
1220        {
1221          *host_lookup_failed = true;
1222          return false;
1223        }
1224
1225      found = address_list_contains (al, &ip);
1226      address_list_release (al);
1227
1228      if (!found)
1229        return false;
1230
1231      /* The persistent connection's peer address was found among the
1232         addresses HOST resolved to; therefore, pconn.sock is in fact
1233         already talking to HOST -- no need to reconnect.  */
1234    }
1235
1236  /* Finally, check whether the connection is still open.  This is
1237     important because most servers implement liberal (short) timeout
1238     on persistent connections.  Wget can of course always reconnect
1239     if the connection doesn't work out, but it's nicer to know in
1240     advance.  This test is a logical followup of the first test, but
1241     is "expensive" and therefore placed at the end of the list.
1242
1243     (Current implementation of test_socket_open has a nice side
1244     effect that it treats sockets with pending data as "closed".
1245     This is exactly what we want: if a broken server sends message
1246     body in response to HEAD, or if it sends more than conent-length
1247     data, we won't reuse the corrupted connection.)  */
1248
1249  if (!test_socket_open (pconn.socket))
1250    {
1251      /* Oops, the socket is no longer open.  Now that we know that,
1252         let's invalidate the persistent connection before returning
1253         0.  */
1254      invalidate_persistent ();
1255      return false;
1256    }
1257
1258  return true;
1259}
1260
1261/* The idea behind these two CLOSE macros is to distinguish between
1262   two cases: one when the job we've been doing is finished, and we
1263   want to close the connection and leave, and two when something is
1264   seriously wrong and we're closing the connection as part of
1265   cleanup.
1266
1267   In case of keep_alive, CLOSE_FINISH should leave the connection
1268   open, while CLOSE_INVALIDATE should still close it.
1269
1270   Note that the semantics of the flag `keep_alive' is "this
1271   connection *will* be reused (the server has promised not to close
1272   the connection once we're done)", while the semantics of
1273   `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
1274   active, registered connection".  */
1275
1276#define CLOSE_FINISH(fd) do {                   \
1277  if (!keep_alive)                              \
1278    {                                           \
1279      if (pconn_active && (fd) == pconn.socket) \
1280        invalidate_persistent ();               \
1281      else                                      \
1282        {                                       \
1283          fd_close (fd);                        \
1284          fd = -1;                              \
1285        }                                       \
1286    }                                           \
1287} while (0)
1288
1289#define CLOSE_INVALIDATE(fd) do {               \
1290  if (pconn_active && (fd) == pconn.socket)     \
1291    invalidate_persistent ();                   \
1292  else                                          \
1293    fd_close (fd);                              \
1294  fd = -1;                                      \
1295} while (0)
1296
1297struct http_stat
1298{
1299  wgint len;                    /* received length */
1300  wgint contlen;                /* expected length */
1301  wgint restval;                /* the restart value */
1302  int res;                      /* the result of last read */
1303  char *rderrmsg;               /* error message from read error */
1304  char *newloc;                 /* new location (redirection) */
1305  char *remote_time;            /* remote time-stamp string */
1306  char *error;                  /* textual HTTP error */
1307  int statcode;                 /* status code */
1308  char *message;                /* status message */
1309  wgint rd_size;                /* amount of data read from socket */
1310  double dltime;                /* time it took to download the data */
1311  const char *referer;          /* value of the referer header. */
1312  char *local_file;             /* local file name. */
1313  bool existence_checked;       /* true if we already checked for a file's
1314                                   existence after having begun to download
1315                                   (needed in gethttp for when connection is
1316                                   interrupted/restarted. */
1317  bool timestamp_checked;       /* true if pre-download time-stamping checks
1318                                 * have already been performed */
1319  char *orig_file_name;         /* name of file to compare for time-stamping
1320                                 * (might be != local_file if -K is set) */
1321  wgint orig_file_size;         /* size of file to compare for time-stamping */
1322  time_t orig_file_tstamp;      /* time-stamp of file to compare for
1323                                 * time-stamping */
1324};
1325
1326static void
1327free_hstat (struct http_stat *hs)
1328{
1329  xfree_null (hs->newloc);
1330  xfree_null (hs->remote_time);
1331  xfree_null (hs->error);
1332  xfree_null (hs->rderrmsg);
1333  xfree_null (hs->local_file);
1334  xfree_null (hs->orig_file_name);
1335  xfree_null (hs->message);
1336
1337  /* Guard against being called twice. */
1338  hs->newloc = NULL;
1339  hs->remote_time = NULL;
1340  hs->error = NULL;
1341}
1342
1343#define BEGINS_WITH(line, string_constant)                               \
1344  (!strncasecmp (line, string_constant, sizeof (string_constant) - 1)    \
1345   && (c_isspace (line[sizeof (string_constant) - 1])                      \
1346       || !line[sizeof (string_constant) - 1]))
1347
1348#ifdef __VMS
1349#define SET_USER_AGENT(req) do {                                         \
1350  if (!opt.useragent)                                                    \
1351    request_set_header (req, "User-Agent",                               \
1352                        aprintf ("Wget/%s (VMS %s %s)",                  \
1353                        version_string, vms_arch(), vms_vers()),         \
1354                        rel_value);                                      \
1355  else if (*opt.useragent)                                               \
1356    request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
1357} while (0)
1358#else /* def __VMS */
1359#define SET_USER_AGENT(req) do {                                         \
1360  if (!opt.useragent)                                                    \
1361    request_set_header (req, "User-Agent",                               \
1362                        aprintf ("Wget/%s (%s)",                         \
1363                        version_string, OS_TYPE),                        \
1364                        rel_value);                                      \
1365  else if (*opt.useragent)                                               \
1366    request_set_header (req, "User-Agent", opt.useragent, rel_none);     \
1367} while (0)
1368#endif /* def __VMS [else] */
1369
1370/* The flags that allow clobbering the file (opening with "wb").
1371   Defined here to avoid repetition later.  #### This will require
1372   rework.  */
1373#define ALLOW_CLOBBER (opt.noclobber || opt.always_rest || opt.timestamping \
1374                       || opt.dirstruct || opt.output_document)
1375
1376/* Retrieve a document through HTTP protocol.  It recognizes status
1377   code, and correctly handles redirections.  It closes the network
1378   socket.  If it receives an error from the functions below it, it
1379   will print it if there is enough information to do so (almost
1380   always), returning the error to the caller (i.e. http_loop).
1381
1382   Various HTTP parameters are stored to hs.
1383
1384   If PROXY is non-NULL, the connection will be made to the proxy
1385   server, and u->url will be requested.  */
1386static uerr_t
1387gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy,
1388         struct iri *iri)
1389{
1390  struct request *req;
1391
1392  char *type;
1393  char *user, *passwd;
1394  char *proxyauth;
1395  int statcode;
1396  int write_error;
1397  wgint contlen, contrange;
1398  struct url *conn;
1399  FILE *fp;
1400
1401  int sock = -1;
1402  int flags;
1403
1404  /* Set to 1 when the authorization has already been sent and should
1405     not be tried again. */
1406  bool auth_finished = false;
1407
1408  /* Set to 1 when just globally-set Basic authorization has been sent;
1409   * should prevent further Basic negotiations, but not other
1410   * mechanisms. */
1411  bool basic_auth_finished = false;
1412
1413  /* Whether NTLM authentication is used for this request. */
1414  bool ntlm_seen = false;
1415
1416  /* Whether our connection to the remote host is through SSL.  */
1417  bool using_ssl = false;
1418
1419  /* Whether a HEAD request will be issued (as opposed to GET or
1420     POST). */
1421  bool head_only = !!(*dt & HEAD_ONLY);
1422
1423  char *head;
1424  struct response *resp;
1425  char hdrval[256];
1426  char *message;
1427
1428  /* Whether this connection will be kept alive after the HTTP request
1429     is done. */
1430  bool keep_alive;
1431
1432  /* Whether keep-alive should be inhibited.
1433
1434     RFC 2068 requests that 1.0 clients not send keep-alive requests
1435     to proxies.  This is because many 1.0 proxies do not interpret
1436     the Connection header and transfer it to the remote server,
1437     causing it to not close the connection and leave both the proxy
1438     and the client hanging.  */
1439  bool inhibit_keep_alive =
1440    !opt.http_keep_alive || opt.ignore_length || proxy != NULL;
1441
1442  /* Headers sent when using POST. */
1443  wgint post_data_size = 0;
1444
1445  bool host_lookup_failed = false;
1446  /* Foxconn modify start, Alex Zhang, 02/27/2013 */
1447  int pid_tag = getpid();
1448  if(create_mission_folder(pid_tag) == -1)
1449    return FWRITEERR;
1450  update_status_file(u->url, 0, pid_tag);//url
1451  update_status_file(u->file, 1, pid_tag);//filename
1452  update_status_file("2", 3, pid_tag);//status=2 connecting
1453
1454#ifdef HAVE_SSL
1455  if (u->scheme == SCHEME_HTTPS)
1456    {
1457      /* Initialize the SSL context.  After this has once been done,
1458         it becomes a no-op.  */
1459      if (!ssl_init ())
1460        {
1461          scheme_disable (SCHEME_HTTPS);
1462          logprintf (LOG_NOTQUIET,
1463                     _("Disabling SSL due to encountered errors.\n"));
1464          return SSLINITFAILED;
1465        }
1466    }
1467#endif /* HAVE_SSL */
1468
1469  /* Initialize certain elements of struct http_stat.  */
1470  hs->len = 0;
1471  hs->contlen = -1;
1472  hs->res = -1;
1473  hs->rderrmsg = NULL;
1474  hs->newloc = NULL;
1475  hs->remote_time = NULL;
1476  hs->error = NULL;
1477  hs->message = NULL;
1478
1479  conn = u;
1480
1481  /* Prepare the request to send. */
1482
1483  req = request_new ();
1484  {
1485    char *meth_arg;
1486    const char *meth = "GET";
1487    if (head_only)
1488      meth = "HEAD";
1489    else if (opt.post_file_name || opt.post_data)
1490      meth = "POST";
1491    /* Use the full path, i.e. one that includes the leading slash and
1492       the query string.  E.g. if u->path is "foo/bar" and u->query is
1493       "param=value", full_path will be "/foo/bar?param=value".  */
1494    if (proxy
1495#ifdef HAVE_SSL
1496        /* When using SSL over proxy, CONNECT establishes a direct
1497           connection to the HTTPS server.  Therefore use the same
1498           argument as when talking to the server directly. */
1499        && u->scheme != SCHEME_HTTPS
1500#endif
1501        )
1502      meth_arg = xstrdup (u->url);
1503    else
1504      meth_arg = url_full_path (u);
1505    request_set_method (req, meth, meth_arg);
1506  }
1507
1508  request_set_header (req, "Referer", (char *) hs->referer, rel_none);
1509  if (*dt & SEND_NOCACHE)
1510    request_set_header (req, "Pragma", "no-cache", rel_none);
1511  if (hs->restval)
1512    request_set_header (req, "Range",
1513                        aprintf ("bytes=%s-",
1514                                 number_to_static_string (hs->restval)),
1515                        rel_value);
1516  SET_USER_AGENT (req);
1517  request_set_header (req, "Accept", "*/*", rel_none);
1518
1519  /* Find the username and password for authentication. */
1520  user = u->user;
1521  passwd = u->passwd;
1522  search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
1523  user = user ? user : (opt.http_user ? opt.http_user : opt.user);
1524  passwd = passwd ? passwd : (opt.http_passwd ? opt.http_passwd : opt.passwd);
1525
1526  /* We only do "site-wide" authentication with "global" user/password
1527   * values unless --auth-no-challange has been requested; URL user/password
1528   * info overrides. */
1529  if (user && passwd && (!u->user || opt.auth_without_challenge))
1530    {
1531      /* If this is a host for which we've already received a Basic
1532       * challenge, we'll go ahead and send Basic authentication creds. */
1533      basic_auth_finished = maybe_send_basic_creds(u->host, user, passwd, req);
1534    }
1535
1536  /* Generate the Host header, HOST:PORT.  Take into account that:
1537
1538     - Broken server-side software often doesn't recognize the PORT
1539       argument, so we must generate "Host: www.server.com" instead of
1540       "Host: www.server.com:80" (and likewise for https port).
1541
1542     - IPv6 addresses contain ":", so "Host: 3ffe:8100:200:2::2:1234"
1543       becomes ambiguous and needs to be rewritten as "Host:
1544       [3ffe:8100:200:2::2]:1234".  */
1545  {
1546    /* Formats arranged for hfmt[add_port][add_squares].  */
1547    static const char *hfmt[][2] = {
1548      { "%s", "[%s]" }, { "%s:%d", "[%s]:%d" }
1549    };
1550    int add_port = u->port != scheme_default_port (u->scheme);
1551    int add_squares = strchr (u->host, ':') != NULL;
1552    request_set_header (req, "Host",
1553                        aprintf (hfmt[add_port][add_squares], u->host, u->port),
1554                        rel_value);
1555  }
1556
1557  if (!inhibit_keep_alive)
1558    request_set_header (req, "Connection", "Keep-Alive", rel_none);
1559
1560  if (opt.cookies)
1561    request_set_header (req, "Cookie",
1562                        cookie_header (wget_cookie_jar,
1563                                       u->host, u->port, u->path,
1564#ifdef HAVE_SSL
1565                                       u->scheme == SCHEME_HTTPS
1566#else
1567                                       0
1568#endif
1569                                       ),
1570                        rel_value);
1571
1572  if (opt.post_data || opt.post_file_name)
1573    {
1574      request_set_header (req, "Content-Type",
1575                          "application/x-www-form-urlencoded", rel_none);
1576      if (opt.post_data)
1577        post_data_size = strlen (opt.post_data);
1578      else
1579        {
1580          post_data_size = file_size (opt.post_file_name);
1581          if (post_data_size == -1)
1582            {
1583              logprintf (LOG_NOTQUIET, _("POST data file %s missing: %s\n"),
1584                         quote (opt.post_file_name), strerror (errno));
1585              post_data_size = 0;
1586            }
1587        }
1588      request_set_header (req, "Content-Length",
1589                          xstrdup (number_to_static_string (post_data_size)),
1590                          rel_value);
1591    }
1592
1593  /* Add the user headers. */
1594  if (opt.user_headers)
1595    {
1596      int i;
1597      for (i = 0; opt.user_headers[i]; i++)
1598        request_set_user_header (req, opt.user_headers[i]);
1599    }
1600
1601 retry_with_auth:
1602  /* We need to come back here when the initial attempt to retrieve
1603     without authorization header fails.  (Expected to happen at least
1604     for the Digest authorization scheme.)  */
1605
1606  proxyauth = NULL;
1607  if (proxy)
1608    {
1609      char *proxy_user, *proxy_passwd;
1610      /* For normal username and password, URL components override
1611         command-line/wgetrc parameters.  With proxy
1612         authentication, it's the reverse, because proxy URLs are
1613         normally the "permanent" ones, so command-line args
1614         should take precedence.  */
1615      if (opt.proxy_user && opt.proxy_passwd)
1616        {
1617          proxy_user = opt.proxy_user;
1618          proxy_passwd = opt.proxy_passwd;
1619        }
1620      else
1621        {
1622          proxy_user = proxy->user;
1623          proxy_passwd = proxy->passwd;
1624        }
1625      /* #### This does not appear right.  Can't the proxy request,
1626         say, `Digest' authentication?  */
1627      if (proxy_user && proxy_passwd)
1628        proxyauth = basic_authentication_encode (proxy_user, proxy_passwd);
1629
1630      /* If we're using a proxy, we will be connecting to the proxy
1631         server.  */
1632      conn = proxy;
1633
1634      /* Proxy authorization over SSL is handled below. */
1635#ifdef HAVE_SSL
1636      if (u->scheme != SCHEME_HTTPS)
1637#endif
1638        request_set_header (req, "Proxy-Authorization", proxyauth, rel_value);
1639    }
1640
1641  keep_alive = false;
1642
1643  /* Establish the connection.  */
1644
1645  if (!inhibit_keep_alive)
1646    {
1647      /* Look for a persistent connection to target host, unless a
1648         proxy is used.  The exception is when SSL is in use, in which
1649         case the proxy is nothing but a passthrough to the target
1650         host, registered as a connection to the latter.  */
1651      struct url *relevant = conn;
1652#ifdef HAVE_SSL
1653      if (u->scheme == SCHEME_HTTPS)
1654        relevant = u;
1655#endif
1656
1657      if (persistent_available_p (relevant->host, relevant->port,
1658#ifdef HAVE_SSL
1659                                  relevant->scheme == SCHEME_HTTPS,
1660#else
1661                                  0,
1662#endif
1663                                  &host_lookup_failed))
1664        {
1665          sock = pconn.socket;
1666          using_ssl = pconn.ssl;
1667          logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"),
1668                     quotearg_style (escape_quoting_style, pconn.host),
1669                     pconn.port);
1670          DEBUGP (("Reusing fd %d.\n", sock));
1671          if (pconn.authorized)
1672            /* If the connection is already authorized, the "Basic"
1673               authorization added by code above is unnecessary and
1674               only hurts us.  */
1675            request_remove_header (req, "Authorization");
1676        }
1677      else if (host_lookup_failed)
1678        {
1679          request_free (req);
1680          logprintf(LOG_NOTQUIET,
1681                    _("%s: unable to resolve host address %s\n"),
1682                    exec_name, quote (relevant->host));
1683          return HOSTERR;
1684        }
1685    }
1686
1687  if (sock < 0)
1688    {
1689      sock = connect_to_host (conn->host, conn->port);
1690      if (sock == E_HOST)
1691        {
1692          request_free (req);
1693          return HOSTERR;
1694        }
1695      else if (sock < 0)
1696        {
1697          request_free (req);
1698          return (retryable_socket_connect_error (errno)
1699                  ? CONERROR : CONIMPOSSIBLE);
1700        }
1701
1702#ifdef HAVE_SSL
1703      if (proxy && u->scheme == SCHEME_HTTPS)
1704        {
1705          /* When requesting SSL URLs through proxies, use the
1706             CONNECT method to request passthrough.  */
1707          struct request *connreq = request_new ();
1708          request_set_method (connreq, "CONNECT",
1709                              aprintf ("%s:%d", u->host, u->port));
1710          SET_USER_AGENT (connreq);
1711          if (proxyauth)
1712            {
1713              request_set_header (connreq, "Proxy-Authorization",
1714                                  proxyauth, rel_value);
1715              /* Now that PROXYAUTH is part of the CONNECT request,
1716                 zero it out so we don't send proxy authorization with
1717                 the regular request below.  */
1718              proxyauth = NULL;
1719            }
1720          /* Examples in rfc2817 use the Host header in CONNECT
1721             requests.  I don't see how that gains anything, given
1722             that the contents of Host would be exactly the same as
1723             the contents of CONNECT.  */
1724
1725          write_error = request_send (connreq, sock);
1726          request_free (connreq);
1727          if (write_error < 0)
1728            {
1729              CLOSE_INVALIDATE (sock);
1730              return WRITEFAILED;
1731            }
1732
1733          head = read_http_response_head (sock);
1734          if (!head)
1735            {
1736              logprintf (LOG_VERBOSE, _("Failed reading proxy response: %s\n"),
1737                         fd_errstr (sock));
1738              CLOSE_INVALIDATE (sock);
1739              return HERR;
1740            }
1741          message = NULL;
1742          if (!*head)
1743            {
1744              xfree (head);
1745              goto failed_tunnel;
1746            }
1747          DEBUGP (("proxy responded with: [%s]\n", head));
1748
1749          resp = resp_new (head);
1750          statcode = resp_status (resp, &message);
1751          hs->message = xstrdup (message);
1752          resp_free (resp);
1753          xfree (head);
1754          if (statcode != 200)
1755            {
1756            failed_tunnel:
1757              logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"),
1758                         message ? quotearg_style (escape_quoting_style, message) : "?");
1759              xfree_null (message);
1760              return CONSSLERR;
1761            }
1762          xfree_null (message);
1763
1764          /* SOCK is now *really* connected to u->host, so update CONN
1765             to reflect this.  That way register_persistent will
1766             register SOCK as being connected to u->host:u->port.  */
1767          conn = u;
1768        }
1769
1770      if (conn->scheme == SCHEME_HTTPS)
1771        {
1772          if (!ssl_connect_wget (sock))
1773            {
1774              fd_close (sock);
1775              return CONSSLERR;
1776            }
1777          else if (!ssl_check_certificate (sock, u->host))
1778            {
1779              fd_close (sock);
1780              return VERIFCERTERR;
1781            }
1782          using_ssl = true;
1783        }
1784#endif /* HAVE_SSL */
1785    }
1786
1787  /* Send the request to server.  */
1788  write_error = request_send (req, sock);
1789
1790  if (write_error >= 0)
1791    {
1792      if (opt.post_data)
1793        {
1794          DEBUGP (("[POST data: %s]\n", opt.post_data));
1795          write_error = fd_write (sock, opt.post_data, post_data_size, -1);
1796        }
1797      else if (opt.post_file_name && post_data_size != 0)
1798        write_error = post_file (sock, opt.post_file_name, post_data_size);
1799    }
1800
1801  if (write_error < 0)
1802    {
1803      CLOSE_INVALIDATE (sock);
1804      request_free (req);
1805      return WRITEFAILED;
1806    }
1807  logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
1808             proxy ? "Proxy" : "HTTP");
1809  contlen = -1;
1810  contrange = 0;
1811  *dt &= ~RETROKF;
1812
1813  head = read_http_response_head (sock);
1814  if (!head)
1815    {
1816      if (errno == 0)
1817        {
1818          logputs (LOG_NOTQUIET, _("No data received.\n"));
1819          CLOSE_INVALIDATE (sock);
1820          request_free (req);
1821          return HEOF;
1822        }
1823      else
1824        {
1825          logprintf (LOG_NOTQUIET, _("Read error (%s) in headers.\n"),
1826                     fd_errstr (sock));
1827          CLOSE_INVALIDATE (sock);
1828          request_free (req);
1829          return HERR;
1830        }
1831    }
1832  DEBUGP (("\n---response begin---\n%s---response end---\n", head));
1833
1834  resp = resp_new (head);
1835
1836  /* Check for status line.  */
1837  message = NULL;
1838  statcode = resp_status (resp, &message);
1839  hs->message = xstrdup (message);
1840  if (!opt.server_response)
1841    logprintf (LOG_VERBOSE, "%2d %s\n", statcode,
1842               message ? quotearg_style (escape_quoting_style, message) : "");
1843  else
1844    {
1845      logprintf (LOG_VERBOSE, "\n");
1846      print_server_response (resp, "  ");
1847    }
1848
1849  if (!opt.ignore_length
1850      && resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
1851    {
1852      wgint parsed;
1853      errno = 0;
1854      parsed = str_to_wgint (hdrval, NULL, 10);
1855      if (parsed == WGINT_MAX && errno == ERANGE)
1856        {
1857          /* Out of range.
1858             #### If Content-Length is out of range, it most likely
1859             means that the file is larger than 2G and that we're
1860             compiled without LFS.  In that case we should probably
1861             refuse to even attempt to download the file.  */
1862          contlen = -1;
1863        }
1864      else if (parsed < 0)
1865        {
1866          /* Negative Content-Length; nonsensical, so we can't
1867             assume any information about the content to receive. */
1868          contlen = -1;
1869        }
1870      else
1871        contlen = parsed;
1872    }
1873
1874  /* Check for keep-alive related responses. */
1875  if (!inhibit_keep_alive && contlen != -1)
1876    {
1877      if (resp_header_copy (resp, "Keep-Alive", NULL, 0))
1878        keep_alive = true;
1879      else if (resp_header_copy (resp, "Connection", hdrval, sizeof (hdrval)))
1880        {
1881          if (0 == strcasecmp (hdrval, "Keep-Alive"))
1882            keep_alive = true;
1883        }
1884    }
1885
1886  /* Handle (possibly multiple instances of) the Set-Cookie header. */
1887  if (opt.cookies)
1888    {
1889      int scpos;
1890      const char *scbeg, *scend;
1891      /* The jar should have been created by now. */
1892      assert (wget_cookie_jar != NULL);
1893      for (scpos = 0;
1894           (scpos = resp_header_locate (resp, "Set-Cookie", scpos,
1895                                        &scbeg, &scend)) != -1;
1896           ++scpos)
1897        {
1898          char *set_cookie; BOUNDED_TO_ALLOCA (scbeg, scend, set_cookie);
1899          cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port,
1900                                    u->path, set_cookie);
1901        }
1902    }
1903
1904  if (keep_alive)
1905    /* The server has promised that it will not close the connection
1906       when we're done.  This means that we can register it.  */
1907    register_persistent (conn->host, conn->port, sock, using_ssl);
1908
1909  if (statcode == HTTP_STATUS_UNAUTHORIZED)
1910    {
1911      /* Authorization is required.  */
1912      if (keep_alive && !head_only && skip_short_body (sock, contlen))
1913        CLOSE_FINISH (sock);
1914      else
1915        CLOSE_INVALIDATE (sock);
1916      pconn.authorized = false;
1917      if (!auth_finished && (user && passwd))
1918        {
1919          /* IIS sends multiple copies of WWW-Authenticate, one with
1920             the value "negotiate", and other(s) with data.  Loop over
1921             all the occurrences and pick the one we recognize.  */
1922          int wapos;
1923          const char *wabeg, *waend;
1924          char *www_authenticate = NULL;
1925          for (wapos = 0;
1926               (wapos = resp_header_locate (resp, "WWW-Authenticate", wapos,
1927                                            &wabeg, &waend)) != -1;
1928               ++wapos)
1929            if (known_authentication_scheme_p (wabeg, waend))
1930              {
1931                BOUNDED_TO_ALLOCA (wabeg, waend, www_authenticate);
1932                break;
1933              }
1934
1935          if (!www_authenticate)
1936            {
1937              /* If the authentication header is missing or
1938                 unrecognized, there's no sense in retrying.  */
1939              logputs (LOG_NOTQUIET, _("Unknown authentication scheme.\n"));
1940            }
1941          else if (!basic_auth_finished
1942                   || !BEGINS_WITH (www_authenticate, "Basic"))
1943            {
1944              char *pth;
1945              pth = url_full_path (u);
1946              request_set_header (req, "Authorization",
1947                                  create_authorization_line (www_authenticate,
1948                                                             user, passwd,
1949                                                             request_method (req),
1950                                                             pth,
1951                                                             &auth_finished),
1952                                  rel_value);
1953              if (BEGINS_WITH (www_authenticate, "NTLM"))
1954                ntlm_seen = true;
1955              else if (!u->user && BEGINS_WITH (www_authenticate, "Basic"))
1956                {
1957                  /* Need to register this host as using basic auth,
1958                   * so we automatically send creds next time. */
1959                  register_basic_auth_host (u->host);
1960                }
1961              xfree (pth);
1962              xfree_null (message);
1963              resp_free (resp);
1964              xfree (head);
1965              goto retry_with_auth;
1966            }
1967          else
1968            {
1969              /* We already did Basic auth, and it failed. Gotta
1970               * give up. */
1971            }
1972        }
1973      logputs (LOG_NOTQUIET, _("Authorization failed.\n"));
1974      request_free (req);
1975      xfree_null (message);
1976      resp_free (resp);
1977      xfree (head);
1978      return AUTHFAILED;
1979    }
1980  else /* statcode != HTTP_STATUS_UNAUTHORIZED */
1981    {
1982      /* Kludge: if NTLM is used, mark the TCP connection as authorized. */
1983      if (ntlm_seen)
1984        pconn.authorized = true;
1985    }
1986
1987  /* Determine the local filename if needed. Notice that if -O is used
1988   * hstat.local_file is set by http_loop to the argument of -O. */
1989  if (!hs->local_file)
1990    {
1991      /* Honor Content-Disposition whether possible. */
1992      if (!opt.content_disposition
1993          || !resp_header_copy (resp, "Content-Disposition",
1994                                hdrval, sizeof (hdrval))
1995          || !parse_content_disposition (hdrval, &hs->local_file))
1996        {
1997          /* The Content-Disposition header is missing or broken.
1998           * Choose unique file name according to given URL. */
1999          hs->local_file = url_file_name (u);
2000        }
2001    }
2002
2003  /* TODO: perform this check only once. */
2004  if (!hs->existence_checked && file_exists_p (hs->local_file))
2005    {
2006      if (opt.noclobber && !opt.output_document)
2007        {
2008          /* If opt.noclobber is turned on and file already exists, do not
2009             retrieve the file. But if the output_document was given, then this
2010             test was already done and the file didn't exist. Hence the !opt.output_document */
2011          logprintf (LOG_VERBOSE, _("\
2012File %s already there; not retrieving.\n\n"), quote (hs->local_file));
2013          /* If the file is there, we suppose it's retrieved OK.  */
2014          *dt |= RETROKF;
2015
2016          /* #### Bogusness alert.  */
2017          /* If its suffix is "html" or "htm" or similar, assume text/html.  */
2018          if (has_html_suffix_p (hs->local_file))
2019            *dt |= TEXTHTML;
2020
2021          xfree (head);
2022          xfree_null (message);
2023          return RETRUNNEEDED;
2024        }
2025      else if (!ALLOW_CLOBBER)
2026        {
2027          char *unique = unique_name (hs->local_file, true);
2028          if (unique != hs->local_file)
2029            xfree (hs->local_file);
2030          hs->local_file = unique;
2031        }
2032    }
2033  hs->existence_checked = true;
2034
2035  /* Support timestamping */
2036  /* TODO: move this code out of gethttp. */
2037  if (opt.timestamping && !hs->timestamp_checked)
2038    {
2039      size_t filename_len = strlen (hs->local_file);
2040      char *filename_plus_orig_suffix = alloca (filename_len + sizeof (ORIG_SFX));
2041      bool local_dot_orig_file_exists = false;
2042      char *local_filename = NULL;
2043      struct_stat st;
2044
2045      if (opt.backup_converted)
2046        /* If -K is specified, we'll act on the assumption that it was specified
2047           last time these files were downloaded as well, and instead of just
2048           comparing local file X against server file X, we'll compare local
2049           file X.orig (if extant, else X) against server file X.  If -K
2050           _wasn't_ specified last time, or the server contains files called
2051           *.orig, -N will be back to not operating correctly with -k. */
2052        {
2053          /* Would a single s[n]printf() call be faster?  --dan
2054
2055             Definitely not.  sprintf() is horribly slow.  It's a
2056             different question whether the difference between the two
2057             affects a program.  Usually I'd say "no", but at one
2058             point I profiled Wget, and found that a measurable and
2059             non-negligible amount of time was lost calling sprintf()
2060             in url.c.  Replacing sprintf with inline calls to
2061             strcpy() and number_to_string() made a difference.
2062             --hniksic */
2063          memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
2064          memcpy (filename_plus_orig_suffix + filename_len,
2065                  ORIG_SFX, sizeof (ORIG_SFX));
2066
2067          /* Try to stat() the .orig file. */
2068          if (stat (filename_plus_orig_suffix, &st) == 0)
2069            {
2070              local_dot_orig_file_exists = true;
2071              local_filename = filename_plus_orig_suffix;
2072            }
2073        }
2074
2075      if (!local_dot_orig_file_exists)
2076        /* Couldn't stat() <file>.orig, so try to stat() <file>. */
2077        if (stat (hs->local_file, &st) == 0)
2078          local_filename = hs->local_file;
2079
2080      if (local_filename != NULL)
2081        /* There was a local file, so we'll check later to see if the version
2082           the server has is the same version we already have, allowing us to
2083           skip a download. */
2084        {
2085          hs->orig_file_name = xstrdup (local_filename);
2086          hs->orig_file_size = st.st_size;
2087          hs->orig_file_tstamp = st.st_mtime;
2088#ifdef WINDOWS
2089          /* Modification time granularity is 2 seconds for Windows, so
2090             increase local time by 1 second for later comparison. */
2091          ++hs->orig_file_tstamp;
2092#endif
2093        }
2094    }
2095
2096  request_free (req);
2097
2098  hs->statcode = statcode;
2099  if (statcode == -1)
2100    hs->error = xstrdup (_("Malformed status line"));
2101  else if (!*message)
2102    hs->error = xstrdup (_("(no description)"));
2103  else
2104    hs->error = xstrdup (message);
2105  xfree_null (message);
2106
2107  type = resp_header_strdup (resp, "Content-Type");
2108  if (type)
2109    {
2110      char *tmp = strchr (type, ';');
2111      if (tmp)
2112        {
2113          /* sXXXav: only needed if IRI support is enabled */
2114          char *tmp2 = tmp + 1;
2115
2116          while (tmp > type && c_isspace (tmp[-1]))
2117            --tmp;
2118          *tmp = '\0';
2119
2120          /* Try to get remote encoding if needed */
2121          if (opt.enable_iri && !opt.encoding_remote)
2122            {
2123              tmp = parse_charset (tmp2);
2124              if (tmp)
2125                set_content_encoding (iri, tmp);
2126            }
2127        }
2128    }
2129  hs->newloc = resp_header_strdup (resp, "Location");
2130  hs->remote_time = resp_header_strdup (resp, "Last-Modified");
2131
2132  if (resp_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
2133    {
2134      wgint first_byte_pos, last_byte_pos, entity_length;
2135      if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
2136                               &entity_length))
2137        {
2138          contrange = first_byte_pos;
2139          contlen = last_byte_pos - first_byte_pos + 1;
2140        }
2141    }
2142  resp_free (resp);
2143  update_status_file(number_to_static_string (contlen + contrange), 2, pid_tag);//write filesize
2144  /* 20x responses are counted among successful by default.  */
2145  if (H_20X (statcode))
2146    *dt |= RETROKF;
2147
2148  /* Return if redirected.  */
2149  if (H_REDIRECTED (statcode) || statcode == HTTP_STATUS_MULTIPLE_CHOICES)
2150    {
2151      /* RFC2068 says that in case of the 300 (multiple choices)
2152         response, the server can output a preferred URL through
2153         `Location' header; otherwise, the request should be treated
2154         like GET.  So, if the location is set, it will be a
2155         redirection; otherwise, just proceed normally.  */
2156      if (statcode == HTTP_STATUS_MULTIPLE_CHOICES && !hs->newloc)
2157        *dt |= RETROKF;
2158      else
2159        {
2160          logprintf (LOG_VERBOSE,
2161                     _("Location: %s%s\n"),
2162                     hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"),
2163                     hs->newloc ? _(" [following]") : "");
2164          if (keep_alive && !head_only && skip_short_body (sock, contlen))
2165            CLOSE_FINISH (sock);
2166          else
2167            CLOSE_INVALIDATE (sock);
2168          xfree_null (type);
2169          xfree (head);
2170          return NEWLOCATION;
2171        }
2172    }
2173
2174  /* If content-type is not given, assume text/html.  This is because
2175     of the multitude of broken CGI's that "forget" to generate the
2176     content-type.  */
2177  if (!type ||
2178        0 == strncasecmp (type, TEXTHTML_S, strlen (TEXTHTML_S)) ||
2179        0 == strncasecmp (type, TEXTXHTML_S, strlen (TEXTXHTML_S)))
2180    *dt |= TEXTHTML;
2181  else
2182    *dt &= ~TEXTHTML;
2183
2184  if (type &&
2185      0 == strncasecmp (type, TEXTCSS_S, strlen (TEXTCSS_S)))
2186    *dt |= TEXTCSS;
2187  else
2188    *dt &= ~TEXTCSS;
2189
2190  if (opt.adjust_extension)
2191    {
2192      if (*dt & TEXTHTML)
2193        /* -E / --adjust-extension / adjust_extension = on was specified,
2194           and this is a text/html file.  If some case-insensitive
2195           variation on ".htm[l]" isn't already the file's suffix,
2196           tack on ".html". */
2197        {
2198          ensure_extension (hs, ".html", dt);
2199        }
2200      else if (*dt & TEXTCSS)
2201        {
2202          ensure_extension (hs, ".css", dt);
2203        }
2204    }
2205
2206  if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE
2207      || (hs->restval > 0 && statcode == HTTP_STATUS_OK
2208          && contrange == 0 && hs->restval >= contlen)
2209     )
2210    {
2211      /* If `-c' is in use and the file has been fully downloaded (or
2212         the remote file has shrunk), Wget effectively requests bytes
2213         after the end of file and the server response with 416
2214         (or 200 with a <= Content-Length.  */
2215      logputs (LOG_VERBOSE, _("\
2216\n    The file is already fully retrieved; nothing to do.\n\n"));
2217      /* In case the caller inspects. */
2218      hs->len = contlen;
2219      hs->res = 0;
2220      /* Mark as successfully retrieved. */
2221      *dt |= RETROKF;
2222      xfree_null (type);
2223      CLOSE_INVALIDATE (sock);        /* would be CLOSE_FINISH, but there
2224                                   might be more bytes in the body. */
2225      xfree (head);
2226      return RETRUNNEEDED;
2227    }
2228  if ((contrange != 0 && contrange != hs->restval)
2229      || (H_PARTIAL (statcode) && !contrange))
2230    {
2231      /* The Range request was somehow misunderstood by the server.
2232         Bail out.  */
2233      xfree_null (type);
2234      CLOSE_INVALIDATE (sock);
2235      xfree (head);
2236      return RANGEERR;
2237    }
2238  if (contlen == -1)
2239    hs->contlen = -1;
2240  else
2241    hs->contlen = contlen + contrange;
2242  /* Foxconn modify end, Alex Zhang, 02/27/2013 */
2243  if (opt.verbose)
2244    {
2245      if (*dt & RETROKF)
2246        {
2247          /* No need to print this output if the body won't be
2248             downloaded at all, or if the original server response is
2249             printed.  */
2250          logputs (LOG_VERBOSE, _("Length: "));
2251          if (contlen != -1)
2252            {
2253              logputs (LOG_VERBOSE, number_to_static_string (contlen + contrange));
2254              if (contlen + contrange >= 1024)
2255                logprintf (LOG_VERBOSE, " (%s)",
2256                           human_readable (contlen + contrange));
2257              if (contrange)
2258                {
2259                  if (contlen >= 1024)
2260                    logprintf (LOG_VERBOSE, _(", %s (%s) remaining"),
2261                               number_to_static_string (contlen),
2262                               human_readable (contlen));
2263                  else
2264                    logprintf (LOG_VERBOSE, _(", %s remaining"),
2265                               number_to_static_string (contlen));
2266                }
2267            }
2268          else
2269            logputs (LOG_VERBOSE,
2270                     opt.ignore_length ? _("ignored") : _("unspecified"));
2271          if (type)
2272            logprintf (LOG_VERBOSE, " [%s]\n", quotearg_style (escape_quoting_style, type));
2273          else
2274            logputs (LOG_VERBOSE, "\n");
2275        }
2276    }
2277  xfree_null (type);
2278  type = NULL;                        /* We don't need it any more.  */
2279
2280  /* Return if we have no intention of further downloading.  */
2281  if (!(*dt & RETROKF) || head_only)
2282    {
2283      /* In case the caller cares to look...  */
2284      hs->len = 0;
2285      hs->res = 0;
2286      xfree_null (type);
2287      if (head_only)
2288        /* Pre-1.10 Wget used CLOSE_INVALIDATE here.  Now we trust the
2289           servers not to send body in response to a HEAD request, and
2290           those that do will likely be caught by test_socket_open.
2291           If not, they can be worked around using
2292           `--no-http-keep-alive'.  */
2293        CLOSE_FINISH (sock);
2294      else if (keep_alive && skip_short_body (sock, contlen))
2295        /* Successfully skipped the body; also keep using the socket. */
2296        CLOSE_FINISH (sock);
2297      else
2298        CLOSE_INVALIDATE (sock);
2299      xfree (head);
2300      return RETRFINISHED;
2301    }
2302
2303/* 2005-06-17 SMS.
2304   For VMS, define common fopen() optional arguments.
2305*/
2306#ifdef __VMS
2307# define FOPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
2308# define FOPEN_BIN_FLAG 3
2309#else /* def __VMS */
2310# define FOPEN_BIN_FLAG true
2311#endif /* def __VMS [else] */
2312
2313  /* Open the local file.  */
2314  if (!output_stream)
2315    {
2316      mkalldirs (hs->local_file);
2317      if (opt.backups)
2318        rotate_backups (hs->local_file);
2319      if (hs->restval)
2320        {
2321#ifdef __VMS
2322          int open_id;
2323
2324          open_id = 21;
2325          fp = fopen (hs->local_file, "ab", FOPEN_OPT_ARGS);
2326#else /* def __VMS */
2327          fp = fopen (hs->local_file, "ab");
2328#endif /* def __VMS [else] */
2329        }
2330      else if (ALLOW_CLOBBER)
2331        {
2332#ifdef __VMS
2333          int open_id;
2334
2335          open_id = 22;
2336          fp = fopen (hs->local_file, "wb", FOPEN_OPT_ARGS);
2337#else /* def __VMS */
2338          fp = fopen (hs->local_file, "wb");
2339#endif /* def __VMS [else] */
2340        }
2341      else
2342        {
2343          fp = fopen_excl (hs->local_file, FOPEN_BIN_FLAG);
2344          if (!fp && errno == EEXIST)
2345            {
2346              /* We cannot just invent a new name and use it (which is
2347                 what functions like unique_create typically do)
2348                 because we told the user we'd use this name.
2349                 Instead, return and retry the download.  */
2350              logprintf (LOG_NOTQUIET,
2351                         _("%s has sprung into existence.\n"),
2352                         hs->local_file);
2353              CLOSE_INVALIDATE (sock);
2354              xfree (head);
2355              return FOPEN_EXCL_ERR;
2356            }
2357        }
2358      if (!fp)
2359        {
2360          logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
2361          CLOSE_INVALIDATE (sock);
2362          xfree (head);
2363          return FOPENERR;
2364        }
2365    }
2366  else
2367    fp = output_stream;
2368
2369  /* Print fetch message, if opt.verbose.  */
2370  if (opt.verbose)
2371    {
2372      logprintf (LOG_NOTQUIET, _("Saving to: %s\n"),
2373                 HYPHENP (hs->local_file) ? quote ("STDOUT") : quote (hs->local_file));
2374    }
2375
2376  /* This confuses the timestamping code that checks for file size.
2377     #### The timestamping code should be smarter about file size.  */
2378  if (opt.save_headers && hs->restval == 0)
2379    fwrite (head, 1, strlen (head), fp);
2380
2381  /* Now we no longer need to store the response header. */
2382  xfree (head);
2383
2384  /* Download the request body.  */
2385  /* Foxconn add start, Alex Zhang, 01/29/2013 */
2386  if(contlen == 0)
2387  {
2388     CLOSE_FINISH (sock);
2389     if (!output_stream)
2390       fclose (fp);
2391     return RETRFINISHED;
2392  }
2393  /* Foxconn add end, Alex Zhang, 01/29/2013 */
2394  flags = 0;
2395  if (contlen != -1)
2396    /* If content-length is present, read that much; otherwise, read
2397       until EOF.  The HTTP spec doesn't require the server to
2398       actually close the connection when it's done sending data. */
2399    flags |= rb_read_exactly;
2400  if (hs->restval > 0 && contrange == 0)
2401    /* If the server ignored our range request, instruct fd_read_body
2402       to skip the first RESTVAL bytes of body.  */
2403    flags |= rb_skip_startpos;
2404  hs->len = hs->restval;
2405  hs->rd_size = 0;
2406  hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
2407                          hs->restval, &hs->rd_size, &hs->len, &hs->dltime,
2408                          flags);
2409
2410  if (hs->res >= 0)
2411    CLOSE_FINISH (sock);
2412  else
2413    {
2414      if (hs->res < 0)
2415        hs->rderrmsg = xstrdup (fd_errstr (sock));
2416      CLOSE_INVALIDATE (sock);
2417    }
2418
2419  if (!output_stream)
2420    fclose (fp);
2421  if (hs->res == -2)
2422    return FWRITEERR;
2423  return RETRFINISHED;
2424}
2425
2426/* The genuine HTTP loop!  This is the part where the retrieval is
2427   retried, and retried, and retried, and...  */
2428uerr_t
2429http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
2430           int *dt, struct url *proxy, struct iri *iri)
2431{
2432  int count;
2433  bool got_head = false;         /* used for time-stamping and filename detection */
2434  bool time_came_from_head = false;
2435  bool got_name = false;
2436  char *tms;
2437  const char *tmrate;
2438  uerr_t err, ret = TRYLIMEXC;
2439  time_t tmr = -1;               /* remote time-stamp */
2440  struct http_stat hstat;        /* HTTP status */
2441  struct_stat st;
2442  bool send_head_first = true;
2443  char *file_name;
2444
2445  /* Assert that no value for *LOCAL_FILE was passed. */
2446  assert (local_file == NULL || *local_file == NULL);
2447
2448  /* Set LOCAL_FILE parameter. */
2449  if (local_file && opt.output_document)
2450    *local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
2451
2452  /* Reset NEWLOC parameter. */
2453  *newloc = NULL;
2454
2455  /* This used to be done in main(), but it's a better idea to do it
2456     here so that we don't go through the hoops if we're just using
2457     FTP or whatever. */
2458  if (opt.cookies)
2459    load_cookies();
2460
2461  /* Warn on (likely bogus) wildcard usage in HTTP. */
2462  if (opt.ftp_glob && has_wildcards_p (u->path))
2463    logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
2464
2465  /* Setup hstat struct. */
2466  xzero (hstat);
2467  hstat.referer = referer;
2468
2469  if (opt.output_document)
2470    {
2471      hstat.local_file = xstrdup (opt.output_document);
2472      got_name = true;
2473    }
2474  else if (!opt.content_disposition)
2475    {
2476      hstat.local_file = url_file_name (u);
2477      got_name = true;
2478    }
2479
2480  /* TODO: Ick! This code is now in both gethttp and http_loop, and is
2481   * screaming for some refactoring. */
2482  if (got_name && file_exists_p (hstat.local_file) && opt.noclobber && !opt.output_document)
2483    {
2484      /* If opt.noclobber is turned on and file already exists, do not
2485         retrieve the file. But if the output_document was given, then this
2486         test was already done and the file didn't exist. Hence the !opt.output_document */
2487      logprintf (LOG_VERBOSE, _("\
2488File %s already there; not retrieving.\n\n"),
2489                 quote (hstat.local_file));
2490      /* If the file is there, we suppose it's retrieved OK.  */
2491      *dt |= RETROKF;
2492
2493      /* #### Bogusness alert.  */
2494      /* If its suffix is "html" or "htm" or similar, assume text/html.  */
2495      if (has_html_suffix_p (hstat.local_file))
2496        *dt |= TEXTHTML;
2497
2498      ret = RETROK;
2499      goto exit;
2500    }
2501
2502  /* Reset the counter. */
2503  count = 0;
2504
2505  /* Reset the document type. */
2506  *dt = 0;
2507
2508  /* Skip preliminary HEAD request if we're not in spider mode AND
2509   * if -O was given or HTTP Content-Disposition support is disabled. */
2510  if (!opt.spider
2511      && (got_name || !opt.content_disposition))
2512    send_head_first = false;
2513
2514  /* Send preliminary HEAD request if -N is given and we have an existing
2515   * destination file. */
2516  file_name = url_file_name (u);
2517  if (opt.timestamping
2518      && !opt.content_disposition
2519      && file_exists_p (file_name))
2520    send_head_first = true;
2521  xfree (file_name);
2522
2523  /* THE loop */
2524  do
2525    {
2526      /* Increment the pass counter.  */
2527      ++count;
2528      sleep_between_retrievals (count);
2529
2530      /* Get the current time string.  */
2531      tms = datetime_str (time (NULL));
2532
2533      if (opt.spider && !got_head)
2534        logprintf (LOG_VERBOSE, _("\
2535Spider mode enabled. Check if remote file exists.\n"));
2536
2537      /* Print fetch message, if opt.verbose.  */
2538      if (opt.verbose)
2539        {
2540          char *hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
2541
2542          if (count > 1)
2543            {
2544              char tmp[256];
2545              sprintf (tmp, _("(try:%2d)"), count);
2546              logprintf (LOG_NOTQUIET, "--%s--  %s  %s\n",
2547                         tms, tmp, hurl);
2548            }
2549          else
2550            {
2551              logprintf (LOG_NOTQUIET, "--%s--  %s\n",
2552                         tms, hurl);
2553            }
2554
2555#ifdef WINDOWS
2556          ws_changetitle (hurl);
2557#endif
2558          xfree (hurl);
2559        }
2560
2561      /* Default document type is empty.  However, if spider mode is
2562         on or time-stamping is employed, HEAD_ONLY commands is
2563         encoded within *dt.  */
2564      if (send_head_first && !got_head)
2565        *dt |= HEAD_ONLY;
2566      else
2567        *dt &= ~HEAD_ONLY;
2568
2569      /* Decide whether or not to restart.  */
2570      if (opt.always_rest
2571          && got_name
2572          && stat (hstat.local_file, &st) == 0
2573          && S_ISREG (st.st_mode))
2574        /* When -c is used, continue from on-disk size.  (Can't use
2575           hstat.len even if count>1 because we don't want a failed
2576           first attempt to clobber existing data.)  */
2577        hstat.restval = st.st_size;
2578      else if (count > 1)
2579        /* otherwise, continue where the previous try left off */
2580        hstat.restval = hstat.len;
2581      else
2582        hstat.restval = 0;
2583
2584      /* Decide whether to send the no-cache directive.  We send it in
2585         two cases:
2586           a) we're using a proxy, and we're past our first retrieval.
2587              Some proxies are notorious for caching incomplete data, so
2588              we require a fresh get.
2589           b) caching is explicitly inhibited. */
2590      if ((proxy && count > 1)        /* a */
2591          || !opt.allow_cache)        /* b */
2592        *dt |= SEND_NOCACHE;
2593      else
2594        *dt &= ~SEND_NOCACHE;
2595
2596      /* Try fetching the document, or at least its head.  */
2597      err = gethttp (u, &hstat, dt, proxy, iri);
2598
2599      /* Time?  */
2600      tms = datetime_str (time (NULL));
2601
2602      /* Get the new location (with or without the redirection).  */
2603      if (hstat.newloc)
2604        *newloc = xstrdup (hstat.newloc);
2605
2606      switch (err)
2607        {
2608        case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
2609        case CONERROR: case READERR: case WRITEFAILED:
2610        case RANGEERR: case FOPEN_EXCL_ERR:
2611          /* Non-fatal errors continue executing the loop, which will
2612             bring them to "while" statement at the end, to judge
2613             whether the number of tries was exceeded.  */
2614          printwhat (count, opt.ntry);
2615          continue;
2616        case FWRITEERR: case FOPENERR:
2617          /* Another fatal error.  */
2618          logputs (LOG_VERBOSE, "\n");
2619          logprintf (LOG_NOTQUIET, _("Cannot write to %s (%s).\n"),
2620                     quote (hstat.local_file), strerror (errno));
2621        case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
2622        case SSLINITFAILED: case CONTNOTSUPPORTED: case VERIFCERTERR:
2623          /* Fatal errors just return from the function.  */
2624          ret = err;
2625          goto exit;
2626        case CONSSLERR:
2627          /* Another fatal error.  */
2628          logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
2629          ret = err;
2630          goto exit;
2631        case NEWLOCATION:
2632          /* Return the new location to the caller.  */
2633          if (!*newloc)
2634            {
2635              logprintf (LOG_NOTQUIET,
2636                         _("ERROR: Redirection (%d) without location.\n"),
2637                         hstat.statcode);
2638              ret = WRONGCODE;
2639            }
2640          else
2641            {
2642              ret = NEWLOCATION;
2643            }
2644          goto exit;
2645        case RETRUNNEEDED:
2646          /* The file was already fully retrieved. */
2647          ret = RETROK;
2648          goto exit;
2649        case RETRFINISHED:
2650          /* Deal with you later.  */
2651          break;
2652        default:
2653          /* All possibilities should have been exhausted.  */
2654          abort ();
2655        }
2656
2657      if (!(*dt & RETROKF))
2658        {
2659          char *hurl = NULL;
2660          if (!opt.verbose)
2661            {
2662              /* #### Ugly ugly ugly! */
2663              hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
2664              logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
2665            }
2666
2667          /* Fall back to GET if HEAD fails with a 500 or 501 error code. */
2668          if (*dt & HEAD_ONLY
2669              && (hstat.statcode == 500 || hstat.statcode == 501))
2670            {
2671              got_head = true;
2672              continue;
2673            }
2674          /* Maybe we should always keep track of broken links, not just in
2675           * spider mode.
2676           * Don't log error if it was UTF-8 encoded because we will try
2677           * once unencoded. */
2678          else if (opt.spider && !iri->utf8_encode)
2679            {
2680              /* #### Again: ugly ugly ugly! */
2681              if (!hurl)
2682                hurl = url_string (u, URL_AUTH_HIDE_PASSWD);
2683              nonexisting_url (hurl);
2684              logprintf (LOG_NOTQUIET, _("\
2685Remote file does not exist -- broken link!!!\n"));
2686            }
2687          else
2688            {
2689              logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
2690                         tms, hstat.statcode,
2691                         quotearg_style (escape_quoting_style, hstat.error));
2692            }
2693          logputs (LOG_VERBOSE, "\n");
2694          ret = WRONGCODE;
2695          xfree_null (hurl);
2696          goto exit;
2697        }
2698
2699      /* Did we get the time-stamp? */
2700      if (!got_head)
2701        {
2702          got_head = true;    /* no more time-stamping */
2703
2704          if (opt.timestamping && !hstat.remote_time)
2705            {
2706              logputs (LOG_NOTQUIET, _("\
2707Last-modified header missing -- time-stamps turned off.\n"));
2708            }
2709          else if (hstat.remote_time)
2710            {
2711              /* Convert the date-string into struct tm.  */
2712              tmr = http_atotm (hstat.remote_time);
2713              if (tmr == (time_t) (-1))
2714                logputs (LOG_VERBOSE, _("\
2715Last-modified header invalid -- time-stamp ignored.\n"));
2716              if (*dt & HEAD_ONLY)
2717                time_came_from_head = true;
2718            }
2719
2720          if (send_head_first)
2721            {
2722              /* The time-stamping section.  */
2723              if (opt.timestamping)
2724                {
2725                  if (hstat.orig_file_name) /* Perform the following
2726                                               checks only if the file
2727                                               we're supposed to
2728                                               download already exists.  */
2729                    {
2730                      if (hstat.remote_time &&
2731                          tmr != (time_t) (-1))
2732                        {
2733                          /* Now time-stamping can be used validly.
2734                             Time-stamping means that if the sizes of
2735                             the local and remote file match, and local
2736                             file is newer than the remote file, it will
2737                             not be retrieved.  Otherwise, the normal
2738                             download procedure is resumed.  */
2739                          if (hstat.orig_file_tstamp >= tmr)
2740                            {
2741                              if (hstat.contlen == -1
2742                                  || hstat.orig_file_size == hstat.contlen)
2743                                {
2744                                  logprintf (LOG_VERBOSE, _("\
2745Server file no newer than local file %s -- not retrieving.\n\n"),
2746                                             quote (hstat.orig_file_name));
2747                                  ret = RETROK;
2748                                  goto exit;
2749                                }
2750                              else
2751                                {
2752                                  logprintf (LOG_VERBOSE, _("\
2753The sizes do not match (local %s) -- retrieving.\n"),
2754                                             number_to_static_string (hstat.orig_file_size));
2755                                }
2756                            }
2757                          else
2758                            logputs (LOG_VERBOSE,
2759                                     _("Remote file is newer, retrieving.\n"));
2760
2761                          logputs (LOG_VERBOSE, "\n");
2762                        }
2763                    }
2764
2765                  /* free_hstat (&hstat); */
2766                  hstat.timestamp_checked = true;
2767                }
2768
2769              if (opt.spider)
2770                {
2771                  bool finished = true;
2772                  if (opt.recursive)
2773                    {
2774                      if (*dt & TEXTHTML)
2775                        {
2776                          logputs (LOG_VERBOSE, _("\
2777Remote file exists and could contain links to other resources -- retrieving.\n\n"));
2778                          finished = false;
2779                        }
2780                      else
2781                        {
2782                          logprintf (LOG_VERBOSE, _("\
2783Remote file exists but does not contain any link -- not retrieving.\n\n"));
2784                          ret = RETROK; /* RETRUNNEEDED is not for caller. */
2785                        }
2786                    }
2787                  else
2788                    {
2789                      if (*dt & TEXTHTML)
2790                        {
2791                          logprintf (LOG_VERBOSE, _("\
2792Remote file exists and could contain further links,\n\
2793but recursion is disabled -- not retrieving.\n\n"));
2794                        }
2795                      else
2796                        {
2797                          logprintf (LOG_VERBOSE, _("\
2798Remote file exists.\n\n"));
2799                        }
2800                      ret = RETROK; /* RETRUNNEEDED is not for caller. */
2801                    }
2802
2803                  if (finished)
2804                    {
2805                      logprintf (LOG_NONVERBOSE,
2806                                 _("%s URL: %s %2d %s\n"),
2807                                 tms, u->url, hstat.statcode,
2808                                 hstat.message ? quotearg_style (escape_quoting_style, hstat.message) : "");
2809                      goto exit;
2810                    }
2811                }
2812
2813              got_name = true;
2814              *dt &= ~HEAD_ONLY;
2815              count = 0;          /* the retrieve count for HEAD is reset */
2816              continue;
2817            } /* send_head_first */
2818        } /* !got_head */
2819
2820      if ((tmr != (time_t) (-1))
2821          && ((hstat.len == hstat.contlen) ||
2822              ((hstat.res == 0) && (hstat.contlen == -1))))
2823        {
2824          const char *fl = NULL;
2825          set_local_file (&fl, hstat.local_file);
2826          if (fl)
2827            {
2828              time_t newtmr = -1;
2829              /* Reparse time header, in case it's changed. */
2830              if (time_came_from_head
2831                  && hstat.remote_time && hstat.remote_time[0])
2832                {
2833                  newtmr = http_atotm (hstat.remote_time);
2834                  if (newtmr != (time_t)-1)
2835                    tmr = newtmr;
2836                }
2837              touch (fl, tmr);
2838            }
2839        }
2840      /* End of time-stamping section. */
2841
2842      tmrate = retr_rate (hstat.rd_size, hstat.dltime);
2843      total_download_time += hstat.dltime;
2844
2845      if (hstat.len == hstat.contlen)
2846        {
2847          if (*dt & RETROKF)
2848            {
2849              bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
2850
2851              logprintf (LOG_VERBOSE,
2852                         write_to_stdout
2853                         ? _("%s (%s) - written to stdout %s[%s/%s]\n\n")
2854                         : _("%s (%s) - %s saved [%s/%s]\n\n"),
2855                         tms, tmrate,
2856                         write_to_stdout ? "" : quote (hstat.local_file),
2857                         number_to_static_string (hstat.len),
2858                         number_to_static_string (hstat.contlen));
2859              logprintf (LOG_NONVERBOSE,
2860                         "%s URL:%s [%s/%s] -> \"%s\" [%d]\n",
2861                         tms, u->url,
2862                         number_to_static_string (hstat.len),
2863                         number_to_static_string (hstat.contlen),
2864                         hstat.local_file, count);
2865            }
2866          ++numurls;
2867          total_downloaded_bytes += hstat.len;
2868
2869          /* Remember that we downloaded the file for later ".orig" code. */
2870          if (*dt & ADDED_HTML_EXTENSION)
2871            downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
2872          else
2873            downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
2874
2875          ret = RETROK;
2876          goto exit;
2877        }
2878      else if (hstat.res == 0) /* No read error */
2879        {
2880          if (hstat.contlen == -1)  /* We don't know how much we were supposed
2881                                       to get, so assume we succeeded. */
2882            {
2883              if (*dt & RETROKF)
2884                {
2885                  bool write_to_stdout = (opt.output_document && HYPHENP (opt.output_document));
2886
2887                  logprintf (LOG_VERBOSE,
2888                             write_to_stdout
2889                             ? _("%s (%s) - written to stdout %s[%s]\n\n")
2890                             : _("%s (%s) - %s saved [%s]\n\n"),
2891                             tms, tmrate,
2892                             write_to_stdout ? "" : quote (hstat.local_file),
2893                             number_to_static_string (hstat.len));
2894                  logprintf (LOG_NONVERBOSE,
2895                             "%s URL:%s [%s] -> \"%s\" [%d]\n",
2896                             tms, u->url, number_to_static_string (hstat.len),
2897                             hstat.local_file, count);
2898                }
2899              ++numurls;
2900              total_downloaded_bytes += hstat.len;
2901
2902              /* Remember that we downloaded the file for later ".orig" code. */
2903              if (*dt & ADDED_HTML_EXTENSION)
2904                downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
2905              else
2906                downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
2907
2908              ret = RETROK;
2909              goto exit;
2910            }
2911          else if (hstat.len < hstat.contlen) /* meaning we lost the
2912                                                 connection too soon */
2913            {
2914              logprintf (LOG_VERBOSE,
2915                         _("%s (%s) - Connection closed at byte %s. "),
2916                         tms, tmrate, number_to_static_string (hstat.len));
2917              printwhat (count, opt.ntry);
2918              continue;
2919            }
2920          else if (hstat.len != hstat.restval)
2921            /* Getting here would mean reading more data than
2922               requested with content-length, which we never do.  */
2923            abort ();
2924          else
2925            {
2926              /* Getting here probably means that the content-length was
2927               * _less_ than the original, local size. We should probably
2928               * truncate or re-read, or something. FIXME */
2929              ret = RETROK;
2930              goto exit;
2931            }
2932        }
2933      else /* from now on hstat.res can only be -1 */
2934        {
2935          if (hstat.contlen == -1)
2936            {
2937              logprintf (LOG_VERBOSE,
2938                         _("%s (%s) - Read error at byte %s (%s)."),
2939                         tms, tmrate, number_to_static_string (hstat.len),
2940                         hstat.rderrmsg);
2941              printwhat (count, opt.ntry);
2942              continue;
2943            }
2944          else /* hstat.res == -1 and contlen is given */
2945            {
2946              logprintf (LOG_VERBOSE,
2947                         _("%s (%s) - Read error at byte %s/%s (%s). "),
2948                         tms, tmrate,
2949                         number_to_static_string (hstat.len),
2950                         number_to_static_string (hstat.contlen),
2951                         hstat.rderrmsg);
2952              printwhat (count, opt.ntry);
2953              continue;
2954            }
2955        }
2956      /* not reached */
2957    }
2958  while (!opt.ntry || (count < opt.ntry));
2959
2960exit:
2961  if (ret == RETROK)
2962    *local_file = xstrdup (hstat.local_file);
2963  free_hstat (&hstat);
2964
2965  return ret;
2966}
2967
2968/* Check whether the result of strptime() indicates success.
2969   strptime() returns the pointer to how far it got to in the string.
2970   The processing has been successful if the string is at `GMT' or
2971   `+X', or at the end of the string.
2972
2973   In extended regexp parlance, the function returns 1 if P matches
2974   "^ *(GMT|[+-][0-9]|$)", 0 otherwise.  P being NULL (which strptime
2975   can return) is considered a failure and 0 is returned.  */
2976static bool
2977check_end (const char *p)
2978{
2979  if (!p)
2980    return false;
2981  while (c_isspace (*p))
2982    ++p;
2983  if (!*p
2984      || (p[0] == 'G' && p[1] == 'M' && p[2] == 'T')
2985      || ((p[0] == '+' || p[0] == '-') && c_isdigit (p[1])))
2986    return true;
2987  else
2988    return false;
2989}
2990
2991/* Convert the textual specification of time in TIME_STRING to the
2992   number of seconds since the Epoch.
2993
2994   TIME_STRING can be in any of the three formats RFC2616 allows the
2995   HTTP servers to emit -- RFC1123-date, RFC850-date or asctime-date,
2996   as well as the time format used in the Set-Cookie header.
2997   Timezones are ignored, and should be GMT.
2998
2999   Return the computed time_t representation, or -1 if the conversion
3000   fails.
3001
3002   This function uses strptime with various string formats for parsing
3003   TIME_STRING.  This results in a parser that is not as lenient in
3004   interpreting TIME_STRING as I would like it to be.  Being based on
3005   strptime, it always allows shortened months, one-digit days, etc.,
3006   but due to the multitude of formats in which time can be
3007   represented, an ideal HTTP time parser would be even more
3008   forgiving.  It should completely ignore things like week days and
3009   concentrate only on the various forms of representing years,
3010   months, days, hours, minutes, and seconds.  For example, it would
3011   be nice if it accepted ISO 8601 out of the box.
3012
3013   I've investigated free and PD code for this purpose, but none was
3014   usable.  getdate was big and unwieldy, and had potential copyright
3015   issues, or so I was informed.  Dr. Marcus Hennecke's atotm(),
3016   distributed with phttpd, is excellent, but we cannot use it because
3017   it is not assigned to the FSF.  So I stuck it with strptime.  */
3018
3019time_t
3020http_atotm (const char *time_string)
3021{
3022  /* NOTE: Solaris strptime man page claims that %n and %t match white
3023     space, but that's not universally available.  Instead, we simply
3024     use ` ' to mean "skip all WS", which works under all strptime
3025     implementations I've tested.  */
3026
3027  static const char *time_formats[] = {
3028    "%a, %d %b %Y %T",          /* rfc1123: Thu, 29 Jan 1998 22:12:57 */
3029    "%A, %d-%b-%y %T",          /* rfc850:  Thursday, 29-Jan-98 22:12:57 */
3030    "%a %b %d %T %Y",           /* asctime: Thu Jan 29 22:12:57 1998 */
3031    "%a, %d-%b-%Y %T"           /* cookies: Thu, 29-Jan-1998 22:12:57
3032                                   (used in Set-Cookie, defined in the
3033                                   Netscape cookie specification.) */
3034  };
3035  const char *oldlocale;
3036  char savedlocale[256];
3037  size_t i;
3038  time_t ret = (time_t) -1;
3039
3040  /* Solaris strptime fails to recognize English month names in
3041     non-English locales, which we work around by temporarily setting
3042     locale to C before invoking strptime.  */
3043  oldlocale = setlocale (LC_TIME, NULL);
3044  if (oldlocale)
3045    {
3046      size_t l = strlen (oldlocale);
3047      if (l >= sizeof savedlocale)
3048        savedlocale[0] = '\0';
3049      else
3050        memcpy (savedlocale, oldlocale, l);
3051    }
3052  else savedlocale[0] = '\0';
3053
3054  setlocale (LC_TIME, "C");
3055
3056  for (i = 0; i < countof (time_formats); i++)
3057    {
3058      struct tm t;
3059
3060      /* Some versions of strptime use the existing contents of struct
3061         tm to recalculate the date according to format.  Zero it out
3062         to prevent stack garbage from influencing strptime.  */
3063      xzero (t);
3064
3065      if (check_end (strptime (time_string, time_formats[i], &t)))
3066        {
3067          ret = timegm (&t);
3068          break;
3069        }
3070    }
3071
3072  /* Restore the previous locale. */
3073  if (savedlocale[0])
3074    setlocale (LC_TIME, savedlocale);
3075
3076  return ret;
3077}
3078
3079/* Authorization support: We support three authorization schemes:
3080
3081   * `Basic' scheme, consisting of base64-ing USER:PASSWORD string;
3082
3083   * `Digest' scheme, added by Junio Hamano <junio@twinsun.com>,
3084   consisting of answering to the server's challenge with the proper
3085   MD5 digests.
3086
3087   * `NTLM' ("NT Lan Manager") scheme, based on code written by Daniel
3088   Stenberg for libcurl.  Like digest, NTLM is based on a
3089   challenge-response mechanism, but unlike digest, it is non-standard
3090   (authenticates TCP connections rather than requests), undocumented
3091   and Microsoft-specific.  */
3092
3093/* Create the authentication header contents for the `Basic' scheme.
3094   This is done by encoding the string "USER:PASS" to base64 and
3095   prepending the string "Basic " in front of it.  */
3096
3097static char *
3098basic_authentication_encode (const char *user, const char *passwd)
3099{
3100  char *t1, *t2;
3101  int len1 = strlen (user) + 1 + strlen (passwd);
3102
3103  t1 = (char *)alloca (len1 + 1);
3104  sprintf (t1, "%s:%s", user, passwd);
3105
3106  t2 = (char *)alloca (BASE64_LENGTH (len1) + 1);
3107  base64_encode (t1, len1, t2);
3108
3109  return concat_strings ("Basic ", t2, (char *) 0);
3110}
3111
3112#define SKIP_WS(x) do {                         \
3113  while (c_isspace (*(x)))                        \
3114    ++(x);                                      \
3115} while (0)
3116
3117#ifdef ENABLE_DIGEST
3118/* Dump the hexadecimal representation of HASH to BUF.  HASH should be
3119   an array of 16 bytes containing the hash keys, and BUF should be a
3120   buffer of 33 writable characters (32 for hex digits plus one for
3121   zero termination).  */
3122static void
3123dump_hash (char *buf, const unsigned char *hash)
3124{
3125  int i;
3126
3127  for (i = 0; i < MD5_HASHLEN; i++, hash++)
3128    {
3129      *buf++ = XNUM_TO_digit (*hash >> 4);
3130      *buf++ = XNUM_TO_digit (*hash & 0xf);
3131    }
3132  *buf = '\0';
3133}
3134
3135/* Take the line apart to find the challenge, and compose a digest
3136   authorization header.  See RFC2069 section 2.1.2.  */
3137static char *
3138digest_authentication_encode (const char *au, const char *user,
3139                              const char *passwd, const char *method,
3140                              const char *path)
3141{
3142  static char *realm, *opaque, *nonce;
3143  static struct {
3144    const char *name;
3145    char **variable;
3146  } options[] = {
3147    { "realm", &realm },
3148    { "opaque", &opaque },
3149    { "nonce", &nonce }
3150  };
3151  char *res;
3152  param_token name, value;
3153
3154  realm = opaque = nonce = NULL;
3155
3156  au += 6;                      /* skip over `Digest' */
3157  while (extract_param (&au, &name, &value, ','))
3158    {
3159      size_t i;
3160      size_t namelen = name.e - name.b;
3161      for (i = 0; i < countof (options); i++)
3162        if (namelen == strlen (options[i].name)
3163            && 0 == strncmp (name.b, options[i].name,
3164                             namelen))
3165          {
3166            *options[i].variable = strdupdelim (value.b, value.e);
3167            break;
3168          }
3169    }
3170  if (!realm || !nonce || !user || !passwd || !path || !method)
3171    {
3172      xfree_null (realm);
3173      xfree_null (opaque);
3174      xfree_null (nonce);
3175      return NULL;
3176    }
3177
3178  /* Calculate the digest value.  */
3179  {
3180    ALLOCA_MD5_CONTEXT (ctx);
3181    unsigned char hash[MD5_HASHLEN];
3182    char a1buf[MD5_HASHLEN * 2 + 1], a2buf[MD5_HASHLEN * 2 + 1];
3183    char response_digest[MD5_HASHLEN * 2 + 1];
3184
3185    /* A1BUF = H(user ":" realm ":" password) */
3186    gen_md5_init (ctx);
3187    gen_md5_update ((unsigned char *)user, strlen (user), ctx);
3188    gen_md5_update ((unsigned char *)":", 1, ctx);
3189    gen_md5_update ((unsigned char *)realm, strlen (realm), ctx);
3190    gen_md5_update ((unsigned char *)":", 1, ctx);
3191    gen_md5_update ((unsigned char *)passwd, strlen (passwd), ctx);
3192    gen_md5_finish (ctx, hash);
3193    dump_hash (a1buf, hash);
3194
3195    /* A2BUF = H(method ":" path) */
3196    gen_md5_init (ctx);
3197    gen_md5_update ((unsigned char *)method, strlen (method), ctx);
3198    gen_md5_update ((unsigned char *)":", 1, ctx);
3199    gen_md5_update ((unsigned char *)path, strlen (path), ctx);
3200    gen_md5_finish (ctx, hash);
3201    dump_hash (a2buf, hash);
3202
3203    /* RESPONSE_DIGEST = H(A1BUF ":" nonce ":" A2BUF) */
3204    gen_md5_init (ctx);
3205    gen_md5_update ((unsigned char *)a1buf, MD5_HASHLEN * 2, ctx);
3206    gen_md5_update ((unsigned char *)":", 1, ctx);
3207    gen_md5_update ((unsigned char *)nonce, strlen (nonce), ctx);
3208    gen_md5_update ((unsigned char *)":", 1, ctx);
3209    gen_md5_update ((unsigned char *)a2buf, MD5_HASHLEN * 2, ctx);
3210    gen_md5_finish (ctx, hash);
3211    dump_hash (response_digest, hash);
3212
3213    res = xmalloc (strlen (user)
3214                   + strlen (user)
3215                   + strlen (realm)
3216                   + strlen (nonce)
3217                   + strlen (path)
3218                   + 2 * MD5_HASHLEN /*strlen (response_digest)*/
3219                   + (opaque ? strlen (opaque) : 0)
3220                   + 128);
3221    sprintf (res, "Digest \
3222username=\"%s\", realm=\"%s\", nonce=\"%s\", uri=\"%s\", response=\"%s\"",
3223             user, realm, nonce, path, response_digest);
3224    if (opaque)
3225      {
3226        char *p = res + strlen (res);
3227        strcat (p, ", opaque=\"");
3228        strcat (p, opaque);
3229        strcat (p, "\"");
3230      }
3231  }
3232  return res;
3233}
3234#endif /* ENABLE_DIGEST */
3235
3236/* Computing the size of a string literal must take into account that
3237   value returned by sizeof includes the terminating \0.  */
3238#define STRSIZE(literal) (sizeof (literal) - 1)
3239
3240/* Whether chars in [b, e) begin with the literal string provided as
3241   first argument and are followed by whitespace or terminating \0.
3242   The comparison is case-insensitive.  */
3243#define STARTS(literal, b, e)                           \
3244  ((e > b) \
3245   && ((size_t) ((e) - (b))) >= STRSIZE (literal)   \
3246   && 0 == strncasecmp (b, literal, STRSIZE (literal))  \
3247   && ((size_t) ((e) - (b)) == STRSIZE (literal)          \
3248       || c_isspace (b[STRSIZE (literal)])))
3249
3250static bool
3251known_authentication_scheme_p (const char *hdrbeg, const char *hdrend)
3252{
3253  return STARTS ("Basic", hdrbeg, hdrend)
3254#ifdef ENABLE_DIGEST
3255    || STARTS ("Digest", hdrbeg, hdrend)
3256#endif
3257#ifdef ENABLE_NTLM
3258    || STARTS ("NTLM", hdrbeg, hdrend)
3259#endif
3260    ;
3261}
3262
3263#undef STARTS
3264
3265/* Create the HTTP authorization request header.  When the
3266   `WWW-Authenticate' response header is seen, according to the
3267   authorization scheme specified in that header (`Basic' and `Digest'
3268   are supported by the current implementation), produce an
3269   appropriate HTTP authorization request header.  */
3270static char *
3271create_authorization_line (const char *au, const char *user,
3272                           const char *passwd, const char *method,
3273                           const char *path, bool *finished)
3274{
3275  /* We are called only with known schemes, so we can dispatch on the
3276     first letter. */
3277  switch (c_toupper (*au))
3278    {
3279    case 'B':                   /* Basic */
3280      *finished = true;
3281      return basic_authentication_encode (user, passwd);
3282#ifdef ENABLE_DIGEST
3283    case 'D':                   /* Digest */
3284      *finished = true;
3285      return digest_authentication_encode (au, user, passwd, method, path);
3286#endif
3287#ifdef ENABLE_NTLM
3288    case 'N':                   /* NTLM */
3289      if (!ntlm_input (&pconn.ntlm, au))
3290        {
3291          *finished = true;
3292          return NULL;
3293        }
3294      return ntlm_output (&pconn.ntlm, user, passwd, finished);
3295#endif
3296    default:
3297      /* We shouldn't get here -- this function should be only called
3298         with values approved by known_authentication_scheme_p.  */
3299      abort ();
3300    }
3301}
3302
3303static void
3304load_cookies (void)
3305{
3306  if (!wget_cookie_jar)
3307    wget_cookie_jar = cookie_jar_new ();
3308  if (opt.cookies_input && !cookies_loaded_p)
3309    {
3310      cookie_jar_load (wget_cookie_jar, opt.cookies_input);
3311      cookies_loaded_p = true;
3312    }
3313}
3314
3315void
3316save_cookies (void)
3317{
3318  if (wget_cookie_jar)
3319    cookie_jar_save (wget_cookie_jar, opt.cookies_output);
3320}
3321
3322void
3323http_cleanup (void)
3324{
3325  xfree_null (pconn.host);
3326  if (wget_cookie_jar)
3327    cookie_jar_delete (wget_cookie_jar);
3328}
3329
3330void
3331ensure_extension (struct http_stat *hs, const char *ext, int *dt)
3332{
3333  char *last_period_in_local_filename = strrchr (hs->local_file, '.');
3334  char shortext[8];
3335  int len = strlen (ext);
3336  if (len == 5)
3337    {
3338      strncpy (shortext, ext, len - 1);
3339      shortext[len - 2] = '\0';
3340    }
3341
3342  if (last_period_in_local_filename == NULL
3343      || !(0 == strcasecmp (last_period_in_local_filename, shortext)
3344           || 0 == strcasecmp (last_period_in_local_filename, ext)))
3345    {
3346      int local_filename_len = strlen (hs->local_file);
3347      /* Resize the local file, allowing for ".html" preceded by
3348         optional ".NUMBER".  */
3349      hs->local_file = xrealloc (hs->local_file,
3350                                 local_filename_len + 24 + len);
3351      strcpy (hs->local_file + local_filename_len, ext);
3352      /* If clobbering is not allowed and the file, as named,
3353         exists, tack on ".NUMBER.html" instead. */
3354      if (!ALLOW_CLOBBER && file_exists_p (hs->local_file))
3355        {
3356          int ext_num = 1;
3357          do
3358            sprintf (hs->local_file + local_filename_len,
3359                     ".%d%s", ext_num++, ext);
3360          while (file_exists_p (hs->local_file));
3361        }
3362      *dt |= ADDED_HTML_EXTENSION;
3363    }
3364}
3365
3366
3367#ifdef TESTING
3368
3369const char *
3370test_parse_content_disposition()
3371{
3372  int i;
3373  struct {
3374    char *hdrval;
3375    char *opt_dir_prefix;
3376    char *filename;
3377    bool result;
3378  } test_array[] = {
3379    { "filename=\"file.ext\"", NULL, "file.ext", true },
3380    { "filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
3381    { "attachment; filename=\"file.ext\"", NULL, "file.ext", true },
3382    { "attachment; filename=\"file.ext\"", "somedir", "somedir/file.ext", true },
3383    { "attachment; filename=\"file.ext\"; dummy", NULL, "file.ext", true },
3384    { "attachment; filename=\"file.ext\"; dummy", "somedir", "somedir/file.ext", true },
3385    { "attachment", NULL, NULL, false },
3386    { "attachment", "somedir", NULL, false },
3387  };
3388
3389  for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
3390    {
3391      char *filename;
3392      bool res;
3393
3394      opt.dir_prefix = test_array[i].opt_dir_prefix;
3395      res = parse_content_disposition (test_array[i].hdrval, &filename);
3396
3397      mu_assert ("test_parse_content_disposition: wrong result",
3398                 res == test_array[i].result
3399                 && (res == false
3400                     || 0 == strcmp (test_array[i].filename, filename)));
3401    }
3402
3403  return NULL;
3404}
3405
3406#endif /* TESTING */
3407
3408/*
3409 * vim: et sts=2 sw=2 cino+={s
3410 */
3411
3412