1/* Various utility functions.
2   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
3   2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation,
4   Inc.
5
6This file is part of GNU Wget.
7
8GNU Wget is free software; you can redistribute it and/or modify
9it under the terms of the GNU General Public License as published by
10the Free Software Foundation; either version 3 of the License, or
11(at your option) any later version.
12
13GNU Wget is distributed in the hope that it will be useful,
14but WITHOUT ANY WARRANTY; without even the implied warranty of
15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16GNU General Public License for more details.
17
18You should have received a copy of the GNU General Public License
19along with Wget.  If not, see <http://www.gnu.org/licenses/>.
20
21Additional permission under GNU GPL version 3 section 7
22
23If you modify this program, or any covered work, by linking or
24combining it with the OpenSSL project's OpenSSL library (or a
25modified version of that library), containing parts covered by the
26terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
27grants you additional permission to convey the resulting work.
28Corresponding Source for a non-source form of such a combination
29shall include the source code for the parts of OpenSSL used as well
30as that of the covered work.  */
31
32#include "wget.h"
33
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37#include <time.h>
38#include <unistd.h>
39#ifdef HAVE_MMAP
40# include <sys/mman.h>
41#endif
42#ifdef HAVE_PROCESS_H
43# include <process.h>  /* getpid() */
44#endif
45#include <errno.h>
46#include <fcntl.h>
47#include <assert.h>
48#include <stdarg.h>
49#include <locale.h>
50
51#if HAVE_UTIME
52# include <sys/types.h>
53# ifdef HAVE_UTIME_H
54#  include <utime.h>
55# endif
56
57# ifdef HAVE_SYS_UTIME_H
58#  include <sys/utime.h>
59# endif
60#endif
61
62#include <sys/time.h>
63
64#include <sys/stat.h>
65
66/* For TIOCGWINSZ and friends: */
67#include <sys/ioctl.h>
68#include <termios.h>
69
70/* Needed for Unix version of run_with_timeout. */
71#include <signal.h>
72#include <setjmp.h>
73
74#include <regex.h>
75#ifdef HAVE_LIBPCRE
76# include <pcre.h>
77#endif
78
79#ifndef HAVE_SIGSETJMP
80/* If sigsetjmp is a macro, configure won't pick it up. */
81# ifdef sigsetjmp
82#  define HAVE_SIGSETJMP
83# endif
84#endif
85
86#if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK
87# define USE_SIGNAL_TIMEOUT
88#endif
89
90#include "utils.h"
91#include "hash.h"
92
93#ifdef __VMS
94#include "vms.h"
95#endif /* def __VMS */
96
97#ifdef TESTING
98#include "test.h"
99#endif
100
101#include "exits.h"
102
103static void _Noreturn
104memfatal (const char *context, long attempted_size)
105{
106  /* Make sure we don't try to store part of the log line, and thus
107     call malloc.  */
108  log_set_save_context (false);
109
110  /* We have different log outputs in different situations:
111     1) output without bytes information
112     2) output with bytes information  */
113  if (attempted_size == UNKNOWN_ATTEMPTED_SIZE)
114    {
115      logprintf (LOG_ALWAYS,
116                 _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"),
117                 exec_name, context);
118    }
119  else
120    {
121      logprintf (LOG_ALWAYS,
122                 _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"),
123                 exec_name, context, attempted_size);
124    }
125
126  exit (WGET_EXIT_GENERIC_ERROR);
127}
128
129/* Character property table for (re-)escaping VMS ODS5 extended file
130   names.  Note that this table ignores Unicode.
131
132   ODS2 valid characters: 0-9 A-Z a-z $ - _ ~
133
134   ODS5 Invalid characters:
135      C0 control codes (0x00 to 0x1F inclusive)
136      Asterisk (*)
137      Question mark (?)
138
139   ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?):
140      Double quotation marks (")
141      Backslash (\)
142      Colon (:)
143      Left angle bracket (<)
144      Right angle bracket (>)
145      Slash (/)
146      Vertical bar (|)
147
148   Characters escaped by "^":
149      SP  !  "  #  %  &  '  (  )  +  ,  .  :  ;  =
150       @  [  \  ]  ^  `  {  |  }  ~
151
152   Either "^_" or "^ " is accepted as a space.  Period (.) is a special
153   case.  Note that un-escaped < and > can also confuse a directory
154   spec.
155
156   Characters put out as ^xx:
157      7F (DEL)
158      80-9F (C1 control characters)
159      A0 (nonbreaking space)
160      FF (Latin small letter y diaeresis)
161
162   Other cases:
163      Unicode: "^Uxxxx", where "xxxx" is four hex digits.
164
165    Property table values:
166      Normal escape:    1
167      Space:            2
168      Dot:              4
169      Hex-hex escape:   8
170      ODS2 normal:     16
171      ODS2 lower case: 32
172      Hex digit:       64
173*/
174
175unsigned char char_prop[ 256] = {
176
177/* NUL SOH STX ETX EOT ENQ ACK BEL   BS  HT  LF  VT  FF  CR  SO  SI */
178    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
179
180/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB  CAN  EM SUB ESC  FS  GS  RS  US */
181    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
182
183/*  SP  !   "   #   $   %   &   '    (   )   *   +   ,   -   .   /  */
184    2,  1,  1,  1, 16,  1,  1,  1,   1,  1,  0,  1,  1, 16,  4,  0,
185
186/*  0   1   2   3   4   5   6   7    8   9   :   ;   <   =   >   ?  */
187   80, 80, 80, 80, 80, 80, 80, 80,  80, 80,  1,  1,  1,  1,  1,  1,
188
189/*  @   A   B   C   D   E   F   G    H   I   J   K   L   M   N   O  */
190    1, 80, 80, 80, 80, 80, 80, 16,  16, 16, 16, 16, 16, 16, 16, 16,
191
192/*  P   Q   R   S   T   U   V   W    X   Y   Z   [   \   ]   ^   _  */
193   16, 16, 16, 16, 16, 16, 16, 16,  16, 16, 16,  1,  1,  1,  1, 16,
194
195/*  `   a   b   c   d   e   f   g    h   i   j   k   l   m   n   o  */
196    1, 96, 96, 96, 96, 96, 96, 32,  32, 32, 32, 32, 32, 32, 32, 32,
197
198/*  p   q   r   s   t   u   v   w    x   y   z   {   |   }   ~  DEL */
199   32, 32, 32, 32, 32, 32, 32, 32,  32, 32, 32,  1,  1,  1, 17,  8,
200
201    8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
202    8,  8,  8,  8,  8,  8,  8,  8,   8,  8,  8,  8,  8,  8,  8,  8,
203    8,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
204    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
205    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
206    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
207    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  0,
208    0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0,  0,  8
209};
210
211/* Utility function: like xstrdup(), but also lowercases S.  */
212
213char *
214xstrdup_lower (const char *s)
215{
216  char *copy = xstrdup (s);
217  char *p = copy;
218  for (; *p; p++)
219    *p = c_tolower (*p);
220  return copy;
221}
222
223/* Copy the string formed by two pointers (one on the beginning, other
224   on the char after the last char) to a new, malloc-ed location.
225   0-terminate it.  */
226char *
227strdupdelim (const char *beg, const char *end)
228{
229  char *res = xmalloc (end - beg + 1);
230  memcpy (res, beg, end - beg);
231  res[end - beg] = '\0';
232  return res;
233}
234
235/* Parse a string containing comma-separated elements, and return a
236   vector of char pointers with the elements.  Spaces following the
237   commas are ignored.  */
238char **
239sepstring (const char *s)
240{
241  char **res;
242  const char *p;
243  int i = 0;
244
245  if (!s || !*s)
246    return NULL;
247  res = NULL;
248  p = s;
249  while (*s)
250    {
251      if (*s == ',')
252        {
253          res = xrealloc (res, (i + 2) * sizeof (char *));
254          res[i] = strdupdelim (p, s);
255          res[++i] = NULL;
256          ++s;
257          /* Skip the blanks following the ','.  */
258          while (c_isspace (*s))
259            ++s;
260          p = s;
261        }
262      else
263        ++s;
264    }
265  res = xrealloc (res, (i + 2) * sizeof (char *));
266  res[i] = strdupdelim (p, s);
267  res[i + 1] = NULL;
268  return res;
269}
270
271/* Like sprintf, but prints into a string of sufficient size freshly
272   allocated with malloc, which is returned.  If unable to print due
273   to invalid format, returns NULL.  Inability to allocate needed
274   memory results in abort, as with xmalloc.  This is in spirit
275   similar to the GNU/BSD extension asprintf, but somewhat easier to
276   use.
277
278   Internally the function either calls vasprintf or loops around
279   vsnprintf until the correct size is found.  Since Wget also ships a
280   fallback implementation of vsnprintf, this should be portable.  */
281
282/* Constant is using for limits memory allocation for text buffer.
283   Applicable in situation when: vasprintf is not available in the system
284   and vsnprintf return -1 when long line is truncated (in old versions of
285   glibc and in other system where C99 doesn`t support) */
286
287#define FMT_MAX_LENGTH 1048576
288
289char *
290aprintf (const char *fmt, ...)
291{
292#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC
293  /* Use vasprintf. */
294  int ret;
295  va_list args;
296  char *str;
297  va_start (args, fmt);
298  ret = vasprintf (&str, fmt, args);
299  va_end (args);
300  if (ret < 0 && errno == ENOMEM)
301    memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE);  /* for consistency
302                                                      with xmalloc/xrealloc */
303  else if (ret < 0)
304    return NULL;
305  return str;
306#else  /* not HAVE_VASPRINTF */
307
308  /* vasprintf is unavailable.  snprintf into a small buffer and
309     resize it as necessary. */
310  int size = 32;
311  char *str = xmalloc (size);
312
313  /* #### This code will infloop and eventually abort in xrealloc if
314     passed a FMT that causes snprintf to consistently return -1.  */
315
316  while (1)
317    {
318      int n;
319      va_list args;
320
321      va_start (args, fmt);
322      n = vsnprintf (str, size, fmt, args);
323      va_end (args);
324
325      /* If the printing worked, return the string. */
326      if (n > -1 && n < size)
327        return str;
328
329      /* Else try again with a larger buffer. */
330      if (n > -1)               /* C99 */
331        size = n + 1;           /* precisely what is needed */
332      else if (size >= FMT_MAX_LENGTH)  /* We have a huge buffer, */
333        {                               /* maybe we have some wrong
334                                           format string? */
335          logprintf (LOG_ALWAYS,
336                     _("%s: aprintf: text buffer is too big (%ld bytes), "
337                       "aborting.\n"),
338                     exec_name, size);  /* printout a log message */
339          abort ();                     /* and abort... */
340        }
341      else
342        {
343          /* else, we continue to grow our
344           * buffer: Twice the old size. */
345          size <<= 1;
346        }
347      str = xrealloc (str, size);
348    }
349#endif /* not HAVE_VASPRINTF */
350}
351
352/* Concatenate the NULL-terminated list of string arguments into
353   freshly allocated space.  */
354
355char *
356concat_strings (const char *str0, ...)
357{
358  va_list args;
359  int saved_lengths[5];         /* inspired by Apache's apr_pstrcat */
360  char *ret, *p;
361
362  const char *next_str;
363  int total_length = 0;
364  size_t argcount;
365
366  /* Calculate the length of and allocate the resulting string. */
367
368  argcount = 0;
369  va_start (args, str0);
370  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
371    {
372      int len = strlen (next_str);
373      if (argcount < countof (saved_lengths))
374        saved_lengths[argcount++] = len;
375      total_length += len;
376    }
377  va_end (args);
378  p = ret = xmalloc (total_length + 1);
379
380  /* Copy the strings into the allocated space. */
381
382  argcount = 0;
383  va_start (args, str0);
384  for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *))
385    {
386      int len;
387      if (argcount < countof (saved_lengths))
388        len = saved_lengths[argcount++];
389      else
390        len = strlen (next_str);
391      memcpy (p, next_str, len);
392      p += len;
393    }
394  va_end (args);
395  *p = '\0';
396
397  return ret;
398}
399
400/* Format the provided time according to the specified format.  The
401   format is a string with format elements supported by strftime.  */
402
403static char *
404fmttime (time_t t, const char *fmt)
405{
406  static char output[32];
407  struct tm *tm = localtime(&t);
408  if (!tm)
409    abort ();
410  if (!strftime(output, sizeof(output), fmt, tm))
411    abort ();
412  return output;
413}
414
415/* Return pointer to a static char[] buffer in which zero-terminated
416   string-representation of TM (in form hh:mm:ss) is printed.
417
418   If TM is NULL, the current time will be used.  */
419
420char *
421time_str (time_t t)
422{
423  return fmttime(t, "%H:%M:%S");
424}
425
426/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss.  */
427
428char *
429datetime_str (time_t t)
430{
431  return fmttime(t, "%Y-%m-%d %H:%M:%S");
432}
433
434/* The Windows versions of the following two functions are defined in
435   mswindows.c. On MSDOS this function should never be called. */
436
437#ifdef __VMS
438
439void
440fork_to_background (void)
441{
442  return;
443}
444
445#else /* def __VMS */
446
447#if !defined(WINDOWS) && !defined(MSDOS)
448void
449fork_to_background (void)
450{
451  pid_t pid;
452  /* Whether we arrange our own version of opt.lfilename here.  */
453  bool logfile_changed = false;
454
455  if (!opt.lfilename && (!opt.quiet || opt.server_response))
456    {
457      /* We must create the file immediately to avoid either a race
458         condition (which arises from using unique_name and failing to
459         use fopen_excl) or lying to the user about the log file name
460         (which arises from using unique_name, printing the name, and
461         using fopen_excl later on.)  */
462      FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename);
463      if (new_log_fp)
464        {
465          logfile_changed = true;
466          fclose (new_log_fp);
467        }
468    }
469  pid = fork ();
470  if (pid < 0)
471    {
472      /* parent, error */
473      perror ("fork");
474      exit (WGET_EXIT_GENERIC_ERROR);
475    }
476  else if (pid != 0)
477    {
478      /* parent, no error */
479      printf (_("Continuing in background, pid %d.\n"), (int) pid);
480      if (logfile_changed)
481        printf (_("Output will be written to %s.\n"), quote (opt.lfilename));
482      exit (WGET_EXIT_SUCCESS);                 /* #### should we use _exit()? */
483    }
484
485  /* child: give up the privileges and keep running. */
486  setsid ();
487  if (freopen ("/dev/null", "r", stdin) == NULL)
488    DEBUGP (("Failed to redirect stdin to /dev/null.\n"));
489  if (freopen ("/dev/null", "w", stdout) == NULL)
490    DEBUGP (("Failed to redirect stdout to /dev/null.\n"));
491  if (freopen ("/dev/null", "w", stderr) == NULL)
492    DEBUGP (("Failed to redirect stderr to /dev/null.\n"));
493}
494#endif /* !WINDOWS && !MSDOS */
495
496#endif /* def __VMS [else] */
497
498
499/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time
500   specified with TM.  The atime ("access time") is set to the current
501   time.  */
502
503void
504touch (const char *file, time_t tm)
505{
506#if HAVE_UTIME
507# ifdef HAVE_STRUCT_UTIMBUF
508  struct utimbuf times;
509# else
510  struct {
511    time_t actime;
512    time_t modtime;
513  } times;
514# endif
515  times.modtime = tm;
516  times.actime = time (NULL);
517  if (utime (file, &times) == -1)
518    logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno));
519#else
520  struct timespec timespecs[2];
521  int fd;
522
523  fd = open (file, O_WRONLY);
524  if (fd < 0)
525    {
526      logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno));
527      return;
528    }
529
530  timespecs[0].tv_sec = time (NULL);
531  timespecs[0].tv_nsec = 0L;
532  timespecs[1].tv_sec = tm;
533  timespecs[1].tv_nsec = 0L;
534
535  if (futimens (fd, timespecs) == -1)
536    logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno));
537
538  close (fd);
539#endif
540}
541
542/* Checks if FILE is a symbolic link, and removes it if it is.  Does
543   nothing under MS-Windows.  */
544int
545remove_link (const char *file)
546{
547  int err = 0;
548  struct_stat st;
549
550  if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode))
551    {
552      DEBUGP (("Unlinking %s (symlink).\n", file));
553      err = unlink (file);
554      if (err != 0)
555        logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"),
556                   quote (file), strerror (errno));
557    }
558  return err;
559}
560
561/* Does FILENAME exist?  This is quite a lousy implementation, since
562   it supplies no error codes -- only a yes-or-no answer.  Thus it
563   will return that a file does not exist if, e.g., the directory is
564   unreadable.  I don't mind it too much currently, though.  The
565   proper way should, of course, be to have a third, error state,
566   other than true/false, but that would introduce uncalled-for
567   additional complexity to the callers.  */
568bool
569file_exists_p (const char *filename)
570{
571#ifdef HAVE_ACCESS
572  return access (filename, F_OK) >= 0;
573#else
574  struct_stat buf;
575  return stat (filename, &buf) >= 0;
576#endif
577}
578
579/* Returns 0 if PATH is a directory, 1 otherwise (any kind of file).
580   Returns 0 on error.  */
581bool
582file_non_directory_p (const char *path)
583{
584  struct_stat buf;
585  /* Use lstat() rather than stat() so that symbolic links pointing to
586     directories can be identified correctly.  */
587  if (lstat (path, &buf) != 0)
588    return false;
589  return S_ISDIR (buf.st_mode) ? false : true;
590}
591
592/* Return the size of file named by FILENAME, or -1 if it cannot be
593   opened or seeked into. */
594wgint
595file_size (const char *filename)
596{
597#if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO)
598  wgint size;
599  /* We use fseek rather than stat to determine the file size because
600     that way we can also verify that the file is readable without
601     explicitly checking for permissions.  Inspired by the POST patch
602     by Arnaud Wylie.  */
603  FILE *fp = fopen (filename, "rb");
604  if (!fp)
605    return -1;
606  fseeko (fp, 0, SEEK_END);
607  size = ftello (fp);
608  fclose (fp);
609  return size;
610#else
611  struct_stat st;
612  if (stat (filename, &st) < 0)
613    return -1;
614  return st.st_size;
615#endif
616}
617
618/* 2005-02-19 SMS.
619   If no UNIQ_SEP is defined (as on VMS), have unique_name() return the
620   original name.  With the VMS file systems' versioning, everything
621   should be fine, and appending ".NN" just causes trouble.
622*/
623
624#ifdef UNIQ_SEP
625
626/* stat file names named PREFIX.1, PREFIX.2, etc., until one that
627   doesn't exist is found.  Return a freshly allocated copy of the
628   unused file name.  */
629
630static char *
631unique_name_1 (const char *prefix)
632{
633  int count = 1;
634  int plen = strlen (prefix);
635  char *template = (char *)alloca (plen + 1 + 24);
636  char *template_tail = template + plen;
637
638  memcpy (template, prefix, plen);
639  *template_tail++ = UNIQ_SEP;
640
641  do
642    number_to_string (template_tail, count++);
643  while (file_exists_p (template));
644
645  return xstrdup (template);
646}
647
648/* Return a unique file name, based on FILE.
649
650   More precisely, if FILE doesn't exist, it is returned unmodified.
651   If not, FILE.1 is tried, then FILE.2, etc.  The first FILE.<number>
652   file name that doesn't exist is returned.
653
654   2005-02-19 SMS.  "." is now UNIQ_SEP, and may be different.
655
656   The resulting file is not created, only verified that it didn't
657   exist at the point in time when the function was called.
658   Therefore, where security matters, don't rely that the file created
659   by this function exists until you open it with O_EXCL or
660   equivalent.
661
662   If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated
663   string.  Otherwise, it may return FILE if the file doesn't exist
664   (and therefore doesn't need changing).  */
665
666char *
667unique_name (const char *file, bool allow_passthrough)
668{
669  /* If the FILE itself doesn't exist, return it without
670     modification. */
671  if (!file_exists_p (file))
672    return allow_passthrough ? (char *)file : xstrdup (file);
673
674  /* Otherwise, find a numeric suffix that results in unused file name
675     and return it.  */
676  return unique_name_1 (file);
677}
678
679#else /* def UNIQ_SEP */
680
681/* Dummy unique_name() for VMS.  Return the original name as easily as
682   possible.
683*/
684char *
685unique_name (const char *file, bool allow_passthrough)
686{
687  /* Return the FILE itself, without modification, irregardful. */
688  return allow_passthrough ? (char *)file : xstrdup (file);
689}
690
691#endif /* def UNIQ_SEP [else] */
692
693/* Create a file based on NAME, except without overwriting an existing
694   file with that name.  Providing O_EXCL is correctly implemented,
695   this function does not have the race condition associated with
696   opening the file returned by unique_name.  */
697
698FILE *
699unique_create (const char *name, bool binary, char **opened_name)
700{
701  /* unique file name, based on NAME */
702  char *uname = unique_name (name, false);
703  FILE *fp;
704  while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST)
705    {
706      xfree (uname);
707      uname = unique_name (name, false);
708    }
709  if (opened_name)
710    {
711      if (fp)
712        *opened_name = uname;
713      else
714        {
715          *opened_name = NULL;
716          xfree (uname);
717        }
718    }
719  else
720    xfree (uname);
721  return fp;
722}
723
724/* Open the file for writing, with the addition that the file is
725   opened "exclusively".  This means that, if the file already exists,
726   this function will *fail* and errno will be set to EEXIST.  If
727   BINARY is set, the file will be opened in binary mode, equivalent
728   to fopen's "wb".
729
730   If opening the file fails for any reason, including the file having
731   previously existed, this function returns NULL and sets errno
732   appropriately.  */
733
734FILE *
735fopen_excl (const char *fname, int binary)
736{
737  int fd;
738#ifdef O_EXCL
739
740/* 2005-04-14 SMS.
741   VMS lacks O_BINARY, but makes up for it in weird and wonderful ways.
742   It also has file versions which obviate all the O_EXCL effort.
743   O_TRUNC (something of a misnomer) requests a new version.
744*/
745# ifdef __VMS
746/* Common open() optional arguments:
747   sequential access only, access callback function.
748*/
749#  define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id
750
751  int open_id;
752  int flags = O_WRONLY | O_CREAT | O_TRUNC;
753
754  if (binary > 1)
755    {
756      open_id = 11;
757      fd = open( fname,                 /* File name. */
758       flags,                           /* Flags. */
759       0777,                            /* Mode for default protection. */
760       "ctx=bin,stm",                   /* Binary, stream access. */
761       "rfm=stmlf",                     /* Stream_LF. */
762       OPEN_OPT_ARGS);                  /* Access callback. */
763    }
764  else if (binary)
765    {
766      open_id = 12;
767      fd = open( fname,                 /* File name. */
768       flags,                           /* Flags. */
769       0777,                            /* Mode for default protection. */
770       "ctx=bin,stm",                   /* Binary, stream access. */
771       "rfm=fix",                       /* Fixed-length, */
772       "mrs=512",                       /* 512-byte records. */
773       OPEN_OPT_ARGS);                  /* Access callback. */
774    }
775  else
776    {
777      open_id = 13;
778      fd = open( fname,                 /* File name. */
779       flags,                           /* Flags. */
780       0777,                            /* Mode for default protection. */
781       "rfm=stmlf",                     /* Stream_LF. */
782       OPEN_OPT_ARGS);                  /* Access callback. */
783    }
784# else /* def __VMS */
785  int flags = O_WRONLY | O_CREAT | O_EXCL;
786# ifdef O_BINARY
787  if (binary)
788    flags |= O_BINARY;
789# endif
790  fd = open (fname, flags, 0666);
791# endif /* def __VMS [else] */
792
793  if (fd < 0)
794    return NULL;
795  return fdopen (fd, binary ? "wb" : "w");
796#else  /* not O_EXCL */
797  /* Manually check whether the file exists.  This is prone to race
798     conditions, but systems without O_EXCL haven't deserved
799     better.  */
800  if (file_exists_p (fname))
801    {
802      errno = EEXIST;
803      return NULL;
804    }
805  return fopen (fname, binary ? "wb" : "w");
806#endif /* not O_EXCL */
807}
808
809/* Create DIRECTORY.  If some of the pathname components of DIRECTORY
810   are missing, create them first.  In case any mkdir() call fails,
811   return its error status.  Returns 0 on successful completion.
812
813   The behaviour of this function should be identical to the behaviour
814   of `mkdir -p' on systems where mkdir supports the `-p' option.  */
815int
816make_directory (const char *directory)
817{
818  int i, ret, quit = 0;
819  char *dir;
820
821  /* Make a copy of dir, to be able to write to it.  Otherwise, the
822     function is unsafe if called with a read-only char *argument.  */
823  STRDUP_ALLOCA (dir, directory);
824
825  /* If the first character of dir is '/', skip it (and thus enable
826     creation of absolute-pathname directories.  */
827  for (i = (*dir == '/'); 1; ++i)
828    {
829      for (; dir[i] && dir[i] != '/'; i++)
830        ;
831      if (!dir[i])
832        quit = 1;
833      dir[i] = '\0';
834      /* Check whether the directory already exists.  Allow creation of
835         of intermediate directories to fail, as the initial path components
836         are not necessarily directories!  */
837      if (!file_exists_p (dir))
838        ret = mkdir (dir, 0777);
839      else
840        ret = 0;
841      if (quit)
842        break;
843      else
844        dir[i] = '/';
845    }
846  return ret;
847}
848
849/* Merge BASE with FILE.  BASE can be a directory or a file name, FILE
850   should be a file name.
851
852   file_merge("/foo/bar", "baz")  => "/foo/baz"
853   file_merge("/foo/bar/", "baz") => "/foo/bar/baz"
854   file_merge("foo", "bar")       => "bar"
855
856   In other words, it's a simpler and gentler version of uri_merge.  */
857
858char *
859file_merge (const char *base, const char *file)
860{
861  char *result;
862  const char *cut = (const char *)strrchr (base, '/');
863
864  if (!cut)
865    return xstrdup (file);
866
867  result = xmalloc (cut - base + 1 + strlen (file) + 1);
868  memcpy (result, base, cut - base);
869  result[cut - base] = '/';
870  strcpy (result + (cut - base) + 1, file);
871
872  return result;
873}
874
875/* Like fnmatch, but performs a case-insensitive match.  */
876
877int
878fnmatch_nocase (const char *pattern, const char *string, int flags)
879{
880#ifdef FNM_CASEFOLD
881  /* The FNM_CASEFOLD flag started as a GNU extension, but it is now
882     also present on *BSD platforms, and possibly elsewhere.  */
883  return fnmatch (pattern, string, flags | FNM_CASEFOLD);
884#else
885  /* Turn PATTERN and STRING to lower case and call fnmatch on them. */
886  char *patcopy = (char *) alloca (strlen (pattern) + 1);
887  char *strcopy = (char *) alloca (strlen (string) + 1);
888  char *p;
889  for (p = patcopy; *pattern; pattern++, p++)
890    *p = c_tolower (*pattern);
891  *p = '\0';
892  for (p = strcopy; *string; string++, p++)
893    *p = c_tolower (*string);
894  *p = '\0';
895  return fnmatch (patcopy, strcopy, flags);
896#endif
897}
898
899static bool in_acclist (const char *const *, const char *, bool);
900
901/* Determine whether a file is acceptable to be followed, according to
902   lists of patterns to accept/reject.  */
903bool
904acceptable (const char *s)
905{
906  const char *p;
907
908  if (opt.output_document && strcmp (s, opt.output_document) == 0)
909    return true;
910
911  if ((p = strrchr (s, '/')))
912    s = p + 1;
913
914  if (opt.accepts)
915    {
916      if (opt.rejects)
917        return (in_acclist ((const char *const *)opt.accepts, s, true)
918                && !in_acclist ((const char *const *)opt.rejects, s, true));
919      else
920        return in_acclist ((const char *const *)opt.accepts, s, true);
921    }
922  else if (opt.rejects)
923    return !in_acclist ((const char *const *)opt.rejects, s, true);
924
925  return true;
926}
927
928/* Determine whether an URL is acceptable to be followed, according to
929   regex patterns to accept/reject.  */
930bool
931accept_url (const char *s)
932{
933  if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s))
934    return false;
935  if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s))
936    return false;
937
938  return true;
939}
940
941/* Check if D2 is a subdirectory of D1.  E.g. if D1 is `/something', subdir_p()
942   will return true if and only if D2 begins with `/something/' or is exactly
943   '/something'.  */
944bool
945subdir_p (const char *d1, const char *d2)
946{
947  if (*d1 == '\0')
948    return true;
949  if (!opt.ignore_case)
950    for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2)
951      ;
952  else
953    for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2)
954      ;
955
956  return *d1 == '\0' && (*d2 == '\0' || *d2 == '/');
957}
958
959/* Iterate through DIRLIST (which must be NULL-terminated), and return the
960   first element that matches DIR, through wildcards or front comparison (as
961   appropriate).  */
962static bool
963dir_matches_p (const char **dirlist, const char *dir)
964{
965  const char **x;
966  int (*matcher) (const char *, const char *, int)
967    = opt.ignore_case ? fnmatch_nocase : fnmatch;
968
969  for (x = dirlist; *x; x++)
970    {
971      /* Remove leading '/' */
972      const char *p = *x + (**x == '/');
973      if (has_wildcards_p (p))
974        {
975          if (matcher (p, dir, FNM_PATHNAME) == 0)
976            break;
977        }
978      else
979        {
980          if (subdir_p (p, dir))
981            break;
982        }
983    }
984
985  return *x ? true : false;
986}
987
988/* Returns whether DIRECTORY is acceptable for download, wrt the
989   include/exclude lists.
990
991   The leading `/' is ignored in paths; relative and absolute paths
992   may be freely intermixed.  */
993
994bool
995accdir (const char *directory)
996{
997  /* Remove starting '/'.  */
998  if (*directory == '/')
999    ++directory;
1000  if (opt.includes)
1001    {
1002      if (!dir_matches_p (opt.includes, directory))
1003        return false;
1004    }
1005  if (opt.excludes)
1006    {
1007      if (dir_matches_p (opt.excludes, directory))
1008        return false;
1009    }
1010  return true;
1011}
1012
1013/* Return true if STRING ends with TAIL.  For instance:
1014
1015   match_tail ("abc", "bc", false)  -> 1
1016   match_tail ("abc", "ab", false)  -> 0
1017   match_tail ("abc", "abc", false) -> 1
1018
1019   If FOLD_CASE is true, the comparison will be case-insensitive.  */
1020
1021bool
1022match_tail (const char *string, const char *tail, bool fold_case)
1023{
1024  int pos = strlen (string) - strlen (tail);
1025
1026  if (pos < 0)
1027    return false;  /* tail is longer than string.  */
1028
1029  if (!fold_case)
1030    return !strcmp (string + pos, tail);
1031  else
1032    return !strcasecmp (string + pos, tail);
1033}
1034
1035/* Checks whether string S matches each element of ACCEPTS.  A list
1036   element are matched either with fnmatch() or match_tail(),
1037   according to whether the element contains wildcards or not.
1038
1039   If the BACKWARD is false, don't do backward comparison -- just compare
1040   them normally.  */
1041static bool
1042in_acclist (const char *const *accepts, const char *s, bool backward)
1043{
1044  for (; *accepts; accepts++)
1045    {
1046      if (has_wildcards_p (*accepts))
1047        {
1048          int res = opt.ignore_case
1049            ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0);
1050          /* fnmatch returns 0 if the pattern *does* match the string.  */
1051          if (res == 0)
1052            return true;
1053        }
1054      else
1055        {
1056          if (backward)
1057            {
1058              if (match_tail (s, *accepts, opt.ignore_case))
1059                return true;
1060            }
1061          else
1062            {
1063              int cmp = opt.ignore_case
1064                ? strcasecmp (s, *accepts) : strcmp (s, *accepts);
1065              if (cmp == 0)
1066                return true;
1067            }
1068        }
1069    }
1070  return false;
1071}
1072
1073/* Return the location of STR's suffix (file extension).  Examples:
1074   suffix ("foo.bar")       -> "bar"
1075   suffix ("foo.bar.baz")   -> "baz"
1076   suffix ("/foo/bar")      -> NULL
1077   suffix ("/foo.bar/baz")  -> NULL  */
1078char *
1079suffix (const char *str)
1080{
1081  char *p;
1082
1083  if ((p = strrchr (str, '.')) && !strchr (p + 1, '/'))
1084    return p + 1;
1085
1086  return NULL;
1087}
1088
1089/* Return true if S contains globbing wildcards (`*', `?', `[' or
1090   `]').  */
1091
1092bool
1093has_wildcards_p (const char *s)
1094{
1095  return !!strpbrk (s, "*?[]");
1096}
1097
1098/* Return true if FNAME ends with a typical HTML suffix.  The
1099   following (case-insensitive) suffixes are presumed to be HTML
1100   files:
1101
1102     html
1103     htm
1104     ?html (`?' matches one character)
1105
1106   #### CAVEAT.  This is not necessarily a good indication that FNAME
1107   refers to a file that contains HTML!  */
1108bool
1109has_html_suffix_p (const char *fname)
1110{
1111  char *suf;
1112
1113  if ((suf = suffix (fname)) == NULL)
1114    return false;
1115  if (!strcasecmp (suf, "html"))
1116    return true;
1117  if (!strcasecmp (suf, "htm"))
1118    return true;
1119  if (suf[0] && !strcasecmp (suf + 1, "html"))
1120    return true;
1121  return false;
1122}
1123
1124/* Read FILE into memory.  A pointer to `struct file_memory' are
1125   returned; use struct element `content' to access file contents, and
1126   the element `length' to know the file length.  `content' is *not*
1127   zero-terminated, and you should *not* read or write beyond the [0,
1128   length) range of characters.
1129
1130   After you are done with the file contents, call wget_read_file_free to
1131   release the memory.
1132
1133   Depending on the operating system and the type of file that is
1134   being read, wget_read_file() either mmap's the file into memory, or
1135   reads the file into the core using read().
1136
1137   If file is named "-", fileno(stdin) is used for reading instead.
1138   If you want to read from a real file named "-", use "./-" instead.  */
1139
1140struct file_memory *
1141wget_read_file (const char *file)
1142{
1143  int fd;
1144  struct file_memory *fm;
1145  long size;
1146  bool inhibit_close = false;
1147
1148  /* Some magic in the finest tradition of Perl and its kin: if FILE
1149     is "-", just use stdin.  */
1150  if (HYPHENP (file))
1151    {
1152      fd = fileno (stdin);
1153      inhibit_close = true;
1154      /* Note that we don't inhibit mmap() in this case.  If stdin is
1155         redirected from a regular file, mmap() will still work.  */
1156    }
1157  else
1158    fd = open (file, O_RDONLY);
1159  if (fd < 0)
1160    return NULL;
1161  fm = xnew (struct file_memory);
1162
1163#ifdef HAVE_MMAP
1164  {
1165    struct_fstat buf;
1166    if (fstat (fd, &buf) < 0)
1167      goto mmap_lose;
1168    fm->length = buf.st_size;
1169    /* NOTE: As far as I know, the callers of this function never
1170       modify the file text.  Relying on this would enable us to
1171       specify PROT_READ and MAP_SHARED for a marginal gain in
1172       efficiency, but at some cost to generality.  */
1173    fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE,
1174                        MAP_PRIVATE, fd, 0);
1175    if (fm->content == (char *)MAP_FAILED)
1176      goto mmap_lose;
1177    if (!inhibit_close)
1178      close (fd);
1179
1180    fm->mmap_p = 1;
1181    return fm;
1182  }
1183
1184 mmap_lose:
1185  /* The most common reason why mmap() fails is that FD does not point
1186     to a plain file.  However, it's also possible that mmap() doesn't
1187     work for a particular type of file.  Therefore, whenever mmap()
1188     fails, we just fall back to the regular method.  */
1189#endif /* HAVE_MMAP */
1190
1191  fm->length = 0;
1192  size = 512;                   /* number of bytes fm->contents can
1193                                   hold at any given time. */
1194  fm->content = xmalloc (size);
1195  while (1)
1196    {
1197      wgint nread;
1198      if (fm->length > size / 2)
1199        {
1200          /* #### I'm not sure whether the whole exponential-growth
1201             thing makes sense with kernel read.  On Linux at least,
1202             read() refuses to read more than 4K from a file at a
1203             single chunk anyway.  But other Unixes might optimize it
1204             better, and it doesn't *hurt* anything, so I'm leaving
1205             it.  */
1206
1207          /* Normally, we grow SIZE exponentially to make the number
1208             of calls to read() and realloc() logarithmic in relation
1209             to file size.  However, read() can read an amount of data
1210             smaller than requested, and it would be unreasonable to
1211             double SIZE every time *something* was read.  Therefore,
1212             we double SIZE only when the length exceeds half of the
1213             entire allocated size.  */
1214          size <<= 1;
1215          fm->content = xrealloc (fm->content, size);
1216        }
1217      nread = read (fd, fm->content + fm->length, size - fm->length);
1218      if (nread > 0)
1219        /* Successful read. */
1220        fm->length += nread;
1221      else if (nread < 0)
1222        /* Error. */
1223        goto lose;
1224      else
1225        /* EOF */
1226        break;
1227    }
1228  if (!inhibit_close)
1229    close (fd);
1230  if (size > fm->length && fm->length != 0)
1231    /* Due to exponential growth of fm->content, the allocated region
1232       might be much larger than what is actually needed.  */
1233    fm->content = xrealloc (fm->content, fm->length);
1234  fm->mmap_p = 0;
1235  return fm;
1236
1237 lose:
1238  if (!inhibit_close)
1239    close (fd);
1240  xfree (fm->content);
1241  xfree (fm);
1242  return NULL;
1243}
1244
1245/* Release the resources held by FM.  Specifically, this calls
1246   munmap() or xfree() on fm->content, depending whether mmap or
1247   malloc/read were used to read in the file.  It also frees the
1248   memory needed to hold the FM structure itself.  */
1249
1250void
1251wget_read_file_free (struct file_memory *fm)
1252{
1253#ifdef HAVE_MMAP
1254  if (fm->mmap_p)
1255    {
1256      munmap (fm->content, fm->length);
1257    }
1258  else
1259#endif
1260    {
1261      xfree (fm->content);
1262    }
1263  xfree (fm);
1264}
1265
1266/* Free the pointers in a NULL-terminated vector of pointers, then
1267   free the pointer itself.  */
1268void
1269free_vec (char **vec)
1270{
1271  if (vec)
1272    {
1273      char **p = vec;
1274      while (*p)
1275        xfree (*p++);
1276      xfree (vec);
1277    }
1278}
1279
1280/* Append vector V2 to vector V1.  The function frees V2 and
1281   reallocates V1 (thus you may not use the contents of neither
1282   pointer after the call).  If V1 is NULL, V2 is returned.  */
1283char **
1284merge_vecs (char **v1, char **v2)
1285{
1286  int i, j;
1287
1288  if (!v1)
1289    return v2;
1290  if (!v2)
1291    return v1;
1292  if (!*v2)
1293    {
1294      /* To avoid j == 0 */
1295      xfree (v2);
1296      return v1;
1297    }
1298  /* Count v1.  */
1299  for (i = 0; v1[i]; i++)
1300    ;
1301  /* Count v2.  */
1302  for (j = 0; v2[j]; j++)
1303    ;
1304  /* Reallocate v1.  */
1305  v1 = xrealloc (v1, (i + j + 1) * sizeof (char **));
1306  memcpy (v1 + i, v2, (j + 1) * sizeof (char *));
1307  xfree (v2);
1308  return v1;
1309}
1310
1311/* Append a freshly allocated copy of STR to VEC.  If VEC is NULL, it
1312   is allocated as needed.  Return the new value of the vector. */
1313
1314char **
1315vec_append (char **vec, const char *str)
1316{
1317  int cnt;                      /* count of vector elements, including
1318                                   the one we're about to append */
1319  if (vec != NULL)
1320    {
1321      for (cnt = 0; vec[cnt]; cnt++)
1322        ;
1323      ++cnt;
1324    }
1325  else
1326    cnt = 1;
1327  /* Reallocate the array to fit the new element and the NULL. */
1328  vec = xrealloc (vec, (cnt + 1) * sizeof (char *));
1329  /* Append a copy of STR to the vector. */
1330  vec[cnt - 1] = xstrdup (str);
1331  vec[cnt] = NULL;
1332  return vec;
1333}
1334
1335/* Sometimes it's useful to create "sets" of strings, i.e. special
1336   hash tables where you want to store strings as keys and merely
1337   query for their existence.  Here is a set of utility routines that
1338   makes that transparent.  */
1339
1340void
1341string_set_add (struct hash_table *ht, const char *s)
1342{
1343  /* First check whether the set element already exists.  If it does,
1344     do nothing so that we don't have to free() the old element and
1345     then strdup() a new one.  */
1346  if (hash_table_contains (ht, s))
1347    return;
1348
1349  /* We use "1" as value.  It provides us a useful and clear arbitrary
1350     value, and it consumes no memory -- the pointers to the same
1351     string "1" will be shared by all the key-value pairs in all `set'
1352     hash tables.  */
1353  hash_table_put (ht, xstrdup (s), "1");
1354}
1355
1356/* Synonym for hash_table_contains... */
1357
1358int
1359string_set_contains (struct hash_table *ht, const char *s)
1360{
1361  return hash_table_contains (ht, s);
1362}
1363
1364/* Convert the specified string set to array.  ARRAY should be large
1365   enough to hold hash_table_count(ht) char pointers.  */
1366
1367void string_set_to_array (struct hash_table *ht, char **array)
1368{
1369  hash_table_iterator iter;
1370  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1371    *array++ = iter.key;
1372}
1373
1374/* Free the string set.  This frees both the storage allocated for
1375   keys and the actual hash table.  (hash_table_destroy would only
1376   destroy the hash table.)  */
1377
1378void
1379string_set_free (struct hash_table *ht)
1380{
1381  hash_table_iterator iter;
1382  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1383    xfree (iter.key);
1384  hash_table_destroy (ht);
1385}
1386
1387/* Utility function: simply call xfree() on all keys and values of HT.  */
1388
1389void
1390free_keys_and_values (struct hash_table *ht)
1391{
1392  hash_table_iterator iter;
1393  for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); )
1394    {
1395      xfree (iter.key);
1396      xfree (iter.value);
1397    }
1398}
1399
1400/* Get digit grouping data for thousand separors by calling
1401   localeconv().  The data includes separator string and grouping info
1402   and is cached after the first call to the function.
1403
1404   In locales that don't set a thousand separator (such as the "C"
1405   locale), this forces it to be ",".  We are now only showing
1406   thousand separators in one place, so this shouldn't be a problem in
1407   practice.  */
1408
1409static void
1410get_grouping_data (const char **sep, const char **grouping)
1411{
1412  static const char *cached_sep;
1413  static const char *cached_grouping;
1414  static bool initialized;
1415  if (!initialized)
1416    {
1417      /* Get the grouping info from the locale. */
1418      struct lconv *lconv = localeconv ();
1419      cached_sep = lconv->thousands_sep;
1420      cached_grouping = lconv->grouping;
1421#if ! USE_NLS_PROGRESS_BAR
1422      /* We can't count column widths, so ensure that the separator
1423       * is single-byte only (let check below determine what byte). */
1424      if (strlen(cached_sep) > 1)
1425        cached_sep = "";
1426#endif
1427      if (!*cached_sep)
1428        {
1429          /* Many locales (such as "C" or "hr_HR") don't specify
1430             grouping, which we still want to use it for legibility.
1431             In those locales set the sep char to ',', unless that
1432             character is used for decimal point, in which case set it
1433             to ".".  */
1434          if (*lconv->decimal_point != ',')
1435            cached_sep = ",";
1436          else
1437            cached_sep = ".";
1438          cached_grouping = "\x03";
1439        }
1440      initialized = true;
1441    }
1442  *sep = cached_sep;
1443  *grouping = cached_grouping;
1444}
1445
1446/* Return a printed representation of N with thousand separators.
1447   This should respect locale settings, with the exception of the "C"
1448   locale which mandates no separator, but we use one anyway.
1449
1450   Unfortunately, we cannot use %'d (in fact it would be %'j) to get
1451   the separators because it's too non-portable, and it's hard to test
1452   for this feature at configure time.  Besides, it wouldn't display
1453   separators in the "C" locale, still used by many Unix users.  */
1454
1455const char *
1456with_thousand_seps (wgint n)
1457{
1458  static char outbuf[48];
1459  char *p = outbuf + sizeof outbuf;
1460
1461  /* Info received from locale */
1462  const char *grouping, *sep;
1463  int seplen;
1464
1465  /* State information */
1466  int i = 0, groupsize;
1467  const char *atgroup;
1468
1469  bool negative = n < 0;
1470
1471  /* Initialize grouping data. */
1472  get_grouping_data (&sep, &grouping);
1473  seplen = strlen (sep);
1474  atgroup = grouping;
1475  groupsize = *atgroup++;
1476
1477  /* This would overflow on WGINT_MIN, but printing negative numbers
1478     is not an important goal of this fuinction.  */
1479  if (negative)
1480    n = -n;
1481
1482  /* Write the number into the buffer, backwards, inserting the
1483     separators as necessary.  */
1484  *--p = '\0';
1485  while (1)
1486    {
1487      *--p = n % 10 + '0';
1488      n /= 10;
1489      if (n == 0)
1490        break;
1491      /* Prepend SEP to every groupsize'd digit and get new groupsize.  */
1492      if (++i == groupsize)
1493        {
1494          if (seplen == 1)
1495            *--p = *sep;
1496          else
1497            memcpy (p -= seplen, sep, seplen);
1498          i = 0;
1499          if (*atgroup)
1500            groupsize = *atgroup++;
1501        }
1502    }
1503  if (negative)
1504    *--p = '-';
1505
1506  return p;
1507}
1508
1509/* N, a byte quantity, is converted to a human-readable abberviated
1510   form a la sizes printed by `ls -lh'.  The result is written to a
1511   static buffer, a pointer to which is returned.
1512
1513   Unlike `with_thousand_seps', this approximates to the nearest unit.
1514   Quoting GNU libit: "Most people visually process strings of 3-4
1515   digits effectively, but longer strings of digits are more prone to
1516   misinterpretation.  Hence, converting to an abbreviated form
1517   usually improves readability."
1518
1519   This intentionally uses kilobyte (KB), megabyte (MB), etc. in their
1520   original computer-related meaning of "powers of 1024".  We don't
1521   use the "*bibyte" names invented in 1998, and seldom used in
1522   practice.  Wikipedia's entry on "binary prefix" discusses this in
1523   some detail.  */
1524
1525char *
1526human_readable (HR_NUMTYPE n, const int acc, const int decimals)
1527{
1528  /* These suffixes are compatible with those of GNU `ls -lh'. */
1529  static char powers[] =
1530    {
1531      'K',                      /* kilobyte, 2^10 bytes */
1532      'M',                      /* megabyte, 2^20 bytes */
1533      'G',                      /* gigabyte, 2^30 bytes */
1534      'T',                      /* terabyte, 2^40 bytes */
1535      'P',                      /* petabyte, 2^50 bytes */
1536      'E',                      /* exabyte,  2^60 bytes */
1537    };
1538  static char buf[8];
1539  size_t i;
1540
1541  /* If the quantity is smaller than 1K, just print it. */
1542  if (n < 1024)
1543    {
1544      snprintf (buf, sizeof (buf), "%d", (int) n);
1545      return buf;
1546    }
1547
1548  /* Loop over powers, dividing N with 1024 in each iteration.  This
1549     works unchanged for all sizes of wgint, while still avoiding
1550     non-portable `long double' arithmetic.  */
1551  for (i = 0; i < countof (powers); i++)
1552    {
1553      /* At each iteration N is greater than the *subsequent* power.
1554         That way N/1024.0 produces a decimal number in the units of
1555         *this* power.  */
1556      if ((n / 1024) < 1024 || i == countof (powers) - 1)
1557        {
1558          double val = n / 1024.0;
1559          /* Print values smaller than the accuracy level (acc) with (decimal)
1560           * decimal digits, and others without any decimals.  */
1561          snprintf (buf, sizeof (buf), "%.*f%c",
1562                    val < acc ? decimals : 0, val, powers[i]);
1563          return buf;
1564        }
1565      n /= 1024;
1566    }
1567  return NULL;                  /* unreached */
1568}
1569
1570/* Count the digits in the provided number.  Used to allocate space
1571   when printing numbers.  */
1572
1573int
1574numdigit (wgint number)
1575{
1576  int cnt = 1;
1577  if (number < 0)
1578    ++cnt;                      /* accomodate '-' */
1579  while ((number /= 10) != 0)
1580    ++cnt;
1581  return cnt;
1582}
1583
1584#define PR(mask) *p++ = n / (mask) + '0'
1585
1586/* DIGITS_<D> is used to print a D-digit number and should be called
1587   with mask==10^(D-1).  It prints n/mask (the first digit), reducing
1588   n to n%mask (the remaining digits), and calling DIGITS_<D-1>.
1589   Recursively this continues until DIGITS_1 is invoked.  */
1590
1591#define DIGITS_1(mask) PR (mask)
1592#define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10)
1593#define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10)
1594#define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10)
1595#define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10)
1596#define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10)
1597#define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10)
1598#define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10)
1599#define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10)
1600#define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10)
1601
1602/* DIGITS_<11-20> are only used on machines with 64-bit wgints. */
1603
1604#define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10)
1605#define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10)
1606#define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10)
1607#define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10)
1608#define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10)
1609#define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10)
1610#define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10)
1611#define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10)
1612#define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10)
1613
1614/* Shorthand for casting to wgint. */
1615#define W wgint
1616
1617/* Print NUMBER to BUFFER in base 10.  This is equivalent to
1618   `sprintf(buffer, "%lld", (long long) number)', only typically much
1619   faster and portable to machines without long long.
1620
1621   The speedup may make a difference in programs that frequently
1622   convert numbers to strings.  Some implementations of sprintf,
1623   particularly the one in some versions of GNU libc, have been known
1624   to be quite slow when converting integers to strings.
1625
1626   Return the pointer to the location where the terminating zero was
1627   printed.  (Equivalent to calling buffer+strlen(buffer) after the
1628   function is done.)
1629
1630   BUFFER should be large enough to accept as many bytes as you expect
1631   the number to take up.  On machines with 64-bit wgints the maximum
1632   needed size is 24 bytes.  That includes the digits needed for the
1633   largest 64-bit number, the `-' sign in case it's negative, and the
1634   terminating '\0'.  */
1635
1636char *
1637number_to_string (char *buffer, wgint number)
1638{
1639  char *p = buffer;
1640  wgint n = number;
1641
1642  int last_digit_char = 0;
1643
1644#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8)
1645  /* We are running in a very strange environment.  Leave the correct
1646     printing to sprintf.  */
1647  p += sprintf (buf, "%j", (intmax_t) (n));
1648#else  /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1649
1650  if (n < 0)
1651    {
1652      if (n < -WGINT_MAX)
1653        {
1654          /* n = -n would overflow because -n would evaluate to a
1655             wgint value larger than WGINT_MAX.  Need to make n
1656             smaller and handle the last digit separately.  */
1657          int last_digit = n % 10;
1658          /* The sign of n%10 is implementation-defined. */
1659          if (last_digit < 0)
1660            last_digit_char = '0' - last_digit;
1661          else
1662            last_digit_char = '0' + last_digit;
1663          /* After n is made smaller, -n will not overflow. */
1664          n /= 10;
1665        }
1666
1667      *p++ = '-';
1668      n = -n;
1669    }
1670
1671  /* Use the DIGITS_ macro appropriate for N's number of digits.  That
1672     way printing any N is fully open-coded without a loop or jump.
1673     (Also see description of DIGITS_*.)  */
1674
1675  if      (n < 10)                       DIGITS_1 (1);
1676  else if (n < 100)                      DIGITS_2 (10);
1677  else if (n < 1000)                     DIGITS_3 (100);
1678  else if (n < 10000)                    DIGITS_4 (1000);
1679  else if (n < 100000)                   DIGITS_5 (10000);
1680  else if (n < 1000000)                  DIGITS_6 (100000);
1681  else if (n < 10000000)                 DIGITS_7 (1000000);
1682  else if (n < 100000000)                DIGITS_8 (10000000);
1683  else if (n < 1000000000)               DIGITS_9 (100000000);
1684#if SIZEOF_WGINT == 4
1685  /* wgint is 32 bits wide: no number has more than 10 digits. */
1686  else                                   DIGITS_10 (1000000000);
1687#else
1688  /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits.
1689     Constants are constructed by compile-time multiplication to avoid
1690     dealing with different notations for 64-bit constants
1691     (nL/nLL/nI64, depending on the compiler and architecture).  */
1692  else if (n < 10*(W)1000000000)         DIGITS_10 (1000000000);
1693  else if (n < 100*(W)1000000000)        DIGITS_11 (10*(W)1000000000);
1694  else if (n < 1000*(W)1000000000)       DIGITS_12 (100*(W)1000000000);
1695  else if (n < 10000*(W)1000000000)      DIGITS_13 (1000*(W)1000000000);
1696  else if (n < 100000*(W)1000000000)     DIGITS_14 (10000*(W)1000000000);
1697  else if (n < 1000000*(W)1000000000)    DIGITS_15 (100000*(W)1000000000);
1698  else if (n < 10000000*(W)1000000000)   DIGITS_16 (1000000*(W)1000000000);
1699  else if (n < 100000000*(W)1000000000)  DIGITS_17 (10000000*(W)1000000000);
1700  else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000);
1701  else                                   DIGITS_19 (1000000000*(W)1000000000);
1702#endif
1703
1704  if (last_digit_char)
1705    *p++ = last_digit_char;
1706
1707  *p = '\0';
1708#endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */
1709
1710  return p;
1711}
1712
1713#undef PR
1714#undef W
1715#undef SPRINTF_WGINT
1716#undef DIGITS_1
1717#undef DIGITS_2
1718#undef DIGITS_3
1719#undef DIGITS_4
1720#undef DIGITS_5
1721#undef DIGITS_6
1722#undef DIGITS_7
1723#undef DIGITS_8
1724#undef DIGITS_9
1725#undef DIGITS_10
1726#undef DIGITS_11
1727#undef DIGITS_12
1728#undef DIGITS_13
1729#undef DIGITS_14
1730#undef DIGITS_15
1731#undef DIGITS_16
1732#undef DIGITS_17
1733#undef DIGITS_18
1734#undef DIGITS_19
1735
1736#define RING_SIZE 3
1737
1738/* Print NUMBER to a statically allocated string and return a pointer
1739   to the printed representation.
1740
1741   This function is intended to be used in conjunction with printf.
1742   It is hard to portably print wgint values:
1743    a) you cannot use printf("%ld", number) because wgint can be long
1744       long on 32-bit machines with LFS.
1745    b) you cannot use printf("%lld", number) because NUMBER could be
1746       long on 32-bit machines without LFS, or on 64-bit machines,
1747       which do not require LFS.  Also, Windows doesn't support %lld.
1748    c) you cannot use printf("%j", (int_max_t) number) because not all
1749       versions of printf support "%j", the most notable being the one
1750       on Windows.
1751    d) you cannot #define WGINT_FMT to the appropriate format and use
1752       printf(WGINT_FMT, number) because that would break translations
1753       for user-visible messages, such as printf("Downloaded: %d
1754       bytes\n", number).
1755
1756   What you should use instead is printf("%s", number_to_static_string
1757   (number)).
1758
1759   CAVEAT: since the function returns pointers to static data, you
1760   must be careful to copy its result before calling it again.
1761   However, to make it more useful with printf, the function maintains
1762   an internal ring of static buffers to return.  That way things like
1763   printf("%s %s", number_to_static_string (num1),
1764   number_to_static_string (num2)) work as expected.  Three buffers
1765   are currently used, which means that "%s %s %s" will work, but "%s
1766   %s %s %s" won't.  If you need to print more than three wgints,
1767   bump the RING_SIZE (or rethink your message.)  */
1768
1769char *
1770number_to_static_string (wgint number)
1771{
1772  static char ring[RING_SIZE][24];
1773  static int ringpos;
1774  char *buf = ring[ringpos];
1775  number_to_string (buf, number);
1776  ringpos = (ringpos + 1) % RING_SIZE;
1777  return buf;
1778}
1779
1780/* Converts the byte to bits format if --report-bps option is enabled
1781 */
1782wgint
1783convert_to_bits (wgint num)
1784{
1785  if (opt.report_bps)
1786    return num * 8;
1787  return num;
1788}
1789
1790
1791/* Determine the width of the terminal we're running on.  If that's
1792   not possible, return 0.  */
1793
1794int
1795determine_screen_width (void)
1796{
1797  /* If there's a way to get the terminal size using POSIX
1798     tcgetattr(), somebody please tell me.  */
1799#ifdef TIOCGWINSZ
1800  int fd;
1801  struct winsize wsz;
1802
1803  if (opt.lfilename != NULL)
1804    return 0;
1805
1806  fd = fileno (stderr);
1807  if (ioctl (fd, TIOCGWINSZ, &wsz) < 0)
1808    return 0;                   /* most likely ENOTTY */
1809
1810  return wsz.ws_col;
1811#elif defined(WINDOWS)
1812  CONSOLE_SCREEN_BUFFER_INFO csbi;
1813  if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi))
1814    return 0;
1815  return csbi.dwSize.X;
1816#else  /* neither TIOCGWINSZ nor WINDOWS */
1817  return 0;
1818#endif /* neither TIOCGWINSZ nor WINDOWS */
1819}
1820
1821/* Whether the rnd system (either rand or [dl]rand48) has been
1822   seeded.  */
1823static int rnd_seeded;
1824
1825/* Return a random number between 0 and MAX-1, inclusive.
1826
1827   If the system does not support lrand48 and MAX is greater than the
1828   value of RAND_MAX+1 on the system, the returned value will be in
1829   the range [0, RAND_MAX].  This may be fixed in a future release.
1830   The random number generator is seeded automatically the first time
1831   it is called.
1832
1833   This uses lrand48 where available, rand elsewhere.  DO NOT use it
1834   for cryptography.  It is only meant to be used in situations where
1835   quality of the random numbers returned doesn't really matter.  */
1836
1837int
1838random_number (int max)
1839{
1840#ifdef HAVE_DRAND48
1841  if (!rnd_seeded)
1842    {
1843      srand48 ((long) time (NULL) ^ (long) getpid ());
1844      rnd_seeded = 1;
1845    }
1846  return lrand48 () % max;
1847#else  /* not HAVE_DRAND48 */
1848
1849  double bounded;
1850  int rnd;
1851  if (!rnd_seeded)
1852    {
1853      srand ((unsigned) time (NULL) ^ (unsigned) getpid ());
1854      rnd_seeded = 1;
1855    }
1856  rnd = rand ();
1857
1858  /* Like rand() % max, but uses the high-order bits for better
1859     randomness on architectures where rand() is implemented using a
1860     simple congruential generator.  */
1861
1862  bounded = (double) max * rnd / (RAND_MAX + 1.0);
1863  return (int) bounded;
1864
1865#endif /* not HAVE_DRAND48 */
1866}
1867
1868/* Return a random uniformly distributed floating point number in the
1869   [0, 1) range.  Uses drand48 where available, and a really lame
1870   kludge elsewhere.  */
1871
1872double
1873random_float (void)
1874{
1875#ifdef HAVE_DRAND48
1876  if (!rnd_seeded)
1877    {
1878      srand48 ((long) time (NULL) ^ (long) getpid ());
1879      rnd_seeded = 1;
1880    }
1881  return drand48 ();
1882#else  /* not HAVE_DRAND48 */
1883  return (  random_number (10000) / 10000.0
1884          + random_number (10000) / (10000.0 * 10000.0)
1885          + random_number (10000) / (10000.0 * 10000.0 * 10000.0)
1886          + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0));
1887#endif /* not HAVE_DRAND48 */
1888}
1889
1890/* Implementation of run_with_timeout, a generic timeout-forcing
1891   routine for systems with Unix-like signal handling.  */
1892
1893#ifdef USE_SIGNAL_TIMEOUT
1894# ifdef HAVE_SIGSETJMP
1895#  define SETJMP(env) sigsetjmp (env, 1)
1896
1897static sigjmp_buf run_with_timeout_env;
1898
1899static void _Noreturn
1900abort_run_with_timeout (int sig)
1901{
1902  assert (sig == SIGALRM);
1903  siglongjmp (run_with_timeout_env, -1);
1904}
1905# else /* not HAVE_SIGSETJMP */
1906#  define SETJMP(env) setjmp (env)
1907
1908static jmp_buf run_with_timeout_env;
1909
1910static void
1911abort_run_with_timeout (int sig)
1912{
1913  assert (sig == SIGALRM);
1914  /* We don't have siglongjmp to preserve the set of blocked signals;
1915     if we longjumped out of the handler at this point, SIGALRM would
1916     remain blocked.  We must unblock it manually. */
1917  sigset_t set;
1918  sigemptyset (&set);
1919  sigaddset (&set, SIGALRM);
1920  sigprocmask (SIG_BLOCK, &set, NULL);
1921
1922  /* Now it's safe to longjump. */
1923  longjmp (run_with_timeout_env, -1);
1924}
1925# endif /* not HAVE_SIGSETJMP */
1926
1927/* Arrange for SIGALRM to be delivered in TIMEOUT seconds.  This uses
1928   setitimer where available, alarm otherwise.
1929
1930   TIMEOUT should be non-zero.  If the timeout value is so small that
1931   it would be rounded to zero, it is rounded to the least legal value
1932   instead (1us for setitimer, 1s for alarm).  That ensures that
1933   SIGALRM will be delivered in all cases.  */
1934
1935static void
1936alarm_set (double timeout)
1937{
1938#ifdef ITIMER_REAL
1939  /* Use the modern itimer interface. */
1940  struct itimerval itv;
1941  xzero (itv);
1942  itv.it_value.tv_sec = (long) timeout;
1943  itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout);
1944  if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0)
1945    /* Ensure that we wait for at least the minimum interval.
1946       Specifying zero would mean "wait forever".  */
1947    itv.it_value.tv_usec = 1;
1948  setitimer (ITIMER_REAL, &itv, NULL);
1949#else  /* not ITIMER_REAL */
1950  /* Use the old alarm() interface. */
1951  int secs = (int) timeout;
1952  if (secs == 0)
1953    /* Round TIMEOUTs smaller than 1 to 1, not to zero.  This is
1954       because alarm(0) means "never deliver the alarm", i.e. "wait
1955       forever", which is not what someone who specifies a 0.5s
1956       timeout would expect.  */
1957    secs = 1;
1958  alarm (secs);
1959#endif /* not ITIMER_REAL */
1960}
1961
1962/* Cancel the alarm set with alarm_set. */
1963
1964static void
1965alarm_cancel (void)
1966{
1967#ifdef ITIMER_REAL
1968  struct itimerval disable;
1969  xzero (disable);
1970  setitimer (ITIMER_REAL, &disable, NULL);
1971#else  /* not ITIMER_REAL */
1972  alarm (0);
1973#endif /* not ITIMER_REAL */
1974}
1975
1976/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT
1977   seconds.  Returns true if the function was interrupted with a
1978   timeout, false otherwise.
1979
1980   This works by setting up SIGALRM to be delivered in TIMEOUT seconds
1981   using setitimer() or alarm().  The timeout is enforced by
1982   longjumping out of the SIGALRM handler.  This has several
1983   advantages compared to the traditional approach of relying on
1984   signals causing system calls to exit with EINTR:
1985
1986     * The callback function is *forcibly* interrupted after the
1987       timeout expires, (almost) regardless of what it was doing and
1988       whether it was in a syscall.  For example, a calculation that
1989       takes a long time is interrupted as reliably as an IO
1990       operation.
1991
1992     * It works with both SYSV and BSD signals because it doesn't
1993       depend on the default setting of SA_RESTART.
1994
1995     * It doesn't require special handler setup beyond a simple call
1996       to signal().  (It does use sigsetjmp/siglongjmp, but they're
1997       optional.)
1998
1999   The only downside is that, if FUN allocates internal resources that
2000   are normally freed prior to exit from the functions, they will be
2001   lost in case of timeout.  */
2002
2003bool
2004run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2005{
2006  int saved_errno;
2007
2008  if (timeout == 0)
2009    {
2010      fun (arg);
2011      return false;
2012    }
2013
2014  signal (SIGALRM, abort_run_with_timeout);
2015  if (SETJMP (run_with_timeout_env) != 0)
2016    {
2017      /* Longjumped out of FUN with a timeout. */
2018      signal (SIGALRM, SIG_DFL);
2019      return true;
2020    }
2021  alarm_set (timeout);
2022  fun (arg);
2023
2024  /* Preserve errno in case alarm() or signal() modifies it. */
2025  saved_errno = errno;
2026  alarm_cancel ();
2027  signal (SIGALRM, SIG_DFL);
2028  errno = saved_errno;
2029
2030  return false;
2031}
2032
2033#else  /* not USE_SIGNAL_TIMEOUT */
2034
2035#ifndef WINDOWS
2036/* A stub version of run_with_timeout that just calls FUN(ARG).  Don't
2037   define it under Windows, because Windows has its own version of
2038   run_with_timeout that uses threads.  */
2039
2040bool
2041run_with_timeout (double timeout, void (*fun) (void *), void *arg)
2042{
2043  fun (arg);
2044  return false;
2045}
2046#endif /* not WINDOWS */
2047#endif /* not USE_SIGNAL_TIMEOUT */
2048
2049#ifndef WINDOWS
2050
2051/* Sleep the specified amount of seconds.  On machines without
2052   nanosleep(), this may sleep shorter if interrupted by signals.  */
2053
2054void
2055xsleep (double seconds)
2056{
2057#ifdef HAVE_NANOSLEEP
2058  /* nanosleep is the preferred interface because it offers high
2059     accuracy and, more importantly, because it allows us to reliably
2060     restart receiving a signal such as SIGWINCH.  (There was an
2061     actual Debian bug report about --limit-rate malfunctioning while
2062     the terminal was being resized.)  */
2063  struct timespec sleep, remaining;
2064  sleep.tv_sec = (long) seconds;
2065  sleep.tv_nsec = 1000000000 * (seconds - (long) seconds);
2066  while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR)
2067    /* If nanosleep has been interrupted by a signal, adjust the
2068       sleeping period and return to sleep.  */
2069    sleep = remaining;
2070#elif defined(HAVE_USLEEP)
2071  /* If usleep is available, use it in preference to select.  */
2072  if (seconds >= 1)
2073    {
2074      /* On some systems, usleep cannot handle values larger than
2075         1,000,000.  If the period is larger than that, use sleep
2076         first, then add usleep for subsecond accuracy.  */
2077      sleep (seconds);
2078      seconds -= (long) seconds;
2079    }
2080  usleep (seconds * 1000000);
2081#else /* fall back select */
2082  /* Note that, although Windows supports select, it can't be used to
2083     implement sleeping because Winsock's select doesn't implement
2084     timeout when it is passed NULL pointers for all fd sets.  (But it
2085     does under Cygwin, which implements Unix-compatible select.)  */
2086  struct timeval sleep;
2087  sleep.tv_sec = (long) seconds;
2088  sleep.tv_usec = 1000000 * (seconds - (long) seconds);
2089  select (0, NULL, NULL, NULL, &sleep);
2090  /* If select returns -1 and errno is EINTR, it means we were
2091     interrupted by a signal.  But without knowing how long we've
2092     actually slept, we can't return to sleep.  Using gettimeofday to
2093     track sleeps is slow and unreliable due to clock skew.  */
2094#endif
2095}
2096
2097#endif /* not WINDOWS */
2098
2099/* Encode the octets in DATA of length LENGTH to base64 format,
2100   storing the result to DEST.  The output will be zero-terminated,
2101   and must point to a writable buffer of at least
2102   1+BASE64_LENGTH(length) bytes.  The function returns the length of
2103   the resulting base64 data, not counting the terminating zero.
2104
2105   This implementation does not emit newlines after 76 characters of
2106   base64 data.  */
2107
2108size_t
2109base64_encode (const void *data, size_t length, char *dest)
2110{
2111  /* Conversion table.  */
2112  static const char tbl[64] = {
2113    'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
2114    'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
2115    'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
2116    'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
2117  };
2118  /* Access bytes in DATA as unsigned char, otherwise the shifts below
2119     don't work for data with MSB set. */
2120  const unsigned char *s = data;
2121  /* Theoretical ANSI violation when length < 3. */
2122  const unsigned char *end = (const unsigned char *) data + length - 2;
2123  char *p = dest;
2124
2125  /* Transform the 3x8 bits to 4x6 bits, as required by base64.  */
2126  for (; s < end; s += 3)
2127    {
2128      *p++ = tbl[s[0] >> 2];
2129      *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2130      *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)];
2131      *p++ = tbl[s[2] & 0x3f];
2132    }
2133
2134  /* Pad the result if necessary...  */
2135  switch (length % 3)
2136    {
2137    case 1:
2138      *p++ = tbl[s[0] >> 2];
2139      *p++ = tbl[(s[0] & 3) << 4];
2140      *p++ = '=';
2141      *p++ = '=';
2142      break;
2143    case 2:
2144      *p++ = tbl[s[0] >> 2];
2145      *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)];
2146      *p++ = tbl[((s[1] & 0xf) << 2)];
2147      *p++ = '=';
2148      break;
2149    }
2150  /* ...and zero-terminate it.  */
2151  *p = '\0';
2152
2153  return p - dest;
2154}
2155
2156/* Store in C the next non-whitespace character from the string, or \0
2157   when end of string is reached.  */
2158#define NEXT_CHAR(c, p) do {                    \
2159  c = (unsigned char) *p++;                     \
2160} while (c_isspace (c))
2161
2162#define IS_ASCII(c) (((c) & 0x80) == 0)
2163
2164/* Decode data from BASE64 (a null-terminated string) into memory
2165   pointed to by DEST.  DEST is assumed to be large enough to
2166   accomodate the decoded data, which is guaranteed to be no more than
2167   3/4*strlen(base64).
2168
2169   Since DEST is assumed to contain binary data, it is not
2170   NUL-terminated.  The function returns the length of the data
2171   written to "TO".  -1 is returned in case of error caused by malformed
2172   base64 input.
2173
2174   This function originates from Free Recode.  */
2175
2176ssize_t
2177base64_decode (const char *base64, void *dest)
2178{
2179  /* Table of base64 values for first 128 characters.  Note that this
2180     assumes ASCII (but so does Wget in other places).  */
2181  static const signed char base64_char_to_value[128] =
2182    {
2183      -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*   0-  9 */
2184      -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  10- 19 */
2185      -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  20- 29 */
2186      -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  /*  30- 39 */
2187      -1,  -1,  -1,  62,  -1,  -1,  -1,  63,  52,  53,  /*  40- 49 */
2188      54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  /*  50- 59 */
2189      -1,  -1,  -1,  -1,  -1,  0,   1,   2,   3,   4,   /*  60- 69 */
2190      5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  /*  70- 79 */
2191      15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  /*  80- 89 */
2192      25,  -1,  -1,  -1,  -1,  -1,  -1,  26,  27,  28,  /*  90- 99 */
2193      29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  /* 100-109 */
2194      39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  /* 110-119 */
2195      49,  50,  51,  -1,  -1,  -1,  -1,  -1             /* 120-127 */
2196    };
2197#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c])
2198#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=')
2199
2200  const char *p = base64;
2201  char *q = dest;
2202
2203  while (1)
2204    {
2205      unsigned char c;
2206      unsigned long value;
2207
2208      /* Process first byte of a quadruplet.  */
2209      NEXT_CHAR (c, p);
2210      if (!c)
2211        break;
2212      if (c == '=' || !IS_BASE64 (c))
2213        return -1;              /* illegal char while decoding base64 */
2214      value = BASE64_CHAR_TO_VALUE (c) << 18;
2215
2216      /* Process second byte of a quadruplet.  */
2217      NEXT_CHAR (c, p);
2218      if (!c)
2219        return -1;              /* premature EOF while decoding base64 */
2220      if (c == '=' || !IS_BASE64 (c))
2221        return -1;              /* illegal char while decoding base64 */
2222      value |= BASE64_CHAR_TO_VALUE (c) << 12;
2223      *q++ = value >> 16;
2224
2225      /* Process third byte of a quadruplet.  */
2226      NEXT_CHAR (c, p);
2227      if (!c)
2228        return -1;              /* premature EOF while decoding base64 */
2229      if (!IS_BASE64 (c))
2230        return -1;              /* illegal char while decoding base64 */
2231
2232      if (c == '=')
2233        {
2234          NEXT_CHAR (c, p);
2235          if (!c)
2236            return -1;          /* premature EOF while decoding base64 */
2237          if (c != '=')
2238            return -1;          /* padding `=' expected but not found */
2239          continue;
2240        }
2241
2242      value |= BASE64_CHAR_TO_VALUE (c) << 6;
2243      *q++ = 0xff & value >> 8;
2244
2245      /* Process fourth byte of a quadruplet.  */
2246      NEXT_CHAR (c, p);
2247      if (!c)
2248        return -1;              /* premature EOF while decoding base64 */
2249      if (c == '=')
2250        continue;
2251      if (!IS_BASE64 (c))
2252        return -1;              /* illegal char while decoding base64 */
2253
2254      value |= BASE64_CHAR_TO_VALUE (c);
2255      *q++ = 0xff & value;
2256    }
2257#undef IS_BASE64
2258#undef BASE64_CHAR_TO_VALUE
2259
2260  return q - (char *) dest;
2261}
2262
2263#ifdef HAVE_LIBPCRE
2264/* Compiles the PCRE regex. */
2265void *
2266compile_pcre_regex (const char *str)
2267{
2268  const char *errbuf;
2269  int erroffset;
2270  pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0);
2271  if (! regex)
2272    {
2273      fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2274               quote (str), errbuf);
2275      return false;
2276    }
2277  return regex;
2278}
2279#endif
2280
2281/* Compiles the POSIX regex. */
2282void *
2283compile_posix_regex (const char *str)
2284{
2285  regex_t *regex = xmalloc (sizeof (regex_t));
2286  int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB);
2287  if (errcode != 0)
2288    {
2289      size_t errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0);
2290      char *errbuf = xmalloc (errbuf_size);
2291      regerror (errcode, (regex_t *) regex, errbuf, errbuf_size);
2292      fprintf (stderr, _("Invalid regular expression %s, %s\n"),
2293               quote (str), errbuf);
2294      xfree (errbuf);
2295      return NULL;
2296    }
2297
2298  return regex;
2299}
2300
2301#ifdef HAVE_LIBPCRE
2302#define OVECCOUNT 30
2303/* Matches a PCRE regex.  */
2304bool
2305match_pcre_regex (const void *regex, const char *str)
2306{
2307  size_t l = strlen (str);
2308  int ovector[OVECCOUNT];
2309
2310  int rc = pcre_exec ((pcre *) regex, 0, str, (int) l, 0, 0, ovector, OVECCOUNT);
2311  if (rc == PCRE_ERROR_NOMATCH)
2312    return false;
2313  else if (rc < 0)
2314    {
2315      logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2316                 quote (str), rc);
2317      return false;
2318    }
2319  else
2320    return true;
2321}
2322#undef OVECCOUNT
2323#endif
2324
2325/* Matches a POSIX regex.  */
2326bool
2327match_posix_regex (const void *regex, const char *str)
2328{
2329  int rc = regexec ((regex_t *) regex, str, 0, NULL, 0);
2330  if (rc == REG_NOMATCH)
2331    return false;
2332  else if (rc == 0)
2333    return true;
2334  else
2335    {
2336      size_t errbuf_size = regerror (rc, opt.acceptregex, NULL, 0);
2337      char *errbuf = xmalloc (errbuf_size);
2338      regerror (rc, opt.acceptregex, errbuf, errbuf_size);
2339      logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"),
2340                 quote (str), rc);
2341      xfree (errbuf);
2342      return false;
2343    }
2344}
2345
2346#undef IS_ASCII
2347#undef NEXT_CHAR
2348
2349/* Simple merge sort for use by stable_sort.  Implementation courtesy
2350   Zeljko Vrba with additional debugging by Nenad Barbutov.  */
2351
2352static void
2353mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
2354                    int (*cmpfun) (const void *, const void *))
2355{
2356#define ELT(array, pos) ((char *)(array) + (pos) * size)
2357  if (from < to)
2358    {
2359      size_t i, j, k;
2360      size_t mid = (to + from) / 2;
2361      mergesort_internal (base, temp, size, from, mid, cmpfun);
2362      mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
2363      i = from;
2364      j = mid + 1;
2365      for (k = from; (i <= mid) && (j <= to); k++)
2366        if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
2367          memcpy (ELT (temp, k), ELT (base, i++), size);
2368        else
2369          memcpy (ELT (temp, k), ELT (base, j++), size);
2370      while (i <= mid)
2371        memcpy (ELT (temp, k++), ELT (base, i++), size);
2372      while (j <= to)
2373        memcpy (ELT (temp, k++), ELT (base, j++), size);
2374      for (k = from; k <= to; k++)
2375        memcpy (ELT (base, k), ELT (temp, k), size);
2376    }
2377#undef ELT
2378}
2379
2380/* Stable sort with interface exactly like standard library's qsort.
2381   Uses mergesort internally, allocating temporary storage with
2382   alloca.  */
2383
2384void
2385stable_sort (void *base, size_t nmemb, size_t size,
2386             int (*cmpfun) (const void *, const void *))
2387{
2388  if (size > 1)
2389    {
2390      void *temp = alloca (nmemb * size * sizeof (void *));
2391      mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
2392    }
2393}
2394
2395/* Print a decimal number.  If it is equal to or larger than ten, the
2396   number is rounded.  Otherwise it is printed with one significant
2397   digit without trailing zeros and with no more than three fractional
2398   digits total.  For example, 0.1 is printed as "0.1", 0.035 is
2399   printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0".
2400
2401   This is useful for displaying durations because it provides
2402   order-of-magnitude information without unnecessary clutter --
2403   long-running downloads are shown without the fractional part, and
2404   short ones still retain one significant digit.  */
2405
2406const char *
2407print_decimal (double number)
2408{
2409  static char buf[32];
2410  double n = number >= 0 ? number : -number;
2411
2412  if (n >= 9.95)
2413    /* Cut off at 9.95 because the below %.1f would round 9.96 to
2414       "10.0" instead of "10".  OTOH 9.94 will print as "9.9".  */
2415    snprintf (buf, sizeof buf, "%.0f", number);
2416  else if (n >= 0.95)
2417    snprintf (buf, sizeof buf, "%.1f", number);
2418  else if (n >= 0.001)
2419    snprintf (buf, sizeof buf, "%.1g", number);
2420  else if (n >= 0.0005)
2421    /* round [0.0005, 0.001) to 0.001 */
2422    snprintf (buf, sizeof buf, "%.3f", number);
2423  else
2424    /* print numbers close to 0 as 0, not 0.000 */
2425    strcpy (buf, "0");
2426
2427  return buf;
2428}
2429
2430/* Get the maximum name length for the given path. */
2431/* Return 0 if length is unknown. */
2432long
2433get_max_length (const char *path, int length, int name)
2434{
2435  long ret;
2436  char *p, *d;
2437
2438  /* Make a copy of the path that we can modify. */
2439  p = path ? strdupdelim (path, path + length) : strdup ("");
2440
2441  for (;;)
2442    {
2443      errno = 0;
2444      /* For an empty path query the current directory. */
2445#if HAVE_PATHCONF
2446      ret = pathconf (*p ? p : ".", name);
2447      if (!(ret < 0 && errno == ENOENT))
2448        break;
2449#else
2450      ret = PATH_MAX;
2451#endif
2452
2453      /* The path does not exist yet, but may be created. */
2454      /* Already at current or root directory, give up. */
2455      if (!*p || strcmp (p, "/") == 0)
2456        break;
2457
2458      /* Remove one directory level and try again. */
2459      d = strrchr (p, '/');
2460      if (d == p)
2461        p[1] = '\0';  /* check root directory */
2462      else if (d)
2463        *d = '\0';  /* remove last directory part */
2464      else
2465        *p = '\0';  /* check current directory */
2466    }
2467
2468  xfree (p);
2469
2470  if (ret < 0)
2471    {
2472      /* pathconf() has a message for us. */
2473      if (errno != 0)
2474          perror ("pathconf");
2475
2476      /* If (errno == 0) then there is no max length.
2477         Even on error return 0 so the caller can continue. */
2478      return 0;
2479    }
2480
2481  return ret;
2482}
2483
2484#ifdef TESTING
2485
2486const char *
2487test_subdir_p(void)
2488{
2489  static const struct {
2490    const char *d1;
2491    const char *d2;
2492    bool result;
2493  } test_array[] = {
2494    { "/somedir", "/somedir", true },
2495    { "/somedir", "/somedir/d2", true },
2496    { "/somedir/d1", "/somedir", false },
2497  };
2498  unsigned i;
2499
2500  for (i = 0; i < countof(test_array); ++i)
2501    {
2502      bool res = subdir_p (test_array[i].d1, test_array[i].d2);
2503
2504      mu_assert ("test_subdir_p: wrong result",
2505                 res == test_array[i].result);
2506    }
2507
2508  return NULL;
2509}
2510
2511const char *
2512test_dir_matches_p(void)
2513{
2514  static struct {
2515    const char *dirlist[3];
2516    const char *dir;
2517    bool result;
2518  } test_array[] = {
2519    { { "/somedir", "/someotherdir", NULL }, "somedir", true },
2520    { { "/somedir", "/someotherdir", NULL }, "anotherdir", false },
2521    { { "/somedir", "/*otherdir", NULL }, "anotherdir", true },
2522    { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true },
2523    { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true },
2524    { { "/somedir/d1", "/someotherdir", NULL }, "d1", false },
2525    { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true },
2526    { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true },
2527    { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2528    { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false },
2529    { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true },
2530    { { "/dir with spaces", NULL, NULL }, "dir with spaces", true },
2531    { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true },
2532    { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false },
2533    { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false },
2534  };
2535  unsigned i;
2536
2537  for (i = 0; i < countof(test_array); ++i)
2538    {
2539      bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir);
2540
2541      mu_assert ("test_dir_matches_p: wrong result",
2542                 res == test_array[i].result);
2543    }
2544
2545  return NULL;
2546}
2547
2548#endif /* TESTING */
2549