1/* Various utility functions. 2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, 4 Inc. 5 6This file is part of GNU Wget. 7 8GNU Wget is free software; you can redistribute it and/or modify 9it under the terms of the GNU General Public License as published by 10the Free Software Foundation; either version 3 of the License, or 11(at your option) any later version. 12 13GNU Wget is distributed in the hope that it will be useful, 14but WITHOUT ANY WARRANTY; without even the implied warranty of 15MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16GNU General Public License for more details. 17 18You should have received a copy of the GNU General Public License 19along with Wget. If not, see <http://www.gnu.org/licenses/>. 20 21Additional permission under GNU GPL version 3 section 7 22 23If you modify this program, or any covered work, by linking or 24combining it with the OpenSSL project's OpenSSL library (or a 25modified version of that library), containing parts covered by the 26terms of the OpenSSL or SSLeay licenses, the Free Software Foundation 27grants you additional permission to convey the resulting work. 28Corresponding Source for a non-source form of such a combination 29shall include the source code for the parts of OpenSSL used as well 30as that of the covered work. */ 31 32#include "wget.h" 33 34#include <stdio.h> 35#include <stdlib.h> 36#include <string.h> 37#include <time.h> 38#include <unistd.h> 39#ifdef HAVE_MMAP 40# include <sys/mman.h> 41#endif 42#ifdef HAVE_PROCESS_H 43# include <process.h> /* getpid() */ 44#endif 45#include <errno.h> 46#include <fcntl.h> 47#include <assert.h> 48#include <stdarg.h> 49#include <locale.h> 50 51#if HAVE_UTIME 52# include <sys/types.h> 53# ifdef HAVE_UTIME_H 54# include <utime.h> 55# endif 56 57# ifdef HAVE_SYS_UTIME_H 58# include <sys/utime.h> 59# endif 60#endif 61 62#include <sys/time.h> 63 64#include <sys/stat.h> 65 66/* For TIOCGWINSZ and friends: */ 67#include <sys/ioctl.h> 68#include <termios.h> 69 70/* Needed for Unix version of run_with_timeout. */ 71#include <signal.h> 72#include <setjmp.h> 73 74#include <regex.h> 75#ifdef HAVE_LIBPCRE 76# include <pcre.h> 77#endif 78 79#ifndef HAVE_SIGSETJMP 80/* If sigsetjmp is a macro, configure won't pick it up. */ 81# ifdef sigsetjmp 82# define HAVE_SIGSETJMP 83# endif 84#endif 85 86#if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK 87# define USE_SIGNAL_TIMEOUT 88#endif 89 90#include "utils.h" 91#include "hash.h" 92 93#ifdef __VMS 94#include "vms.h" 95#endif /* def __VMS */ 96 97#ifdef TESTING 98#include "test.h" 99#endif 100 101#include "exits.h" 102 103static void _Noreturn 104memfatal (const char *context, long attempted_size) 105{ 106 /* Make sure we don't try to store part of the log line, and thus 107 call malloc. */ 108 log_set_save_context (false); 109 110 /* We have different log outputs in different situations: 111 1) output without bytes information 112 2) output with bytes information */ 113 if (attempted_size == UNKNOWN_ATTEMPTED_SIZE) 114 { 115 logprintf (LOG_ALWAYS, 116 _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"), 117 exec_name, context); 118 } 119 else 120 { 121 logprintf (LOG_ALWAYS, 122 _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"), 123 exec_name, context, attempted_size); 124 } 125 126 exit (WGET_EXIT_GENERIC_ERROR); 127} 128 129/* Character property table for (re-)escaping VMS ODS5 extended file 130 names. Note that this table ignores Unicode. 131 132 ODS2 valid characters: 0-9 A-Z a-z $ - _ ~ 133 134 ODS5 Invalid characters: 135 C0 control codes (0x00 to 0x1F inclusive) 136 Asterisk (*) 137 Question mark (?) 138 139 ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?): 140 Double quotation marks (") 141 Backslash (\) 142 Colon (:) 143 Left angle bracket (<) 144 Right angle bracket (>) 145 Slash (/) 146 Vertical bar (|) 147 148 Characters escaped by "^": 149 SP ! " # % & ' ( ) + , . : ; = 150 @ [ \ ] ^ ` { | } ~ 151 152 Either "^_" or "^ " is accepted as a space. Period (.) is a special 153 case. Note that un-escaped < and > can also confuse a directory 154 spec. 155 156 Characters put out as ^xx: 157 7F (DEL) 158 80-9F (C1 control characters) 159 A0 (nonbreaking space) 160 FF (Latin small letter y diaeresis) 161 162 Other cases: 163 Unicode: "^Uxxxx", where "xxxx" is four hex digits. 164 165 Property table values: 166 Normal escape: 1 167 Space: 2 168 Dot: 4 169 Hex-hex escape: 8 170 ODS2 normal: 16 171 ODS2 lower case: 32 172 Hex digit: 64 173*/ 174 175unsigned char char_prop[ 256] = { 176 177/* NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI */ 178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179 180/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */ 181 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 182 183/* SP ! " # $ % & ' ( ) * + , - . / */ 184 2, 1, 1, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0, 185 186/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ 187 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 1, 1, 1, 1, 1, 1, 188 189/* @ A B C D E F G H I J K L M N O */ 190 1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16, 191 192/* P Q R S T U V W X Y Z [ \ ] ^ _ */ 193 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 1, 1, 1, 16, 194 195/* ` a b c d e f g h i j k l m n o */ 196 1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32, 197 198/* p q r s t u v w x y z { | } ~ DEL */ 199 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 1, 1, 17, 8, 200 201 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 202 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 203 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 205 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 206 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 207 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 208 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 209}; 210 211/* Utility function: like xstrdup(), but also lowercases S. */ 212 213char * 214xstrdup_lower (const char *s) 215{ 216 char *copy = xstrdup (s); 217 char *p = copy; 218 for (; *p; p++) 219 *p = c_tolower (*p); 220 return copy; 221} 222 223/* Copy the string formed by two pointers (one on the beginning, other 224 on the char after the last char) to a new, malloc-ed location. 225 0-terminate it. */ 226char * 227strdupdelim (const char *beg, const char *end) 228{ 229 char *res = xmalloc (end - beg + 1); 230 memcpy (res, beg, end - beg); 231 res[end - beg] = '\0'; 232 return res; 233} 234 235/* Parse a string containing comma-separated elements, and return a 236 vector of char pointers with the elements. Spaces following the 237 commas are ignored. */ 238char ** 239sepstring (const char *s) 240{ 241 char **res; 242 const char *p; 243 int i = 0; 244 245 if (!s || !*s) 246 return NULL; 247 res = NULL; 248 p = s; 249 while (*s) 250 { 251 if (*s == ',') 252 { 253 res = xrealloc (res, (i + 2) * sizeof (char *)); 254 res[i] = strdupdelim (p, s); 255 res[++i] = NULL; 256 ++s; 257 /* Skip the blanks following the ','. */ 258 while (c_isspace (*s)) 259 ++s; 260 p = s; 261 } 262 else 263 ++s; 264 } 265 res = xrealloc (res, (i + 2) * sizeof (char *)); 266 res[i] = strdupdelim (p, s); 267 res[i + 1] = NULL; 268 return res; 269} 270 271/* Like sprintf, but prints into a string of sufficient size freshly 272 allocated with malloc, which is returned. If unable to print due 273 to invalid format, returns NULL. Inability to allocate needed 274 memory results in abort, as with xmalloc. This is in spirit 275 similar to the GNU/BSD extension asprintf, but somewhat easier to 276 use. 277 278 Internally the function either calls vasprintf or loops around 279 vsnprintf until the correct size is found. Since Wget also ships a 280 fallback implementation of vsnprintf, this should be portable. */ 281 282/* Constant is using for limits memory allocation for text buffer. 283 Applicable in situation when: vasprintf is not available in the system 284 and vsnprintf return -1 when long line is truncated (in old versions of 285 glibc and in other system where C99 doesn`t support) */ 286 287#define FMT_MAX_LENGTH 1048576 288 289char * 290aprintf (const char *fmt, ...) 291{ 292#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC 293 /* Use vasprintf. */ 294 int ret; 295 va_list args; 296 char *str; 297 va_start (args, fmt); 298 ret = vasprintf (&str, fmt, args); 299 va_end (args); 300 if (ret < 0 && errno == ENOMEM) 301 memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE); /* for consistency 302 with xmalloc/xrealloc */ 303 else if (ret < 0) 304 return NULL; 305 return str; 306#else /* not HAVE_VASPRINTF */ 307 308 /* vasprintf is unavailable. snprintf into a small buffer and 309 resize it as necessary. */ 310 int size = 32; 311 char *str = xmalloc (size); 312 313 /* #### This code will infloop and eventually abort in xrealloc if 314 passed a FMT that causes snprintf to consistently return -1. */ 315 316 while (1) 317 { 318 int n; 319 va_list args; 320 321 va_start (args, fmt); 322 n = vsnprintf (str, size, fmt, args); 323 va_end (args); 324 325 /* If the printing worked, return the string. */ 326 if (n > -1 && n < size) 327 return str; 328 329 /* Else try again with a larger buffer. */ 330 if (n > -1) /* C99 */ 331 size = n + 1; /* precisely what is needed */ 332 else if (size >= FMT_MAX_LENGTH) /* We have a huge buffer, */ 333 { /* maybe we have some wrong 334 format string? */ 335 logprintf (LOG_ALWAYS, 336 _("%s: aprintf: text buffer is too big (%ld bytes), " 337 "aborting.\n"), 338 exec_name, size); /* printout a log message */ 339 abort (); /* and abort... */ 340 } 341 else 342 { 343 /* else, we continue to grow our 344 * buffer: Twice the old size. */ 345 size <<= 1; 346 } 347 str = xrealloc (str, size); 348 } 349#endif /* not HAVE_VASPRINTF */ 350} 351 352/* Concatenate the NULL-terminated list of string arguments into 353 freshly allocated space. */ 354 355char * 356concat_strings (const char *str0, ...) 357{ 358 va_list args; 359 int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */ 360 char *ret, *p; 361 362 const char *next_str; 363 int total_length = 0; 364 size_t argcount; 365 366 /* Calculate the length of and allocate the resulting string. */ 367 368 argcount = 0; 369 va_start (args, str0); 370 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *)) 371 { 372 int len = strlen (next_str); 373 if (argcount < countof (saved_lengths)) 374 saved_lengths[argcount++] = len; 375 total_length += len; 376 } 377 va_end (args); 378 p = ret = xmalloc (total_length + 1); 379 380 /* Copy the strings into the allocated space. */ 381 382 argcount = 0; 383 va_start (args, str0); 384 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *)) 385 { 386 int len; 387 if (argcount < countof (saved_lengths)) 388 len = saved_lengths[argcount++]; 389 else 390 len = strlen (next_str); 391 memcpy (p, next_str, len); 392 p += len; 393 } 394 va_end (args); 395 *p = '\0'; 396 397 return ret; 398} 399 400/* Format the provided time according to the specified format. The 401 format is a string with format elements supported by strftime. */ 402 403static char * 404fmttime (time_t t, const char *fmt) 405{ 406 static char output[32]; 407 struct tm *tm = localtime(&t); 408 if (!tm) 409 abort (); 410 if (!strftime(output, sizeof(output), fmt, tm)) 411 abort (); 412 return output; 413} 414 415/* Return pointer to a static char[] buffer in which zero-terminated 416 string-representation of TM (in form hh:mm:ss) is printed. 417 418 If TM is NULL, the current time will be used. */ 419 420char * 421time_str (time_t t) 422{ 423 return fmttime(t, "%H:%M:%S"); 424} 425 426/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */ 427 428char * 429datetime_str (time_t t) 430{ 431 return fmttime(t, "%Y-%m-%d %H:%M:%S"); 432} 433 434/* The Windows versions of the following two functions are defined in 435 mswindows.c. On MSDOS this function should never be called. */ 436 437#ifdef __VMS 438 439void 440fork_to_background (void) 441{ 442 return; 443} 444 445#else /* def __VMS */ 446 447#if !defined(WINDOWS) && !defined(MSDOS) 448void 449fork_to_background (void) 450{ 451 pid_t pid; 452 /* Whether we arrange our own version of opt.lfilename here. */ 453 bool logfile_changed = false; 454 455 if (!opt.lfilename && (!opt.quiet || opt.server_response)) 456 { 457 /* We must create the file immediately to avoid either a race 458 condition (which arises from using unique_name and failing to 459 use fopen_excl) or lying to the user about the log file name 460 (which arises from using unique_name, printing the name, and 461 using fopen_excl later on.) */ 462 FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename); 463 if (new_log_fp) 464 { 465 logfile_changed = true; 466 fclose (new_log_fp); 467 } 468 } 469 pid = fork (); 470 if (pid < 0) 471 { 472 /* parent, error */ 473 perror ("fork"); 474 exit (WGET_EXIT_GENERIC_ERROR); 475 } 476 else if (pid != 0) 477 { 478 /* parent, no error */ 479 printf (_("Continuing in background, pid %d.\n"), (int) pid); 480 if (logfile_changed) 481 printf (_("Output will be written to %s.\n"), quote (opt.lfilename)); 482 exit (WGET_EXIT_SUCCESS); /* #### should we use _exit()? */ 483 } 484 485 /* child: give up the privileges and keep running. */ 486 setsid (); 487 if (freopen ("/dev/null", "r", stdin) == NULL) 488 DEBUGP (("Failed to redirect stdin to /dev/null.\n")); 489 if (freopen ("/dev/null", "w", stdout) == NULL) 490 DEBUGP (("Failed to redirect stdout to /dev/null.\n")); 491 if (freopen ("/dev/null", "w", stderr) == NULL) 492 DEBUGP (("Failed to redirect stderr to /dev/null.\n")); 493} 494#endif /* !WINDOWS && !MSDOS */ 495 496#endif /* def __VMS [else] */ 497 498 499/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time 500 specified with TM. The atime ("access time") is set to the current 501 time. */ 502 503void 504touch (const char *file, time_t tm) 505{ 506#if HAVE_UTIME 507# ifdef HAVE_STRUCT_UTIMBUF 508 struct utimbuf times; 509# else 510 struct { 511 time_t actime; 512 time_t modtime; 513 } times; 514# endif 515 times.modtime = tm; 516 times.actime = time (NULL); 517 if (utime (file, ×) == -1) 518 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno)); 519#else 520 struct timespec timespecs[2]; 521 int fd; 522 523 fd = open (file, O_WRONLY); 524 if (fd < 0) 525 { 526 logprintf (LOG_NOTQUIET, "open(%s): %s\n", file, strerror (errno)); 527 return; 528 } 529 530 timespecs[0].tv_sec = time (NULL); 531 timespecs[0].tv_nsec = 0L; 532 timespecs[1].tv_sec = tm; 533 timespecs[1].tv_nsec = 0L; 534 535 if (futimens (fd, timespecs) == -1) 536 logprintf (LOG_NOTQUIET, "futimens(%s): %s\n", file, strerror (errno)); 537 538 close (fd); 539#endif 540} 541 542/* Checks if FILE is a symbolic link, and removes it if it is. Does 543 nothing under MS-Windows. */ 544int 545remove_link (const char *file) 546{ 547 int err = 0; 548 struct_stat st; 549 550 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode)) 551 { 552 DEBUGP (("Unlinking %s (symlink).\n", file)); 553 err = unlink (file); 554 if (err != 0) 555 logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"), 556 quote (file), strerror (errno)); 557 } 558 return err; 559} 560 561/* Does FILENAME exist? This is quite a lousy implementation, since 562 it supplies no error codes -- only a yes-or-no answer. Thus it 563 will return that a file does not exist if, e.g., the directory is 564 unreadable. I don't mind it too much currently, though. The 565 proper way should, of course, be to have a third, error state, 566 other than true/false, but that would introduce uncalled-for 567 additional complexity to the callers. */ 568bool 569file_exists_p (const char *filename) 570{ 571#ifdef HAVE_ACCESS 572 return access (filename, F_OK) >= 0; 573#else 574 struct_stat buf; 575 return stat (filename, &buf) >= 0; 576#endif 577} 578 579/* Returns 0 if PATH is a directory, 1 otherwise (any kind of file). 580 Returns 0 on error. */ 581bool 582file_non_directory_p (const char *path) 583{ 584 struct_stat buf; 585 /* Use lstat() rather than stat() so that symbolic links pointing to 586 directories can be identified correctly. */ 587 if (lstat (path, &buf) != 0) 588 return false; 589 return S_ISDIR (buf.st_mode) ? false : true; 590} 591 592/* Return the size of file named by FILENAME, or -1 if it cannot be 593 opened or seeked into. */ 594wgint 595file_size (const char *filename) 596{ 597#if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO) 598 wgint size; 599 /* We use fseek rather than stat to determine the file size because 600 that way we can also verify that the file is readable without 601 explicitly checking for permissions. Inspired by the POST patch 602 by Arnaud Wylie. */ 603 FILE *fp = fopen (filename, "rb"); 604 if (!fp) 605 return -1; 606 fseeko (fp, 0, SEEK_END); 607 size = ftello (fp); 608 fclose (fp); 609 return size; 610#else 611 struct_stat st; 612 if (stat (filename, &st) < 0) 613 return -1; 614 return st.st_size; 615#endif 616} 617 618/* 2005-02-19 SMS. 619 If no UNIQ_SEP is defined (as on VMS), have unique_name() return the 620 original name. With the VMS file systems' versioning, everything 621 should be fine, and appending ".NN" just causes trouble. 622*/ 623 624#ifdef UNIQ_SEP 625 626/* stat file names named PREFIX.1, PREFIX.2, etc., until one that 627 doesn't exist is found. Return a freshly allocated copy of the 628 unused file name. */ 629 630static char * 631unique_name_1 (const char *prefix) 632{ 633 int count = 1; 634 int plen = strlen (prefix); 635 char *template = (char *)alloca (plen + 1 + 24); 636 char *template_tail = template + plen; 637 638 memcpy (template, prefix, plen); 639 *template_tail++ = UNIQ_SEP; 640 641 do 642 number_to_string (template_tail, count++); 643 while (file_exists_p (template)); 644 645 return xstrdup (template); 646} 647 648/* Return a unique file name, based on FILE. 649 650 More precisely, if FILE doesn't exist, it is returned unmodified. 651 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number> 652 file name that doesn't exist is returned. 653 654 2005-02-19 SMS. "." is now UNIQ_SEP, and may be different. 655 656 The resulting file is not created, only verified that it didn't 657 exist at the point in time when the function was called. 658 Therefore, where security matters, don't rely that the file created 659 by this function exists until you open it with O_EXCL or 660 equivalent. 661 662 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated 663 string. Otherwise, it may return FILE if the file doesn't exist 664 (and therefore doesn't need changing). */ 665 666char * 667unique_name (const char *file, bool allow_passthrough) 668{ 669 /* If the FILE itself doesn't exist, return it without 670 modification. */ 671 if (!file_exists_p (file)) 672 return allow_passthrough ? (char *)file : xstrdup (file); 673 674 /* Otherwise, find a numeric suffix that results in unused file name 675 and return it. */ 676 return unique_name_1 (file); 677} 678 679#else /* def UNIQ_SEP */ 680 681/* Dummy unique_name() for VMS. Return the original name as easily as 682 possible. 683*/ 684char * 685unique_name (const char *file, bool allow_passthrough) 686{ 687 /* Return the FILE itself, without modification, irregardful. */ 688 return allow_passthrough ? (char *)file : xstrdup (file); 689} 690 691#endif /* def UNIQ_SEP [else] */ 692 693/* Create a file based on NAME, except without overwriting an existing 694 file with that name. Providing O_EXCL is correctly implemented, 695 this function does not have the race condition associated with 696 opening the file returned by unique_name. */ 697 698FILE * 699unique_create (const char *name, bool binary, char **opened_name) 700{ 701 /* unique file name, based on NAME */ 702 char *uname = unique_name (name, false); 703 FILE *fp; 704 while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST) 705 { 706 xfree (uname); 707 uname = unique_name (name, false); 708 } 709 if (opened_name) 710 { 711 if (fp) 712 *opened_name = uname; 713 else 714 { 715 *opened_name = NULL; 716 xfree (uname); 717 } 718 } 719 else 720 xfree (uname); 721 return fp; 722} 723 724/* Open the file for writing, with the addition that the file is 725 opened "exclusively". This means that, if the file already exists, 726 this function will *fail* and errno will be set to EEXIST. If 727 BINARY is set, the file will be opened in binary mode, equivalent 728 to fopen's "wb". 729 730 If opening the file fails for any reason, including the file having 731 previously existed, this function returns NULL and sets errno 732 appropriately. */ 733 734FILE * 735fopen_excl (const char *fname, int binary) 736{ 737 int fd; 738#ifdef O_EXCL 739 740/* 2005-04-14 SMS. 741 VMS lacks O_BINARY, but makes up for it in weird and wonderful ways. 742 It also has file versions which obviate all the O_EXCL effort. 743 O_TRUNC (something of a misnomer) requests a new version. 744*/ 745# ifdef __VMS 746/* Common open() optional arguments: 747 sequential access only, access callback function. 748*/ 749# define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id 750 751 int open_id; 752 int flags = O_WRONLY | O_CREAT | O_TRUNC; 753 754 if (binary > 1) 755 { 756 open_id = 11; 757 fd = open( fname, /* File name. */ 758 flags, /* Flags. */ 759 0777, /* Mode for default protection. */ 760 "ctx=bin,stm", /* Binary, stream access. */ 761 "rfm=stmlf", /* Stream_LF. */ 762 OPEN_OPT_ARGS); /* Access callback. */ 763 } 764 else if (binary) 765 { 766 open_id = 12; 767 fd = open( fname, /* File name. */ 768 flags, /* Flags. */ 769 0777, /* Mode for default protection. */ 770 "ctx=bin,stm", /* Binary, stream access. */ 771 "rfm=fix", /* Fixed-length, */ 772 "mrs=512", /* 512-byte records. */ 773 OPEN_OPT_ARGS); /* Access callback. */ 774 } 775 else 776 { 777 open_id = 13; 778 fd = open( fname, /* File name. */ 779 flags, /* Flags. */ 780 0777, /* Mode for default protection. */ 781 "rfm=stmlf", /* Stream_LF. */ 782 OPEN_OPT_ARGS); /* Access callback. */ 783 } 784# else /* def __VMS */ 785 int flags = O_WRONLY | O_CREAT | O_EXCL; 786# ifdef O_BINARY 787 if (binary) 788 flags |= O_BINARY; 789# endif 790 fd = open (fname, flags, 0666); 791# endif /* def __VMS [else] */ 792 793 if (fd < 0) 794 return NULL; 795 return fdopen (fd, binary ? "wb" : "w"); 796#else /* not O_EXCL */ 797 /* Manually check whether the file exists. This is prone to race 798 conditions, but systems without O_EXCL haven't deserved 799 better. */ 800 if (file_exists_p (fname)) 801 { 802 errno = EEXIST; 803 return NULL; 804 } 805 return fopen (fname, binary ? "wb" : "w"); 806#endif /* not O_EXCL */ 807} 808 809/* Create DIRECTORY. If some of the pathname components of DIRECTORY 810 are missing, create them first. In case any mkdir() call fails, 811 return its error status. Returns 0 on successful completion. 812 813 The behaviour of this function should be identical to the behaviour 814 of `mkdir -p' on systems where mkdir supports the `-p' option. */ 815int 816make_directory (const char *directory) 817{ 818 int i, ret, quit = 0; 819 char *dir; 820 821 /* Make a copy of dir, to be able to write to it. Otherwise, the 822 function is unsafe if called with a read-only char *argument. */ 823 STRDUP_ALLOCA (dir, directory); 824 825 /* If the first character of dir is '/', skip it (and thus enable 826 creation of absolute-pathname directories. */ 827 for (i = (*dir == '/'); 1; ++i) 828 { 829 for (; dir[i] && dir[i] != '/'; i++) 830 ; 831 if (!dir[i]) 832 quit = 1; 833 dir[i] = '\0'; 834 /* Check whether the directory already exists. Allow creation of 835 of intermediate directories to fail, as the initial path components 836 are not necessarily directories! */ 837 if (!file_exists_p (dir)) 838 ret = mkdir (dir, 0777); 839 else 840 ret = 0; 841 if (quit) 842 break; 843 else 844 dir[i] = '/'; 845 } 846 return ret; 847} 848 849/* Merge BASE with FILE. BASE can be a directory or a file name, FILE 850 should be a file name. 851 852 file_merge("/foo/bar", "baz") => "/foo/baz" 853 file_merge("/foo/bar/", "baz") => "/foo/bar/baz" 854 file_merge("foo", "bar") => "bar" 855 856 In other words, it's a simpler and gentler version of uri_merge. */ 857 858char * 859file_merge (const char *base, const char *file) 860{ 861 char *result; 862 const char *cut = (const char *)strrchr (base, '/'); 863 864 if (!cut) 865 return xstrdup (file); 866 867 result = xmalloc (cut - base + 1 + strlen (file) + 1); 868 memcpy (result, base, cut - base); 869 result[cut - base] = '/'; 870 strcpy (result + (cut - base) + 1, file); 871 872 return result; 873} 874 875/* Like fnmatch, but performs a case-insensitive match. */ 876 877int 878fnmatch_nocase (const char *pattern, const char *string, int flags) 879{ 880#ifdef FNM_CASEFOLD 881 /* The FNM_CASEFOLD flag started as a GNU extension, but it is now 882 also present on *BSD platforms, and possibly elsewhere. */ 883 return fnmatch (pattern, string, flags | FNM_CASEFOLD); 884#else 885 /* Turn PATTERN and STRING to lower case and call fnmatch on them. */ 886 char *patcopy = (char *) alloca (strlen (pattern) + 1); 887 char *strcopy = (char *) alloca (strlen (string) + 1); 888 char *p; 889 for (p = patcopy; *pattern; pattern++, p++) 890 *p = c_tolower (*pattern); 891 *p = '\0'; 892 for (p = strcopy; *string; string++, p++) 893 *p = c_tolower (*string); 894 *p = '\0'; 895 return fnmatch (patcopy, strcopy, flags); 896#endif 897} 898 899static bool in_acclist (const char *const *, const char *, bool); 900 901/* Determine whether a file is acceptable to be followed, according to 902 lists of patterns to accept/reject. */ 903bool 904acceptable (const char *s) 905{ 906 const char *p; 907 908 if (opt.output_document && strcmp (s, opt.output_document) == 0) 909 return true; 910 911 if ((p = strrchr (s, '/'))) 912 s = p + 1; 913 914 if (opt.accepts) 915 { 916 if (opt.rejects) 917 return (in_acclist ((const char *const *)opt.accepts, s, true) 918 && !in_acclist ((const char *const *)opt.rejects, s, true)); 919 else 920 return in_acclist ((const char *const *)opt.accepts, s, true); 921 } 922 else if (opt.rejects) 923 return !in_acclist ((const char *const *)opt.rejects, s, true); 924 925 return true; 926} 927 928/* Determine whether an URL is acceptable to be followed, according to 929 regex patterns to accept/reject. */ 930bool 931accept_url (const char *s) 932{ 933 if (opt.acceptregex && !opt.regex_match_fun (opt.acceptregex, s)) 934 return false; 935 if (opt.rejectregex && opt.regex_match_fun (opt.rejectregex, s)) 936 return false; 937 938 return true; 939} 940 941/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p() 942 will return true if and only if D2 begins with `/something/' or is exactly 943 '/something'. */ 944bool 945subdir_p (const char *d1, const char *d2) 946{ 947 if (*d1 == '\0') 948 return true; 949 if (!opt.ignore_case) 950 for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2) 951 ; 952 else 953 for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2) 954 ; 955 956 return *d1 == '\0' && (*d2 == '\0' || *d2 == '/'); 957} 958 959/* Iterate through DIRLIST (which must be NULL-terminated), and return the 960 first element that matches DIR, through wildcards or front comparison (as 961 appropriate). */ 962static bool 963dir_matches_p (const char **dirlist, const char *dir) 964{ 965 const char **x; 966 int (*matcher) (const char *, const char *, int) 967 = opt.ignore_case ? fnmatch_nocase : fnmatch; 968 969 for (x = dirlist; *x; x++) 970 { 971 /* Remove leading '/' */ 972 const char *p = *x + (**x == '/'); 973 if (has_wildcards_p (p)) 974 { 975 if (matcher (p, dir, FNM_PATHNAME) == 0) 976 break; 977 } 978 else 979 { 980 if (subdir_p (p, dir)) 981 break; 982 } 983 } 984 985 return *x ? true : false; 986} 987 988/* Returns whether DIRECTORY is acceptable for download, wrt the 989 include/exclude lists. 990 991 The leading `/' is ignored in paths; relative and absolute paths 992 may be freely intermixed. */ 993 994bool 995accdir (const char *directory) 996{ 997 /* Remove starting '/'. */ 998 if (*directory == '/') 999 ++directory; 1000 if (opt.includes) 1001 { 1002 if (!dir_matches_p (opt.includes, directory)) 1003 return false; 1004 } 1005 if (opt.excludes) 1006 { 1007 if (dir_matches_p (opt.excludes, directory)) 1008 return false; 1009 } 1010 return true; 1011} 1012 1013/* Return true if STRING ends with TAIL. For instance: 1014 1015 match_tail ("abc", "bc", false) -> 1 1016 match_tail ("abc", "ab", false) -> 0 1017 match_tail ("abc", "abc", false) -> 1 1018 1019 If FOLD_CASE is true, the comparison will be case-insensitive. */ 1020 1021bool 1022match_tail (const char *string, const char *tail, bool fold_case) 1023{ 1024 int pos = strlen (string) - strlen (tail); 1025 1026 if (pos < 0) 1027 return false; /* tail is longer than string. */ 1028 1029 if (!fold_case) 1030 return !strcmp (string + pos, tail); 1031 else 1032 return !strcasecmp (string + pos, tail); 1033} 1034 1035/* Checks whether string S matches each element of ACCEPTS. A list 1036 element are matched either with fnmatch() or match_tail(), 1037 according to whether the element contains wildcards or not. 1038 1039 If the BACKWARD is false, don't do backward comparison -- just compare 1040 them normally. */ 1041static bool 1042in_acclist (const char *const *accepts, const char *s, bool backward) 1043{ 1044 for (; *accepts; accepts++) 1045 { 1046 if (has_wildcards_p (*accepts)) 1047 { 1048 int res = opt.ignore_case 1049 ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0); 1050 /* fnmatch returns 0 if the pattern *does* match the string. */ 1051 if (res == 0) 1052 return true; 1053 } 1054 else 1055 { 1056 if (backward) 1057 { 1058 if (match_tail (s, *accepts, opt.ignore_case)) 1059 return true; 1060 } 1061 else 1062 { 1063 int cmp = opt.ignore_case 1064 ? strcasecmp (s, *accepts) : strcmp (s, *accepts); 1065 if (cmp == 0) 1066 return true; 1067 } 1068 } 1069 } 1070 return false; 1071} 1072 1073/* Return the location of STR's suffix (file extension). Examples: 1074 suffix ("foo.bar") -> "bar" 1075 suffix ("foo.bar.baz") -> "baz" 1076 suffix ("/foo/bar") -> NULL 1077 suffix ("/foo.bar/baz") -> NULL */ 1078char * 1079suffix (const char *str) 1080{ 1081 char *p; 1082 1083 if ((p = strrchr (str, '.')) && !strchr (p + 1, '/')) 1084 return p + 1; 1085 1086 return NULL; 1087} 1088 1089/* Return true if S contains globbing wildcards (`*', `?', `[' or 1090 `]'). */ 1091 1092bool 1093has_wildcards_p (const char *s) 1094{ 1095 return !!strpbrk (s, "*?[]"); 1096} 1097 1098/* Return true if FNAME ends with a typical HTML suffix. The 1099 following (case-insensitive) suffixes are presumed to be HTML 1100 files: 1101 1102 html 1103 htm 1104 ?html (`?' matches one character) 1105 1106 #### CAVEAT. This is not necessarily a good indication that FNAME 1107 refers to a file that contains HTML! */ 1108bool 1109has_html_suffix_p (const char *fname) 1110{ 1111 char *suf; 1112 1113 if ((suf = suffix (fname)) == NULL) 1114 return false; 1115 if (!strcasecmp (suf, "html")) 1116 return true; 1117 if (!strcasecmp (suf, "htm")) 1118 return true; 1119 if (suf[0] && !strcasecmp (suf + 1, "html")) 1120 return true; 1121 return false; 1122} 1123 1124/* Read FILE into memory. A pointer to `struct file_memory' are 1125 returned; use struct element `content' to access file contents, and 1126 the element `length' to know the file length. `content' is *not* 1127 zero-terminated, and you should *not* read or write beyond the [0, 1128 length) range of characters. 1129 1130 After you are done with the file contents, call wget_read_file_free to 1131 release the memory. 1132 1133 Depending on the operating system and the type of file that is 1134 being read, wget_read_file() either mmap's the file into memory, or 1135 reads the file into the core using read(). 1136 1137 If file is named "-", fileno(stdin) is used for reading instead. 1138 If you want to read from a real file named "-", use "./-" instead. */ 1139 1140struct file_memory * 1141wget_read_file (const char *file) 1142{ 1143 int fd; 1144 struct file_memory *fm; 1145 long size; 1146 bool inhibit_close = false; 1147 1148 /* Some magic in the finest tradition of Perl and its kin: if FILE 1149 is "-", just use stdin. */ 1150 if (HYPHENP (file)) 1151 { 1152 fd = fileno (stdin); 1153 inhibit_close = true; 1154 /* Note that we don't inhibit mmap() in this case. If stdin is 1155 redirected from a regular file, mmap() will still work. */ 1156 } 1157 else 1158 fd = open (file, O_RDONLY); 1159 if (fd < 0) 1160 return NULL; 1161 fm = xnew (struct file_memory); 1162 1163#ifdef HAVE_MMAP 1164 { 1165 struct_fstat buf; 1166 if (fstat (fd, &buf) < 0) 1167 goto mmap_lose; 1168 fm->length = buf.st_size; 1169 /* NOTE: As far as I know, the callers of this function never 1170 modify the file text. Relying on this would enable us to 1171 specify PROT_READ and MAP_SHARED for a marginal gain in 1172 efficiency, but at some cost to generality. */ 1173 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE, 1174 MAP_PRIVATE, fd, 0); 1175 if (fm->content == (char *)MAP_FAILED) 1176 goto mmap_lose; 1177 if (!inhibit_close) 1178 close (fd); 1179 1180 fm->mmap_p = 1; 1181 return fm; 1182 } 1183 1184 mmap_lose: 1185 /* The most common reason why mmap() fails is that FD does not point 1186 to a plain file. However, it's also possible that mmap() doesn't 1187 work for a particular type of file. Therefore, whenever mmap() 1188 fails, we just fall back to the regular method. */ 1189#endif /* HAVE_MMAP */ 1190 1191 fm->length = 0; 1192 size = 512; /* number of bytes fm->contents can 1193 hold at any given time. */ 1194 fm->content = xmalloc (size); 1195 while (1) 1196 { 1197 wgint nread; 1198 if (fm->length > size / 2) 1199 { 1200 /* #### I'm not sure whether the whole exponential-growth 1201 thing makes sense with kernel read. On Linux at least, 1202 read() refuses to read more than 4K from a file at a 1203 single chunk anyway. But other Unixes might optimize it 1204 better, and it doesn't *hurt* anything, so I'm leaving 1205 it. */ 1206 1207 /* Normally, we grow SIZE exponentially to make the number 1208 of calls to read() and realloc() logarithmic in relation 1209 to file size. However, read() can read an amount of data 1210 smaller than requested, and it would be unreasonable to 1211 double SIZE every time *something* was read. Therefore, 1212 we double SIZE only when the length exceeds half of the 1213 entire allocated size. */ 1214 size <<= 1; 1215 fm->content = xrealloc (fm->content, size); 1216 } 1217 nread = read (fd, fm->content + fm->length, size - fm->length); 1218 if (nread > 0) 1219 /* Successful read. */ 1220 fm->length += nread; 1221 else if (nread < 0) 1222 /* Error. */ 1223 goto lose; 1224 else 1225 /* EOF */ 1226 break; 1227 } 1228 if (!inhibit_close) 1229 close (fd); 1230 if (size > fm->length && fm->length != 0) 1231 /* Due to exponential growth of fm->content, the allocated region 1232 might be much larger than what is actually needed. */ 1233 fm->content = xrealloc (fm->content, fm->length); 1234 fm->mmap_p = 0; 1235 return fm; 1236 1237 lose: 1238 if (!inhibit_close) 1239 close (fd); 1240 xfree (fm->content); 1241 xfree (fm); 1242 return NULL; 1243} 1244 1245/* Release the resources held by FM. Specifically, this calls 1246 munmap() or xfree() on fm->content, depending whether mmap or 1247 malloc/read were used to read in the file. It also frees the 1248 memory needed to hold the FM structure itself. */ 1249 1250void 1251wget_read_file_free (struct file_memory *fm) 1252{ 1253#ifdef HAVE_MMAP 1254 if (fm->mmap_p) 1255 { 1256 munmap (fm->content, fm->length); 1257 } 1258 else 1259#endif 1260 { 1261 xfree (fm->content); 1262 } 1263 xfree (fm); 1264} 1265 1266/* Free the pointers in a NULL-terminated vector of pointers, then 1267 free the pointer itself. */ 1268void 1269free_vec (char **vec) 1270{ 1271 if (vec) 1272 { 1273 char **p = vec; 1274 while (*p) 1275 xfree (*p++); 1276 xfree (vec); 1277 } 1278} 1279 1280/* Append vector V2 to vector V1. The function frees V2 and 1281 reallocates V1 (thus you may not use the contents of neither 1282 pointer after the call). If V1 is NULL, V2 is returned. */ 1283char ** 1284merge_vecs (char **v1, char **v2) 1285{ 1286 int i, j; 1287 1288 if (!v1) 1289 return v2; 1290 if (!v2) 1291 return v1; 1292 if (!*v2) 1293 { 1294 /* To avoid j == 0 */ 1295 xfree (v2); 1296 return v1; 1297 } 1298 /* Count v1. */ 1299 for (i = 0; v1[i]; i++) 1300 ; 1301 /* Count v2. */ 1302 for (j = 0; v2[j]; j++) 1303 ; 1304 /* Reallocate v1. */ 1305 v1 = xrealloc (v1, (i + j + 1) * sizeof (char **)); 1306 memcpy (v1 + i, v2, (j + 1) * sizeof (char *)); 1307 xfree (v2); 1308 return v1; 1309} 1310 1311/* Append a freshly allocated copy of STR to VEC. If VEC is NULL, it 1312 is allocated as needed. Return the new value of the vector. */ 1313 1314char ** 1315vec_append (char **vec, const char *str) 1316{ 1317 int cnt; /* count of vector elements, including 1318 the one we're about to append */ 1319 if (vec != NULL) 1320 { 1321 for (cnt = 0; vec[cnt]; cnt++) 1322 ; 1323 ++cnt; 1324 } 1325 else 1326 cnt = 1; 1327 /* Reallocate the array to fit the new element and the NULL. */ 1328 vec = xrealloc (vec, (cnt + 1) * sizeof (char *)); 1329 /* Append a copy of STR to the vector. */ 1330 vec[cnt - 1] = xstrdup (str); 1331 vec[cnt] = NULL; 1332 return vec; 1333} 1334 1335/* Sometimes it's useful to create "sets" of strings, i.e. special 1336 hash tables where you want to store strings as keys and merely 1337 query for their existence. Here is a set of utility routines that 1338 makes that transparent. */ 1339 1340void 1341string_set_add (struct hash_table *ht, const char *s) 1342{ 1343 /* First check whether the set element already exists. If it does, 1344 do nothing so that we don't have to free() the old element and 1345 then strdup() a new one. */ 1346 if (hash_table_contains (ht, s)) 1347 return; 1348 1349 /* We use "1" as value. It provides us a useful and clear arbitrary 1350 value, and it consumes no memory -- the pointers to the same 1351 string "1" will be shared by all the key-value pairs in all `set' 1352 hash tables. */ 1353 hash_table_put (ht, xstrdup (s), "1"); 1354} 1355 1356/* Synonym for hash_table_contains... */ 1357 1358int 1359string_set_contains (struct hash_table *ht, const char *s) 1360{ 1361 return hash_table_contains (ht, s); 1362} 1363 1364/* Convert the specified string set to array. ARRAY should be large 1365 enough to hold hash_table_count(ht) char pointers. */ 1366 1367void string_set_to_array (struct hash_table *ht, char **array) 1368{ 1369 hash_table_iterator iter; 1370 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) 1371 *array++ = iter.key; 1372} 1373 1374/* Free the string set. This frees both the storage allocated for 1375 keys and the actual hash table. (hash_table_destroy would only 1376 destroy the hash table.) */ 1377 1378void 1379string_set_free (struct hash_table *ht) 1380{ 1381 hash_table_iterator iter; 1382 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) 1383 xfree (iter.key); 1384 hash_table_destroy (ht); 1385} 1386 1387/* Utility function: simply call xfree() on all keys and values of HT. */ 1388 1389void 1390free_keys_and_values (struct hash_table *ht) 1391{ 1392 hash_table_iterator iter; 1393 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) 1394 { 1395 xfree (iter.key); 1396 xfree (iter.value); 1397 } 1398} 1399 1400/* Get digit grouping data for thousand separors by calling 1401 localeconv(). The data includes separator string and grouping info 1402 and is cached after the first call to the function. 1403 1404 In locales that don't set a thousand separator (such as the "C" 1405 locale), this forces it to be ",". We are now only showing 1406 thousand separators in one place, so this shouldn't be a problem in 1407 practice. */ 1408 1409static void 1410get_grouping_data (const char **sep, const char **grouping) 1411{ 1412 static const char *cached_sep; 1413 static const char *cached_grouping; 1414 static bool initialized; 1415 if (!initialized) 1416 { 1417 /* Get the grouping info from the locale. */ 1418 struct lconv *lconv = localeconv (); 1419 cached_sep = lconv->thousands_sep; 1420 cached_grouping = lconv->grouping; 1421#if ! USE_NLS_PROGRESS_BAR 1422 /* We can't count column widths, so ensure that the separator 1423 * is single-byte only (let check below determine what byte). */ 1424 if (strlen(cached_sep) > 1) 1425 cached_sep = ""; 1426#endif 1427 if (!*cached_sep) 1428 { 1429 /* Many locales (such as "C" or "hr_HR") don't specify 1430 grouping, which we still want to use it for legibility. 1431 In those locales set the sep char to ',', unless that 1432 character is used for decimal point, in which case set it 1433 to ".". */ 1434 if (*lconv->decimal_point != ',') 1435 cached_sep = ","; 1436 else 1437 cached_sep = "."; 1438 cached_grouping = "\x03"; 1439 } 1440 initialized = true; 1441 } 1442 *sep = cached_sep; 1443 *grouping = cached_grouping; 1444} 1445 1446/* Return a printed representation of N with thousand separators. 1447 This should respect locale settings, with the exception of the "C" 1448 locale which mandates no separator, but we use one anyway. 1449 1450 Unfortunately, we cannot use %'d (in fact it would be %'j) to get 1451 the separators because it's too non-portable, and it's hard to test 1452 for this feature at configure time. Besides, it wouldn't display 1453 separators in the "C" locale, still used by many Unix users. */ 1454 1455const char * 1456with_thousand_seps (wgint n) 1457{ 1458 static char outbuf[48]; 1459 char *p = outbuf + sizeof outbuf; 1460 1461 /* Info received from locale */ 1462 const char *grouping, *sep; 1463 int seplen; 1464 1465 /* State information */ 1466 int i = 0, groupsize; 1467 const char *atgroup; 1468 1469 bool negative = n < 0; 1470 1471 /* Initialize grouping data. */ 1472 get_grouping_data (&sep, &grouping); 1473 seplen = strlen (sep); 1474 atgroup = grouping; 1475 groupsize = *atgroup++; 1476 1477 /* This would overflow on WGINT_MIN, but printing negative numbers 1478 is not an important goal of this fuinction. */ 1479 if (negative) 1480 n = -n; 1481 1482 /* Write the number into the buffer, backwards, inserting the 1483 separators as necessary. */ 1484 *--p = '\0'; 1485 while (1) 1486 { 1487 *--p = n % 10 + '0'; 1488 n /= 10; 1489 if (n == 0) 1490 break; 1491 /* Prepend SEP to every groupsize'd digit and get new groupsize. */ 1492 if (++i == groupsize) 1493 { 1494 if (seplen == 1) 1495 *--p = *sep; 1496 else 1497 memcpy (p -= seplen, sep, seplen); 1498 i = 0; 1499 if (*atgroup) 1500 groupsize = *atgroup++; 1501 } 1502 } 1503 if (negative) 1504 *--p = '-'; 1505 1506 return p; 1507} 1508 1509/* N, a byte quantity, is converted to a human-readable abberviated 1510 form a la sizes printed by `ls -lh'. The result is written to a 1511 static buffer, a pointer to which is returned. 1512 1513 Unlike `with_thousand_seps', this approximates to the nearest unit. 1514 Quoting GNU libit: "Most people visually process strings of 3-4 1515 digits effectively, but longer strings of digits are more prone to 1516 misinterpretation. Hence, converting to an abbreviated form 1517 usually improves readability." 1518 1519 This intentionally uses kilobyte (KB), megabyte (MB), etc. in their 1520 original computer-related meaning of "powers of 1024". We don't 1521 use the "*bibyte" names invented in 1998, and seldom used in 1522 practice. Wikipedia's entry on "binary prefix" discusses this in 1523 some detail. */ 1524 1525char * 1526human_readable (HR_NUMTYPE n, const int acc, const int decimals) 1527{ 1528 /* These suffixes are compatible with those of GNU `ls -lh'. */ 1529 static char powers[] = 1530 { 1531 'K', /* kilobyte, 2^10 bytes */ 1532 'M', /* megabyte, 2^20 bytes */ 1533 'G', /* gigabyte, 2^30 bytes */ 1534 'T', /* terabyte, 2^40 bytes */ 1535 'P', /* petabyte, 2^50 bytes */ 1536 'E', /* exabyte, 2^60 bytes */ 1537 }; 1538 static char buf[8]; 1539 size_t i; 1540 1541 /* If the quantity is smaller than 1K, just print it. */ 1542 if (n < 1024) 1543 { 1544 snprintf (buf, sizeof (buf), "%d", (int) n); 1545 return buf; 1546 } 1547 1548 /* Loop over powers, dividing N with 1024 in each iteration. This 1549 works unchanged for all sizes of wgint, while still avoiding 1550 non-portable `long double' arithmetic. */ 1551 for (i = 0; i < countof (powers); i++) 1552 { 1553 /* At each iteration N is greater than the *subsequent* power. 1554 That way N/1024.0 produces a decimal number in the units of 1555 *this* power. */ 1556 if ((n / 1024) < 1024 || i == countof (powers) - 1) 1557 { 1558 double val = n / 1024.0; 1559 /* Print values smaller than the accuracy level (acc) with (decimal) 1560 * decimal digits, and others without any decimals. */ 1561 snprintf (buf, sizeof (buf), "%.*f%c", 1562 val < acc ? decimals : 0, val, powers[i]); 1563 return buf; 1564 } 1565 n /= 1024; 1566 } 1567 return NULL; /* unreached */ 1568} 1569 1570/* Count the digits in the provided number. Used to allocate space 1571 when printing numbers. */ 1572 1573int 1574numdigit (wgint number) 1575{ 1576 int cnt = 1; 1577 if (number < 0) 1578 ++cnt; /* accomodate '-' */ 1579 while ((number /= 10) != 0) 1580 ++cnt; 1581 return cnt; 1582} 1583 1584#define PR(mask) *p++ = n / (mask) + '0' 1585 1586/* DIGITS_<D> is used to print a D-digit number and should be called 1587 with mask==10^(D-1). It prints n/mask (the first digit), reducing 1588 n to n%mask (the remaining digits), and calling DIGITS_<D-1>. 1589 Recursively this continues until DIGITS_1 is invoked. */ 1590 1591#define DIGITS_1(mask) PR (mask) 1592#define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10) 1593#define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10) 1594#define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10) 1595#define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10) 1596#define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10) 1597#define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10) 1598#define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10) 1599#define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10) 1600#define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10) 1601 1602/* DIGITS_<11-20> are only used on machines with 64-bit wgints. */ 1603 1604#define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10) 1605#define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10) 1606#define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10) 1607#define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10) 1608#define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10) 1609#define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10) 1610#define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10) 1611#define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10) 1612#define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10) 1613 1614/* Shorthand for casting to wgint. */ 1615#define W wgint 1616 1617/* Print NUMBER to BUFFER in base 10. This is equivalent to 1618 `sprintf(buffer, "%lld", (long long) number)', only typically much 1619 faster and portable to machines without long long. 1620 1621 The speedup may make a difference in programs that frequently 1622 convert numbers to strings. Some implementations of sprintf, 1623 particularly the one in some versions of GNU libc, have been known 1624 to be quite slow when converting integers to strings. 1625 1626 Return the pointer to the location where the terminating zero was 1627 printed. (Equivalent to calling buffer+strlen(buffer) after the 1628 function is done.) 1629 1630 BUFFER should be large enough to accept as many bytes as you expect 1631 the number to take up. On machines with 64-bit wgints the maximum 1632 needed size is 24 bytes. That includes the digits needed for the 1633 largest 64-bit number, the `-' sign in case it's negative, and the 1634 terminating '\0'. */ 1635 1636char * 1637number_to_string (char *buffer, wgint number) 1638{ 1639 char *p = buffer; 1640 wgint n = number; 1641 1642 int last_digit_char = 0; 1643 1644#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8) 1645 /* We are running in a very strange environment. Leave the correct 1646 printing to sprintf. */ 1647 p += sprintf (buf, "%j", (intmax_t) (n)); 1648#else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ 1649 1650 if (n < 0) 1651 { 1652 if (n < -WGINT_MAX) 1653 { 1654 /* n = -n would overflow because -n would evaluate to a 1655 wgint value larger than WGINT_MAX. Need to make n 1656 smaller and handle the last digit separately. */ 1657 int last_digit = n % 10; 1658 /* The sign of n%10 is implementation-defined. */ 1659 if (last_digit < 0) 1660 last_digit_char = '0' - last_digit; 1661 else 1662 last_digit_char = '0' + last_digit; 1663 /* After n is made smaller, -n will not overflow. */ 1664 n /= 10; 1665 } 1666 1667 *p++ = '-'; 1668 n = -n; 1669 } 1670 1671 /* Use the DIGITS_ macro appropriate for N's number of digits. That 1672 way printing any N is fully open-coded without a loop or jump. 1673 (Also see description of DIGITS_*.) */ 1674 1675 if (n < 10) DIGITS_1 (1); 1676 else if (n < 100) DIGITS_2 (10); 1677 else if (n < 1000) DIGITS_3 (100); 1678 else if (n < 10000) DIGITS_4 (1000); 1679 else if (n < 100000) DIGITS_5 (10000); 1680 else if (n < 1000000) DIGITS_6 (100000); 1681 else if (n < 10000000) DIGITS_7 (1000000); 1682 else if (n < 100000000) DIGITS_8 (10000000); 1683 else if (n < 1000000000) DIGITS_9 (100000000); 1684#if SIZEOF_WGINT == 4 1685 /* wgint is 32 bits wide: no number has more than 10 digits. */ 1686 else DIGITS_10 (1000000000); 1687#else 1688 /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits. 1689 Constants are constructed by compile-time multiplication to avoid 1690 dealing with different notations for 64-bit constants 1691 (nL/nLL/nI64, depending on the compiler and architecture). */ 1692 else if (n < 10*(W)1000000000) DIGITS_10 (1000000000); 1693 else if (n < 100*(W)1000000000) DIGITS_11 (10*(W)1000000000); 1694 else if (n < 1000*(W)1000000000) DIGITS_12 (100*(W)1000000000); 1695 else if (n < 10000*(W)1000000000) DIGITS_13 (1000*(W)1000000000); 1696 else if (n < 100000*(W)1000000000) DIGITS_14 (10000*(W)1000000000); 1697 else if (n < 1000000*(W)1000000000) DIGITS_15 (100000*(W)1000000000); 1698 else if (n < 10000000*(W)1000000000) DIGITS_16 (1000000*(W)1000000000); 1699 else if (n < 100000000*(W)1000000000) DIGITS_17 (10000000*(W)1000000000); 1700 else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000); 1701 else DIGITS_19 (1000000000*(W)1000000000); 1702#endif 1703 1704 if (last_digit_char) 1705 *p++ = last_digit_char; 1706 1707 *p = '\0'; 1708#endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ 1709 1710 return p; 1711} 1712 1713#undef PR 1714#undef W 1715#undef SPRINTF_WGINT 1716#undef DIGITS_1 1717#undef DIGITS_2 1718#undef DIGITS_3 1719#undef DIGITS_4 1720#undef DIGITS_5 1721#undef DIGITS_6 1722#undef DIGITS_7 1723#undef DIGITS_8 1724#undef DIGITS_9 1725#undef DIGITS_10 1726#undef DIGITS_11 1727#undef DIGITS_12 1728#undef DIGITS_13 1729#undef DIGITS_14 1730#undef DIGITS_15 1731#undef DIGITS_16 1732#undef DIGITS_17 1733#undef DIGITS_18 1734#undef DIGITS_19 1735 1736#define RING_SIZE 3 1737 1738/* Print NUMBER to a statically allocated string and return a pointer 1739 to the printed representation. 1740 1741 This function is intended to be used in conjunction with printf. 1742 It is hard to portably print wgint values: 1743 a) you cannot use printf("%ld", number) because wgint can be long 1744 long on 32-bit machines with LFS. 1745 b) you cannot use printf("%lld", number) because NUMBER could be 1746 long on 32-bit machines without LFS, or on 64-bit machines, 1747 which do not require LFS. Also, Windows doesn't support %lld. 1748 c) you cannot use printf("%j", (int_max_t) number) because not all 1749 versions of printf support "%j", the most notable being the one 1750 on Windows. 1751 d) you cannot #define WGINT_FMT to the appropriate format and use 1752 printf(WGINT_FMT, number) because that would break translations 1753 for user-visible messages, such as printf("Downloaded: %d 1754 bytes\n", number). 1755 1756 What you should use instead is printf("%s", number_to_static_string 1757 (number)). 1758 1759 CAVEAT: since the function returns pointers to static data, you 1760 must be careful to copy its result before calling it again. 1761 However, to make it more useful with printf, the function maintains 1762 an internal ring of static buffers to return. That way things like 1763 printf("%s %s", number_to_static_string (num1), 1764 number_to_static_string (num2)) work as expected. Three buffers 1765 are currently used, which means that "%s %s %s" will work, but "%s 1766 %s %s %s" won't. If you need to print more than three wgints, 1767 bump the RING_SIZE (or rethink your message.) */ 1768 1769char * 1770number_to_static_string (wgint number) 1771{ 1772 static char ring[RING_SIZE][24]; 1773 static int ringpos; 1774 char *buf = ring[ringpos]; 1775 number_to_string (buf, number); 1776 ringpos = (ringpos + 1) % RING_SIZE; 1777 return buf; 1778} 1779 1780/* Converts the byte to bits format if --report-bps option is enabled 1781 */ 1782wgint 1783convert_to_bits (wgint num) 1784{ 1785 if (opt.report_bps) 1786 return num * 8; 1787 return num; 1788} 1789 1790 1791/* Determine the width of the terminal we're running on. If that's 1792 not possible, return 0. */ 1793 1794int 1795determine_screen_width (void) 1796{ 1797 /* If there's a way to get the terminal size using POSIX 1798 tcgetattr(), somebody please tell me. */ 1799#ifdef TIOCGWINSZ 1800 int fd; 1801 struct winsize wsz; 1802 1803 if (opt.lfilename != NULL) 1804 return 0; 1805 1806 fd = fileno (stderr); 1807 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0) 1808 return 0; /* most likely ENOTTY */ 1809 1810 return wsz.ws_col; 1811#elif defined(WINDOWS) 1812 CONSOLE_SCREEN_BUFFER_INFO csbi; 1813 if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi)) 1814 return 0; 1815 return csbi.dwSize.X; 1816#else /* neither TIOCGWINSZ nor WINDOWS */ 1817 return 0; 1818#endif /* neither TIOCGWINSZ nor WINDOWS */ 1819} 1820 1821/* Whether the rnd system (either rand or [dl]rand48) has been 1822 seeded. */ 1823static int rnd_seeded; 1824 1825/* Return a random number between 0 and MAX-1, inclusive. 1826 1827 If the system does not support lrand48 and MAX is greater than the 1828 value of RAND_MAX+1 on the system, the returned value will be in 1829 the range [0, RAND_MAX]. This may be fixed in a future release. 1830 The random number generator is seeded automatically the first time 1831 it is called. 1832 1833 This uses lrand48 where available, rand elsewhere. DO NOT use it 1834 for cryptography. It is only meant to be used in situations where 1835 quality of the random numbers returned doesn't really matter. */ 1836 1837int 1838random_number (int max) 1839{ 1840#ifdef HAVE_DRAND48 1841 if (!rnd_seeded) 1842 { 1843 srand48 ((long) time (NULL) ^ (long) getpid ()); 1844 rnd_seeded = 1; 1845 } 1846 return lrand48 () % max; 1847#else /* not HAVE_DRAND48 */ 1848 1849 double bounded; 1850 int rnd; 1851 if (!rnd_seeded) 1852 { 1853 srand ((unsigned) time (NULL) ^ (unsigned) getpid ()); 1854 rnd_seeded = 1; 1855 } 1856 rnd = rand (); 1857 1858 /* Like rand() % max, but uses the high-order bits for better 1859 randomness on architectures where rand() is implemented using a 1860 simple congruential generator. */ 1861 1862 bounded = (double) max * rnd / (RAND_MAX + 1.0); 1863 return (int) bounded; 1864 1865#endif /* not HAVE_DRAND48 */ 1866} 1867 1868/* Return a random uniformly distributed floating point number in the 1869 [0, 1) range. Uses drand48 where available, and a really lame 1870 kludge elsewhere. */ 1871 1872double 1873random_float (void) 1874{ 1875#ifdef HAVE_DRAND48 1876 if (!rnd_seeded) 1877 { 1878 srand48 ((long) time (NULL) ^ (long) getpid ()); 1879 rnd_seeded = 1; 1880 } 1881 return drand48 (); 1882#else /* not HAVE_DRAND48 */ 1883 return ( random_number (10000) / 10000.0 1884 + random_number (10000) / (10000.0 * 10000.0) 1885 + random_number (10000) / (10000.0 * 10000.0 * 10000.0) 1886 + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0)); 1887#endif /* not HAVE_DRAND48 */ 1888} 1889 1890/* Implementation of run_with_timeout, a generic timeout-forcing 1891 routine for systems with Unix-like signal handling. */ 1892 1893#ifdef USE_SIGNAL_TIMEOUT 1894# ifdef HAVE_SIGSETJMP 1895# define SETJMP(env) sigsetjmp (env, 1) 1896 1897static sigjmp_buf run_with_timeout_env; 1898 1899static void _Noreturn 1900abort_run_with_timeout (int sig) 1901{ 1902 assert (sig == SIGALRM); 1903 siglongjmp (run_with_timeout_env, -1); 1904} 1905# else /* not HAVE_SIGSETJMP */ 1906# define SETJMP(env) setjmp (env) 1907 1908static jmp_buf run_with_timeout_env; 1909 1910static void 1911abort_run_with_timeout (int sig) 1912{ 1913 assert (sig == SIGALRM); 1914 /* We don't have siglongjmp to preserve the set of blocked signals; 1915 if we longjumped out of the handler at this point, SIGALRM would 1916 remain blocked. We must unblock it manually. */ 1917 sigset_t set; 1918 sigemptyset (&set); 1919 sigaddset (&set, SIGALRM); 1920 sigprocmask (SIG_BLOCK, &set, NULL); 1921 1922 /* Now it's safe to longjump. */ 1923 longjmp (run_with_timeout_env, -1); 1924} 1925# endif /* not HAVE_SIGSETJMP */ 1926 1927/* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses 1928 setitimer where available, alarm otherwise. 1929 1930 TIMEOUT should be non-zero. If the timeout value is so small that 1931 it would be rounded to zero, it is rounded to the least legal value 1932 instead (1us for setitimer, 1s for alarm). That ensures that 1933 SIGALRM will be delivered in all cases. */ 1934 1935static void 1936alarm_set (double timeout) 1937{ 1938#ifdef ITIMER_REAL 1939 /* Use the modern itimer interface. */ 1940 struct itimerval itv; 1941 xzero (itv); 1942 itv.it_value.tv_sec = (long) timeout; 1943 itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout); 1944 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0) 1945 /* Ensure that we wait for at least the minimum interval. 1946 Specifying zero would mean "wait forever". */ 1947 itv.it_value.tv_usec = 1; 1948 setitimer (ITIMER_REAL, &itv, NULL); 1949#else /* not ITIMER_REAL */ 1950 /* Use the old alarm() interface. */ 1951 int secs = (int) timeout; 1952 if (secs == 0) 1953 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is 1954 because alarm(0) means "never deliver the alarm", i.e. "wait 1955 forever", which is not what someone who specifies a 0.5s 1956 timeout would expect. */ 1957 secs = 1; 1958 alarm (secs); 1959#endif /* not ITIMER_REAL */ 1960} 1961 1962/* Cancel the alarm set with alarm_set. */ 1963 1964static void 1965alarm_cancel (void) 1966{ 1967#ifdef ITIMER_REAL 1968 struct itimerval disable; 1969 xzero (disable); 1970 setitimer (ITIMER_REAL, &disable, NULL); 1971#else /* not ITIMER_REAL */ 1972 alarm (0); 1973#endif /* not ITIMER_REAL */ 1974} 1975 1976/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT 1977 seconds. Returns true if the function was interrupted with a 1978 timeout, false otherwise. 1979 1980 This works by setting up SIGALRM to be delivered in TIMEOUT seconds 1981 using setitimer() or alarm(). The timeout is enforced by 1982 longjumping out of the SIGALRM handler. This has several 1983 advantages compared to the traditional approach of relying on 1984 signals causing system calls to exit with EINTR: 1985 1986 * The callback function is *forcibly* interrupted after the 1987 timeout expires, (almost) regardless of what it was doing and 1988 whether it was in a syscall. For example, a calculation that 1989 takes a long time is interrupted as reliably as an IO 1990 operation. 1991 1992 * It works with both SYSV and BSD signals because it doesn't 1993 depend on the default setting of SA_RESTART. 1994 1995 * It doesn't require special handler setup beyond a simple call 1996 to signal(). (It does use sigsetjmp/siglongjmp, but they're 1997 optional.) 1998 1999 The only downside is that, if FUN allocates internal resources that 2000 are normally freed prior to exit from the functions, they will be 2001 lost in case of timeout. */ 2002 2003bool 2004run_with_timeout (double timeout, void (*fun) (void *), void *arg) 2005{ 2006 int saved_errno; 2007 2008 if (timeout == 0) 2009 { 2010 fun (arg); 2011 return false; 2012 } 2013 2014 signal (SIGALRM, abort_run_with_timeout); 2015 if (SETJMP (run_with_timeout_env) != 0) 2016 { 2017 /* Longjumped out of FUN with a timeout. */ 2018 signal (SIGALRM, SIG_DFL); 2019 return true; 2020 } 2021 alarm_set (timeout); 2022 fun (arg); 2023 2024 /* Preserve errno in case alarm() or signal() modifies it. */ 2025 saved_errno = errno; 2026 alarm_cancel (); 2027 signal (SIGALRM, SIG_DFL); 2028 errno = saved_errno; 2029 2030 return false; 2031} 2032 2033#else /* not USE_SIGNAL_TIMEOUT */ 2034 2035#ifndef WINDOWS 2036/* A stub version of run_with_timeout that just calls FUN(ARG). Don't 2037 define it under Windows, because Windows has its own version of 2038 run_with_timeout that uses threads. */ 2039 2040bool 2041run_with_timeout (double timeout, void (*fun) (void *), void *arg) 2042{ 2043 fun (arg); 2044 return false; 2045} 2046#endif /* not WINDOWS */ 2047#endif /* not USE_SIGNAL_TIMEOUT */ 2048 2049#ifndef WINDOWS 2050 2051/* Sleep the specified amount of seconds. On machines without 2052 nanosleep(), this may sleep shorter if interrupted by signals. */ 2053 2054void 2055xsleep (double seconds) 2056{ 2057#ifdef HAVE_NANOSLEEP 2058 /* nanosleep is the preferred interface because it offers high 2059 accuracy and, more importantly, because it allows us to reliably 2060 restart receiving a signal such as SIGWINCH. (There was an 2061 actual Debian bug report about --limit-rate malfunctioning while 2062 the terminal was being resized.) */ 2063 struct timespec sleep, remaining; 2064 sleep.tv_sec = (long) seconds; 2065 sleep.tv_nsec = 1000000000 * (seconds - (long) seconds); 2066 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR) 2067 /* If nanosleep has been interrupted by a signal, adjust the 2068 sleeping period and return to sleep. */ 2069 sleep = remaining; 2070#elif defined(HAVE_USLEEP) 2071 /* If usleep is available, use it in preference to select. */ 2072 if (seconds >= 1) 2073 { 2074 /* On some systems, usleep cannot handle values larger than 2075 1,000,000. If the period is larger than that, use sleep 2076 first, then add usleep for subsecond accuracy. */ 2077 sleep (seconds); 2078 seconds -= (long) seconds; 2079 } 2080 usleep (seconds * 1000000); 2081#else /* fall back select */ 2082 /* Note that, although Windows supports select, it can't be used to 2083 implement sleeping because Winsock's select doesn't implement 2084 timeout when it is passed NULL pointers for all fd sets. (But it 2085 does under Cygwin, which implements Unix-compatible select.) */ 2086 struct timeval sleep; 2087 sleep.tv_sec = (long) seconds; 2088 sleep.tv_usec = 1000000 * (seconds - (long) seconds); 2089 select (0, NULL, NULL, NULL, &sleep); 2090 /* If select returns -1 and errno is EINTR, it means we were 2091 interrupted by a signal. But without knowing how long we've 2092 actually slept, we can't return to sleep. Using gettimeofday to 2093 track sleeps is slow and unreliable due to clock skew. */ 2094#endif 2095} 2096 2097#endif /* not WINDOWS */ 2098 2099/* Encode the octets in DATA of length LENGTH to base64 format, 2100 storing the result to DEST. The output will be zero-terminated, 2101 and must point to a writable buffer of at least 2102 1+BASE64_LENGTH(length) bytes. The function returns the length of 2103 the resulting base64 data, not counting the terminating zero. 2104 2105 This implementation does not emit newlines after 76 characters of 2106 base64 data. */ 2107 2108size_t 2109base64_encode (const void *data, size_t length, char *dest) 2110{ 2111 /* Conversion table. */ 2112 static const char tbl[64] = { 2113 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', 2114 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', 2115 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', 2116 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' 2117 }; 2118 /* Access bytes in DATA as unsigned char, otherwise the shifts below 2119 don't work for data with MSB set. */ 2120 const unsigned char *s = data; 2121 /* Theoretical ANSI violation when length < 3. */ 2122 const unsigned char *end = (const unsigned char *) data + length - 2; 2123 char *p = dest; 2124 2125 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ 2126 for (; s < end; s += 3) 2127 { 2128 *p++ = tbl[s[0] >> 2]; 2129 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; 2130 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; 2131 *p++ = tbl[s[2] & 0x3f]; 2132 } 2133 2134 /* Pad the result if necessary... */ 2135 switch (length % 3) 2136 { 2137 case 1: 2138 *p++ = tbl[s[0] >> 2]; 2139 *p++ = tbl[(s[0] & 3) << 4]; 2140 *p++ = '='; 2141 *p++ = '='; 2142 break; 2143 case 2: 2144 *p++ = tbl[s[0] >> 2]; 2145 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; 2146 *p++ = tbl[((s[1] & 0xf) << 2)]; 2147 *p++ = '='; 2148 break; 2149 } 2150 /* ...and zero-terminate it. */ 2151 *p = '\0'; 2152 2153 return p - dest; 2154} 2155 2156/* Store in C the next non-whitespace character from the string, or \0 2157 when end of string is reached. */ 2158#define NEXT_CHAR(c, p) do { \ 2159 c = (unsigned char) *p++; \ 2160} while (c_isspace (c)) 2161 2162#define IS_ASCII(c) (((c) & 0x80) == 0) 2163 2164/* Decode data from BASE64 (a null-terminated string) into memory 2165 pointed to by DEST. DEST is assumed to be large enough to 2166 accomodate the decoded data, which is guaranteed to be no more than 2167 3/4*strlen(base64). 2168 2169 Since DEST is assumed to contain binary data, it is not 2170 NUL-terminated. The function returns the length of the data 2171 written to "TO". -1 is returned in case of error caused by malformed 2172 base64 input. 2173 2174 This function originates from Free Recode. */ 2175 2176ssize_t 2177base64_decode (const char *base64, void *dest) 2178{ 2179 /* Table of base64 values for first 128 characters. Note that this 2180 assumes ASCII (but so does Wget in other places). */ 2181 static const signed char base64_char_to_value[128] = 2182 { 2183 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */ 2184 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */ 2185 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */ 2186 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */ 2187 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */ 2188 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */ 2189 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */ 2190 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */ 2191 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */ 2192 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */ 2193 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */ 2194 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */ 2195 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */ 2196 }; 2197#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c]) 2198#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=') 2199 2200 const char *p = base64; 2201 char *q = dest; 2202 2203 while (1) 2204 { 2205 unsigned char c; 2206 unsigned long value; 2207 2208 /* Process first byte of a quadruplet. */ 2209 NEXT_CHAR (c, p); 2210 if (!c) 2211 break; 2212 if (c == '=' || !IS_BASE64 (c)) 2213 return -1; /* illegal char while decoding base64 */ 2214 value = BASE64_CHAR_TO_VALUE (c) << 18; 2215 2216 /* Process second byte of a quadruplet. */ 2217 NEXT_CHAR (c, p); 2218 if (!c) 2219 return -1; /* premature EOF while decoding base64 */ 2220 if (c == '=' || !IS_BASE64 (c)) 2221 return -1; /* illegal char while decoding base64 */ 2222 value |= BASE64_CHAR_TO_VALUE (c) << 12; 2223 *q++ = value >> 16; 2224 2225 /* Process third byte of a quadruplet. */ 2226 NEXT_CHAR (c, p); 2227 if (!c) 2228 return -1; /* premature EOF while decoding base64 */ 2229 if (!IS_BASE64 (c)) 2230 return -1; /* illegal char while decoding base64 */ 2231 2232 if (c == '=') 2233 { 2234 NEXT_CHAR (c, p); 2235 if (!c) 2236 return -1; /* premature EOF while decoding base64 */ 2237 if (c != '=') 2238 return -1; /* padding `=' expected but not found */ 2239 continue; 2240 } 2241 2242 value |= BASE64_CHAR_TO_VALUE (c) << 6; 2243 *q++ = 0xff & value >> 8; 2244 2245 /* Process fourth byte of a quadruplet. */ 2246 NEXT_CHAR (c, p); 2247 if (!c) 2248 return -1; /* premature EOF while decoding base64 */ 2249 if (c == '=') 2250 continue; 2251 if (!IS_BASE64 (c)) 2252 return -1; /* illegal char while decoding base64 */ 2253 2254 value |= BASE64_CHAR_TO_VALUE (c); 2255 *q++ = 0xff & value; 2256 } 2257#undef IS_BASE64 2258#undef BASE64_CHAR_TO_VALUE 2259 2260 return q - (char *) dest; 2261} 2262 2263#ifdef HAVE_LIBPCRE 2264/* Compiles the PCRE regex. */ 2265void * 2266compile_pcre_regex (const char *str) 2267{ 2268 const char *errbuf; 2269 int erroffset; 2270 pcre *regex = pcre_compile (str, 0, &errbuf, &erroffset, 0); 2271 if (! regex) 2272 { 2273 fprintf (stderr, _("Invalid regular expression %s, %s\n"), 2274 quote (str), errbuf); 2275 return false; 2276 } 2277 return regex; 2278} 2279#endif 2280 2281/* Compiles the POSIX regex. */ 2282void * 2283compile_posix_regex (const char *str) 2284{ 2285 regex_t *regex = xmalloc (sizeof (regex_t)); 2286 int errcode = regcomp ((regex_t *) regex, str, REG_EXTENDED | REG_NOSUB); 2287 if (errcode != 0) 2288 { 2289 size_t errbuf_size = regerror (errcode, (regex_t *) regex, NULL, 0); 2290 char *errbuf = xmalloc (errbuf_size); 2291 regerror (errcode, (regex_t *) regex, errbuf, errbuf_size); 2292 fprintf (stderr, _("Invalid regular expression %s, %s\n"), 2293 quote (str), errbuf); 2294 xfree (errbuf); 2295 return NULL; 2296 } 2297 2298 return regex; 2299} 2300 2301#ifdef HAVE_LIBPCRE 2302#define OVECCOUNT 30 2303/* Matches a PCRE regex. */ 2304bool 2305match_pcre_regex (const void *regex, const char *str) 2306{ 2307 size_t l = strlen (str); 2308 int ovector[OVECCOUNT]; 2309 2310 int rc = pcre_exec ((pcre *) regex, 0, str, (int) l, 0, 0, ovector, OVECCOUNT); 2311 if (rc == PCRE_ERROR_NOMATCH) 2312 return false; 2313 else if (rc < 0) 2314 { 2315 logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), 2316 quote (str), rc); 2317 return false; 2318 } 2319 else 2320 return true; 2321} 2322#undef OVECCOUNT 2323#endif 2324 2325/* Matches a POSIX regex. */ 2326bool 2327match_posix_regex (const void *regex, const char *str) 2328{ 2329 int rc = regexec ((regex_t *) regex, str, 0, NULL, 0); 2330 if (rc == REG_NOMATCH) 2331 return false; 2332 else if (rc == 0) 2333 return true; 2334 else 2335 { 2336 size_t errbuf_size = regerror (rc, opt.acceptregex, NULL, 0); 2337 char *errbuf = xmalloc (errbuf_size); 2338 regerror (rc, opt.acceptregex, errbuf, errbuf_size); 2339 logprintf (LOG_VERBOSE, _("Error while matching %s: %d\n"), 2340 quote (str), rc); 2341 xfree (errbuf); 2342 return false; 2343 } 2344} 2345 2346#undef IS_ASCII 2347#undef NEXT_CHAR 2348 2349/* Simple merge sort for use by stable_sort. Implementation courtesy 2350 Zeljko Vrba with additional debugging by Nenad Barbutov. */ 2351 2352static void 2353mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to, 2354 int (*cmpfun) (const void *, const void *)) 2355{ 2356#define ELT(array, pos) ((char *)(array) + (pos) * size) 2357 if (from < to) 2358 { 2359 size_t i, j, k; 2360 size_t mid = (to + from) / 2; 2361 mergesort_internal (base, temp, size, from, mid, cmpfun); 2362 mergesort_internal (base, temp, size, mid + 1, to, cmpfun); 2363 i = from; 2364 j = mid + 1; 2365 for (k = from; (i <= mid) && (j <= to); k++) 2366 if (cmpfun (ELT (base, i), ELT (base, j)) <= 0) 2367 memcpy (ELT (temp, k), ELT (base, i++), size); 2368 else 2369 memcpy (ELT (temp, k), ELT (base, j++), size); 2370 while (i <= mid) 2371 memcpy (ELT (temp, k++), ELT (base, i++), size); 2372 while (j <= to) 2373 memcpy (ELT (temp, k++), ELT (base, j++), size); 2374 for (k = from; k <= to; k++) 2375 memcpy (ELT (base, k), ELT (temp, k), size); 2376 } 2377#undef ELT 2378} 2379 2380/* Stable sort with interface exactly like standard library's qsort. 2381 Uses mergesort internally, allocating temporary storage with 2382 alloca. */ 2383 2384void 2385stable_sort (void *base, size_t nmemb, size_t size, 2386 int (*cmpfun) (const void *, const void *)) 2387{ 2388 if (size > 1) 2389 { 2390 void *temp = alloca (nmemb * size * sizeof (void *)); 2391 mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun); 2392 } 2393} 2394 2395/* Print a decimal number. If it is equal to or larger than ten, the 2396 number is rounded. Otherwise it is printed with one significant 2397 digit without trailing zeros and with no more than three fractional 2398 digits total. For example, 0.1 is printed as "0.1", 0.035 is 2399 printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0". 2400 2401 This is useful for displaying durations because it provides 2402 order-of-magnitude information without unnecessary clutter -- 2403 long-running downloads are shown without the fractional part, and 2404 short ones still retain one significant digit. */ 2405 2406const char * 2407print_decimal (double number) 2408{ 2409 static char buf[32]; 2410 double n = number >= 0 ? number : -number; 2411 2412 if (n >= 9.95) 2413 /* Cut off at 9.95 because the below %.1f would round 9.96 to 2414 "10.0" instead of "10". OTOH 9.94 will print as "9.9". */ 2415 snprintf (buf, sizeof buf, "%.0f", number); 2416 else if (n >= 0.95) 2417 snprintf (buf, sizeof buf, "%.1f", number); 2418 else if (n >= 0.001) 2419 snprintf (buf, sizeof buf, "%.1g", number); 2420 else if (n >= 0.0005) 2421 /* round [0.0005, 0.001) to 0.001 */ 2422 snprintf (buf, sizeof buf, "%.3f", number); 2423 else 2424 /* print numbers close to 0 as 0, not 0.000 */ 2425 strcpy (buf, "0"); 2426 2427 return buf; 2428} 2429 2430/* Get the maximum name length for the given path. */ 2431/* Return 0 if length is unknown. */ 2432long 2433get_max_length (const char *path, int length, int name) 2434{ 2435 long ret; 2436 char *p, *d; 2437 2438 /* Make a copy of the path that we can modify. */ 2439 p = path ? strdupdelim (path, path + length) : strdup (""); 2440 2441 for (;;) 2442 { 2443 errno = 0; 2444 /* For an empty path query the current directory. */ 2445#if HAVE_PATHCONF 2446 ret = pathconf (*p ? p : ".", name); 2447 if (!(ret < 0 && errno == ENOENT)) 2448 break; 2449#else 2450 ret = PATH_MAX; 2451#endif 2452 2453 /* The path does not exist yet, but may be created. */ 2454 /* Already at current or root directory, give up. */ 2455 if (!*p || strcmp (p, "/") == 0) 2456 break; 2457 2458 /* Remove one directory level and try again. */ 2459 d = strrchr (p, '/'); 2460 if (d == p) 2461 p[1] = '\0'; /* check root directory */ 2462 else if (d) 2463 *d = '\0'; /* remove last directory part */ 2464 else 2465 *p = '\0'; /* check current directory */ 2466 } 2467 2468 xfree (p); 2469 2470 if (ret < 0) 2471 { 2472 /* pathconf() has a message for us. */ 2473 if (errno != 0) 2474 perror ("pathconf"); 2475 2476 /* If (errno == 0) then there is no max length. 2477 Even on error return 0 so the caller can continue. */ 2478 return 0; 2479 } 2480 2481 return ret; 2482} 2483 2484#ifdef TESTING 2485 2486const char * 2487test_subdir_p(void) 2488{ 2489 static const struct { 2490 const char *d1; 2491 const char *d2; 2492 bool result; 2493 } test_array[] = { 2494 { "/somedir", "/somedir", true }, 2495 { "/somedir", "/somedir/d2", true }, 2496 { "/somedir/d1", "/somedir", false }, 2497 }; 2498 unsigned i; 2499 2500 for (i = 0; i < countof(test_array); ++i) 2501 { 2502 bool res = subdir_p (test_array[i].d1, test_array[i].d2); 2503 2504 mu_assert ("test_subdir_p: wrong result", 2505 res == test_array[i].result); 2506 } 2507 2508 return NULL; 2509} 2510 2511const char * 2512test_dir_matches_p(void) 2513{ 2514 static struct { 2515 const char *dirlist[3]; 2516 const char *dir; 2517 bool result; 2518 } test_array[] = { 2519 { { "/somedir", "/someotherdir", NULL }, "somedir", true }, 2520 { { "/somedir", "/someotherdir", NULL }, "anotherdir", false }, 2521 { { "/somedir", "/*otherdir", NULL }, "anotherdir", true }, 2522 { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true }, 2523 { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true }, 2524 { { "/somedir/d1", "/someotherdir", NULL }, "d1", false }, 2525 { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true }, 2526 { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true }, 2527 { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true }, 2528 { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false }, 2529 { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true }, 2530 { { "/dir with spaces", NULL, NULL }, "dir with spaces", true }, 2531 { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true }, 2532 { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false }, 2533 { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false }, 2534 }; 2535 unsigned i; 2536 2537 for (i = 0; i < countof(test_array); ++i) 2538 { 2539 bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir); 2540 2541 mu_assert ("test_dir_matches_p: wrong result", 2542 res == test_array[i].result); 2543 } 2544 2545 return NULL; 2546} 2547 2548#endif /* TESTING */ 2549