1/* Various utility functions. 2 Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 3 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc. 4 5This file is part of GNU Wget. 6 7GNU Wget is free software; you can redistribute it and/or modify 8it under the terms of the GNU General Public License as published by 9the Free Software Foundation; either version 3 of the License, or 10(at your option) any later version. 11 12GNU Wget is distributed in the hope that it will be useful, 13but WITHOUT ANY WARRANTY; without even the implied warranty of 14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15GNU General Public License for more details. 16 17You should have received a copy of the GNU General Public License 18along with Wget. If not, see <http://www.gnu.org/licenses/>. 19 20Additional permission under GNU GPL version 3 section 7 21 22If you modify this program, or any covered work, by linking or 23combining it with the OpenSSL project's OpenSSL library (or a 24modified version of that library), containing parts covered by the 25terms of the OpenSSL or SSLeay licenses, the Free Software Foundation 26grants you additional permission to convey the resulting work. 27Corresponding Source for a non-source form of such a combination 28shall include the source code for the parts of OpenSSL used as well 29as that of the covered work. */ 30 31#include "wget.h" 32 33#include <stdio.h> 34#include <stdlib.h> 35#include <string.h> 36#include <time.h> 37#ifdef HAVE_SYS_TIME_H 38# include <sys/time.h> 39#endif 40#ifdef HAVE_UNISTD_H 41# include <unistd.h> 42#endif 43#ifdef HAVE_MMAP 44# include <sys/mman.h> 45#endif 46#ifdef HAVE_PROCESS_H 47# include <process.h> /* getpid() */ 48#endif 49#ifdef HAVE_UTIME_H 50# include <utime.h> 51#endif 52#ifdef HAVE_SYS_UTIME_H 53# include <sys/utime.h> 54#endif 55#include <errno.h> 56#include <fcntl.h> 57#include <assert.h> 58#include <stdarg.h> 59#include <locale.h> 60 61/* For TIOCGWINSZ and friends: */ 62#ifdef HAVE_SYS_IOCTL_H 63# include <sys/ioctl.h> 64#endif 65#ifdef HAVE_TERMIOS_H 66# include <termios.h> 67#endif 68 69/* Needed for Unix version of run_with_timeout. */ 70#include <signal.h> 71#include <setjmp.h> 72 73#ifndef HAVE_SIGSETJMP 74/* If sigsetjmp is a macro, configure won't pick it up. */ 75# ifdef sigsetjmp 76# define HAVE_SIGSETJMP 77# endif 78#endif 79 80#if defined HAVE_SIGSETJMP || defined HAVE_SIGBLOCK 81# define USE_SIGNAL_TIMEOUT 82#endif 83 84#include "utils.h" 85#include "hash.h" 86 87#ifdef __VMS 88#include "vms.h" 89#endif /* def __VMS */ 90 91#ifdef TESTING 92#include "test.h" 93#endif 94 95static void 96memfatal (const char *context, long attempted_size) 97{ 98 /* Make sure we don't try to store part of the log line, and thus 99 call malloc. */ 100 log_set_save_context (false); 101 102 /* We have different log outputs in different situations: 103 1) output without bytes information 104 2) output with bytes information */ 105 if (attempted_size == UNKNOWN_ATTEMPTED_SIZE) 106 { 107 logprintf (LOG_ALWAYS, 108 _("%s: %s: Failed to allocate enough memory; memory exhausted.\n"), 109 exec_name, context); 110 } 111 else 112 { 113 logprintf (LOG_ALWAYS, 114 _("%s: %s: Failed to allocate %ld bytes; memory exhausted.\n"), 115 exec_name, context, attempted_size); 116 } 117 118 exit (1); 119} 120 121/* Character property table for (re-)escaping VMS ODS5 extended file 122 names. Note that this table ignores Unicode. 123 124 ODS2 valid characters: 0-9 A-Z a-z $ - _ ~ 125 126 ODS5 Invalid characters: 127 C0 control codes (0x00 to 0x1F inclusive) 128 Asterisk (*) 129 Question mark (?) 130 131 ODS5 Invalid characters only in VMS V7.2 (which no one runs, right?): 132 Double quotation marks (") 133 Backslash (\) 134 Colon (:) 135 Left angle bracket (<) 136 Right angle bracket (>) 137 Slash (/) 138 Vertical bar (|) 139 140 Characters escaped by "^": 141 SP ! # % & ' ( ) + , . ; = @ [ ] ^ ` { } ~ 142 143 Either "^_" or "^ " is accepted as a space. Period (.) is a special 144 case. Note that un-escaped < and > can also confuse a directory 145 spec. 146 147 Characters put out as ^xx: 148 7F (DEL) 149 80-9F (C1 control characters) 150 A0 (nonbreaking space) 151 FF (Latin small letter y diaeresis) 152 153 Other cases: 154 Unicode: "^Uxxxx", where "xxxx" is four hex digits. 155 156 Property table values: 157 Normal escape: 1 158 Space: 2 159 Dot: 4 160 Hex-hex escape: 8 161 ODS2 normal: 16 162 ODS2 lower case: 32 163 Hex digit: 64 164*/ 165 166unsigned char char_prop[ 256] = { 167 168/* NUL SOH STX ETX EOT ENQ ACK BEL BS HT LF VT FF CR SO SI */ 169 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 170 171/* DLE DC1 DC2 DC3 DC4 NAK SYN ETB CAN EM SUB ESC FS GS RS US */ 172 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 173 174/* SP ! " # $ % & ' ( ) * + , - . / */ 175 2, 1, 0, 1, 16, 1, 1, 1, 1, 1, 0, 1, 1, 16, 4, 0, 176 177/* 0 1 2 3 4 5 6 7 8 9 : ; < = > ? */ 178 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 0, 1, 1, 1, 1, 1, 179 180/* @ A B C D E F G H I J K L M N O */ 181 1, 80, 80, 80, 80, 80, 80, 16, 16, 16, 16, 16, 16, 16, 16, 16, 182 183/* P Q R S T U V W X Y Z [ \ ] ^ _ */ 184 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 185 186/* ` a b c d e f g h i j k l m n o */ 187 1, 96, 96, 96, 96, 96, 96, 32, 32, 32, 32, 32, 32, 32, 32, 32, 188 189/* p q r s t u v w x y z { | } ~ DEL */ 190 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 1, 0, 1, 17, 8, 191 192 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 193 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 194 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 195 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 196 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 197 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 199 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 200}; 201 202/* Utility function: like xstrdup(), but also lowercases S. */ 203 204char * 205xstrdup_lower (const char *s) 206{ 207 char *copy = xstrdup (s); 208 char *p = copy; 209 for (; *p; p++) 210 *p = c_tolower (*p); 211 return copy; 212} 213 214/* Copy the string formed by two pointers (one on the beginning, other 215 on the char after the last char) to a new, malloc-ed location. 216 0-terminate it. */ 217char * 218strdupdelim (const char *beg, const char *end) 219{ 220 char *res = xmalloc (end - beg + 1); 221 memcpy (res, beg, end - beg); 222 res[end - beg] = '\0'; 223 return res; 224} 225 226/* Parse a string containing comma-separated elements, and return a 227 vector of char pointers with the elements. Spaces following the 228 commas are ignored. */ 229char ** 230sepstring (const char *s) 231{ 232 char **res; 233 const char *p; 234 int i = 0; 235 236 if (!s || !*s) 237 return NULL; 238 res = NULL; 239 p = s; 240 while (*s) 241 { 242 if (*s == ',') 243 { 244 res = xrealloc (res, (i + 2) * sizeof (char *)); 245 res[i] = strdupdelim (p, s); 246 res[++i] = NULL; 247 ++s; 248 /* Skip the blanks following the ','. */ 249 while (c_isspace (*s)) 250 ++s; 251 p = s; 252 } 253 else 254 ++s; 255 } 256 res = xrealloc (res, (i + 2) * sizeof (char *)); 257 res[i] = strdupdelim (p, s); 258 res[i + 1] = NULL; 259 return res; 260} 261 262/* Like sprintf, but prints into a string of sufficient size freshly 263 allocated with malloc, which is returned. If unable to print due 264 to invalid format, returns NULL. Inability to allocate needed 265 memory results in abort, as with xmalloc. This is in spirit 266 similar to the GNU/BSD extension asprintf, but somewhat easier to 267 use. 268 269 Internally the function either calls vasprintf or loops around 270 vsnprintf until the correct size is found. Since Wget also ships a 271 fallback implementation of vsnprintf, this should be portable. */ 272 273/* Constant is using for limits memory allocation for text buffer. 274 Applicable in situation when: vasprintf is not available in the system 275 and vsnprintf return -1 when long line is truncated (in old versions of 276 glibc and in other system where C99 doesn`t support) */ 277 278#define FMT_MAX_LENGTH 1048576 279 280char * 281aprintf (const char *fmt, ...) 282{ 283#if defined HAVE_VASPRINTF && !defined DEBUG_MALLOC 284 /* Use vasprintf. */ 285 int ret; 286 va_list args; 287 char *str; 288 va_start (args, fmt); 289 ret = vasprintf (&str, fmt, args); 290 va_end (args); 291 if (ret < 0 && errno == ENOMEM) 292 memfatal ("aprintf", UNKNOWN_ATTEMPTED_SIZE); /* for consistency 293 with xmalloc/xrealloc */ 294 else if (ret < 0) 295 return NULL; 296 return str; 297#else /* not HAVE_VASPRINTF */ 298 299 /* vasprintf is unavailable. snprintf into a small buffer and 300 resize it as necessary. */ 301 int size = 32; 302 char *str = xmalloc (size); 303 304 /* #### This code will infloop and eventually abort in xrealloc if 305 passed a FMT that causes snprintf to consistently return -1. */ 306 307 while (1) 308 { 309 int n; 310 va_list args; 311 312 va_start (args, fmt); 313 n = vsnprintf (str, size, fmt, args); 314 va_end (args); 315 316 /* If the printing worked, return the string. */ 317 if (n > -1 && n < size) 318 return str; 319 320 /* Else try again with a larger buffer. */ 321 if (n > -1) /* C99 */ 322 size = n + 1; /* precisely what is needed */ 323 else if (size >= FMT_MAX_LENGTH) /* We have a huge buffer, */ 324 { /* maybe we have some wrong 325 format string? */ 326 logprintf (LOG_ALWAYS, 327 _("%s: aprintf: text buffer is too big (%ld bytes), " 328 "aborting.\n"), 329 exec_name, size); /* printout a log message */ 330 abort (); /* and abort... */ 331 } 332 else 333 { 334 /* else, we continue to grow our 335 * buffer: Twice the old size. */ 336 size <<= 1; 337 } 338 str = xrealloc (str, size); 339 } 340#endif /* not HAVE_VASPRINTF */ 341} 342 343/* Concatenate the NULL-terminated list of string arguments into 344 freshly allocated space. */ 345 346char * 347concat_strings (const char *str0, ...) 348{ 349 va_list args; 350 int saved_lengths[5]; /* inspired by Apache's apr_pstrcat */ 351 char *ret, *p; 352 353 const char *next_str; 354 int total_length = 0; 355 size_t argcount; 356 357 /* Calculate the length of and allocate the resulting string. */ 358 359 argcount = 0; 360 va_start (args, str0); 361 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *)) 362 { 363 int len = strlen (next_str); 364 if (argcount < countof (saved_lengths)) 365 saved_lengths[argcount++] = len; 366 total_length += len; 367 } 368 va_end (args); 369 p = ret = xmalloc (total_length + 1); 370 371 /* Copy the strings into the allocated space. */ 372 373 argcount = 0; 374 va_start (args, str0); 375 for (next_str = str0; next_str != NULL; next_str = va_arg (args, char *)) 376 { 377 int len; 378 if (argcount < countof (saved_lengths)) 379 len = saved_lengths[argcount++]; 380 else 381 len = strlen (next_str); 382 memcpy (p, next_str, len); 383 p += len; 384 } 385 va_end (args); 386 *p = '\0'; 387 388 return ret; 389} 390 391/* Format the provided time according to the specified format. The 392 format is a string with format elements supported by strftime. */ 393 394static char * 395fmttime (time_t t, const char *fmt) 396{ 397 static char output[32]; 398 struct tm *tm = localtime(&t); 399 if (!tm) 400 abort (); 401 if (!strftime(output, sizeof(output), fmt, tm)) 402 abort (); 403 return output; 404} 405 406/* Return pointer to a static char[] buffer in which zero-terminated 407 string-representation of TM (in form hh:mm:ss) is printed. 408 409 If TM is NULL, the current time will be used. */ 410 411char * 412time_str (time_t t) 413{ 414 return fmttime(t, "%H:%M:%S"); 415} 416 417/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */ 418 419char * 420datetime_str (time_t t) 421{ 422 return fmttime(t, "%Y-%m-%d %H:%M:%S"); 423} 424 425/* The Windows versions of the following two functions are defined in 426 mswindows.c. On MSDOS this function should never be called. */ 427 428#ifdef __VMS 429 430void 431fork_to_background (void) 432{ 433 return; 434} 435 436#else /* def __VMS */ 437 438#if !defined(WINDOWS) && !defined(MSDOS) 439void 440fork_to_background (void) 441{ 442 pid_t pid; 443 /* Whether we arrange our own version of opt.lfilename here. */ 444 bool logfile_changed = false; 445 446 if (!opt.lfilename && (!opt.quiet || opt.server_response)) 447 { 448 /* We must create the file immediately to avoid either a race 449 condition (which arises from using unique_name and failing to 450 use fopen_excl) or lying to the user about the log file name 451 (which arises from using unique_name, printing the name, and 452 using fopen_excl later on.) */ 453 FILE *new_log_fp = unique_create (DEFAULT_LOGFILE, false, &opt.lfilename); 454 if (new_log_fp) 455 { 456 logfile_changed = true; 457 fclose (new_log_fp); 458 } 459 } 460 pid = fork (); 461 if (pid < 0) 462 { 463 /* parent, error */ 464 perror ("fork"); 465 exit (1); 466 } 467 else if (pid != 0) 468 { 469 /* parent, no error */ 470 printf (_("Continuing in background, pid %d.\n"), (int) pid); 471 if (logfile_changed) 472 printf (_("Output will be written to %s.\n"), quote (opt.lfilename)); 473 exit (0); /* #### should we use _exit()? */ 474 } 475 476 /* child: give up the privileges and keep running. */ 477 setsid (); 478 freopen ("/dev/null", "r", stdin); 479 freopen ("/dev/null", "w", stdout); 480 freopen ("/dev/null", "w", stderr); 481} 482#endif /* !WINDOWS && !MSDOS */ 483 484#endif /* def __VMS [else] */ 485 486 487/* "Touch" FILE, i.e. make its mtime ("modified time") equal the time 488 specified with TM. The atime ("access time") is set to the current 489 time. */ 490 491void 492touch (const char *file, time_t tm) 493{ 494#ifdef HAVE_STRUCT_UTIMBUF 495 struct utimbuf times; 496#else 497 struct { 498 time_t actime; 499 time_t modtime; 500 } times; 501#endif 502 times.modtime = tm; 503 times.actime = time (NULL); 504 if (utime (file, ×) == -1) 505 logprintf (LOG_NOTQUIET, "utime(%s): %s\n", file, strerror (errno)); 506} 507 508/* Checks if FILE is a symbolic link, and removes it if it is. Does 509 nothing under MS-Windows. */ 510int 511remove_link (const char *file) 512{ 513 int err = 0; 514 struct_stat st; 515 516 if (lstat (file, &st) == 0 && S_ISLNK (st.st_mode)) 517 { 518 DEBUGP (("Unlinking %s (symlink).\n", file)); 519 err = unlink (file); 520 if (err != 0) 521 logprintf (LOG_VERBOSE, _("Failed to unlink symlink %s: %s\n"), 522 quote (file), strerror (errno)); 523 } 524 return err; 525} 526 527/* Does FILENAME exist? This is quite a lousy implementation, since 528 it supplies no error codes -- only a yes-or-no answer. Thus it 529 will return that a file does not exist if, e.g., the directory is 530 unreadable. I don't mind it too much currently, though. The 531 proper way should, of course, be to have a third, error state, 532 other than true/false, but that would introduce uncalled-for 533 additional complexity to the callers. */ 534bool 535file_exists_p (const char *filename) 536{ 537#ifdef HAVE_ACCESS 538 return access (filename, F_OK) >= 0; 539#else 540 struct_stat buf; 541 return stat (filename, &buf) >= 0; 542#endif 543} 544 545/* Returns 0 if PATH is a directory, 1 otherwise (any kind of file). 546 Returns 0 on error. */ 547bool 548file_non_directory_p (const char *path) 549{ 550 struct_stat buf; 551 /* Use lstat() rather than stat() so that symbolic links pointing to 552 directories can be identified correctly. */ 553 if (lstat (path, &buf) != 0) 554 return false; 555 return S_ISDIR (buf.st_mode) ? false : true; 556} 557 558/* Return the size of file named by FILENAME, or -1 if it cannot be 559 opened or seeked into. */ 560wgint 561file_size (const char *filename) 562{ 563#if defined(HAVE_FSEEKO) && defined(HAVE_FTELLO) 564 wgint size; 565 /* We use fseek rather than stat to determine the file size because 566 that way we can also verify that the file is readable without 567 explicitly checking for permissions. Inspired by the POST patch 568 by Arnaud Wylie. */ 569 FILE *fp = fopen (filename, "rb"); 570 if (!fp) 571 return -1; 572 fseeko (fp, 0, SEEK_END); 573 size = ftello (fp); 574 fclose (fp); 575 return size; 576#else 577 struct_stat st; 578 if (stat (filename, &st) < 0) 579 return -1; 580 return st.st_size; 581#endif 582} 583 584/* 2005-02-19 SMS. 585 If no UNIQ_SEP is defined (as on VMS), have unique_name() return the 586 original name. With the VMS file systems' versioning, everything 587 should be fine, and appending ".NN" just causes trouble. 588*/ 589 590#ifdef UNIQ_SEP 591 592/* stat file names named PREFIX.1, PREFIX.2, etc., until one that 593 doesn't exist is found. Return a freshly allocated copy of the 594 unused file name. */ 595 596static char * 597unique_name_1 (const char *prefix) 598{ 599 int count = 1; 600 int plen = strlen (prefix); 601 char *template = (char *)alloca (plen + 1 + 24); 602 char *template_tail = template + plen; 603 604 memcpy (template, prefix, plen); 605 *template_tail++ = UNIQ_SEP; 606 607 do 608 number_to_string (template_tail, count++); 609 while (file_exists_p (template)); 610 611 return xstrdup (template); 612} 613 614/* Return a unique file name, based on FILE. 615 616 More precisely, if FILE doesn't exist, it is returned unmodified. 617 If not, FILE.1 is tried, then FILE.2, etc. The first FILE.<number> 618 file name that doesn't exist is returned. 619 620 2005-02-19 SMS. "." is now UNIQ_SEP, and may be different. 621 622 The resulting file is not created, only verified that it didn't 623 exist at the point in time when the function was called. 624 Therefore, where security matters, don't rely that the file created 625 by this function exists until you open it with O_EXCL or 626 equivalent. 627 628 If ALLOW_PASSTHROUGH is 0, it always returns a freshly allocated 629 string. Otherwise, it may return FILE if the file doesn't exist 630 (and therefore doesn't need changing). */ 631 632char * 633unique_name (const char *file, bool allow_passthrough) 634{ 635 /* If the FILE itself doesn't exist, return it without 636 modification. */ 637 if (!file_exists_p (file)) 638 return allow_passthrough ? (char *)file : xstrdup (file); 639 640 /* Otherwise, find a numeric suffix that results in unused file name 641 and return it. */ 642 return unique_name_1 (file); 643} 644 645#else /* def UNIQ_SEP */ 646 647/* Dummy unique_name() for VMS. Return the original name as easily as 648 possible. 649*/ 650char * 651unique_name (const char *file, bool allow_passthrough) 652{ 653 /* Return the FILE itself, without modification, irregardful. */ 654 return allow_passthrough ? (char *)file : xstrdup (file); 655} 656 657#endif /* def UNIQ_SEP [else] */ 658 659/* Create a file based on NAME, except without overwriting an existing 660 file with that name. Providing O_EXCL is correctly implemented, 661 this function does not have the race condition associated with 662 opening the file returned by unique_name. */ 663 664FILE * 665unique_create (const char *name, bool binary, char **opened_name) 666{ 667 /* unique file name, based on NAME */ 668 char *uname = unique_name (name, false); 669 FILE *fp; 670 while ((fp = fopen_excl (uname, binary)) == NULL && errno == EEXIST) 671 { 672 xfree (uname); 673 uname = unique_name (name, false); 674 } 675 if (opened_name && fp != NULL) 676 { 677 if (fp) 678 *opened_name = uname; 679 else 680 { 681 *opened_name = NULL; 682 xfree (uname); 683 } 684 } 685 else 686 xfree (uname); 687 return fp; 688} 689 690/* Open the file for writing, with the addition that the file is 691 opened "exclusively". This means that, if the file already exists, 692 this function will *fail* and errno will be set to EEXIST. If 693 BINARY is set, the file will be opened in binary mode, equivalent 694 to fopen's "wb". 695 696 If opening the file fails for any reason, including the file having 697 previously existed, this function returns NULL and sets errno 698 appropriately. */ 699 700FILE * 701fopen_excl (const char *fname, int binary) 702{ 703 int fd; 704#ifdef O_EXCL 705 706/* 2005-04-14 SMS. 707 VMS lacks O_BINARY, but makes up for it in weird and wonderful ways. 708 It also has file versions which obviate all the O_EXCL effort. 709 O_TRUNC (something of a misnomer) requests a new version. 710*/ 711# ifdef __VMS 712/* Common open() optional arguments: 713 sequential access only, access callback function. 714*/ 715# define OPEN_OPT_ARGS "fop=sqo", "acc", acc_cb, &open_id 716 717 int open_id; 718 int flags = O_WRONLY | O_CREAT | O_TRUNC; 719 720 if (binary > 1) 721 { 722 open_id = 11; 723 fd = open( fname, /* File name. */ 724 flags, /* Flags. */ 725 0777, /* Mode for default protection. */ 726 "ctx=bin,stm", /* Binary, stream access. */ 727 "rfm=stmlf", /* Stream_LF. */ 728 OPEN_OPT_ARGS); /* Access callback. */ 729 } 730 else if (binary) 731 { 732 open_id = 12; 733 fd = open( fname, /* File name. */ 734 flags, /* Flags. */ 735 0777, /* Mode for default protection. */ 736 "ctx=bin,stm", /* Binary, stream access. */ 737 "rfm=fix", /* Fixed-length, */ 738 "mrs=512", /* 512-byte records. */ 739 OPEN_OPT_ARGS); /* Access callback. */ 740 } 741 else 742 { 743 open_id = 13; 744 fd = open( fname, /* File name. */ 745 flags, /* Flags. */ 746 0777, /* Mode for default protection. 747*/ 748 "rfm=stmlf", /* Stream_LF. */ 749 OPEN_OPT_ARGS); /* Access callback. */ 750 } 751# else /* def __VMS */ 752 int flags = O_WRONLY | O_CREAT | O_EXCL; 753# ifdef O_BINARY 754 if (binary) 755 flags |= O_BINARY; 756# endif 757 fd = open (fname, flags, 0666); 758# endif /* def __VMS [else] */ 759 760 if (fd < 0) 761 return NULL; 762 return fdopen (fd, binary ? "wb" : "w"); 763#else /* not O_EXCL */ 764 /* Manually check whether the file exists. This is prone to race 765 conditions, but systems without O_EXCL haven't deserved 766 better. */ 767 if (file_exists_p (fname)) 768 { 769 errno = EEXIST; 770 return NULL; 771 } 772 return fopen (fname, binary ? "wb" : "w"); 773#endif /* not O_EXCL */ 774} 775 776/* Create DIRECTORY. If some of the pathname components of DIRECTORY 777 are missing, create them first. In case any mkdir() call fails, 778 return its error status. Returns 0 on successful completion. 779 780 The behaviour of this function should be identical to the behaviour 781 of `mkdir -p' on systems where mkdir supports the `-p' option. */ 782int 783make_directory (const char *directory) 784{ 785 int i, ret, quit = 0; 786 char *dir; 787 788 /* Make a copy of dir, to be able to write to it. Otherwise, the 789 function is unsafe if called with a read-only char *argument. */ 790 STRDUP_ALLOCA (dir, directory); 791 792 /* If the first character of dir is '/', skip it (and thus enable 793 creation of absolute-pathname directories. */ 794 for (i = (*dir == '/'); 1; ++i) 795 { 796 for (; dir[i] && dir[i] != '/'; i++) 797 ; 798 if (!dir[i]) 799 quit = 1; 800 dir[i] = '\0'; 801 /* Check whether the directory already exists. Allow creation of 802 of intermediate directories to fail, as the initial path components 803 are not necessarily directories! */ 804 if (!file_exists_p (dir)) 805 ret = mkdir (dir, 0777); 806 else 807 ret = 0; 808 if (quit) 809 break; 810 else 811 dir[i] = '/'; 812 } 813 return ret; 814} 815 816/* Merge BASE with FILE. BASE can be a directory or a file name, FILE 817 should be a file name. 818 819 file_merge("/foo/bar", "baz") => "/foo/baz" 820 file_merge("/foo/bar/", "baz") => "/foo/bar/baz" 821 file_merge("foo", "bar") => "bar" 822 823 In other words, it's a simpler and gentler version of uri_merge. */ 824 825char * 826file_merge (const char *base, const char *file) 827{ 828 char *result; 829 const char *cut = (const char *)strrchr (base, '/'); 830 831 if (!cut) 832 return xstrdup (file); 833 834 result = xmalloc (cut - base + 1 + strlen (file) + 1); 835 memcpy (result, base, cut - base); 836 result[cut - base] = '/'; 837 strcpy (result + (cut - base) + 1, file); 838 839 return result; 840} 841 842/* Like fnmatch, but performs a case-insensitive match. */ 843 844int 845fnmatch_nocase (const char *pattern, const char *string, int flags) 846{ 847#ifdef FNM_CASEFOLD 848 /* The FNM_CASEFOLD flag started as a GNU extension, but it is now 849 also present on *BSD platforms, and possibly elsewhere. */ 850 return fnmatch (pattern, string, flags | FNM_CASEFOLD); 851#else 852 /* Turn PATTERN and STRING to lower case and call fnmatch on them. */ 853 char *patcopy = (char *) alloca (strlen (pattern) + 1); 854 char *strcopy = (char *) alloca (strlen (string) + 1); 855 char *p; 856 for (p = patcopy; *pattern; pattern++, p++) 857 *p = c_tolower (*pattern); 858 *p = '\0'; 859 for (p = strcopy; *string; string++, p++) 860 *p = c_tolower (*string); 861 *p = '\0'; 862 return fnmatch (patcopy, strcopy, flags); 863#endif 864} 865 866static bool in_acclist (const char *const *, const char *, bool); 867 868/* Determine whether a file is acceptable to be followed, according to 869 lists of patterns to accept/reject. */ 870bool 871acceptable (const char *s) 872{ 873 int l = strlen (s); 874 875 while (l && s[l] != '/') 876 --l; 877 if (s[l] == '/') 878 s += (l + 1); 879 if (opt.accepts) 880 { 881 if (opt.rejects) 882 return (in_acclist ((const char *const *)opt.accepts, s, true) 883 && !in_acclist ((const char *const *)opt.rejects, s, true)); 884 else 885 return in_acclist ((const char *const *)opt.accepts, s, true); 886 } 887 else if (opt.rejects) 888 return !in_acclist ((const char *const *)opt.rejects, s, true); 889 return true; 890} 891 892/* Check if D2 is a subdirectory of D1. E.g. if D1 is `/something', subdir_p() 893 will return true if and only if D2 begins with `/something/' or is exactly 894 '/something'. */ 895bool 896subdir_p (const char *d1, const char *d2) 897{ 898 if (*d1 == '\0') 899 return true; 900 if (!opt.ignore_case) 901 for (; *d1 && *d2 && (*d1 == *d2); ++d1, ++d2) 902 ; 903 else 904 for (; *d1 && *d2 && (c_tolower (*d1) == c_tolower (*d2)); ++d1, ++d2) 905 ; 906 907 return *d1 == '\0' && (*d2 == '\0' || *d2 == '/'); 908} 909 910/* Iterate through DIRLIST (which must be NULL-terminated), and return the 911 first element that matches DIR, through wildcards or front comparison (as 912 appropriate). */ 913static bool 914dir_matches_p (char **dirlist, const char *dir) 915{ 916 char **x; 917 int (*matcher) (const char *, const char *, int) 918 = opt.ignore_case ? fnmatch_nocase : fnmatch; 919 920 for (x = dirlist; *x; x++) 921 { 922 /* Remove leading '/' */ 923 char *p = *x + (**x == '/'); 924 if (has_wildcards_p (p)) 925 { 926 if (matcher (p, dir, FNM_PATHNAME) == 0) 927 break; 928 } 929 else 930 { 931 if (subdir_p (p, dir)) 932 break; 933 } 934 } 935 936 return *x ? true : false; 937} 938 939/* Returns whether DIRECTORY is acceptable for download, wrt the 940 include/exclude lists. 941 942 The leading `/' is ignored in paths; relative and absolute paths 943 may be freely intermixed. */ 944 945bool 946accdir (const char *directory) 947{ 948 /* Remove starting '/'. */ 949 if (*directory == '/') 950 ++directory; 951 if (opt.includes) 952 { 953 if (!dir_matches_p (opt.includes, directory)) 954 return false; 955 } 956 if (opt.excludes) 957 { 958 if (dir_matches_p (opt.excludes, directory)) 959 return false; 960 } 961 return true; 962} 963 964/* Return true if STRING ends with TAIL. For instance: 965 966 match_tail ("abc", "bc", false) -> 1 967 match_tail ("abc", "ab", false) -> 0 968 match_tail ("abc", "abc", false) -> 1 969 970 If FOLD_CASE is true, the comparison will be case-insensitive. */ 971 972bool 973match_tail (const char *string, const char *tail, bool fold_case) 974{ 975 int i, j; 976 977 /* We want this to be fast, so we code two loops, one with 978 case-folding, one without. */ 979 980 if (!fold_case) 981 { 982 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) 983 if (string[i] != tail[j]) 984 break; 985 } 986 else 987 { 988 for (i = strlen (string), j = strlen (tail); i >= 0 && j >= 0; i--, j--) 989 if (c_tolower (string[i]) != c_tolower (tail[j])) 990 break; 991 } 992 993 /* If the tail was exhausted, the match was succesful. */ 994 if (j == -1) 995 return true; 996 else 997 return false; 998} 999 1000/* Checks whether string S matches each element of ACCEPTS. A list 1001 element are matched either with fnmatch() or match_tail(), 1002 according to whether the element contains wildcards or not. 1003 1004 If the BACKWARD is false, don't do backward comparison -- just compare 1005 them normally. */ 1006static bool 1007in_acclist (const char *const *accepts, const char *s, bool backward) 1008{ 1009 for (; *accepts; accepts++) 1010 { 1011 if (has_wildcards_p (*accepts)) 1012 { 1013 int res = opt.ignore_case 1014 ? fnmatch_nocase (*accepts, s, 0) : fnmatch (*accepts, s, 0); 1015 /* fnmatch returns 0 if the pattern *does* match the string. */ 1016 if (res == 0) 1017 return true; 1018 } 1019 else 1020 { 1021 if (backward) 1022 { 1023 if (match_tail (s, *accepts, opt.ignore_case)) 1024 return true; 1025 } 1026 else 1027 { 1028 int cmp = opt.ignore_case 1029 ? strcasecmp (s, *accepts) : strcmp (s, *accepts); 1030 if (cmp == 0) 1031 return true; 1032 } 1033 } 1034 } 1035 return false; 1036} 1037 1038/* Return the location of STR's suffix (file extension). Examples: 1039 suffix ("foo.bar") -> "bar" 1040 suffix ("foo.bar.baz") -> "baz" 1041 suffix ("/foo/bar") -> NULL 1042 suffix ("/foo.bar/baz") -> NULL */ 1043char * 1044suffix (const char *str) 1045{ 1046 int i; 1047 1048 for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--) 1049 ; 1050 1051 if (str[i++] == '.') 1052 return (char *)str + i; 1053 else 1054 return NULL; 1055} 1056 1057/* Return true if S contains globbing wildcards (`*', `?', `[' or 1058 `]'). */ 1059 1060bool 1061has_wildcards_p (const char *s) 1062{ 1063 for (; *s; s++) 1064 if (*s == '*' || *s == '?' || *s == '[' || *s == ']') 1065 return true; 1066 return false; 1067} 1068 1069/* Return true if FNAME ends with a typical HTML suffix. The 1070 following (case-insensitive) suffixes are presumed to be HTML 1071 files: 1072 1073 html 1074 htm 1075 ?html (`?' matches one character) 1076 1077 #### CAVEAT. This is not necessarily a good indication that FNAME 1078 refers to a file that contains HTML! */ 1079bool 1080has_html_suffix_p (const char *fname) 1081{ 1082 char *suf; 1083 1084 if ((suf = suffix (fname)) == NULL) 1085 return false; 1086 if (!strcasecmp (suf, "html")) 1087 return true; 1088 if (!strcasecmp (suf, "htm")) 1089 return true; 1090 if (suf[0] && !strcasecmp (suf + 1, "html")) 1091 return true; 1092 return false; 1093} 1094 1095/* Read a line from FP and return the pointer to freshly allocated 1096 storage. The storage space is obtained through malloc() and should 1097 be freed with free() when it is no longer needed. 1098 1099 The length of the line is not limited, except by available memory. 1100 The newline character at the end of line is retained. The line is 1101 terminated with a zero character. 1102 1103 After end-of-file is encountered without anything being read, NULL 1104 is returned. NULL is also returned on error. To distinguish 1105 between these two cases, use the stdio function ferror(). */ 1106 1107char * 1108read_whole_line (FILE *fp) 1109{ 1110 int length = 0; 1111 int bufsize = 82; 1112 char *line = xmalloc (bufsize); 1113 1114 while (fgets (line + length, bufsize - length, fp)) 1115 { 1116 length += strlen (line + length); 1117 if (length == 0) 1118 /* Possible for example when reading from a binary file where 1119 a line begins with \0. */ 1120 continue; 1121 1122 if (line[length - 1] == '\n') 1123 break; 1124 1125 /* fgets() guarantees to read the whole line, or to use up the 1126 space we've given it. We can double the buffer 1127 unconditionally. */ 1128 bufsize <<= 1; 1129 line = xrealloc (line, bufsize); 1130 } 1131 if (length == 0 || ferror (fp)) 1132 { 1133 xfree (line); 1134 return NULL; 1135 } 1136 if (length + 1 < bufsize) 1137 /* Relieve the memory from our exponential greediness. We say 1138 `length + 1' because the terminating \0 is not included in 1139 LENGTH. We don't need to zero-terminate the string ourselves, 1140 though, because fgets() does that. */ 1141 line = xrealloc (line, length + 1); 1142 return line; 1143} 1144 1145/* Read FILE into memory. A pointer to `struct file_memory' are 1146 returned; use struct element `content' to access file contents, and 1147 the element `length' to know the file length. `content' is *not* 1148 zero-terminated, and you should *not* read or write beyond the [0, 1149 length) range of characters. 1150 1151 After you are done with the file contents, call read_file_free to 1152 release the memory. 1153 1154 Depending on the operating system and the type of file that is 1155 being read, read_file() either mmap's the file into memory, or 1156 reads the file into the core using read(). 1157 1158 If file is named "-", fileno(stdin) is used for reading instead. 1159 If you want to read from a real file named "-", use "./-" instead. */ 1160 1161struct file_memory * 1162read_file (const char *file) 1163{ 1164 int fd; 1165 struct file_memory *fm; 1166 long size; 1167 bool inhibit_close = false; 1168 1169 /* Some magic in the finest tradition of Perl and its kin: if FILE 1170 is "-", just use stdin. */ 1171 if (HYPHENP (file)) 1172 { 1173 fd = fileno (stdin); 1174 inhibit_close = true; 1175 /* Note that we don't inhibit mmap() in this case. If stdin is 1176 redirected from a regular file, mmap() will still work. */ 1177 } 1178 else 1179 fd = open (file, O_RDONLY); 1180 if (fd < 0) 1181 return NULL; 1182 fm = xnew (struct file_memory); 1183 1184#ifdef HAVE_MMAP 1185 { 1186 struct_fstat buf; 1187 if (fstat (fd, &buf) < 0) 1188 goto mmap_lose; 1189 fm->length = buf.st_size; 1190 /* NOTE: As far as I know, the callers of this function never 1191 modify the file text. Relying on this would enable us to 1192 specify PROT_READ and MAP_SHARED for a marginal gain in 1193 efficiency, but at some cost to generality. */ 1194 fm->content = mmap (NULL, fm->length, PROT_READ | PROT_WRITE, 1195 MAP_PRIVATE, fd, 0); 1196 if (fm->content == (char *)MAP_FAILED) 1197 goto mmap_lose; 1198 if (!inhibit_close) 1199 close (fd); 1200 1201 fm->mmap_p = 1; 1202 return fm; 1203 } 1204 1205 mmap_lose: 1206 /* The most common reason why mmap() fails is that FD does not point 1207 to a plain file. However, it's also possible that mmap() doesn't 1208 work for a particular type of file. Therefore, whenever mmap() 1209 fails, we just fall back to the regular method. */ 1210#endif /* HAVE_MMAP */ 1211 1212 fm->length = 0; 1213 size = 512; /* number of bytes fm->contents can 1214 hold at any given time. */ 1215 fm->content = xmalloc (size); 1216 while (1) 1217 { 1218 wgint nread; 1219 if (fm->length > size / 2) 1220 { 1221 /* #### I'm not sure whether the whole exponential-growth 1222 thing makes sense with kernel read. On Linux at least, 1223 read() refuses to read more than 4K from a file at a 1224 single chunk anyway. But other Unixes might optimize it 1225 better, and it doesn't *hurt* anything, so I'm leaving 1226 it. */ 1227 1228 /* Normally, we grow SIZE exponentially to make the number 1229 of calls to read() and realloc() logarithmic in relation 1230 to file size. However, read() can read an amount of data 1231 smaller than requested, and it would be unreasonable to 1232 double SIZE every time *something* was read. Therefore, 1233 we double SIZE only when the length exceeds half of the 1234 entire allocated size. */ 1235 size <<= 1; 1236 fm->content = xrealloc (fm->content, size); 1237 } 1238 nread = read (fd, fm->content + fm->length, size - fm->length); 1239 if (nread > 0) 1240 /* Successful read. */ 1241 fm->length += nread; 1242 else if (nread < 0) 1243 /* Error. */ 1244 goto lose; 1245 else 1246 /* EOF */ 1247 break; 1248 } 1249 if (!inhibit_close) 1250 close (fd); 1251 if (size > fm->length && fm->length != 0) 1252 /* Due to exponential growth of fm->content, the allocated region 1253 might be much larger than what is actually needed. */ 1254 fm->content = xrealloc (fm->content, fm->length); 1255 fm->mmap_p = 0; 1256 return fm; 1257 1258 lose: 1259 if (!inhibit_close) 1260 close (fd); 1261 xfree (fm->content); 1262 xfree (fm); 1263 return NULL; 1264} 1265 1266/* Release the resources held by FM. Specifically, this calls 1267 munmap() or xfree() on fm->content, depending whether mmap or 1268 malloc/read were used to read in the file. It also frees the 1269 memory needed to hold the FM structure itself. */ 1270 1271void 1272read_file_free (struct file_memory *fm) 1273{ 1274#ifdef HAVE_MMAP 1275 if (fm->mmap_p) 1276 { 1277 munmap (fm->content, fm->length); 1278 } 1279 else 1280#endif 1281 { 1282 xfree (fm->content); 1283 } 1284 xfree (fm); 1285} 1286 1287/* Free the pointers in a NULL-terminated vector of pointers, then 1288 free the pointer itself. */ 1289void 1290free_vec (char **vec) 1291{ 1292 if (vec) 1293 { 1294 char **p = vec; 1295 while (*p) 1296 xfree (*p++); 1297 xfree (vec); 1298 } 1299} 1300 1301/* Append vector V2 to vector V1. The function frees V2 and 1302 reallocates V1 (thus you may not use the contents of neither 1303 pointer after the call). If V1 is NULL, V2 is returned. */ 1304char ** 1305merge_vecs (char **v1, char **v2) 1306{ 1307 int i, j; 1308 1309 if (!v1) 1310 return v2; 1311 if (!v2) 1312 return v1; 1313 if (!*v2) 1314 { 1315 /* To avoid j == 0 */ 1316 xfree (v2); 1317 return v1; 1318 } 1319 /* Count v1. */ 1320 for (i = 0; v1[i]; i++) 1321 ; 1322 /* Count v2. */ 1323 for (j = 0; v2[j]; j++) 1324 ; 1325 /* Reallocate v1. */ 1326 v1 = xrealloc (v1, (i + j + 1) * sizeof (char **)); 1327 memcpy (v1 + i, v2, (j + 1) * sizeof (char *)); 1328 xfree (v2); 1329 return v1; 1330} 1331 1332/* Append a freshly allocated copy of STR to VEC. If VEC is NULL, it 1333 is allocated as needed. Return the new value of the vector. */ 1334 1335char ** 1336vec_append (char **vec, const char *str) 1337{ 1338 int cnt; /* count of vector elements, including 1339 the one we're about to append */ 1340 if (vec != NULL) 1341 { 1342 for (cnt = 0; vec[cnt]; cnt++) 1343 ; 1344 ++cnt; 1345 } 1346 else 1347 cnt = 1; 1348 /* Reallocate the array to fit the new element and the NULL. */ 1349 vec = xrealloc (vec, (cnt + 1) * sizeof (char *)); 1350 /* Append a copy of STR to the vector. */ 1351 vec[cnt - 1] = xstrdup (str); 1352 vec[cnt] = NULL; 1353 return vec; 1354} 1355 1356/* Sometimes it's useful to create "sets" of strings, i.e. special 1357 hash tables where you want to store strings as keys and merely 1358 query for their existence. Here is a set of utility routines that 1359 makes that transparent. */ 1360 1361void 1362string_set_add (struct hash_table *ht, const char *s) 1363{ 1364 /* First check whether the set element already exists. If it does, 1365 do nothing so that we don't have to free() the old element and 1366 then strdup() a new one. */ 1367 if (hash_table_contains (ht, s)) 1368 return; 1369 1370 /* We use "1" as value. It provides us a useful and clear arbitrary 1371 value, and it consumes no memory -- the pointers to the same 1372 string "1" will be shared by all the key-value pairs in all `set' 1373 hash tables. */ 1374 hash_table_put (ht, xstrdup (s), "1"); 1375} 1376 1377/* Synonym for hash_table_contains... */ 1378 1379int 1380string_set_contains (struct hash_table *ht, const char *s) 1381{ 1382 return hash_table_contains (ht, s); 1383} 1384 1385/* Convert the specified string set to array. ARRAY should be large 1386 enough to hold hash_table_count(ht) char pointers. */ 1387 1388void string_set_to_array (struct hash_table *ht, char **array) 1389{ 1390 hash_table_iterator iter; 1391 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) 1392 *array++ = iter.key; 1393} 1394 1395/* Free the string set. This frees both the storage allocated for 1396 keys and the actual hash table. (hash_table_destroy would only 1397 destroy the hash table.) */ 1398 1399void 1400string_set_free (struct hash_table *ht) 1401{ 1402 hash_table_iterator iter; 1403 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) 1404 xfree (iter.key); 1405 hash_table_destroy (ht); 1406} 1407 1408/* Utility function: simply call xfree() on all keys and values of HT. */ 1409 1410void 1411free_keys_and_values (struct hash_table *ht) 1412{ 1413 hash_table_iterator iter; 1414 for (hash_table_iterate (ht, &iter); hash_table_iter_next (&iter); ) 1415 { 1416 xfree (iter.key); 1417 xfree (iter.value); 1418 } 1419} 1420 1421/* Get digit grouping data for thousand separors by calling 1422 localeconv(). The data includes separator string and grouping info 1423 and is cached after the first call to the function. 1424 1425 In locales that don't set a thousand separator (such as the "C" 1426 locale), this forces it to be ",". We are now only showing 1427 thousand separators in one place, so this shouldn't be a problem in 1428 practice. */ 1429 1430static void 1431get_grouping_data (const char **sep, const char **grouping) 1432{ 1433 static const char *cached_sep; 1434 static const char *cached_grouping; 1435 static bool initialized; 1436 if (!initialized) 1437 { 1438 /* Get the grouping info from the locale. */ 1439 struct lconv *lconv = localeconv (); 1440 cached_sep = lconv->thousands_sep; 1441 cached_grouping = lconv->grouping; 1442#if ! USE_NLS_PROGRESS_BAR 1443 /* We can't count column widths, so ensure that the separator 1444 * is single-byte only (let check below determine what byte). */ 1445 if (strlen(cached_sep) > 1) 1446 cached_sep = ""; 1447#endif 1448 if (!*cached_sep) 1449 { 1450 /* Many locales (such as "C" or "hr_HR") don't specify 1451 grouping, which we still want to use it for legibility. 1452 In those locales set the sep char to ',', unless that 1453 character is used for decimal point, in which case set it 1454 to ".". */ 1455 if (*lconv->decimal_point != ',') 1456 cached_sep = ","; 1457 else 1458 cached_sep = "."; 1459 cached_grouping = "\x03"; 1460 } 1461 initialized = true; 1462 } 1463 *sep = cached_sep; 1464 *grouping = cached_grouping; 1465} 1466 1467/* Return a printed representation of N with thousand separators. 1468 This should respect locale settings, with the exception of the "C" 1469 locale which mandates no separator, but we use one anyway. 1470 1471 Unfortunately, we cannot use %'d (in fact it would be %'j) to get 1472 the separators because it's too non-portable, and it's hard to test 1473 for this feature at configure time. Besides, it wouldn't display 1474 separators in the "C" locale, still used by many Unix users. */ 1475 1476const char * 1477with_thousand_seps (wgint n) 1478{ 1479 static char outbuf[48]; 1480 char *p = outbuf + sizeof outbuf; 1481 1482 /* Info received from locale */ 1483 const char *grouping, *sep; 1484 int seplen; 1485 1486 /* State information */ 1487 int i = 0, groupsize; 1488 const char *atgroup; 1489 1490 bool negative = n < 0; 1491 1492 /* Initialize grouping data. */ 1493 get_grouping_data (&sep, &grouping); 1494 seplen = strlen (sep); 1495 atgroup = grouping; 1496 groupsize = *atgroup++; 1497 1498 /* This would overflow on WGINT_MIN, but printing negative numbers 1499 is not an important goal of this fuinction. */ 1500 if (negative) 1501 n = -n; 1502 1503 /* Write the number into the buffer, backwards, inserting the 1504 separators as necessary. */ 1505 *--p = '\0'; 1506 while (1) 1507 { 1508 *--p = n % 10 + '0'; 1509 n /= 10; 1510 if (n == 0) 1511 break; 1512 /* Prepend SEP to every groupsize'd digit and get new groupsize. */ 1513 if (++i == groupsize) 1514 { 1515 if (seplen == 1) 1516 *--p = *sep; 1517 else 1518 memcpy (p -= seplen, sep, seplen); 1519 i = 0; 1520 if (*atgroup) 1521 groupsize = *atgroup++; 1522 } 1523 } 1524 if (negative) 1525 *--p = '-'; 1526 1527 return p; 1528} 1529 1530/* N, a byte quantity, is converted to a human-readable abberviated 1531 form a la sizes printed by `ls -lh'. The result is written to a 1532 static buffer, a pointer to which is returned. 1533 1534 Unlike `with_thousand_seps', this approximates to the nearest unit. 1535 Quoting GNU libit: "Most people visually process strings of 3-4 1536 digits effectively, but longer strings of digits are more prone to 1537 misinterpretation. Hence, converting to an abbreviated form 1538 usually improves readability." 1539 1540 This intentionally uses kilobyte (KB), megabyte (MB), etc. in their 1541 original computer-related meaning of "powers of 1024". We don't 1542 use the "*bibyte" names invented in 1998, and seldom used in 1543 practice. Wikipedia's entry on "binary prefix" discusses this in 1544 some detail. */ 1545 1546char * 1547human_readable (HR_NUMTYPE n) 1548{ 1549 /* These suffixes are compatible with those of GNU `ls -lh'. */ 1550 static char powers[] = 1551 { 1552 'K', /* kilobyte, 2^10 bytes */ 1553 'M', /* megabyte, 2^20 bytes */ 1554 'G', /* gigabyte, 2^30 bytes */ 1555 'T', /* terabyte, 2^40 bytes */ 1556 'P', /* petabyte, 2^50 bytes */ 1557 'E', /* exabyte, 2^60 bytes */ 1558 }; 1559 static char buf[8]; 1560 size_t i; 1561 1562 /* If the quantity is smaller than 1K, just print it. */ 1563 if (n < 1024) 1564 { 1565 snprintf (buf, sizeof (buf), "%d", (int) n); 1566 return buf; 1567 } 1568 1569 /* Loop over powers, dividing N with 1024 in each iteration. This 1570 works unchanged for all sizes of wgint, while still avoiding 1571 non-portable `long double' arithmetic. */ 1572 for (i = 0; i < countof (powers); i++) 1573 { 1574 /* At each iteration N is greater than the *subsequent* power. 1575 That way N/1024.0 produces a decimal number in the units of 1576 *this* power. */ 1577 if ((n / 1024) < 1024 || i == countof (powers) - 1) 1578 { 1579 double val = n / 1024.0; 1580 /* Print values smaller than 10 with one decimal digits, and 1581 others without any decimals. */ 1582 snprintf (buf, sizeof (buf), "%.*f%c", 1583 val < 10 ? 1 : 0, val, powers[i]); 1584 return buf; 1585 } 1586 n /= 1024; 1587 } 1588 return NULL; /* unreached */ 1589} 1590 1591/* Count the digits in the provided number. Used to allocate space 1592 when printing numbers. */ 1593 1594int 1595numdigit (wgint number) 1596{ 1597 int cnt = 1; 1598 if (number < 0) 1599 ++cnt; /* accomodate '-' */ 1600 while ((number /= 10) != 0) 1601 ++cnt; 1602 return cnt; 1603} 1604 1605#define PR(mask) *p++ = n / (mask) + '0' 1606 1607/* DIGITS_<D> is used to print a D-digit number and should be called 1608 with mask==10^(D-1). It prints n/mask (the first digit), reducing 1609 n to n%mask (the remaining digits), and calling DIGITS_<D-1>. 1610 Recursively this continues until DIGITS_1 is invoked. */ 1611 1612#define DIGITS_1(mask) PR (mask) 1613#define DIGITS_2(mask) PR (mask), n %= (mask), DIGITS_1 ((mask) / 10) 1614#define DIGITS_3(mask) PR (mask), n %= (mask), DIGITS_2 ((mask) / 10) 1615#define DIGITS_4(mask) PR (mask), n %= (mask), DIGITS_3 ((mask) / 10) 1616#define DIGITS_5(mask) PR (mask), n %= (mask), DIGITS_4 ((mask) / 10) 1617#define DIGITS_6(mask) PR (mask), n %= (mask), DIGITS_5 ((mask) / 10) 1618#define DIGITS_7(mask) PR (mask), n %= (mask), DIGITS_6 ((mask) / 10) 1619#define DIGITS_8(mask) PR (mask), n %= (mask), DIGITS_7 ((mask) / 10) 1620#define DIGITS_9(mask) PR (mask), n %= (mask), DIGITS_8 ((mask) / 10) 1621#define DIGITS_10(mask) PR (mask), n %= (mask), DIGITS_9 ((mask) / 10) 1622 1623/* DIGITS_<11-20> are only used on machines with 64-bit wgints. */ 1624 1625#define DIGITS_11(mask) PR (mask), n %= (mask), DIGITS_10 ((mask) / 10) 1626#define DIGITS_12(mask) PR (mask), n %= (mask), DIGITS_11 ((mask) / 10) 1627#define DIGITS_13(mask) PR (mask), n %= (mask), DIGITS_12 ((mask) / 10) 1628#define DIGITS_14(mask) PR (mask), n %= (mask), DIGITS_13 ((mask) / 10) 1629#define DIGITS_15(mask) PR (mask), n %= (mask), DIGITS_14 ((mask) / 10) 1630#define DIGITS_16(mask) PR (mask), n %= (mask), DIGITS_15 ((mask) / 10) 1631#define DIGITS_17(mask) PR (mask), n %= (mask), DIGITS_16 ((mask) / 10) 1632#define DIGITS_18(mask) PR (mask), n %= (mask), DIGITS_17 ((mask) / 10) 1633#define DIGITS_19(mask) PR (mask), n %= (mask), DIGITS_18 ((mask) / 10) 1634 1635/* Shorthand for casting to wgint. */ 1636#define W wgint 1637 1638/* Print NUMBER to BUFFER in base 10. This is equivalent to 1639 `sprintf(buffer, "%lld", (long long) number)', only typically much 1640 faster and portable to machines without long long. 1641 1642 The speedup may make a difference in programs that frequently 1643 convert numbers to strings. Some implementations of sprintf, 1644 particularly the one in some versions of GNU libc, have been known 1645 to be quite slow when converting integers to strings. 1646 1647 Return the pointer to the location where the terminating zero was 1648 printed. (Equivalent to calling buffer+strlen(buffer) after the 1649 function is done.) 1650 1651 BUFFER should be large enough to accept as many bytes as you expect 1652 the number to take up. On machines with 64-bit wgints the maximum 1653 needed size is 24 bytes. That includes the digits needed for the 1654 largest 64-bit number, the `-' sign in case it's negative, and the 1655 terminating '\0'. */ 1656 1657char * 1658number_to_string (char *buffer, wgint number) 1659{ 1660 char *p = buffer; 1661 wgint n = number; 1662 1663 int last_digit_char = 0; 1664 1665#if (SIZEOF_WGINT != 4) && (SIZEOF_WGINT != 8) 1666 /* We are running in a very strange environment. Leave the correct 1667 printing to sprintf. */ 1668 p += sprintf (buf, "%j", (intmax_t) (n)); 1669#else /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ 1670 1671 if (n < 0) 1672 { 1673 if (n < -WGINT_MAX) 1674 { 1675 /* n = -n would overflow because -n would evaluate to a 1676 wgint value larger than WGINT_MAX. Need to make n 1677 smaller and handle the last digit separately. */ 1678 int last_digit = n % 10; 1679 /* The sign of n%10 is implementation-defined. */ 1680 if (last_digit < 0) 1681 last_digit_char = '0' - last_digit; 1682 else 1683 last_digit_char = '0' + last_digit; 1684 /* After n is made smaller, -n will not overflow. */ 1685 n /= 10; 1686 } 1687 1688 *p++ = '-'; 1689 n = -n; 1690 } 1691 1692 /* Use the DIGITS_ macro appropriate for N's number of digits. That 1693 way printing any N is fully open-coded without a loop or jump. 1694 (Also see description of DIGITS_*.) */ 1695 1696 if (n < 10) DIGITS_1 (1); 1697 else if (n < 100) DIGITS_2 (10); 1698 else if (n < 1000) DIGITS_3 (100); 1699 else if (n < 10000) DIGITS_4 (1000); 1700 else if (n < 100000) DIGITS_5 (10000); 1701 else if (n < 1000000) DIGITS_6 (100000); 1702 else if (n < 10000000) DIGITS_7 (1000000); 1703 else if (n < 100000000) DIGITS_8 (10000000); 1704 else if (n < 1000000000) DIGITS_9 (100000000); 1705#if SIZEOF_WGINT == 4 1706 /* wgint is 32 bits wide: no number has more than 10 digits. */ 1707 else DIGITS_10 (1000000000); 1708#else 1709 /* wgint is 64 bits wide: handle numbers with 9-19 decimal digits. 1710 Constants are constructed by compile-time multiplication to avoid 1711 dealing with different notations for 64-bit constants 1712 (nL/nLL/nI64, depending on the compiler and architecture). */ 1713 else if (n < 10*(W)1000000000) DIGITS_10 (1000000000); 1714 else if (n < 100*(W)1000000000) DIGITS_11 (10*(W)1000000000); 1715 else if (n < 1000*(W)1000000000) DIGITS_12 (100*(W)1000000000); 1716 else if (n < 10000*(W)1000000000) DIGITS_13 (1000*(W)1000000000); 1717 else if (n < 100000*(W)1000000000) DIGITS_14 (10000*(W)1000000000); 1718 else if (n < 1000000*(W)1000000000) DIGITS_15 (100000*(W)1000000000); 1719 else if (n < 10000000*(W)1000000000) DIGITS_16 (1000000*(W)1000000000); 1720 else if (n < 100000000*(W)1000000000) DIGITS_17 (10000000*(W)1000000000); 1721 else if (n < 1000000000*(W)1000000000) DIGITS_18 (100000000*(W)1000000000); 1722 else DIGITS_19 (1000000000*(W)1000000000); 1723#endif 1724 1725 if (last_digit_char) 1726 *p++ = last_digit_char; 1727 1728 *p = '\0'; 1729#endif /* (SIZEOF_WGINT == 4) || (SIZEOF_WGINT == 8) */ 1730 1731 return p; 1732} 1733 1734#undef PR 1735#undef W 1736#undef SPRINTF_WGINT 1737#undef DIGITS_1 1738#undef DIGITS_2 1739#undef DIGITS_3 1740#undef DIGITS_4 1741#undef DIGITS_5 1742#undef DIGITS_6 1743#undef DIGITS_7 1744#undef DIGITS_8 1745#undef DIGITS_9 1746#undef DIGITS_10 1747#undef DIGITS_11 1748#undef DIGITS_12 1749#undef DIGITS_13 1750#undef DIGITS_14 1751#undef DIGITS_15 1752#undef DIGITS_16 1753#undef DIGITS_17 1754#undef DIGITS_18 1755#undef DIGITS_19 1756 1757#define RING_SIZE 3 1758 1759/* Print NUMBER to a statically allocated string and return a pointer 1760 to the printed representation. 1761 1762 This function is intended to be used in conjunction with printf. 1763 It is hard to portably print wgint values: 1764 a) you cannot use printf("%ld", number) because wgint can be long 1765 long on 32-bit machines with LFS. 1766 b) you cannot use printf("%lld", number) because NUMBER could be 1767 long on 32-bit machines without LFS, or on 64-bit machines, 1768 which do not require LFS. Also, Windows doesn't support %lld. 1769 c) you cannot use printf("%j", (int_max_t) number) because not all 1770 versions of printf support "%j", the most notable being the one 1771 on Windows. 1772 d) you cannot #define WGINT_FMT to the appropriate format and use 1773 printf(WGINT_FMT, number) because that would break translations 1774 for user-visible messages, such as printf("Downloaded: %d 1775 bytes\n", number). 1776 1777 What you should use instead is printf("%s", number_to_static_string 1778 (number)). 1779 1780 CAVEAT: since the function returns pointers to static data, you 1781 must be careful to copy its result before calling it again. 1782 However, to make it more useful with printf, the function maintains 1783 an internal ring of static buffers to return. That way things like 1784 printf("%s %s", number_to_static_string (num1), 1785 number_to_static_string (num2)) work as expected. Three buffers 1786 are currently used, which means that "%s %s %s" will work, but "%s 1787 %s %s %s" won't. If you need to print more than three wgints, 1788 bump the RING_SIZE (or rethink your message.) */ 1789 1790char * 1791number_to_static_string (wgint number) 1792{ 1793 static char ring[RING_SIZE][24]; 1794 static int ringpos; 1795 char *buf = ring[ringpos]; 1796 number_to_string (buf, number); 1797 ringpos = (ringpos + 1) % RING_SIZE; 1798 return buf; 1799} 1800 1801/* Determine the width of the terminal we're running on. If that's 1802 not possible, return 0. */ 1803 1804int 1805determine_screen_width (void) 1806{ 1807 /* If there's a way to get the terminal size using POSIX 1808 tcgetattr(), somebody please tell me. */ 1809#ifdef TIOCGWINSZ 1810 int fd; 1811 struct winsize wsz; 1812 1813 if (opt.lfilename != NULL) 1814 return 0; 1815 1816 fd = fileno (stderr); 1817 if (ioctl (fd, TIOCGWINSZ, &wsz) < 0) 1818 return 0; /* most likely ENOTTY */ 1819 1820 return wsz.ws_col; 1821#elif defined(WINDOWS) 1822 CONSOLE_SCREEN_BUFFER_INFO csbi; 1823 if (!GetConsoleScreenBufferInfo (GetStdHandle (STD_ERROR_HANDLE), &csbi)) 1824 return 0; 1825 return csbi.dwSize.X; 1826#else /* neither TIOCGWINSZ nor WINDOWS */ 1827 return 0; 1828#endif /* neither TIOCGWINSZ nor WINDOWS */ 1829} 1830 1831/* Whether the rnd system (either rand or [dl]rand48) has been 1832 seeded. */ 1833static int rnd_seeded; 1834 1835/* Return a random number between 0 and MAX-1, inclusive. 1836 1837 If the system does not support lrand48 and MAX is greater than the 1838 value of RAND_MAX+1 on the system, the returned value will be in 1839 the range [0, RAND_MAX]. This may be fixed in a future release. 1840 The random number generator is seeded automatically the first time 1841 it is called. 1842 1843 This uses lrand48 where available, rand elsewhere. DO NOT use it 1844 for cryptography. It is only meant to be used in situations where 1845 quality of the random numbers returned doesn't really matter. */ 1846 1847int 1848random_number (int max) 1849{ 1850#ifdef HAVE_DRAND48 1851 if (!rnd_seeded) 1852 { 1853 srand48 ((long) time (NULL) ^ (long) getpid ()); 1854 rnd_seeded = 1; 1855 } 1856 return lrand48 () % max; 1857#else /* not HAVE_DRAND48 */ 1858 1859 double bounded; 1860 int rnd; 1861 if (!rnd_seeded) 1862 { 1863 srand ((unsigned) time (NULL) ^ (unsigned) getpid ()); 1864 rnd_seeded = 1; 1865 } 1866 rnd = rand (); 1867 1868 /* Like rand() % max, but uses the high-order bits for better 1869 randomness on architectures where rand() is implemented using a 1870 simple congruential generator. */ 1871 1872 bounded = (double) max * rnd / (RAND_MAX + 1.0); 1873 return (int) bounded; 1874 1875#endif /* not HAVE_DRAND48 */ 1876} 1877 1878/* Return a random uniformly distributed floating point number in the 1879 [0, 1) range. Uses drand48 where available, and a really lame 1880 kludge elsewhere. */ 1881 1882double 1883random_float (void) 1884{ 1885#ifdef HAVE_DRAND48 1886 if (!rnd_seeded) 1887 { 1888 srand48 ((long) time (NULL) ^ (long) getpid ()); 1889 rnd_seeded = 1; 1890 } 1891 return drand48 (); 1892#else /* not HAVE_DRAND48 */ 1893 return ( random_number (10000) / 10000.0 1894 + random_number (10000) / (10000.0 * 10000.0) 1895 + random_number (10000) / (10000.0 * 10000.0 * 10000.0) 1896 + random_number (10000) / (10000.0 * 10000.0 * 10000.0 * 10000.0)); 1897#endif /* not HAVE_DRAND48 */ 1898} 1899 1900/* Implementation of run_with_timeout, a generic timeout-forcing 1901 routine for systems with Unix-like signal handling. */ 1902 1903#ifdef USE_SIGNAL_TIMEOUT 1904# ifdef HAVE_SIGSETJMP 1905# define SETJMP(env) sigsetjmp (env, 1) 1906 1907static sigjmp_buf run_with_timeout_env; 1908 1909static void 1910abort_run_with_timeout (int sig) 1911{ 1912 assert (sig == SIGALRM); 1913 siglongjmp (run_with_timeout_env, -1); 1914} 1915# else /* not HAVE_SIGSETJMP */ 1916# define SETJMP(env) setjmp (env) 1917 1918static jmp_buf run_with_timeout_env; 1919 1920static void 1921abort_run_with_timeout (int sig) 1922{ 1923 assert (sig == SIGALRM); 1924 /* We don't have siglongjmp to preserve the set of blocked signals; 1925 if we longjumped out of the handler at this point, SIGALRM would 1926 remain blocked. We must unblock it manually. */ 1927 int mask = siggetmask (); 1928 mask &= ~sigmask (SIGALRM); 1929 sigsetmask (mask); 1930 1931 /* Now it's safe to longjump. */ 1932 longjmp (run_with_timeout_env, -1); 1933} 1934# endif /* not HAVE_SIGSETJMP */ 1935 1936/* Arrange for SIGALRM to be delivered in TIMEOUT seconds. This uses 1937 setitimer where available, alarm otherwise. 1938 1939 TIMEOUT should be non-zero. If the timeout value is so small that 1940 it would be rounded to zero, it is rounded to the least legal value 1941 instead (1us for setitimer, 1s for alarm). That ensures that 1942 SIGALRM will be delivered in all cases. */ 1943 1944static void 1945alarm_set (double timeout) 1946{ 1947#ifdef ITIMER_REAL 1948 /* Use the modern itimer interface. */ 1949 struct itimerval itv; 1950 xzero (itv); 1951 itv.it_value.tv_sec = (long) timeout; 1952 itv.it_value.tv_usec = 1000000 * (timeout - (long)timeout); 1953 if (itv.it_value.tv_sec == 0 && itv.it_value.tv_usec == 0) 1954 /* Ensure that we wait for at least the minimum interval. 1955 Specifying zero would mean "wait forever". */ 1956 itv.it_value.tv_usec = 1; 1957 setitimer (ITIMER_REAL, &itv, NULL); 1958#else /* not ITIMER_REAL */ 1959 /* Use the old alarm() interface. */ 1960 int secs = (int) timeout; 1961 if (secs == 0) 1962 /* Round TIMEOUTs smaller than 1 to 1, not to zero. This is 1963 because alarm(0) means "never deliver the alarm", i.e. "wait 1964 forever", which is not what someone who specifies a 0.5s 1965 timeout would expect. */ 1966 secs = 1; 1967 alarm (secs); 1968#endif /* not ITIMER_REAL */ 1969} 1970 1971/* Cancel the alarm set with alarm_set. */ 1972 1973static void 1974alarm_cancel (void) 1975{ 1976#ifdef ITIMER_REAL 1977 struct itimerval disable; 1978 xzero (disable); 1979 setitimer (ITIMER_REAL, &disable, NULL); 1980#else /* not ITIMER_REAL */ 1981 alarm (0); 1982#endif /* not ITIMER_REAL */ 1983} 1984 1985/* Call FUN(ARG), but don't allow it to run for more than TIMEOUT 1986 seconds. Returns true if the function was interrupted with a 1987 timeout, false otherwise. 1988 1989 This works by setting up SIGALRM to be delivered in TIMEOUT seconds 1990 using setitimer() or alarm(). The timeout is enforced by 1991 longjumping out of the SIGALRM handler. This has several 1992 advantages compared to the traditional approach of relying on 1993 signals causing system calls to exit with EINTR: 1994 1995 * The callback function is *forcibly* interrupted after the 1996 timeout expires, (almost) regardless of what it was doing and 1997 whether it was in a syscall. For example, a calculation that 1998 takes a long time is interrupted as reliably as an IO 1999 operation. 2000 2001 * It works with both SYSV and BSD signals because it doesn't 2002 depend on the default setting of SA_RESTART. 2003 2004 * It doesn't require special handler setup beyond a simple call 2005 to signal(). (It does use sigsetjmp/siglongjmp, but they're 2006 optional.) 2007 2008 The only downside is that, if FUN allocates internal resources that 2009 are normally freed prior to exit from the functions, they will be 2010 lost in case of timeout. */ 2011 2012bool 2013run_with_timeout (double timeout, void (*fun) (void *), void *arg) 2014{ 2015 int saved_errno; 2016 2017 if (timeout == 0) 2018 { 2019 fun (arg); 2020 return false; 2021 } 2022 2023 signal (SIGALRM, abort_run_with_timeout); 2024 if (SETJMP (run_with_timeout_env) != 0) 2025 { 2026 /* Longjumped out of FUN with a timeout. */ 2027 signal (SIGALRM, SIG_DFL); 2028 return true; 2029 } 2030 alarm_set (timeout); 2031 fun (arg); 2032 2033 /* Preserve errno in case alarm() or signal() modifies it. */ 2034 saved_errno = errno; 2035 alarm_cancel (); 2036 signal (SIGALRM, SIG_DFL); 2037 errno = saved_errno; 2038 2039 return false; 2040} 2041 2042#else /* not USE_SIGNAL_TIMEOUT */ 2043 2044#ifndef WINDOWS 2045/* A stub version of run_with_timeout that just calls FUN(ARG). Don't 2046 define it under Windows, because Windows has its own version of 2047 run_with_timeout that uses threads. */ 2048 2049bool 2050run_with_timeout (double timeout, void (*fun) (void *), void *arg) 2051{ 2052 fun (arg); 2053 return false; 2054} 2055#endif /* not WINDOWS */ 2056#endif /* not USE_SIGNAL_TIMEOUT */ 2057 2058#ifndef WINDOWS 2059 2060/* Sleep the specified amount of seconds. On machines without 2061 nanosleep(), this may sleep shorter if interrupted by signals. */ 2062 2063void 2064xsleep (double seconds) 2065{ 2066#ifdef HAVE_NANOSLEEP 2067 /* nanosleep is the preferred interface because it offers high 2068 accuracy and, more importantly, because it allows us to reliably 2069 restart receiving a signal such as SIGWINCH. (There was an 2070 actual Debian bug report about --limit-rate malfunctioning while 2071 the terminal was being resized.) */ 2072 struct timespec sleep, remaining; 2073 sleep.tv_sec = (long) seconds; 2074 sleep.tv_nsec = 1000000000 * (seconds - (long) seconds); 2075 while (nanosleep (&sleep, &remaining) < 0 && errno == EINTR) 2076 /* If nanosleep has been interrupted by a signal, adjust the 2077 sleeping period and return to sleep. */ 2078 sleep = remaining; 2079#elif defined(HAVE_USLEEP) 2080 /* If usleep is available, use it in preference to select. */ 2081 if (seconds >= 1) 2082 { 2083 /* On some systems, usleep cannot handle values larger than 2084 1,000,000. If the period is larger than that, use sleep 2085 first, then add usleep for subsecond accuracy. */ 2086 sleep (seconds); 2087 seconds -= (long) seconds; 2088 } 2089 usleep (seconds * 1000000); 2090#else /* fall back select */ 2091 /* Note that, although Windows supports select, it can't be used to 2092 implement sleeping because Winsock's select doesn't implement 2093 timeout when it is passed NULL pointers for all fd sets. (But it 2094 does under Cygwin, which implements Unix-compatible select.) */ 2095 struct timeval sleep; 2096 sleep.tv_sec = (long) seconds; 2097 sleep.tv_usec = 1000000 * (seconds - (long) seconds); 2098 select (0, NULL, NULL, NULL, &sleep); 2099 /* If select returns -1 and errno is EINTR, it means we were 2100 interrupted by a signal. But without knowing how long we've 2101 actually slept, we can't return to sleep. Using gettimeofday to 2102 track sleeps is slow and unreliable due to clock skew. */ 2103#endif 2104} 2105 2106#endif /* not WINDOWS */ 2107 2108/* Encode the octets in DATA of length LENGTH to base64 format, 2109 storing the result to DEST. The output will be zero-terminated, 2110 and must point to a writable buffer of at least 2111 1+BASE64_LENGTH(length) bytes. The function returns the length of 2112 the resulting base64 data, not counting the terminating zero. 2113 2114 This implementation does not emit newlines after 76 characters of 2115 base64 data. */ 2116 2117int 2118base64_encode (const void *data, int length, char *dest) 2119{ 2120 /* Conversion table. */ 2121 static const char tbl[64] = { 2122 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', 2123 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', 2124 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v', 2125 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/' 2126 }; 2127 /* Access bytes in DATA as unsigned char, otherwise the shifts below 2128 don't work for data with MSB set. */ 2129 const unsigned char *s = data; 2130 /* Theoretical ANSI violation when length < 3. */ 2131 const unsigned char *end = (const unsigned char *) data + length - 2; 2132 char *p = dest; 2133 2134 /* Transform the 3x8 bits to 4x6 bits, as required by base64. */ 2135 for (; s < end; s += 3) 2136 { 2137 *p++ = tbl[s[0] >> 2]; 2138 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; 2139 *p++ = tbl[((s[1] & 0xf) << 2) + (s[2] >> 6)]; 2140 *p++ = tbl[s[2] & 0x3f]; 2141 } 2142 2143 /* Pad the result if necessary... */ 2144 switch (length % 3) 2145 { 2146 case 1: 2147 *p++ = tbl[s[0] >> 2]; 2148 *p++ = tbl[(s[0] & 3) << 4]; 2149 *p++ = '='; 2150 *p++ = '='; 2151 break; 2152 case 2: 2153 *p++ = tbl[s[0] >> 2]; 2154 *p++ = tbl[((s[0] & 3) << 4) + (s[1] >> 4)]; 2155 *p++ = tbl[((s[1] & 0xf) << 2)]; 2156 *p++ = '='; 2157 break; 2158 } 2159 /* ...and zero-terminate it. */ 2160 *p = '\0'; 2161 2162 return p - dest; 2163} 2164 2165/* Store in C the next non-whitespace character from the string, or \0 2166 when end of string is reached. */ 2167#define NEXT_CHAR(c, p) do { \ 2168 c = (unsigned char) *p++; \ 2169} while (c_isspace (c)) 2170 2171#define IS_ASCII(c) (((c) & 0x80) == 0) 2172 2173/* Decode data from BASE64 (a null-terminated string) into memory 2174 pointed to by DEST. DEST is assumed to be large enough to 2175 accomodate the decoded data, which is guaranteed to be no more than 2176 3/4*strlen(base64). 2177 2178 Since DEST is assumed to contain binary data, it is not 2179 NUL-terminated. The function returns the length of the data 2180 written to TO. -1 is returned in case of error caused by malformed 2181 base64 input. 2182 2183 This function originates from Free Recode. */ 2184 2185int 2186base64_decode (const char *base64, void *dest) 2187{ 2188 /* Table of base64 values for first 128 characters. Note that this 2189 assumes ASCII (but so does Wget in other places). */ 2190 static const signed char base64_char_to_value[128] = 2191 { 2192 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 0- 9 */ 2193 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 10- 19 */ 2194 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 20- 29 */ 2195 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, /* 30- 39 */ 2196 -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, /* 40- 49 */ 2197 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, /* 50- 59 */ 2198 -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, /* 60- 69 */ 2199 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, /* 70- 79 */ 2200 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, /* 80- 89 */ 2201 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, /* 90- 99 */ 2202 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, /* 100-109 */ 2203 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, /* 110-119 */ 2204 49, 50, 51, -1, -1, -1, -1, -1 /* 120-127 */ 2205 }; 2206#define BASE64_CHAR_TO_VALUE(c) ((int) base64_char_to_value[c]) 2207#define IS_BASE64(c) ((IS_ASCII (c) && BASE64_CHAR_TO_VALUE (c) >= 0) || c == '=') 2208 2209 const char *p = base64; 2210 char *q = dest; 2211 2212 while (1) 2213 { 2214 unsigned char c; 2215 unsigned long value; 2216 2217 /* Process first byte of a quadruplet. */ 2218 NEXT_CHAR (c, p); 2219 if (!c) 2220 break; 2221 if (c == '=' || !IS_BASE64 (c)) 2222 return -1; /* illegal char while decoding base64 */ 2223 value = BASE64_CHAR_TO_VALUE (c) << 18; 2224 2225 /* Process second byte of a quadruplet. */ 2226 NEXT_CHAR (c, p); 2227 if (!c) 2228 return -1; /* premature EOF while decoding base64 */ 2229 if (c == '=' || !IS_BASE64 (c)) 2230 return -1; /* illegal char while decoding base64 */ 2231 value |= BASE64_CHAR_TO_VALUE (c) << 12; 2232 *q++ = value >> 16; 2233 2234 /* Process third byte of a quadruplet. */ 2235 NEXT_CHAR (c, p); 2236 if (!c) 2237 return -1; /* premature EOF while decoding base64 */ 2238 if (!IS_BASE64 (c)) 2239 return -1; /* illegal char while decoding base64 */ 2240 2241 if (c == '=') 2242 { 2243 NEXT_CHAR (c, p); 2244 if (!c) 2245 return -1; /* premature EOF while decoding base64 */ 2246 if (c != '=') 2247 return -1; /* padding `=' expected but not found */ 2248 continue; 2249 } 2250 2251 value |= BASE64_CHAR_TO_VALUE (c) << 6; 2252 *q++ = 0xff & value >> 8; 2253 2254 /* Process fourth byte of a quadruplet. */ 2255 NEXT_CHAR (c, p); 2256 if (!c) 2257 return -1; /* premature EOF while decoding base64 */ 2258 if (c == '=') 2259 continue; 2260 if (!IS_BASE64 (c)) 2261 return -1; /* illegal char while decoding base64 */ 2262 2263 value |= BASE64_CHAR_TO_VALUE (c); 2264 *q++ = 0xff & value; 2265 } 2266#undef IS_BASE64 2267#undef BASE64_CHAR_TO_VALUE 2268 2269 return q - (char *) dest; 2270} 2271 2272#undef IS_ASCII 2273#undef NEXT_CHAR 2274 2275/* Simple merge sort for use by stable_sort. Implementation courtesy 2276 Zeljko Vrba with additional debugging by Nenad Barbutov. */ 2277 2278static void 2279mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to, 2280 int (*cmpfun) (const void *, const void *)) 2281{ 2282#define ELT(array, pos) ((char *)(array) + (pos) * size) 2283 if (from < to) 2284 { 2285 size_t i, j, k; 2286 size_t mid = (to + from) / 2; 2287 mergesort_internal (base, temp, size, from, mid, cmpfun); 2288 mergesort_internal (base, temp, size, mid + 1, to, cmpfun); 2289 i = from; 2290 j = mid + 1; 2291 for (k = from; (i <= mid) && (j <= to); k++) 2292 if (cmpfun (ELT (base, i), ELT (base, j)) <= 0) 2293 memcpy (ELT (temp, k), ELT (base, i++), size); 2294 else 2295 memcpy (ELT (temp, k), ELT (base, j++), size); 2296 while (i <= mid) 2297 memcpy (ELT (temp, k++), ELT (base, i++), size); 2298 while (j <= to) 2299 memcpy (ELT (temp, k++), ELT (base, j++), size); 2300 for (k = from; k <= to; k++) 2301 memcpy (ELT (base, k), ELT (temp, k), size); 2302 } 2303#undef ELT 2304} 2305 2306/* Stable sort with interface exactly like standard library's qsort. 2307 Uses mergesort internally, allocating temporary storage with 2308 alloca. */ 2309 2310void 2311stable_sort (void *base, size_t nmemb, size_t size, 2312 int (*cmpfun) (const void *, const void *)) 2313{ 2314 if (size > 1) 2315 { 2316 void *temp = alloca (nmemb * size * sizeof (void *)); 2317 mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun); 2318 } 2319} 2320 2321/* Print a decimal number. If it is equal to or larger than ten, the 2322 number is rounded. Otherwise it is printed with one significant 2323 digit without trailing zeros and with no more than three fractional 2324 digits total. For example, 0.1 is printed as "0.1", 0.035 is 2325 printed as "0.04", 0.0091 as "0.009", and 0.0003 as simply "0". 2326 2327 This is useful for displaying durations because it provides 2328 order-of-magnitude information without unnecessary clutter -- 2329 long-running downloads are shown without the fractional part, and 2330 short ones still retain one significant digit. */ 2331 2332const char * 2333print_decimal (double number) 2334{ 2335 static char buf[32]; 2336 double n = number >= 0 ? number : -number; 2337 2338 if (n >= 9.95) 2339 /* Cut off at 9.95 because the below %.1f would round 9.96 to 2340 "10.0" instead of "10". OTOH 9.94 will print as "9.9". */ 2341 snprintf (buf, sizeof buf, "%.0f", number); 2342 else if (n >= 0.95) 2343 snprintf (buf, sizeof buf, "%.1f", number); 2344 else if (n >= 0.001) 2345 snprintf (buf, sizeof buf, "%.1g", number); 2346 else if (n >= 0.0005) 2347 /* round [0.0005, 0.001) to 0.001 */ 2348 snprintf (buf, sizeof buf, "%.3f", number); 2349 else 2350 /* print numbers close to 0 as 0, not 0.000 */ 2351 strcpy (buf, "0"); 2352 2353 return buf; 2354} 2355 2356#ifdef TESTING 2357 2358const char * 2359test_subdir_p() 2360{ 2361 int i; 2362 struct { 2363 char *d1; 2364 char *d2; 2365 bool result; 2366 } test_array[] = { 2367 { "/somedir", "/somedir", true }, 2368 { "/somedir", "/somedir/d2", true }, 2369 { "/somedir/d1", "/somedir", false }, 2370 }; 2371 2372 for (i = 0; i < countof(test_array); ++i) 2373 { 2374 bool res = subdir_p (test_array[i].d1, test_array[i].d2); 2375 2376 mu_assert ("test_subdir_p: wrong result", 2377 res == test_array[i].result); 2378 } 2379 2380 return NULL; 2381} 2382 2383const char * 2384test_dir_matches_p() 2385{ 2386 int i; 2387 struct { 2388 char *dirlist[3]; 2389 char *dir; 2390 bool result; 2391 } test_array[] = { 2392 { { "/somedir", "/someotherdir", NULL }, "somedir", true }, 2393 { { "/somedir", "/someotherdir", NULL }, "anotherdir", false }, 2394 { { "/somedir", "/*otherdir", NULL }, "anotherdir", true }, 2395 { { "/somedir/d1", "/someotherdir", NULL }, "somedir/d1", true }, 2396 { { "*/*d1", "/someotherdir", NULL }, "somedir/d1", true }, 2397 { { "/somedir/d1", "/someotherdir", NULL }, "d1", false }, 2398 { { "!COMPLETE", NULL, NULL }, "!COMPLETE", true }, 2399 { { "*COMPLETE", NULL, NULL }, "!COMPLETE", true }, 2400 { { "*/!COMPLETE", NULL, NULL }, "foo/!COMPLETE", true }, 2401 { { "*COMPLETE", NULL, NULL }, "foo/!COMPLETE", false }, 2402 { { "*/*COMPLETE", NULL, NULL }, "foo/!COMPLETE", true }, 2403 { { "/dir with spaces", NULL, NULL }, "dir with spaces", true }, 2404 { { "/dir*with*spaces", NULL, NULL }, "dir with spaces", true }, 2405 { { "/Tmp/has", NULL, NULL }, "/Tmp/has space", false }, 2406 { { "/Tmp/has", NULL, NULL }, "/Tmp/has,comma", false }, 2407 }; 2408 2409 for (i = 0; i < countof(test_array); ++i) 2410 { 2411 bool res = dir_matches_p (test_array[i].dirlist, test_array[i].dir); 2412 2413 mu_assert ("test_dir_matches_p: wrong result", 2414 res == test_array[i].result); 2415 } 2416 2417 return NULL; 2418} 2419 2420#endif /* TESTING */ 2421 2422