1/* quotearg.c - quote arguments for output 2 3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free 4 Software Foundation, Inc. 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19/* Written by Paul Eggert <eggert@twinsun.com> */ 20 21#include <config.h> 22 23#include "quotearg.h" 24 25#include "xalloc.h" 26 27#include <ctype.h> 28#include <errno.h> 29#include <limits.h> 30#include <stdbool.h> 31#include <stdlib.h> 32#include <string.h> 33#include <wchar.h> 34#include <wctype.h> 35 36#include "gettext.h" 37#define _(msgid) gettext (msgid) 38#define N_(msgid) msgid 39 40#if !HAVE_MBRTOWC 41/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the 42 other macros are defined only for documentation and to satisfy C 43 syntax. */ 44# undef MB_CUR_MAX 45# define MB_CUR_MAX 1 46# undef mbstate_t 47# define mbstate_t int 48# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) 49# define iswprint(wc) isprint ((unsigned char) (wc)) 50# undef HAVE_MBSINIT 51#endif 52 53#if !defined mbsinit && !HAVE_MBSINIT 54# define mbsinit(ps) 1 55#endif 56 57#ifndef SIZE_MAX 58# define SIZE_MAX ((size_t) -1) 59#endif 60 61#define INT_BITS (sizeof (int) * CHAR_BIT) 62 63struct quoting_options 64{ 65 /* Basic quoting style. */ 66 enum quoting_style style; 67 68 /* Quote the characters indicated by this bit vector even if the 69 quoting style would not normally require them to be quoted. */ 70 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; 71}; 72 73/* Names of quoting styles. */ 74char const *const quoting_style_args[] = 75{ 76 "literal", 77 "shell", 78 "shell-always", 79 "c", 80 "escape", 81 "locale", 82 "clocale", 83 0 84}; 85 86/* Correspondences to quoting style names. */ 87enum quoting_style const quoting_style_vals[] = 88{ 89 literal_quoting_style, 90 shell_quoting_style, 91 shell_always_quoting_style, 92 c_quoting_style, 93 escape_quoting_style, 94 locale_quoting_style, 95 clocale_quoting_style 96}; 97 98/* The default quoting options. */ 99static struct quoting_options default_quoting_options; 100 101/* Allocate a new set of quoting options, with contents initially identical 102 to O if O is not null, or to the default if O is null. 103 It is the caller's responsibility to free the result. */ 104struct quoting_options * 105clone_quoting_options (struct quoting_options *o) 106{ 107 int e = errno; 108 struct quoting_options *p = xmemdup (o ? o : &default_quoting_options, 109 sizeof *o); 110 errno = e; 111 return p; 112} 113 114/* Get the value of O's quoting style. If O is null, use the default. */ 115enum quoting_style 116get_quoting_style (struct quoting_options *o) 117{ 118 return (o ? o : &default_quoting_options)->style; 119} 120 121/* In O (or in the default if O is null), 122 set the value of the quoting style to S. */ 123void 124set_quoting_style (struct quoting_options *o, enum quoting_style s) 125{ 126 (o ? o : &default_quoting_options)->style = s; 127} 128 129/* In O (or in the default if O is null), 130 set the value of the quoting options for character C to I. 131 Return the old value. Currently, the only values defined for I are 132 0 (the default) and 1 (which means to quote the character even if 133 it would not otherwise be quoted). */ 134int 135set_char_quoting (struct quoting_options *o, char c, int i) 136{ 137 unsigned char uc = c; 138 unsigned int *p = 139 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; 140 int shift = uc % INT_BITS; 141 int r = (*p >> shift) & 1; 142 *p ^= ((i & 1) ^ r) << shift; 143 return r; 144} 145 146/* MSGID approximates a quotation mark. Return its translation if it 147 has one; otherwise, return either it or "\"", depending on S. */ 148static char const * 149gettext_quote (char const *msgid, enum quoting_style s) 150{ 151 char const *translation = _(msgid); 152 if (translation == msgid && s == clocale_quoting_style) 153 translation = "\""; 154 return translation; 155} 156 157/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 158 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the 159 non-quoting-style part of O to control quoting. 160 Terminate the output with a null character, and return the written 161 size of the output, not counting the terminating null. 162 If BUFFERSIZE is too small to store the output string, return the 163 value that would have been returned had BUFFERSIZE been large enough. 164 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE. 165 166 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, 167 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting 168 style specified by O, and O may not be null. */ 169 170static size_t 171quotearg_buffer_restyled (char *buffer, size_t buffersize, 172 char const *arg, size_t argsize, 173 enum quoting_style quoting_style, 174 struct quoting_options const *o) 175{ 176 size_t i; 177 size_t len = 0; 178 char const *quote_string = 0; 179 size_t quote_string_len = 0; 180 bool backslash_escapes = false; 181 bool unibyte_locale = MB_CUR_MAX == 1; 182 183#define STORE(c) \ 184 do \ 185 { \ 186 if (len < buffersize) \ 187 buffer[len] = (c); \ 188 len++; \ 189 } \ 190 while (0) 191 192 switch (quoting_style) 193 { 194 case c_quoting_style: 195 STORE ('"'); 196 backslash_escapes = true; 197 quote_string = "\""; 198 quote_string_len = 1; 199 break; 200 201 case escape_quoting_style: 202 backslash_escapes = true; 203 break; 204 205 case locale_quoting_style: 206 case clocale_quoting_style: 207 { 208 /* TRANSLATORS: 209 Get translations for open and closing quotation marks. 210 211 The message catalog should translate "`" to a left 212 quotation mark suitable for the locale, and similarly for 213 "'". If the catalog has no translation, 214 locale_quoting_style quotes `like this', and 215 clocale_quoting_style quotes "like this". 216 217 For example, an American English Unicode locale should 218 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and 219 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION 220 MARK). A British English Unicode locale should instead 221 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and 222 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. 223 224 If you don't know what to put here, please see 225 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs> 226 and use glyphs suitable for your language. */ 227 228 char const *left = gettext_quote (N_("`"), quoting_style); 229 char const *right = gettext_quote (N_("'"), quoting_style); 230 for (quote_string = left; *quote_string; quote_string++) 231 STORE (*quote_string); 232 backslash_escapes = true; 233 quote_string = right; 234 quote_string_len = strlen (quote_string); 235 } 236 break; 237 238 case shell_always_quoting_style: 239 STORE ('\''); 240 quote_string = "'"; 241 quote_string_len = 1; 242 break; 243 244 default: 245 break; 246 } 247 248 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++) 249 { 250 unsigned char c; 251 unsigned char esc; 252 253 if (backslash_escapes 254 && quote_string_len 255 && i + quote_string_len <= argsize 256 && memcmp (arg + i, quote_string, quote_string_len) == 0) 257 STORE ('\\'); 258 259 c = arg[i]; 260 switch (c) 261 { 262 case '\0': 263 if (backslash_escapes) 264 { 265 STORE ('\\'); 266 STORE ('0'); 267 STORE ('0'); 268 c = '0'; 269 } 270 break; 271 272 case '?': 273 switch (quoting_style) 274 { 275 case shell_quoting_style: 276 goto use_shell_always_quoting_style; 277 278 case c_quoting_style: 279 if (i + 2 < argsize && arg[i + 1] == '?') 280 switch (arg[i + 2]) 281 { 282 case '!': case '\'': 283 case '(': case ')': case '-': case '/': 284 case '<': case '=': case '>': 285 /* Escape the second '?' in what would otherwise be 286 a trigraph. */ 287 c = arg[i + 2]; 288 i += 2; 289 STORE ('?'); 290 STORE ('\\'); 291 STORE ('?'); 292 break; 293 294 default: 295 break; 296 } 297 break; 298 299 default: 300 break; 301 } 302 break; 303 304 case '\a': esc = 'a'; goto c_escape; 305 case '\b': esc = 'b'; goto c_escape; 306 case '\f': esc = 'f'; goto c_escape; 307 case '\n': esc = 'n'; goto c_and_shell_escape; 308 case '\r': esc = 'r'; goto c_and_shell_escape; 309 case '\t': esc = 't'; goto c_and_shell_escape; 310 case '\v': esc = 'v'; goto c_escape; 311 case '\\': esc = c; goto c_and_shell_escape; 312 313 c_and_shell_escape: 314 if (quoting_style == shell_quoting_style) 315 goto use_shell_always_quoting_style; 316 c_escape: 317 if (backslash_escapes) 318 { 319 c = esc; 320 goto store_escape; 321 } 322 break; 323 324 case '{': case '}': /* sometimes special if isolated */ 325 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1)) 326 break; 327 /* Fall through. */ 328 case '#': case '~': 329 if (i != 0) 330 break; 331 /* Fall through. */ 332 case ' ': 333 case '!': /* special in bash */ 334 case '"': case '$': case '&': 335 case '(': case ')': case '*': case ';': 336 case '<': 337 case '=': /* sometimes special in 0th or (with "set -k") later args */ 338 case '>': case '[': 339 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ 340 case '`': case '|': 341 /* A shell special character. In theory, '$' and '`' could 342 be the first bytes of multibyte characters, which means 343 we should check them with mbrtowc, but in practice this 344 doesn't happen so it's not worth worrying about. */ 345 if (quoting_style == shell_quoting_style) 346 goto use_shell_always_quoting_style; 347 break; 348 349 case '\'': 350 switch (quoting_style) 351 { 352 case shell_quoting_style: 353 goto use_shell_always_quoting_style; 354 355 case shell_always_quoting_style: 356 STORE ('\''); 357 STORE ('\\'); 358 STORE ('\''); 359 break; 360 361 default: 362 break; 363 } 364 break; 365 366 case '%': case '+': case ',': case '-': case '.': case '/': 367 case '0': case '1': case '2': case '3': case '4': case '5': 368 case '6': case '7': case '8': case '9': case ':': 369 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 370 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 371 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 372 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 373 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': 374 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': 375 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 376 case 'o': case 'p': case 'q': case 'r': case 's': case 't': 377 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': 378 /* These characters don't cause problems, no matter what the 379 quoting style is. They cannot start multibyte sequences. */ 380 break; 381 382 default: 383 /* If we have a multibyte sequence, copy it until we reach 384 its end, find an error, or come back to the initial shift 385 state. For C-like styles, if the sequence has 386 unprintable characters, escape the whole sequence, since 387 we can't easily escape single characters within it. */ 388 { 389 /* Length of multibyte sequence found so far. */ 390 size_t m; 391 392 bool printable; 393 394 if (unibyte_locale) 395 { 396 m = 1; 397 printable = isprint (c) != 0; 398 } 399 else 400 { 401 mbstate_t mbstate; 402 memset (&mbstate, 0, sizeof mbstate); 403 404 m = 0; 405 printable = true; 406 if (argsize == SIZE_MAX) 407 argsize = strlen (arg); 408 409 do 410 { 411 wchar_t w; 412 size_t bytes = mbrtowc (&w, &arg[i + m], 413 argsize - (i + m), &mbstate); 414 if (bytes == 0) 415 break; 416 else if (bytes == (size_t) -1) 417 { 418 printable = false; 419 break; 420 } 421 else if (bytes == (size_t) -2) 422 { 423 printable = false; 424 while (i + m < argsize && arg[i + m]) 425 m++; 426 break; 427 } 428 else 429 { 430 /* Work around a bug with older shells that "see" a '\' 431 that is really the 2nd byte of a multibyte character. 432 In practice the problem is limited to ASCII 433 chars >= '@' that are shell special chars. */ 434 if ('[' == 0x5b && quoting_style == shell_quoting_style) 435 { 436 size_t j; 437 for (j = 1; j < bytes; j++) 438 switch (arg[i + m + j]) 439 { 440 case '[': case '\\': case '^': 441 case '`': case '|': 442 goto use_shell_always_quoting_style; 443 444 default: 445 break; 446 } 447 } 448 449 if (! iswprint (w)) 450 printable = false; 451 m += bytes; 452 } 453 } 454 while (! mbsinit (&mbstate)); 455 } 456 457 if (1 < m || (backslash_escapes && ! printable)) 458 { 459 /* Output a multibyte sequence, or an escaped 460 unprintable unibyte character. */ 461 size_t ilim = i + m; 462 463 for (;;) 464 { 465 if (backslash_escapes && ! printable) 466 { 467 STORE ('\\'); 468 STORE ('0' + (c >> 6)); 469 STORE ('0' + ((c >> 3) & 7)); 470 c = '0' + (c & 7); 471 } 472 if (ilim <= i + 1) 473 break; 474 STORE (c); 475 c = arg[++i]; 476 } 477 478 goto store_c; 479 } 480 } 481 } 482 483 if (! (backslash_escapes 484 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) 485 goto store_c; 486 487 store_escape: 488 STORE ('\\'); 489 490 store_c: 491 STORE (c); 492 } 493 494 if (i == 0 && quoting_style == shell_quoting_style) 495 goto use_shell_always_quoting_style; 496 497 if (quote_string) 498 for (; *quote_string; quote_string++) 499 STORE (*quote_string); 500 501 if (len < buffersize) 502 buffer[len] = '\0'; 503 return len; 504 505 use_shell_always_quoting_style: 506 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 507 shell_always_quoting_style, o); 508} 509 510/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 511 argument ARG (of size ARGSIZE), using O to control quoting. 512 If O is null, use the default. 513 Terminate the output with a null character, and return the written 514 size of the output, not counting the terminating null. 515 If BUFFERSIZE is too small to store the output string, return the 516 value that would have been returned had BUFFERSIZE been large enough. 517 If ARGSIZE is SIZE_MAX, use the string length of the argument for 518 ARGSIZE. */ 519size_t 520quotearg_buffer (char *buffer, size_t buffersize, 521 char const *arg, size_t argsize, 522 struct quoting_options const *o) 523{ 524 struct quoting_options const *p = o ? o : &default_quoting_options; 525 int e = errno; 526 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 527 p->style, p); 528 errno = e; 529 return r; 530} 531 532/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly 533 allocated storage containing the quoted string. */ 534char * 535quotearg_alloc (char const *arg, size_t argsize, 536 struct quoting_options const *o) 537{ 538 int e = errno; 539 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1; 540 char *buf = xcharalloc (bufsize); 541 quotearg_buffer (buf, bufsize, arg, argsize, o); 542 errno = e; 543 return buf; 544} 545 546/* A storage slot with size and pointer to a value. */ 547struct slotvec 548{ 549 size_t size; 550 char *val; 551}; 552 553/* Preallocate a slot 0 buffer, so that the caller can always quote 554 one small component of a "memory exhausted" message in slot 0. */ 555static char slot0[256]; 556static unsigned int nslots = 1; 557static struct slotvec slotvec0 = {sizeof slot0, slot0}; 558static struct slotvec *slotvec = &slotvec0; 559 560void 561quotearg_free (void) 562{ 563 struct slotvec *sv = slotvec; 564 unsigned int i; 565 for (i = 1; i < nslots; i++) 566 free (sv[i].val); 567 if (sv[0].val != slot0) 568 { 569 free (sv[0].val); 570 slotvec0.size = sizeof slot0; 571 slotvec0.val = slot0; 572 } 573 if (sv != &slotvec0) 574 { 575 free (sv); 576 slotvec = &slotvec0; 577 } 578 nslots = 1; 579} 580 581/* Use storage slot N to return a quoted version of argument ARG. 582 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a 583 null-terminated string. 584 OPTIONS specifies the quoting options. 585 The returned value points to static storage that can be 586 reused by the next call to this function with the same value of N. 587 N must be nonnegative. N is deliberately declared with type "int" 588 to allow for future extensions (using negative values). */ 589static char * 590quotearg_n_options (int n, char const *arg, size_t argsize, 591 struct quoting_options const *options) 592{ 593 int e = errno; 594 595 unsigned int n0 = n; 596 struct slotvec *sv = slotvec; 597 598 if (n < 0) 599 abort (); 600 601 if (nslots <= n0) 602 { 603 /* FIXME: technically, the type of n1 should be `unsigned int', 604 but that evokes an unsuppressible warning from gcc-4.0.1 and 605 older. If gcc ever provides an option to suppress that warning, 606 revert to the original type, so that the test in xalloc_oversized 607 is once again performed only at compile time. */ 608 size_t n1 = n0 + 1; 609 bool preallocated = (sv == &slotvec0); 610 611 if (xalloc_oversized (n1, sizeof *sv)) 612 xalloc_die (); 613 614 slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv); 615 if (preallocated) 616 *sv = slotvec0; 617 memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv); 618 nslots = n1; 619 } 620 621 { 622 size_t size = sv[n].size; 623 char *val = sv[n].val; 624 size_t qsize = quotearg_buffer (val, size, arg, argsize, options); 625 626 if (size <= qsize) 627 { 628 sv[n].size = size = qsize + 1; 629 if (val != slot0) 630 free (val); 631 sv[n].val = val = xcharalloc (size); 632 quotearg_buffer (val, size, arg, argsize, options); 633 } 634 635 errno = e; 636 return val; 637 } 638} 639 640char * 641quotearg_n (int n, char const *arg) 642{ 643 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options); 644} 645 646char * 647quotearg (char const *arg) 648{ 649 return quotearg_n (0, arg); 650} 651 652/* Return quoting options for STYLE, with no extra quoting. */ 653static struct quoting_options 654quoting_options_from_style (enum quoting_style style) 655{ 656 struct quoting_options o; 657 o.style = style; 658 memset (o.quote_these_too, 0, sizeof o.quote_these_too); 659 return o; 660} 661 662char * 663quotearg_n_style (int n, enum quoting_style s, char const *arg) 664{ 665 struct quoting_options const o = quoting_options_from_style (s); 666 return quotearg_n_options (n, arg, SIZE_MAX, &o); 667} 668 669char * 670quotearg_n_style_mem (int n, enum quoting_style s, 671 char const *arg, size_t argsize) 672{ 673 struct quoting_options const o = quoting_options_from_style (s); 674 return quotearg_n_options (n, arg, argsize, &o); 675} 676 677char * 678quotearg_style (enum quoting_style s, char const *arg) 679{ 680 return quotearg_n_style (0, s, arg); 681} 682 683char * 684quotearg_char (char const *arg, char ch) 685{ 686 struct quoting_options options; 687 options = default_quoting_options; 688 set_char_quoting (&options, ch, 1); 689 return quotearg_n_options (0, arg, SIZE_MAX, &options); 690} 691 692char * 693quotearg_colon (char const *arg) 694{ 695 return quotearg_char (arg, ':'); 696} 697