quotearg.c revision 131555
1/* quotearg.c - quote arguments for output 2 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. 3 4 This program is free software; you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 2, or (at your option) 7 any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software Foundation, 16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 17 18/* Written by Paul Eggert <eggert@twinsun.com> */ 19 20#if HAVE_CONFIG_H 21# include <config.h> 22#endif 23 24#if HAVE_STDDEF_H 25# include <stddef.h> /* For the definition of size_t on windows w/MSVC. */ 26#endif 27#include <sys/types.h> 28#include <quotearg.h> 29#include <xalloc.h> 30 31#include <ctype.h> 32 33#if ENABLE_NLS 34# include <libintl.h> 35# define _(text) gettext (text) 36#else 37# define _(text) text 38#endif 39#define N_(text) text 40 41#if HAVE_LIMITS_H 42# include <limits.h> 43#endif 44#ifndef CHAR_BIT 45# define CHAR_BIT 8 46#endif 47#ifndef UCHAR_MAX 48# define UCHAR_MAX ((unsigned char) -1) 49#endif 50 51#if HAVE_C_BACKSLASH_A 52# define ALERT_CHAR '\a' 53#else 54# define ALERT_CHAR '\7' 55#endif 56 57#if HAVE_STDLIB_H 58# include <stdlib.h> 59#endif 60 61#if HAVE_STRING_H 62# include <string.h> 63#endif 64 65#if HAVE_WCHAR_H 66# include <wchar.h> 67#endif 68 69#if !HAVE_MBRTOWC 70/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the 71 other macros are defined only for documentation and to satisfy C 72 syntax. */ 73# undef MB_CUR_MAX 74# define MB_CUR_MAX 1 75# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) 76# define mbsinit(ps) 1 77# define iswprint(wc) ISPRINT ((unsigned char) (wc)) 78#endif 79 80#ifndef iswprint 81# if HAVE_WCTYPE_H 82# include <wctype.h> 83# endif 84# if !defined iswprint && !HAVE_ISWPRINT 85# define iswprint(wc) 1 86# endif 87#endif 88 89#define INT_BITS (sizeof (int) * CHAR_BIT) 90 91#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) 92# define IN_CTYPE_DOMAIN(c) 1 93#else 94# define IN_CTYPE_DOMAIN(c) isascii(c) 95#endif 96 97/* Undefine to protect against the definition in wctype.h of solaris2.6. */ 98#undef ISPRINT 99#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) 100 101struct quoting_options 102{ 103 /* Basic quoting style. */ 104 enum quoting_style style; 105 106 /* Quote the characters indicated by this bit vector even if the 107 quoting style would not normally require them to be quoted. */ 108 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; 109}; 110 111/* Names of quoting styles. */ 112char const *const quoting_style_args[] = 113{ 114 "literal", 115 "shell", 116 "shell-always", 117 "c", 118 "escape", 119 "locale", 120 "clocale", 121 0 122}; 123 124/* Correspondences to quoting style names. */ 125enum quoting_style const quoting_style_vals[] = 126{ 127 literal_quoting_style, 128 shell_quoting_style, 129 shell_always_quoting_style, 130 c_quoting_style, 131 escape_quoting_style, 132 locale_quoting_style, 133 clocale_quoting_style 134}; 135 136/* The default quoting options. */ 137static struct quoting_options default_quoting_options; 138 139/* Allocate a new set of quoting options, with contents initially identical 140 to O if O is not null, or to the default if O is null. 141 It is the caller's responsibility to free the result. */ 142struct quoting_options * 143clone_quoting_options (struct quoting_options *o) 144{ 145 struct quoting_options *p 146 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options)); 147 *p = *(o ? o : &default_quoting_options); 148 return p; 149} 150 151/* Get the value of O's quoting style. If O is null, use the default. */ 152enum quoting_style 153get_quoting_style (struct quoting_options *o) 154{ 155 return (o ? o : &default_quoting_options)->style; 156} 157 158/* In O (or in the default if O is null), 159 set the value of the quoting style to S. */ 160void 161set_quoting_style (struct quoting_options *o, enum quoting_style s) 162{ 163 (o ? o : &default_quoting_options)->style = s; 164} 165 166/* In O (or in the default if O is null), 167 set the value of the quoting options for character C to I. 168 Return the old value. Currently, the only values defined for I are 169 0 (the default) and 1 (which means to quote the character even if 170 it would not otherwise be quoted). */ 171int 172set_char_quoting (struct quoting_options *o, char c, int i) 173{ 174 unsigned char uc = c; 175 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; 176 int shift = uc % INT_BITS; 177 int r = (*p >> shift) & 1; 178 *p ^= ((i & 1) ^ r) << shift; 179 return r; 180} 181 182/* MSGID approximates a quotation mark. Return its translation if it 183 has one; otherwise, return either it or "\"", depending on S. */ 184static char const * 185gettext_quote (char const *msgid, enum quoting_style s) 186{ 187 char const *translation = _(msgid); 188 if (translation == msgid && s == clocale_quoting_style) 189 translation = "\""; 190 return translation; 191} 192 193/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 194 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the 195 non-quoting-style part of O to control quoting. 196 Terminate the output with a null character, and return the written 197 size of the output, not counting the terminating null. 198 If BUFFERSIZE is too small to store the output string, return the 199 value that would have been returned had BUFFERSIZE been large enough. 200 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. 201 202 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, 203 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting 204 style specified by O, and O may not be null. */ 205 206static size_t 207quotearg_buffer_restyled (char *buffer, size_t buffersize, 208 char const *arg, size_t argsize, 209 enum quoting_style quoting_style, 210 struct quoting_options const *o) 211{ 212 size_t i; 213 size_t len = 0; 214 char const *quote_string = 0; 215 size_t quote_string_len = 0; 216 int backslash_escapes = 0; 217 int unibyte_locale = MB_CUR_MAX == 1; 218 219#define STORE(c) \ 220 do \ 221 { \ 222 if (len < buffersize) \ 223 buffer[len] = (c); \ 224 len++; \ 225 } \ 226 while (0) 227 228 switch (quoting_style) 229 { 230 case c_quoting_style: 231 STORE ('"'); 232 backslash_escapes = 1; 233 quote_string = "\""; 234 quote_string_len = 1; 235 break; 236 237 case escape_quoting_style: 238 backslash_escapes = 1; 239 break; 240 241 case locale_quoting_style: 242 case clocale_quoting_style: 243 { 244 /* Get translations for open and closing quotation marks. 245 246 The message catalog should translate "`" to a left 247 quotation mark suitable for the locale, and similarly for 248 "'". If the catalog has no translation, 249 locale_quoting_style quotes `like this', and 250 clocale_quoting_style quotes "like this". 251 252 For example, an American English Unicode locale should 253 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and 254 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION 255 MARK). A British English Unicode locale should instead 256 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and 257 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */ 258 259 char const *left = gettext_quote (N_("`"), quoting_style); 260 char const *right = gettext_quote (N_("'"), quoting_style); 261 for (quote_string = left; *quote_string; quote_string++) 262 STORE (*quote_string); 263 backslash_escapes = 1; 264 quote_string = right; 265 quote_string_len = strlen (quote_string); 266 } 267 break; 268 269 case shell_always_quoting_style: 270 STORE ('\''); 271 quote_string = "'"; 272 quote_string_len = 1; 273 break; 274 275 default: 276 break; 277 } 278 279 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++) 280 { 281 unsigned char c; 282 unsigned char esc; 283 284 if (backslash_escapes 285 && quote_string_len 286 && i + quote_string_len <= argsize 287 && memcmp (arg + i, quote_string, quote_string_len) == 0) 288 STORE ('\\'); 289 290 c = arg[i]; 291 switch (c) 292 { 293 case '?': 294 switch (quoting_style) 295 { 296 case shell_quoting_style: 297 goto use_shell_always_quoting_style; 298 299 case c_quoting_style: 300 if (i + 2 < argsize && arg[i + 1] == '?') 301 switch (arg[i + 2]) 302 { 303 case '!': case '\'': 304 case '(': case ')': case '-': case '/': 305 case '<': case '=': case '>': 306 /* Escape the second '?' in what would otherwise be 307 a trigraph. */ 308 i += 2; 309 c = arg[i + 2]; 310 STORE ('?'); 311 STORE ('\\'); 312 STORE ('?'); 313 break; 314 } 315 break; 316 317 default: 318 break; 319 } 320 break; 321 322 case ALERT_CHAR: esc = 'a'; goto c_escape; 323 case '\b': esc = 'b'; goto c_escape; 324 case '\f': esc = 'f'; goto c_escape; 325 case '\n': esc = 'n'; goto c_and_shell_escape; 326 case '\r': esc = 'r'; goto c_and_shell_escape; 327 case '\t': esc = 't'; goto c_and_shell_escape; 328 case '\v': esc = 'v'; goto c_escape; 329 case '\\': esc = c; goto c_and_shell_escape; 330 331 c_and_shell_escape: 332 if (quoting_style == shell_quoting_style) 333 goto use_shell_always_quoting_style; 334 c_escape: 335 if (backslash_escapes) 336 { 337 c = esc; 338 goto store_escape; 339 } 340 break; 341 342 case '#': case '~': 343 if (i != 0) 344 break; 345 /* Fall through. */ 346 case ' ': 347 case '!': /* special in bash */ 348 case '"': case '$': case '&': 349 case '(': case ')': case '*': case ';': 350 case '<': case '>': case '[': 351 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ 352 case '`': case '|': 353 /* A shell special character. In theory, '$' and '`' could 354 be the first bytes of multibyte characters, which means 355 we should check them with mbrtowc, but in practice this 356 doesn't happen so it's not worth worrying about. */ 357 if (quoting_style == shell_quoting_style) 358 goto use_shell_always_quoting_style; 359 break; 360 361 case '\'': 362 switch (quoting_style) 363 { 364 case shell_quoting_style: 365 goto use_shell_always_quoting_style; 366 367 case shell_always_quoting_style: 368 STORE ('\''); 369 STORE ('\\'); 370 STORE ('\''); 371 break; 372 373 default: 374 break; 375 } 376 break; 377 378 case '%': case '+': case ',': case '-': case '.': case '/': 379 case '0': case '1': case '2': case '3': case '4': case '5': 380 case '6': case '7': case '8': case '9': case ':': case '=': 381 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 382 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 383 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 384 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 385 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': 386 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': 387 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 388 case 'o': case 'p': case 'q': case 'r': case 's': case 't': 389 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': 390 case '{': case '}': 391 /* These characters don't cause problems, no matter what the 392 quoting style is. They cannot start multibyte sequences. */ 393 break; 394 395 default: 396 /* If we have a multibyte sequence, copy it until we reach 397 its end, find an error, or come back to the initial shift 398 state. For C-like styles, if the sequence has 399 unprintable characters, escape the whole sequence, since 400 we can't easily escape single characters within it. */ 401 { 402 /* Length of multibyte sequence found so far. */ 403 size_t m; 404 405 int printable; 406 407 if (unibyte_locale) 408 { 409 m = 1; 410 printable = ISPRINT (c); 411 } 412 else 413 { 414 mbstate_t mbstate; 415 memset (&mbstate, 0, sizeof mbstate); 416 417 m = 0; 418 printable = 1; 419 if (argsize == (size_t) -1) 420 argsize = strlen (arg); 421 422 do 423 { 424 wchar_t w; 425 size_t bytes = mbrtowc (&w, &arg[i + m], 426 argsize - (i + m), &mbstate); 427 if (bytes == 0) 428 break; 429 else if (bytes == (size_t) -1) 430 { 431 printable = 0; 432 break; 433 } 434 else if (bytes == (size_t) -2) 435 { 436 printable = 0; 437 while (i + m < argsize && arg[i + m]) 438 m++; 439 break; 440 } 441 else 442 { 443 if (! iswprint (w)) 444 printable = 0; 445 m += bytes; 446 } 447 } 448 while (! mbsinit (&mbstate)); 449 } 450 451 if (1 < m || (backslash_escapes && ! printable)) 452 { 453 /* Output a multibyte sequence, or an escaped 454 unprintable unibyte character. */ 455 size_t ilim = i + m; 456 457 for (;;) 458 { 459 if (backslash_escapes && ! printable) 460 { 461 STORE ('\\'); 462 STORE ('0' + (c >> 6)); 463 STORE ('0' + ((c >> 3) & 7)); 464 c = '0' + (c & 7); 465 } 466 if (ilim <= i + 1) 467 break; 468 STORE (c); 469 c = arg[++i]; 470 } 471 472 goto store_c; 473 } 474 } 475 } 476 477 if (! (backslash_escapes 478 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) 479 goto store_c; 480 481 store_escape: 482 STORE ('\\'); 483 484 store_c: 485 STORE (c); 486 } 487 488 if (quote_string) 489 for (; *quote_string; quote_string++) 490 STORE (*quote_string); 491 492 if (len < buffersize) 493 buffer[len] = '\0'; 494 return len; 495 496 use_shell_always_quoting_style: 497 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 498 shell_always_quoting_style, o); 499} 500 501/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 502 argument ARG (of size ARGSIZE), using O to control quoting. 503 If O is null, use the default. 504 Terminate the output with a null character, and return the written 505 size of the output, not counting the terminating null. 506 If BUFFERSIZE is too small to store the output string, return the 507 value that would have been returned had BUFFERSIZE been large enough. 508 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */ 509size_t 510quotearg_buffer (char *buffer, size_t buffersize, 511 char const *arg, size_t argsize, 512 struct quoting_options const *o) 513{ 514 struct quoting_options const *p = o ? o : &default_quoting_options; 515 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 516 p->style, p); 517} 518 519/* Use storage slot N to return a quoted version of the string ARG. 520 OPTIONS specifies the quoting options. 521 The returned value points to static storage that can be 522 reused by the next call to this function with the same value of N. 523 N must be nonnegative. N is deliberately declared with type "int" 524 to allow for future extensions (using negative values). */ 525static char * 526quotearg_n_options (int n, char const *arg, 527 struct quoting_options const *options) 528{ 529 /* Preallocate a slot 0 buffer, so that the caller can always quote 530 one small component of a "memory exhausted" message in slot 0. */ 531 static char slot0[256]; 532 static unsigned int nslots = 1; 533 struct slotvec 534 { 535 size_t size; 536 char *val; 537 }; 538 static struct slotvec slotvec0 = {sizeof slot0, slot0}; 539 static struct slotvec *slotvec = &slotvec0; 540 541 if (nslots <= n) 542 { 543 int n1 = n + 1; 544 size_t s = n1 * sizeof (struct slotvec); 545 if (! (0 < n1 && n1 == s / sizeof (struct slotvec))) 546 abort (); 547 if (slotvec == &slotvec0) 548 { 549 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec)); 550 *slotvec = slotvec0; 551 } 552 slotvec = (struct slotvec *) xrealloc (slotvec, s); 553 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec)); 554 nslots = n; 555 } 556 557 { 558 size_t size = slotvec[n].size; 559 char *val = slotvec[n].val; 560 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options); 561 562 if (size <= qsize) 563 { 564 slotvec[n].size = size = qsize + 1; 565 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size); 566 quotearg_buffer (val, size, arg, (size_t) -1, options); 567 } 568 569 return val; 570 } 571} 572 573char * 574quotearg_n (unsigned int n, char const *arg) 575{ 576 return quotearg_n_options (n, arg, &default_quoting_options); 577} 578 579char * 580quotearg (char const *arg) 581{ 582 return quotearg_n (0, arg); 583} 584 585char * 586quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg) 587{ 588 struct quoting_options o; 589 o.style = s; 590 memset (o.quote_these_too, 0, sizeof o.quote_these_too); 591 return quotearg_n_options (n, arg, &o); 592} 593 594char * 595quotearg_style (enum quoting_style s, char const *arg) 596{ 597 return quotearg_n_style (0, s, arg); 598} 599 600char * 601quotearg_char (char const *arg, char ch) 602{ 603 struct quoting_options options; 604 options = default_quoting_options; 605 set_char_quoting (&options, ch, 1); 606 return quotearg_n_options (0, arg, &options); 607} 608 609char * 610quotearg_colon (char const *arg) 611{ 612 return quotearg_char (arg, ':'); 613} 614