1/* $NetBSD$ */ 2 3/* quotearg.c - quote arguments for output 4 Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 19 20/* Written by Paul Eggert <eggert@twinsun.com> */ 21 22#if HAVE_CONFIG_H 23# include <config.h> 24#endif 25 26#if HAVE_STDDEF_H 27# include <stddef.h> /* For the definition of size_t on windows w/MSVC. */ 28#endif 29#include <sys/types.h> 30#include <quotearg.h> 31#include <xalloc.h> 32 33#include <ctype.h> 34 35#if ENABLE_NLS 36# include <libintl.h> 37# define _(text) gettext (text) 38#else 39# define _(text) text 40#endif 41#define N_(text) text 42 43#if HAVE_LIMITS_H 44# include <limits.h> 45#endif 46#ifndef CHAR_BIT 47# define CHAR_BIT 8 48#endif 49#ifndef UCHAR_MAX 50# define UCHAR_MAX ((unsigned char) -1) 51#endif 52 53#if HAVE_C_BACKSLASH_A 54# define ALERT_CHAR '\a' 55#else 56# define ALERT_CHAR '\7' 57#endif 58 59#if HAVE_STDLIB_H 60# include <stdlib.h> 61#endif 62 63#if HAVE_STRING_H 64# include <string.h> 65#endif 66 67#if HAVE_WCHAR_H 68# include <wchar.h> 69#endif 70 71#if !HAVE_MBRTOWC 72/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the 73 other macros are defined only for documentation and to satisfy C 74 syntax. */ 75# undef MB_CUR_MAX 76# define MB_CUR_MAX 1 77# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0) 78# define mbsinit(ps) 1 79# define iswprint(wc) ISPRINT ((unsigned char) (wc)) 80#endif 81 82#ifndef iswprint 83# if HAVE_WCTYPE_H 84# include <wctype.h> 85# endif 86# if !defined iswprint && !HAVE_ISWPRINT 87# define iswprint(wc) 1 88# endif 89#endif 90 91#define INT_BITS (sizeof (int) * CHAR_BIT) 92 93#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) 94# define IN_CTYPE_DOMAIN(c) 1 95#else 96# define IN_CTYPE_DOMAIN(c) isascii(c) 97#endif 98 99/* Undefine to protect against the definition in wctype.h of solaris2.6. */ 100#undef ISPRINT 101#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c)) 102 103struct quoting_options 104{ 105 /* Basic quoting style. */ 106 enum quoting_style style; 107 108 /* Quote the characters indicated by this bit vector even if the 109 quoting style would not normally require them to be quoted. */ 110 int quote_these_too[(UCHAR_MAX / INT_BITS) + 1]; 111}; 112 113/* Names of quoting styles. */ 114char const *const quoting_style_args[] = 115{ 116 "literal", 117 "shell", 118 "shell-always", 119 "c", 120 "escape", 121 "locale", 122 "clocale", 123 0 124}; 125 126/* Correspondences to quoting style names. */ 127enum quoting_style const quoting_style_vals[] = 128{ 129 literal_quoting_style, 130 shell_quoting_style, 131 shell_always_quoting_style, 132 c_quoting_style, 133 escape_quoting_style, 134 locale_quoting_style, 135 clocale_quoting_style 136}; 137 138/* The default quoting options. */ 139static struct quoting_options default_quoting_options; 140 141/* Allocate a new set of quoting options, with contents initially identical 142 to O if O is not null, or to the default if O is null. 143 It is the caller's responsibility to free the result. */ 144struct quoting_options * 145clone_quoting_options (struct quoting_options *o) 146{ 147 struct quoting_options *p 148 = (struct quoting_options *) xmalloc (sizeof (struct quoting_options)); 149 *p = *(o ? o : &default_quoting_options); 150 return p; 151} 152 153/* Get the value of O's quoting style. If O is null, use the default. */ 154enum quoting_style 155get_quoting_style (struct quoting_options *o) 156{ 157 return (o ? o : &default_quoting_options)->style; 158} 159 160/* In O (or in the default if O is null), 161 set the value of the quoting style to S. */ 162void 163set_quoting_style (struct quoting_options *o, enum quoting_style s) 164{ 165 (o ? o : &default_quoting_options)->style = s; 166} 167 168/* In O (or in the default if O is null), 169 set the value of the quoting options for character C to I. 170 Return the old value. Currently, the only values defined for I are 171 0 (the default) and 1 (which means to quote the character even if 172 it would not otherwise be quoted). */ 173int 174set_char_quoting (struct quoting_options *o, char c, int i) 175{ 176 unsigned char uc = c; 177 int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS; 178 int shift = uc % INT_BITS; 179 int r = (*p >> shift) & 1; 180 *p ^= ((i & 1) ^ r) << shift; 181 return r; 182} 183 184/* MSGID approximates a quotation mark. Return its translation if it 185 has one; otherwise, return either it or "\"", depending on S. */ 186static char const * 187gettext_quote (char const *msgid, enum quoting_style s) 188{ 189 char const *translation = _(msgid); 190 if (translation == msgid && s == clocale_quoting_style) 191 translation = "\""; 192 return translation; 193} 194 195/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 196 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the 197 non-quoting-style part of O to control quoting. 198 Terminate the output with a null character, and return the written 199 size of the output, not counting the terminating null. 200 If BUFFERSIZE is too small to store the output string, return the 201 value that would have been returned had BUFFERSIZE been large enough. 202 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. 203 204 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG, 205 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting 206 style specified by O, and O may not be null. */ 207 208static size_t 209quotearg_buffer_restyled (char *buffer, size_t buffersize, 210 char const *arg, size_t argsize, 211 enum quoting_style quoting_style, 212 struct quoting_options const *o) 213{ 214 size_t i; 215 size_t len = 0; 216 char const *quote_string = 0; 217 size_t quote_string_len = 0; 218 int backslash_escapes = 0; 219 int unibyte_locale = MB_CUR_MAX == 1; 220 221#define STORE(c) \ 222 do \ 223 { \ 224 if (len < buffersize) \ 225 buffer[len] = (c); \ 226 len++; \ 227 } \ 228 while (0) 229 230 switch (quoting_style) 231 { 232 case c_quoting_style: 233 STORE ('"'); 234 backslash_escapes = 1; 235 quote_string = "\""; 236 quote_string_len = 1; 237 break; 238 239 case escape_quoting_style: 240 backslash_escapes = 1; 241 break; 242 243 case locale_quoting_style: 244 case clocale_quoting_style: 245 { 246 /* Get translations for open and closing quotation marks. 247 248 The message catalog should translate "`" to a left 249 quotation mark suitable for the locale, and similarly for 250 "'". If the catalog has no translation, 251 locale_quoting_style quotes `like this', and 252 clocale_quoting_style quotes "like this". 253 254 For example, an American English Unicode locale should 255 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and 256 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION 257 MARK). A British English Unicode locale should instead 258 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and 259 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */ 260 261 char const *left = gettext_quote (N_("`"), quoting_style); 262 char const *right = gettext_quote (N_("'"), quoting_style); 263 for (quote_string = left; *quote_string; quote_string++) 264 STORE (*quote_string); 265 backslash_escapes = 1; 266 quote_string = right; 267 quote_string_len = strlen (quote_string); 268 } 269 break; 270 271 case shell_always_quoting_style: 272 STORE ('\''); 273 quote_string = "'"; 274 quote_string_len = 1; 275 break; 276 277 default: 278 break; 279 } 280 281 for (i = 0; ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize); i++) 282 { 283 unsigned char c; 284 unsigned char esc; 285 286 if (backslash_escapes 287 && quote_string_len 288 && i + quote_string_len <= argsize 289 && memcmp (arg + i, quote_string, quote_string_len) == 0) 290 STORE ('\\'); 291 292 c = arg[i]; 293 switch (c) 294 { 295 case '?': 296 switch (quoting_style) 297 { 298 case shell_quoting_style: 299 goto use_shell_always_quoting_style; 300 301 case c_quoting_style: 302 if (i + 2 < argsize && arg[i + 1] == '?') 303 switch (arg[i + 2]) 304 { 305 case '!': case '\'': 306 case '(': case ')': case '-': case '/': 307 case '<': case '=': case '>': 308 /* Escape the second '?' in what would otherwise be 309 a trigraph. */ 310 i += 2; 311 c = arg[i + 2]; 312 STORE ('?'); 313 STORE ('\\'); 314 STORE ('?'); 315 break; 316 } 317 break; 318 319 default: 320 break; 321 } 322 break; 323 324 case ALERT_CHAR: esc = 'a'; goto c_escape; 325 case '\b': esc = 'b'; goto c_escape; 326 case '\f': esc = 'f'; goto c_escape; 327 case '\n': esc = 'n'; goto c_and_shell_escape; 328 case '\r': esc = 'r'; goto c_and_shell_escape; 329 case '\t': esc = 't'; goto c_and_shell_escape; 330 case '\v': esc = 'v'; goto c_escape; 331 case '\\': esc = c; goto c_and_shell_escape; 332 333 c_and_shell_escape: 334 if (quoting_style == shell_quoting_style) 335 goto use_shell_always_quoting_style; 336 c_escape: 337 if (backslash_escapes) 338 { 339 c = esc; 340 goto store_escape; 341 } 342 break; 343 344 case '#': case '~': 345 if (i != 0) 346 break; 347 /* Fall through. */ 348 case ' ': 349 case '!': /* special in bash */ 350 case '"': case '$': case '&': 351 case '(': case ')': case '*': case ';': 352 case '<': case '>': case '[': 353 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */ 354 case '`': case '|': 355 /* A shell special character. In theory, '$' and '`' could 356 be the first bytes of multibyte characters, which means 357 we should check them with mbrtowc, but in practice this 358 doesn't happen so it's not worth worrying about. */ 359 if (quoting_style == shell_quoting_style) 360 goto use_shell_always_quoting_style; 361 break; 362 363 case '\'': 364 switch (quoting_style) 365 { 366 case shell_quoting_style: 367 goto use_shell_always_quoting_style; 368 369 case shell_always_quoting_style: 370 STORE ('\''); 371 STORE ('\\'); 372 STORE ('\''); 373 break; 374 375 default: 376 break; 377 } 378 break; 379 380 case '%': case '+': case ',': case '-': case '.': case '/': 381 case '0': case '1': case '2': case '3': case '4': case '5': 382 case '6': case '7': case '8': case '9': case ':': case '=': 383 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 384 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 385 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 386 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 387 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b': 388 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': 389 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n': 390 case 'o': case 'p': case 'q': case 'r': case 's': case 't': 391 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z': 392 case '{': case '}': 393 /* These characters don't cause problems, no matter what the 394 quoting style is. They cannot start multibyte sequences. */ 395 break; 396 397 default: 398 /* If we have a multibyte sequence, copy it until we reach 399 its end, find an error, or come back to the initial shift 400 state. For C-like styles, if the sequence has 401 unprintable characters, escape the whole sequence, since 402 we can't easily escape single characters within it. */ 403 { 404 /* Length of multibyte sequence found so far. */ 405 size_t m; 406 407 int printable; 408 409 if (unibyte_locale) 410 { 411 m = 1; 412 printable = ISPRINT (c); 413 } 414 else 415 { 416 mbstate_t mbstate; 417 memset (&mbstate, 0, sizeof mbstate); 418 419 m = 0; 420 printable = 1; 421 if (argsize == (size_t) -1) 422 argsize = strlen (arg); 423 424 do 425 { 426 wchar_t w; 427 size_t bytes = mbrtowc (&w, &arg[i + m], 428 argsize - (i + m), &mbstate); 429 if (bytes == 0) 430 break; 431 else if (bytes == (size_t) -1) 432 { 433 printable = 0; 434 break; 435 } 436 else if (bytes == (size_t) -2) 437 { 438 printable = 0; 439 while (i + m < argsize && arg[i + m]) 440 m++; 441 break; 442 } 443 else 444 { 445 if (! iswprint (w)) 446 printable = 0; 447 m += bytes; 448 } 449 } 450 while (! mbsinit (&mbstate)); 451 } 452 453 if (1 < m || (backslash_escapes && ! printable)) 454 { 455 /* Output a multibyte sequence, or an escaped 456 unprintable unibyte character. */ 457 size_t ilim = i + m; 458 459 for (;;) 460 { 461 if (backslash_escapes && ! printable) 462 { 463 STORE ('\\'); 464 STORE ('0' + (c >> 6)); 465 STORE ('0' + ((c >> 3) & 7)); 466 c = '0' + (c & 7); 467 } 468 if (ilim <= i + 1) 469 break; 470 STORE (c); 471 c = arg[++i]; 472 } 473 474 goto store_c; 475 } 476 } 477 } 478 479 if (! (backslash_escapes 480 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS)))) 481 goto store_c; 482 483 store_escape: 484 STORE ('\\'); 485 486 store_c: 487 STORE (c); 488 } 489 490 if (quote_string) 491 for (; *quote_string; quote_string++) 492 STORE (*quote_string); 493 494 if (len < buffersize) 495 buffer[len] = '\0'; 496 return len; 497 498 use_shell_always_quoting_style: 499 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 500 shell_always_quoting_style, o); 501} 502 503/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of 504 argument ARG (of size ARGSIZE), using O to control quoting. 505 If O is null, use the default. 506 Terminate the output with a null character, and return the written 507 size of the output, not counting the terminating null. 508 If BUFFERSIZE is too small to store the output string, return the 509 value that would have been returned had BUFFERSIZE been large enough. 510 If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */ 511size_t 512quotearg_buffer (char *buffer, size_t buffersize, 513 char const *arg, size_t argsize, 514 struct quoting_options const *o) 515{ 516 struct quoting_options const *p = o ? o : &default_quoting_options; 517 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize, 518 p->style, p); 519} 520 521/* Use storage slot N to return a quoted version of the string ARG. 522 OPTIONS specifies the quoting options. 523 The returned value points to static storage that can be 524 reused by the next call to this function with the same value of N. 525 N must be nonnegative. N is deliberately declared with type "int" 526 to allow for future extensions (using negative values). */ 527static char * 528quotearg_n_options (int n, char const *arg, 529 struct quoting_options const *options) 530{ 531 /* Preallocate a slot 0 buffer, so that the caller can always quote 532 one small component of a "memory exhausted" message in slot 0. */ 533 static char slot0[256]; 534 static unsigned int nslots = 1; 535 struct slotvec 536 { 537 size_t size; 538 char *val; 539 }; 540 static struct slotvec slotvec0 = {sizeof slot0, slot0}; 541 static struct slotvec *slotvec = &slotvec0; 542 543 if (nslots <= n) 544 { 545 int n1 = n + 1; 546 size_t s = n1 * sizeof (struct slotvec); 547 if (! (0 < n1 && n1 == s / sizeof (struct slotvec))) 548 abort (); 549 if (slotvec == &slotvec0) 550 { 551 slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec)); 552 *slotvec = slotvec0; 553 } 554 slotvec = (struct slotvec *) xrealloc (slotvec, s); 555 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec)); 556 nslots = n; 557 } 558 559 { 560 size_t size = slotvec[n].size; 561 char *val = slotvec[n].val; 562 size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options); 563 564 if (size <= qsize) 565 { 566 slotvec[n].size = size = qsize + 1; 567 slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size); 568 quotearg_buffer (val, size, arg, (size_t) -1, options); 569 } 570 571 return val; 572 } 573} 574 575char * 576quotearg_n (unsigned int n, char const *arg) 577{ 578 return quotearg_n_options (n, arg, &default_quoting_options); 579} 580 581char * 582quotearg (char const *arg) 583{ 584 return quotearg_n (0, arg); 585} 586 587char * 588quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg) 589{ 590 struct quoting_options o; 591 o.style = s; 592 memset (o.quote_these_too, 0, sizeof o.quote_these_too); 593 return quotearg_n_options (n, arg, &o); 594} 595 596char * 597quotearg_style (enum quoting_style s, char const *arg) 598{ 599 return quotearg_n_style (0, s, arg); 600} 601 602char * 603quotearg_char (char const *arg, char ch) 604{ 605 struct quoting_options options; 606 options = default_quoting_options; 607 set_char_quoting (&options, ch, 1); 608 return quotearg_n_options (0, arg, &options); 609} 610 611char * 612quotearg_colon (char const *arg) 613{ 614 return quotearg_char (arg, ':'); 615} 616