quotearg.c revision 131555
1/* quotearg.c - quote arguments for output
2   Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software Foundation,
16   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17
18/* Written by Paul Eggert <eggert@twinsun.com> */
19
20#if HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#if HAVE_STDDEF_H
25# include <stddef.h>  /* For the definition of size_t on windows w/MSVC.  */
26#endif
27#include <sys/types.h>
28#include <quotearg.h>
29#include <xalloc.h>
30
31#include <ctype.h>
32
33#if ENABLE_NLS
34# include <libintl.h>
35# define _(text) gettext (text)
36#else
37# define _(text) text
38#endif
39#define N_(text) text
40
41#if HAVE_LIMITS_H
42# include <limits.h>
43#endif
44#ifndef CHAR_BIT
45# define CHAR_BIT 8
46#endif
47#ifndef UCHAR_MAX
48# define UCHAR_MAX ((unsigned char) -1)
49#endif
50
51#if HAVE_C_BACKSLASH_A
52# define ALERT_CHAR '\a'
53#else
54# define ALERT_CHAR '\7'
55#endif
56
57#if HAVE_STDLIB_H
58# include <stdlib.h>
59#endif
60
61#if HAVE_STRING_H
62# include <string.h>
63#endif
64
65#if HAVE_WCHAR_H
66# include <wchar.h>
67#endif
68
69#if !HAVE_MBRTOWC
70/* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
71   other macros are defined only for documentation and to satisfy C
72   syntax.  */
73# undef MB_CUR_MAX
74# define MB_CUR_MAX 1
75# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
76# define mbsinit(ps) 1
77# define iswprint(wc) ISPRINT ((unsigned char) (wc))
78#endif
79
80#ifndef iswprint
81# if HAVE_WCTYPE_H
82#  include <wctype.h>
83# endif
84# if !defined iswprint && !HAVE_ISWPRINT
85#  define iswprint(wc) 1
86# endif
87#endif
88
89#define INT_BITS (sizeof (int) * CHAR_BIT)
90
91#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
92# define IN_CTYPE_DOMAIN(c) 1
93#else
94# define IN_CTYPE_DOMAIN(c) isascii(c)
95#endif
96
97/* Undefine to protect against the definition in wctype.h of solaris2.6.   */
98#undef ISPRINT
99#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
100
101struct quoting_options
102{
103  /* Basic quoting style.  */
104  enum quoting_style style;
105
106  /* Quote the characters indicated by this bit vector even if the
107     quoting style would not normally require them to be quoted.  */
108  int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
109};
110
111/* Names of quoting styles.  */
112char const *const quoting_style_args[] =
113{
114  "literal",
115  "shell",
116  "shell-always",
117  "c",
118  "escape",
119  "locale",
120  "clocale",
121  0
122};
123
124/* Correspondences to quoting style names.  */
125enum quoting_style const quoting_style_vals[] =
126{
127  literal_quoting_style,
128  shell_quoting_style,
129  shell_always_quoting_style,
130  c_quoting_style,
131  escape_quoting_style,
132  locale_quoting_style,
133  clocale_quoting_style
134};
135
136/* The default quoting options.  */
137static struct quoting_options default_quoting_options;
138
139/* Allocate a new set of quoting options, with contents initially identical
140   to O if O is not null, or to the default if O is null.
141   It is the caller's responsibility to free the result.  */
142struct quoting_options *
143clone_quoting_options (struct quoting_options *o)
144{
145  struct quoting_options *p
146    = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
147  *p = *(o ? o : &default_quoting_options);
148  return p;
149}
150
151/* Get the value of O's quoting style.  If O is null, use the default.  */
152enum quoting_style
153get_quoting_style (struct quoting_options *o)
154{
155  return (o ? o : &default_quoting_options)->style;
156}
157
158/* In O (or in the default if O is null),
159   set the value of the quoting style to S.  */
160void
161set_quoting_style (struct quoting_options *o, enum quoting_style s)
162{
163  (o ? o : &default_quoting_options)->style = s;
164}
165
166/* In O (or in the default if O is null),
167   set the value of the quoting options for character C to I.
168   Return the old value.  Currently, the only values defined for I are
169   0 (the default) and 1 (which means to quote the character even if
170   it would not otherwise be quoted).  */
171int
172set_char_quoting (struct quoting_options *o, char c, int i)
173{
174  unsigned char uc = c;
175  int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
176  int shift = uc % INT_BITS;
177  int r = (*p >> shift) & 1;
178  *p ^= ((i & 1) ^ r) << shift;
179  return r;
180}
181
182/* MSGID approximates a quotation mark.  Return its translation if it
183   has one; otherwise, return either it or "\"", depending on S.  */
184static char const *
185gettext_quote (char const *msgid, enum quoting_style s)
186{
187  char const *translation = _(msgid);
188  if (translation == msgid && s == clocale_quoting_style)
189    translation = "\"";
190  return translation;
191}
192
193/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
194   argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
195   non-quoting-style part of O to control quoting.
196   Terminate the output with a null character, and return the written
197   size of the output, not counting the terminating null.
198   If BUFFERSIZE is too small to store the output string, return the
199   value that would have been returned had BUFFERSIZE been large enough.
200   If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
201
202   This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
203   ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
204   style specified by O, and O may not be null.  */
205
206static size_t
207quotearg_buffer_restyled (char *buffer, size_t buffersize,
208			  char const *arg, size_t argsize,
209			  enum quoting_style quoting_style,
210			  struct quoting_options const *o)
211{
212  size_t i;
213  size_t len = 0;
214  char const *quote_string = 0;
215  size_t quote_string_len = 0;
216  int backslash_escapes = 0;
217  int unibyte_locale = MB_CUR_MAX == 1;
218
219#define STORE(c) \
220    do \
221      { \
222	if (len < buffersize) \
223	  buffer[len] = (c); \
224	len++; \
225      } \
226    while (0)
227
228  switch (quoting_style)
229    {
230    case c_quoting_style:
231      STORE ('"');
232      backslash_escapes = 1;
233      quote_string = "\"";
234      quote_string_len = 1;
235      break;
236
237    case escape_quoting_style:
238      backslash_escapes = 1;
239      break;
240
241    case locale_quoting_style:
242    case clocale_quoting_style:
243      {
244	/* Get translations for open and closing quotation marks.
245
246	   The message catalog should translate "`" to a left
247	   quotation mark suitable for the locale, and similarly for
248	   "'".  If the catalog has no translation,
249	   locale_quoting_style quotes `like this', and
250	   clocale_quoting_style quotes "like this".
251
252	   For example, an American English Unicode locale should
253	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
254	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
255	   MARK).  A British English Unicode locale should instead
256	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
257	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.  */
258
259	char const *left = gettext_quote (N_("`"), quoting_style);
260	char const *right = gettext_quote (N_("'"), quoting_style);
261	for (quote_string = left; *quote_string; quote_string++)
262	  STORE (*quote_string);
263	backslash_escapes = 1;
264	quote_string = right;
265	quote_string_len = strlen (quote_string);
266      }
267      break;
268
269    case shell_always_quoting_style:
270      STORE ('\'');
271      quote_string = "'";
272      quote_string_len = 1;
273      break;
274
275    default:
276      break;
277    }
278
279  for (i = 0;  ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize);  i++)
280    {
281      unsigned char c;
282      unsigned char esc;
283
284      if (backslash_escapes
285	  && quote_string_len
286	  && i + quote_string_len <= argsize
287	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
288	STORE ('\\');
289
290      c = arg[i];
291      switch (c)
292	{
293	case '?':
294	  switch (quoting_style)
295	    {
296	    case shell_quoting_style:
297	      goto use_shell_always_quoting_style;
298
299	    case c_quoting_style:
300	      if (i + 2 < argsize && arg[i + 1] == '?')
301		switch (arg[i + 2])
302		  {
303		  case '!': case '\'':
304		  case '(': case ')': case '-': case '/':
305		  case '<': case '=': case '>':
306		    /* Escape the second '?' in what would otherwise be
307		       a trigraph.  */
308		    i += 2;
309		    c = arg[i + 2];
310		    STORE ('?');
311		    STORE ('\\');
312		    STORE ('?');
313		    break;
314		  }
315	      break;
316
317	    default:
318	      break;
319	    }
320	  break;
321
322	case ALERT_CHAR: esc = 'a'; goto c_escape;
323	case '\b': esc = 'b'; goto c_escape;
324	case '\f': esc = 'f'; goto c_escape;
325	case '\n': esc = 'n'; goto c_and_shell_escape;
326	case '\r': esc = 'r'; goto c_and_shell_escape;
327	case '\t': esc = 't'; goto c_and_shell_escape;
328	case '\v': esc = 'v'; goto c_escape;
329	case '\\': esc = c; goto c_and_shell_escape;
330
331	c_and_shell_escape:
332	  if (quoting_style == shell_quoting_style)
333	    goto use_shell_always_quoting_style;
334	c_escape:
335	  if (backslash_escapes)
336	    {
337	      c = esc;
338	      goto store_escape;
339	    }
340	  break;
341
342	case '#': case '~':
343	  if (i != 0)
344	    break;
345	  /* Fall through.  */
346	case ' ':
347	case '!': /* special in bash */
348	case '"': case '$': case '&':
349	case '(': case ')': case '*': case ';':
350	case '<': case '>': case '[':
351	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
352	case '`': case '|':
353	  /* A shell special character.  In theory, '$' and '`' could
354	     be the first bytes of multibyte characters, which means
355	     we should check them with mbrtowc, but in practice this
356	     doesn't happen so it's not worth worrying about.  */
357	  if (quoting_style == shell_quoting_style)
358	    goto use_shell_always_quoting_style;
359	  break;
360
361	case '\'':
362	  switch (quoting_style)
363	    {
364	    case shell_quoting_style:
365	      goto use_shell_always_quoting_style;
366
367	    case shell_always_quoting_style:
368	      STORE ('\'');
369	      STORE ('\\');
370	      STORE ('\'');
371	      break;
372
373	    default:
374	      break;
375	    }
376	  break;
377
378	case '%': case '+': case ',': case '-': case '.': case '/':
379	case '0': case '1': case '2': case '3': case '4': case '5':
380	case '6': case '7': case '8': case '9': case ':': case '=':
381	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
382	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
383	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
384	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
385	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
386	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
387	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
388	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
389	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
390	case '{': case '}':
391	  /* These characters don't cause problems, no matter what the
392	     quoting style is.  They cannot start multibyte sequences.  */
393	  break;
394
395	default:
396	  /* If we have a multibyte sequence, copy it until we reach
397	     its end, find an error, or come back to the initial shift
398	     state.  For C-like styles, if the sequence has
399	     unprintable characters, escape the whole sequence, since
400	     we can't easily escape single characters within it.  */
401	  {
402	    /* Length of multibyte sequence found so far.  */
403	    size_t m;
404
405	    int printable;
406
407	    if (unibyte_locale)
408	      {
409		m = 1;
410		printable = ISPRINT (c);
411	      }
412	    else
413	      {
414		mbstate_t mbstate;
415		memset (&mbstate, 0, sizeof mbstate);
416
417		m = 0;
418		printable = 1;
419		if (argsize == (size_t) -1)
420		  argsize = strlen (arg);
421
422		do
423		  {
424		    wchar_t w;
425		    size_t bytes = mbrtowc (&w, &arg[i + m],
426					    argsize - (i + m), &mbstate);
427		    if (bytes == 0)
428		      break;
429		    else if (bytes == (size_t) -1)
430		      {
431			printable = 0;
432			break;
433		      }
434		    else if (bytes == (size_t) -2)
435		      {
436			printable = 0;
437			while (i + m < argsize && arg[i + m])
438			  m++;
439			break;
440		      }
441		    else
442		      {
443			if (! iswprint (w))
444			  printable = 0;
445			m += bytes;
446		      }
447		  }
448		while (! mbsinit (&mbstate));
449	      }
450
451	    if (1 < m || (backslash_escapes && ! printable))
452	      {
453		/* Output a multibyte sequence, or an escaped
454		   unprintable unibyte character.  */
455		size_t ilim = i + m;
456
457		for (;;)
458		  {
459		    if (backslash_escapes && ! printable)
460		      {
461			STORE ('\\');
462			STORE ('0' + (c >> 6));
463			STORE ('0' + ((c >> 3) & 7));
464			c = '0' + (c & 7);
465		      }
466		    if (ilim <= i + 1)
467		      break;
468		    STORE (c);
469		    c = arg[++i];
470		  }
471
472		goto store_c;
473	      }
474	  }
475	}
476
477      if (! (backslash_escapes
478	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
479	goto store_c;
480
481    store_escape:
482      STORE ('\\');
483
484    store_c:
485      STORE (c);
486    }
487
488  if (quote_string)
489    for (; *quote_string; quote_string++)
490      STORE (*quote_string);
491
492  if (len < buffersize)
493    buffer[len] = '\0';
494  return len;
495
496 use_shell_always_quoting_style:
497  return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
498				   shell_always_quoting_style, o);
499}
500
501/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
502   argument ARG (of size ARGSIZE), using O to control quoting.
503   If O is null, use the default.
504   Terminate the output with a null character, and return the written
505   size of the output, not counting the terminating null.
506   If BUFFERSIZE is too small to store the output string, return the
507   value that would have been returned had BUFFERSIZE been large enough.
508   If ARGSIZE is -1, use the string length of the argument for ARGSIZE.  */
509size_t
510quotearg_buffer (char *buffer, size_t buffersize,
511		 char const *arg, size_t argsize,
512		 struct quoting_options const *o)
513{
514  struct quoting_options const *p = o ? o : &default_quoting_options;
515  return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
516				   p->style, p);
517}
518
519/* Use storage slot N to return a quoted version of the string ARG.
520   OPTIONS specifies the quoting options.
521   The returned value points to static storage that can be
522   reused by the next call to this function with the same value of N.
523   N must be nonnegative.  N is deliberately declared with type "int"
524   to allow for future extensions (using negative values).  */
525static char *
526quotearg_n_options (int n, char const *arg,
527		    struct quoting_options const *options)
528{
529  /* Preallocate a slot 0 buffer, so that the caller can always quote
530     one small component of a "memory exhausted" message in slot 0.  */
531  static char slot0[256];
532  static unsigned int nslots = 1;
533  struct slotvec
534    {
535      size_t size;
536      char *val;
537    };
538  static struct slotvec slotvec0 = {sizeof slot0, slot0};
539  static struct slotvec *slotvec = &slotvec0;
540
541  if (nslots <= n)
542    {
543      int n1 = n + 1;
544      size_t s = n1 * sizeof (struct slotvec);
545      if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
546	abort ();
547      if (slotvec == &slotvec0)
548	{
549	  slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
550	  *slotvec = slotvec0;
551	}
552      slotvec = (struct slotvec *) xrealloc (slotvec, s);
553      memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
554      nslots = n;
555    }
556
557  {
558    size_t size = slotvec[n].size;
559    char *val = slotvec[n].val;
560    size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
561
562    if (size <= qsize)
563      {
564	slotvec[n].size = size = qsize + 1;
565	slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
566	quotearg_buffer (val, size, arg, (size_t) -1, options);
567      }
568
569    return val;
570  }
571}
572
573char *
574quotearg_n (unsigned int n, char const *arg)
575{
576  return quotearg_n_options (n, arg, &default_quoting_options);
577}
578
579char *
580quotearg (char const *arg)
581{
582  return quotearg_n (0, arg);
583}
584
585char *
586quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
587{
588  struct quoting_options o;
589  o.style = s;
590  memset (o.quote_these_too, 0, sizeof o.quote_these_too);
591  return quotearg_n_options (n, arg, &o);
592}
593
594char *
595quotearg_style (enum quoting_style s, char const *arg)
596{
597  return quotearg_n_style (0, s, arg);
598}
599
600char *
601quotearg_char (char const *arg, char ch)
602{
603  struct quoting_options options;
604  options = default_quoting_options;
605  set_char_quoting (&options, ch, 1);
606  return quotearg_n_options (0, arg, &options);
607}
608
609char *
610quotearg_colon (char const *arg)
611{
612  return quotearg_char (arg, ':');
613}
614