1/* quotearg.c - quote arguments for output
2
3   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006, 2007 Free
4   Software Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19
20/* Written by Paul Eggert <eggert@twinsun.com> */
21
22#include <config.h>
23
24#include "quotearg.h"
25
26#include "xalloc.h"
27
28#include <ctype.h>
29#include <errno.h>
30#include <limits.h>
31#include <stdbool.h>
32#include <stdlib.h>
33#include <string.h>
34#include <wchar.h>
35#include <wctype.h>
36
37#include "gettext.h"
38#define _(msgid) gettext (msgid)
39#define N_(msgid) msgid
40
41#if !HAVE_MBRTOWC
42/* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
43   other macros are defined only for documentation and to satisfy C
44   syntax.  */
45# undef MB_CUR_MAX
46# define MB_CUR_MAX 1
47# undef mbstate_t
48# define mbstate_t int
49# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
50# define iswprint(wc) isprint ((unsigned char) (wc))
51# undef HAVE_MBSINIT
52#endif
53
54#if !defined mbsinit && !HAVE_MBSINIT
55# define mbsinit(ps) 1
56#endif
57
58#ifndef SIZE_MAX
59# define SIZE_MAX ((size_t) -1)
60#endif
61
62#define INT_BITS (sizeof (int) * CHAR_BIT)
63
64struct quoting_options
65{
66  /* Basic quoting style.  */
67  enum quoting_style style;
68
69  /* Quote the characters indicated by this bit vector even if the
70     quoting style would not normally require them to be quoted.  */
71  unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
72};
73
74/* Names of quoting styles.  */
75char const *const quoting_style_args[] =
76{
77  "literal",
78  "shell",
79  "shell-always",
80  "c",
81  "escape",
82  "locale",
83  "clocale",
84  0
85};
86
87/* Correspondences to quoting style names.  */
88enum quoting_style const quoting_style_vals[] =
89{
90  literal_quoting_style,
91  shell_quoting_style,
92  shell_always_quoting_style,
93  c_quoting_style,
94  escape_quoting_style,
95  locale_quoting_style,
96  clocale_quoting_style
97};
98
99/* The default quoting options.  */
100static struct quoting_options default_quoting_options;
101
102/* Allocate a new set of quoting options, with contents initially identical
103   to O if O is not null, or to the default if O is null.
104   It is the caller's responsibility to free the result.  */
105struct quoting_options *
106clone_quoting_options (struct quoting_options *o)
107{
108  int e = errno;
109  struct quoting_options *p = xmemdup (o ? o : &default_quoting_options,
110				       sizeof *o);
111  errno = e;
112  return p;
113}
114
115/* Get the value of O's quoting style.  If O is null, use the default.  */
116enum quoting_style
117get_quoting_style (struct quoting_options *o)
118{
119  return (o ? o : &default_quoting_options)->style;
120}
121
122/* In O (or in the default if O is null),
123   set the value of the quoting style to S.  */
124void
125set_quoting_style (struct quoting_options *o, enum quoting_style s)
126{
127  (o ? o : &default_quoting_options)->style = s;
128}
129
130/* In O (or in the default if O is null),
131   set the value of the quoting options for character C to I.
132   Return the old value.  Currently, the only values defined for I are
133   0 (the default) and 1 (which means to quote the character even if
134   it would not otherwise be quoted).  */
135int
136set_char_quoting (struct quoting_options *o, char c, int i)
137{
138  unsigned char uc = c;
139  unsigned int *p =
140    (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
141  int shift = uc % INT_BITS;
142  int r = (*p >> shift) & 1;
143  *p ^= ((i & 1) ^ r) << shift;
144  return r;
145}
146
147/* MSGID approximates a quotation mark.  Return its translation if it
148   has one; otherwise, return either it or "\"", depending on S.  */
149static char const *
150gettext_quote (char const *msgid, enum quoting_style s)
151{
152  char const *translation = _(msgid);
153  if (translation == msgid && s == clocale_quoting_style)
154    translation = "\"";
155  return translation;
156}
157
158/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
159   argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
160   non-quoting-style part of O to control quoting.
161   Terminate the output with a null character, and return the written
162   size of the output, not counting the terminating null.
163   If BUFFERSIZE is too small to store the output string, return the
164   value that would have been returned had BUFFERSIZE been large enough.
165   If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
166
167   This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
168   ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
169   style specified by O, and O may not be null.  */
170
171static size_t
172quotearg_buffer_restyled (char *buffer, size_t buffersize,
173			  char const *arg, size_t argsize,
174			  enum quoting_style quoting_style,
175			  struct quoting_options const *o)
176{
177  size_t i;
178  size_t len = 0;
179  char const *quote_string = 0;
180  size_t quote_string_len = 0;
181  bool backslash_escapes = false;
182  bool unibyte_locale = MB_CUR_MAX == 1;
183
184#define STORE(c) \
185    do \
186      { \
187	if (len < buffersize) \
188	  buffer[len] = (c); \
189	len++; \
190      } \
191    while (0)
192
193  switch (quoting_style)
194    {
195    case c_quoting_style:
196      STORE ('"');
197      backslash_escapes = true;
198      quote_string = "\"";
199      quote_string_len = 1;
200      break;
201
202    case escape_quoting_style:
203      backslash_escapes = true;
204      break;
205
206    case locale_quoting_style:
207    case clocale_quoting_style:
208      {
209	/* TRANSLATORS:
210	   Get translations for open and closing quotation marks.
211
212	   The message catalog should translate "`" to a left
213	   quotation mark suitable for the locale, and similarly for
214	   "'".  If the catalog has no translation,
215	   locale_quoting_style quotes `like this', and
216	   clocale_quoting_style quotes "like this".
217
218	   For example, an American English Unicode locale should
219	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
220	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
221	   MARK).  A British English Unicode locale should instead
222	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
223	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
224
225	   If you don't know what to put here, please see
226	   <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
227	   and use glyphs suitable for your language.  */
228
229	char const *left = gettext_quote (N_("`"), quoting_style);
230	char const *right = gettext_quote (N_("'"), quoting_style);
231	for (quote_string = left; *quote_string; quote_string++)
232	  STORE (*quote_string);
233	backslash_escapes = true;
234	quote_string = right;
235	quote_string_len = strlen (quote_string);
236      }
237      break;
238
239    case shell_always_quoting_style:
240      STORE ('\'');
241      quote_string = "'";
242      quote_string_len = 1;
243      break;
244
245    default:
246      break;
247    }
248
249  for (i = 0;  ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize);  i++)
250    {
251      unsigned char c;
252      unsigned char esc;
253
254      if (backslash_escapes
255	  && quote_string_len
256	  && i + quote_string_len <= argsize
257	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
258	STORE ('\\');
259
260      c = arg[i];
261      switch (c)
262	{
263	case '\0':
264	  if (backslash_escapes)
265	    {
266	      STORE ('\\');
267	      STORE ('0');
268	      STORE ('0');
269	      c = '0';
270	    }
271	  break;
272
273	case '?':
274	  switch (quoting_style)
275	    {
276	    case shell_quoting_style:
277	      goto use_shell_always_quoting_style;
278
279	    case c_quoting_style:
280	      if (i + 2 < argsize && arg[i + 1] == '?')
281		switch (arg[i + 2])
282		  {
283		  case '!': case '\'':
284		  case '(': case ')': case '-': case '/':
285		  case '<': case '=': case '>':
286		    /* Escape the second '?' in what would otherwise be
287		       a trigraph.  */
288		    c = arg[i + 2];
289		    i += 2;
290		    STORE ('?');
291		    STORE ('\\');
292		    STORE ('?');
293		    break;
294
295		  default:
296		    break;
297		  }
298	      break;
299
300	    default:
301	      break;
302	    }
303	  break;
304
305	case '\a': esc = 'a'; goto c_escape;
306	case '\b': esc = 'b'; goto c_escape;
307	case '\f': esc = 'f'; goto c_escape;
308	case '\n': esc = 'n'; goto c_and_shell_escape;
309	case '\r': esc = 'r'; goto c_and_shell_escape;
310	case '\t': esc = 't'; goto c_and_shell_escape;
311	case '\v': esc = 'v'; goto c_escape;
312	case '\\': esc = c; goto c_and_shell_escape;
313
314	c_and_shell_escape:
315	  if (quoting_style == shell_quoting_style)
316	    goto use_shell_always_quoting_style;
317	c_escape:
318	  if (backslash_escapes)
319	    {
320	      c = esc;
321	      goto store_escape;
322	    }
323	  break;
324
325	case '{': case '}': /* sometimes special if isolated */
326	  if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
327	    break;
328	  /* Fall through.  */
329	case '#': case '~':
330	  if (i != 0)
331	    break;
332	  /* Fall through.  */
333	case ' ':
334	case '!': /* special in bash */
335	case '"': case '$': case '&':
336	case '(': case ')': case '*': case ';':
337	case '<':
338	case '=': /* sometimes special in 0th or (with "set -k") later args */
339	case '>': case '[':
340	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
341	case '`': case '|':
342	  /* A shell special character.  In theory, '$' and '`' could
343	     be the first bytes of multibyte characters, which means
344	     we should check them with mbrtowc, but in practice this
345	     doesn't happen so it's not worth worrying about.  */
346	  if (quoting_style == shell_quoting_style)
347	    goto use_shell_always_quoting_style;
348	  break;
349
350	case '\'':
351	  switch (quoting_style)
352	    {
353	    case shell_quoting_style:
354	      goto use_shell_always_quoting_style;
355
356	    case shell_always_quoting_style:
357	      STORE ('\'');
358	      STORE ('\\');
359	      STORE ('\'');
360	      break;
361
362	    default:
363	      break;
364	    }
365	  break;
366
367	case '%': case '+': case ',': case '-': case '.': case '/':
368	case '0': case '1': case '2': case '3': case '4': case '5':
369	case '6': case '7': case '8': case '9': case ':':
370	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
371	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
372	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
373	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
374	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
375	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
376	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
377	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
378	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
379	  /* These characters don't cause problems, no matter what the
380	     quoting style is.  They cannot start multibyte sequences.  */
381	  break;
382
383	default:
384	  /* If we have a multibyte sequence, copy it until we reach
385	     its end, find an error, or come back to the initial shift
386	     state.  For C-like styles, if the sequence has
387	     unprintable characters, escape the whole sequence, since
388	     we can't easily escape single characters within it.  */
389	  {
390	    /* Length of multibyte sequence found so far.  */
391	    size_t m;
392
393	    bool printable;
394
395	    if (unibyte_locale)
396	      {
397		m = 1;
398		printable = isprint (c) != 0;
399	      }
400	    else
401	      {
402		mbstate_t mbstate;
403		memset (&mbstate, 0, sizeof mbstate);
404
405		m = 0;
406		printable = true;
407		if (argsize == SIZE_MAX)
408		  argsize = strlen (arg);
409
410		do
411		  {
412		    wchar_t w;
413		    size_t bytes = mbrtowc (&w, &arg[i + m],
414					    argsize - (i + m), &mbstate);
415		    if (bytes == 0)
416		      break;
417		    else if (bytes == (size_t) -1)
418		      {
419			printable = false;
420			break;
421		      }
422		    else if (bytes == (size_t) -2)
423		      {
424			printable = false;
425			while (i + m < argsize && arg[i + m])
426			  m++;
427			break;
428		      }
429		    else
430		      {
431			/* Work around a bug with older shells that "see" a '\'
432			   that is really the 2nd byte of a multibyte character.
433			   In practice the problem is limited to ASCII
434			   chars >= '@' that are shell special chars.  */
435			if ('[' == 0x5b && quoting_style == shell_quoting_style)
436			  {
437			    size_t j;
438			    for (j = 1; j < bytes; j++)
439			      switch (arg[i + m + j])
440				{
441				case '[': case '\\': case '^':
442				case '`': case '|':
443				  goto use_shell_always_quoting_style;
444
445				default:
446				  break;
447				}
448			  }
449
450			if (! iswprint (w))
451			  printable = false;
452			m += bytes;
453		      }
454		  }
455		while (! mbsinit (&mbstate));
456	      }
457
458	    if (1 < m || (backslash_escapes && ! printable))
459	      {
460		/* Output a multibyte sequence, or an escaped
461		   unprintable unibyte character.  */
462		size_t ilim = i + m;
463
464		for (;;)
465		  {
466		    if (backslash_escapes && ! printable)
467		      {
468			STORE ('\\');
469			STORE ('0' + (c >> 6));
470			STORE ('0' + ((c >> 3) & 7));
471			c = '0' + (c & 7);
472		      }
473		    if (ilim <= i + 1)
474		      break;
475		    STORE (c);
476		    c = arg[++i];
477		  }
478
479		goto store_c;
480	      }
481	  }
482	}
483
484      if (! (backslash_escapes
485	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
486	goto store_c;
487
488    store_escape:
489      STORE ('\\');
490
491    store_c:
492      STORE (c);
493    }
494
495  if (i == 0 && quoting_style == shell_quoting_style)
496    goto use_shell_always_quoting_style;
497
498  if (quote_string)
499    for (; *quote_string; quote_string++)
500      STORE (*quote_string);
501
502  if (len < buffersize)
503    buffer[len] = '\0';
504  return len;
505
506 use_shell_always_quoting_style:
507  return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
508				   shell_always_quoting_style, o);
509}
510
511/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
512   argument ARG (of size ARGSIZE), using O to control quoting.
513   If O is null, use the default.
514   Terminate the output with a null character, and return the written
515   size of the output, not counting the terminating null.
516   If BUFFERSIZE is too small to store the output string, return the
517   value that would have been returned had BUFFERSIZE been large enough.
518   If ARGSIZE is SIZE_MAX, use the string length of the argument for
519   ARGSIZE.  */
520size_t
521quotearg_buffer (char *buffer, size_t buffersize,
522		 char const *arg, size_t argsize,
523		 struct quoting_options const *o)
524{
525  struct quoting_options const *p = o ? o : &default_quoting_options;
526  int e = errno;
527  size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
528				       p->style, p);
529  errno = e;
530  return r;
531}
532
533/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
534   allocated storage containing the quoted string.  */
535char *
536quotearg_alloc (char const *arg, size_t argsize,
537		struct quoting_options const *o)
538{
539  int e = errno;
540  size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
541  char *buf = xcharalloc (bufsize);
542  quotearg_buffer (buf, bufsize, arg, argsize, o);
543  errno = e;
544  return buf;
545}
546
547/* A storage slot with size and pointer to a value.  */
548struct slotvec
549{
550  size_t size;
551  char *val;
552};
553
554/* Preallocate a slot 0 buffer, so that the caller can always quote
555   one small component of a "memory exhausted" message in slot 0.  */
556static char slot0[256];
557static unsigned int nslots = 1;
558static struct slotvec slotvec0 = {sizeof slot0, slot0};
559static struct slotvec *slotvec = &slotvec0;
560
561void
562quotearg_free (void)
563{
564  struct slotvec *sv = slotvec;
565  unsigned int i;
566  for (i = 1; i < nslots; i++)
567    free (sv[i].val);
568  if (sv[0].val != slot0)
569    {
570      free (sv[0].val);
571      slotvec0.size = sizeof slot0;
572      slotvec0.val = slot0;
573    }
574  if (sv != &slotvec0)
575    {
576      free (sv);
577      slotvec = &slotvec0;
578    }
579  nslots = 1;
580}
581
582/* Use storage slot N to return a quoted version of argument ARG.
583   ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
584   null-terminated string.
585   OPTIONS specifies the quoting options.
586   The returned value points to static storage that can be
587   reused by the next call to this function with the same value of N.
588   N must be nonnegative.  N is deliberately declared with type "int"
589   to allow for future extensions (using negative values).  */
590static char *
591quotearg_n_options (int n, char const *arg, size_t argsize,
592		    struct quoting_options const *options)
593{
594  int e = errno;
595
596  unsigned int n0 = n;
597  struct slotvec *sv = slotvec;
598
599  if (n < 0)
600    abort ();
601
602  if (nslots <= n0)
603    {
604      /* FIXME: technically, the type of n1 should be `unsigned int',
605	 but that evokes an unsuppressible warning from gcc-4.0.1 and
606	 older.  If gcc ever provides an option to suppress that warning,
607	 revert to the original type, so that the test in xalloc_oversized
608	 is once again performed only at compile time.  */
609      size_t n1 = n0 + 1;
610      bool preallocated = (sv == &slotvec0);
611
612      if (xalloc_oversized (n1, sizeof *sv))
613	xalloc_die ();
614
615      slotvec = sv = xrealloc (preallocated ? NULL : sv, n1 * sizeof *sv);
616      if (preallocated)
617	*sv = slotvec0;
618      memset (sv + nslots, 0, (n1 - nslots) * sizeof *sv);
619      nslots = n1;
620    }
621
622  {
623    size_t size = sv[n].size;
624    char *val = sv[n].val;
625    size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
626
627    if (size <= qsize)
628      {
629	sv[n].size = size = qsize + 1;
630	if (val != slot0)
631	  free (val);
632	sv[n].val = val = xcharalloc (size);
633	quotearg_buffer (val, size, arg, argsize, options);
634      }
635
636    errno = e;
637    return val;
638  }
639}
640
641char *
642quotearg_n (int n, char const *arg)
643{
644  return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
645}
646
647char *
648quotearg (char const *arg)
649{
650  return quotearg_n (0, arg);
651}
652
653/* Return quoting options for STYLE, with no extra quoting.  */
654static struct quoting_options
655quoting_options_from_style (enum quoting_style style)
656{
657  struct quoting_options o;
658  o.style = style;
659  memset (o.quote_these_too, 0, sizeof o.quote_these_too);
660  return o;
661}
662
663char *
664quotearg_n_style (int n, enum quoting_style s, char const *arg)
665{
666  struct quoting_options const o = quoting_options_from_style (s);
667  return quotearg_n_options (n, arg, SIZE_MAX, &o);
668}
669
670char *
671quotearg_n_style_mem (int n, enum quoting_style s,
672		      char const *arg, size_t argsize)
673{
674  struct quoting_options const o = quoting_options_from_style (s);
675  return quotearg_n_options (n, arg, argsize, &o);
676}
677
678char *
679quotearg_style (enum quoting_style s, char const *arg)
680{
681  return quotearg_n_style (0, s, arg);
682}
683
684char *
685quotearg_char (char const *arg, char ch)
686{
687  struct quoting_options options;
688  options = default_quoting_options;
689  set_char_quoting (&options, ch, 1);
690  return quotearg_n_options (0, arg, SIZE_MAX, &options);
691}
692
693char *
694quotearg_colon (char const *arg)
695{
696  return quotearg_char (arg, ':');
697}
698