1/* quotearg.c - quote arguments for output
2   Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 2, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, write to the Free Software Foundation,
16   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
17
18/* Written by Paul Eggert <eggert@twinsun.com> */
19
20#if HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#if HAVE_STDDEF_H
25# include <stddef.h>  /* For the definition of size_t on windows w/MSVC.  */
26#endif
27#include <sys/types.h>
28#include <quotearg.h>
29#include <xalloc.h>
30
31#include <ctype.h>
32
33#if ENABLE_NLS
34# include <libintl.h>
35# define _(text) gettext (text)
36#else
37# define _(text) text
38#endif
39#define N_(text) text
40
41#if HAVE_LIMITS_H
42# include <limits.h>
43#endif
44#ifndef CHAR_BIT
45# define CHAR_BIT 8
46#endif
47#ifndef SIZE_MAX
48# define SIZE_MAX ((size_t) -1)
49#endif
50#ifndef UCHAR_MAX
51# define UCHAR_MAX ((unsigned char) -1)
52#endif
53#ifndef UINT_MAX
54# define UINT_MAX ((unsigned int) -1)
55#endif
56
57#if HAVE_C_BACKSLASH_A
58# define ALERT_CHAR '\a'
59#else
60# define ALERT_CHAR '\7'
61#endif
62
63#if HAVE_STDLIB_H
64# include <stdlib.h>
65#endif
66
67#if HAVE_STRING_H
68# include <string.h>
69#endif
70
71#if HAVE_WCHAR_H
72
73/* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared.  */
74# include <stdio.h>
75# include <time.h>
76
77# include <wchar.h>
78#endif
79
80#if !HAVE_MBRTOWC
81/* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
82   other macros are defined only for documentation and to satisfy C
83   syntax.  */
84# undef MB_CUR_MAX
85# define MB_CUR_MAX 1
86# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
87# define mbsinit(ps) 1
88# define iswprint(wc) ISPRINT ((unsigned char) (wc))
89#endif
90
91#ifndef iswprint
92# if HAVE_WCTYPE_H
93#  include <wctype.h>
94# endif
95# if !defined iswprint && !HAVE_ISWPRINT
96#  define iswprint(wc) 1
97# endif
98#endif
99
100#define INT_BITS (sizeof (int) * CHAR_BIT)
101
102#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
103# define IN_CTYPE_DOMAIN(c) 1
104#else
105# define IN_CTYPE_DOMAIN(c) isascii(c)
106#endif
107
108/* Undefine to protect against the definition in wctype.h of solaris2.6.   */
109#undef ISPRINT
110#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
111
112struct quoting_options
113{
114  /* Basic quoting style.  */
115  enum quoting_style style;
116
117  /* Quote the characters indicated by this bit vector even if the
118     quoting style would not normally require them to be quoted.  */
119  int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
120};
121
122/* Names of quoting styles.  */
123char const *const quoting_style_args[] =
124{
125  "literal",
126  "shell",
127  "shell-always",
128  "c",
129  "escape",
130  "locale",
131  "clocale",
132  0
133};
134
135/* Correspondences to quoting style names.  */
136enum quoting_style const quoting_style_vals[] =
137{
138  literal_quoting_style,
139  shell_quoting_style,
140  shell_always_quoting_style,
141  c_quoting_style,
142  escape_quoting_style,
143  locale_quoting_style,
144  clocale_quoting_style
145};
146
147/* The default quoting options.  */
148static struct quoting_options default_quoting_options;
149
150/* Allocate a new set of quoting options, with contents initially identical
151   to O if O is not null, or to the default if O is null.
152   It is the caller's responsibility to free the result.  */
153struct quoting_options *
154clone_quoting_options (struct quoting_options *o)
155{
156  struct quoting_options *p
157    = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
158  *p = *(o ? o : &default_quoting_options);
159  return p;
160}
161
162/* Get the value of O's quoting style.  If O is null, use the default.  */
163enum quoting_style
164get_quoting_style (struct quoting_options *o)
165{
166  return (o ? o : &default_quoting_options)->style;
167}
168
169/* In O (or in the default if O is null),
170   set the value of the quoting style to S.  */
171void
172set_quoting_style (struct quoting_options *o, enum quoting_style s)
173{
174  (o ? o : &default_quoting_options)->style = s;
175}
176
177/* In O (or in the default if O is null),
178   set the value of the quoting options for character C to I.
179   Return the old value.  Currently, the only values defined for I are
180   0 (the default) and 1 (which means to quote the character even if
181   it would not otherwise be quoted).  */
182int
183set_char_quoting (struct quoting_options *o, char c, int i)
184{
185  unsigned char uc = c;
186  int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
187  int shift = uc % INT_BITS;
188  int r = (*p >> shift) & 1;
189  *p ^= ((i & 1) ^ r) << shift;
190  return r;
191}
192
193/* MSGID approximates a quotation mark.  Return its translation if it
194   has one; otherwise, return either it or "\"", depending on S.  */
195static char const *
196gettext_quote (char const *msgid, enum quoting_style s)
197{
198  char const *translation = _(msgid);
199  if (translation == msgid && s == clocale_quoting_style)
200    translation = "\"";
201  return translation;
202}
203
204/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
205   argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
206   non-quoting-style part of O to control quoting.
207   Terminate the output with a null character, and return the written
208   size of the output, not counting the terminating null.
209   If BUFFERSIZE is too small to store the output string, return the
210   value that would have been returned had BUFFERSIZE been large enough.
211   If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
212
213   This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
214   ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
215   style specified by O, and O may not be null.  */
216
217static size_t
218quotearg_buffer_restyled (char *buffer, size_t buffersize,
219			  char const *arg, size_t argsize,
220			  enum quoting_style quoting_style,
221			  struct quoting_options const *o)
222{
223  size_t i;
224  size_t len = 0;
225  char const *quote_string = 0;
226  size_t quote_string_len = 0;
227  int backslash_escapes = 0;
228  int unibyte_locale = MB_CUR_MAX == 1;
229
230#define STORE(c) \
231    do \
232      { \
233	if (len < buffersize) \
234	  buffer[len] = (c); \
235	len++; \
236      } \
237    while (0)
238
239  switch (quoting_style)
240    {
241    case c_quoting_style:
242      STORE ('"');
243      backslash_escapes = 1;
244      quote_string = "\"";
245      quote_string_len = 1;
246      break;
247
248    case escape_quoting_style:
249      backslash_escapes = 1;
250      break;
251
252    case locale_quoting_style:
253    case clocale_quoting_style:
254      {
255	/* Get translations for open and closing quotation marks.
256
257	   The message catalog should translate "`" to a left
258	   quotation mark suitable for the locale, and similarly for
259	   "'".  If the catalog has no translation,
260	   locale_quoting_style quotes `like this', and
261	   clocale_quoting_style quotes "like this".
262
263	   For example, an American English Unicode locale should
264	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
265	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
266	   MARK).  A British English Unicode locale should instead
267	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
268	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.  */
269
270	char const *left = gettext_quote (N_("`"), quoting_style);
271	char const *right = gettext_quote (N_("'"), quoting_style);
272	for (quote_string = left; *quote_string; quote_string++)
273	  STORE (*quote_string);
274	backslash_escapes = 1;
275	quote_string = right;
276	quote_string_len = strlen (quote_string);
277      }
278      break;
279
280    case shell_always_quoting_style:
281      STORE ('\'');
282      quote_string = "'";
283      quote_string_len = 1;
284      break;
285
286    default:
287      break;
288    }
289
290  for (i = 0;  ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize);  i++)
291    {
292      unsigned char c;
293      unsigned char esc;
294
295      if (backslash_escapes
296	  && quote_string_len
297	  && i + quote_string_len <= argsize
298	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
299	STORE ('\\');
300
301      c = arg[i];
302      switch (c)
303	{
304	case '\0':
305	  if (backslash_escapes)
306	    {
307	      STORE ('\\');
308	      STORE ('0');
309	      STORE ('0');
310	      c = '0';
311	    }
312	  break;
313
314	case '?':
315	  switch (quoting_style)
316	    {
317	    case shell_quoting_style:
318	      goto use_shell_always_quoting_style;
319
320	    case c_quoting_style:
321	      if (i + 2 < argsize && arg[i + 1] == '?')
322		switch (arg[i + 2])
323		  {
324		  case '!': case '\'':
325		  case '(': case ')': case '-': case '/':
326		  case '<': case '=': case '>':
327		    /* Escape the second '?' in what would otherwise be
328		       a trigraph.  */
329		    i += 2;
330		    c = arg[i + 2];
331		    STORE ('?');
332		    STORE ('\\');
333		    STORE ('?');
334		    break;
335		  }
336	      break;
337
338	    default:
339	      break;
340	    }
341	  break;
342
343	case ALERT_CHAR: esc = 'a'; goto c_escape;
344	case '\b': esc = 'b'; goto c_escape;
345	case '\f': esc = 'f'; goto c_escape;
346	case '\n': esc = 'n'; goto c_and_shell_escape;
347	case '\r': esc = 'r'; goto c_and_shell_escape;
348	case '\t': esc = 't'; goto c_and_shell_escape;
349	case '\v': esc = 'v'; goto c_escape;
350	case '\\': esc = c; goto c_and_shell_escape;
351
352	c_and_shell_escape:
353	  if (quoting_style == shell_quoting_style)
354	    goto use_shell_always_quoting_style;
355	c_escape:
356	  if (backslash_escapes)
357	    {
358	      c = esc;
359	      goto store_escape;
360	    }
361	  break;
362
363	case '#': case '~':
364	  if (i != 0)
365	    break;
366	  /* Fall through.  */
367	case ' ':
368	case '!': /* special in bash */
369	case '"': case '$': case '&':
370	case '(': case ')': case '*': case ';':
371	case '<': case '>': case '[':
372	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
373	case '`': case '|':
374	  /* A shell special character.  In theory, '$' and '`' could
375	     be the first bytes of multibyte characters, which means
376	     we should check them with mbrtowc, but in practice this
377	     doesn't happen so it's not worth worrying about.  */
378	  if (quoting_style == shell_quoting_style)
379	    goto use_shell_always_quoting_style;
380	  break;
381
382	case '\'':
383	  switch (quoting_style)
384	    {
385	    case shell_quoting_style:
386	      goto use_shell_always_quoting_style;
387
388	    case shell_always_quoting_style:
389	      STORE ('\'');
390	      STORE ('\\');
391	      STORE ('\'');
392	      break;
393
394	    default:
395	      break;
396	    }
397	  break;
398
399	case '%': case '+': case ',': case '-': case '.': case '/':
400	case '0': case '1': case '2': case '3': case '4': case '5':
401	case '6': case '7': case '8': case '9': case ':': case '=':
402	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
403	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
404	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
405	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
406	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
407	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
408	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
409	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
410	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
411	case '{': case '}':
412	  /* These characters don't cause problems, no matter what the
413	     quoting style is.  They cannot start multibyte sequences.  */
414	  break;
415
416	default:
417	  /* If we have a multibyte sequence, copy it until we reach
418	     its end, find an error, or come back to the initial shift
419	     state.  For C-like styles, if the sequence has
420	     unprintable characters, escape the whole sequence, since
421	     we can't easily escape single characters within it.  */
422	  {
423	    /* Length of multibyte sequence found so far.  */
424	    size_t m;
425
426	    int printable;
427
428	    if (unibyte_locale)
429	      {
430		m = 1;
431		printable = ISPRINT (c);
432	      }
433	    else
434	      {
435		mbstate_t mbstate;
436		memset (&mbstate, 0, sizeof mbstate);
437
438		m = 0;
439		printable = 1;
440		if (argsize == (size_t) -1)
441		  argsize = strlen (arg);
442
443		do
444		  {
445		    wchar_t w;
446		    size_t bytes = mbrtowc (&w, &arg[i + m],
447					    argsize - (i + m), &mbstate);
448		    if (bytes == 0)
449		      break;
450		    else if (bytes == (size_t) -1)
451		      {
452			printable = 0;
453			break;
454		      }
455		    else if (bytes == (size_t) -2)
456		      {
457			printable = 0;
458			while (i + m < argsize && arg[i + m])
459			  m++;
460			break;
461		      }
462		    else
463		      {
464			if (! iswprint (w))
465			  printable = 0;
466			m += bytes;
467		      }
468		  }
469		while (! mbsinit (&mbstate));
470	      }
471
472	    if (1 < m || (backslash_escapes && ! printable))
473	      {
474		/* Output a multibyte sequence, or an escaped
475		   unprintable unibyte character.  */
476		size_t ilim = i + m;
477
478		for (;;)
479		  {
480		    if (backslash_escapes && ! printable)
481		      {
482			STORE ('\\');
483			STORE ('0' + (c >> 6));
484			STORE ('0' + ((c >> 3) & 7));
485			c = '0' + (c & 7);
486		      }
487		    if (ilim <= i + 1)
488		      break;
489		    STORE (c);
490		    c = arg[++i];
491		  }
492
493		goto store_c;
494	      }
495	  }
496	}
497
498      if (! (backslash_escapes
499	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
500	goto store_c;
501
502    store_escape:
503      STORE ('\\');
504
505    store_c:
506      STORE (c);
507    }
508
509  if (quote_string)
510    for (; *quote_string; quote_string++)
511      STORE (*quote_string);
512
513  if (len < buffersize)
514    buffer[len] = '\0';
515  return len;
516
517 use_shell_always_quoting_style:
518  return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
519				   shell_always_quoting_style, o);
520}
521
522/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
523   argument ARG (of size ARGSIZE), using O to control quoting.
524   If O is null, use the default.
525   Terminate the output with a null character, and return the written
526   size of the output, not counting the terminating null.
527   If BUFFERSIZE is too small to store the output string, return the
528   value that would have been returned had BUFFERSIZE been large enough.
529   If ARGSIZE is -1, use the string length of the argument for ARGSIZE.  */
530size_t
531quotearg_buffer (char *buffer, size_t buffersize,
532		 char const *arg, size_t argsize,
533		 struct quoting_options const *o)
534{
535  struct quoting_options const *p = o ? o : &default_quoting_options;
536  return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
537				   p->style, p);
538}
539
540/* Use storage slot N to return a quoted version of argument ARG.
541   ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
542   OPTIONS specifies the quoting options.
543   The returned value points to static storage that can be
544   reused by the next call to this function with the same value of N.
545   N must be nonnegative.  N is deliberately declared with type "int"
546   to allow for future extensions (using negative values).  */
547static char *
548quotearg_n_options (int n, char const *arg, size_t argsize,
549		    struct quoting_options const *options)
550{
551  /* Preallocate a slot 0 buffer, so that the caller can always quote
552     one small component of a "memory exhausted" message in slot 0.  */
553  static char slot0[256];
554  static unsigned int nslots = 1;
555  unsigned int n0 = n;
556  struct slotvec
557    {
558      size_t size;
559      char *val;
560    };
561  static struct slotvec slotvec0 = {sizeof slot0, slot0};
562  static struct slotvec *slotvec = &slotvec0;
563
564  if (n < 0)
565    abort ();
566
567  if (nslots <= n0)
568    {
569      unsigned int n1 = n0 + 1;
570      size_t s = n1 * sizeof *slotvec;
571
572      if (SIZE_MAX / UINT_MAX <= sizeof *slotvec
573	  && n1 != s / sizeof *slotvec)
574	xalloc_die ();
575
576      if (slotvec == &slotvec0)
577	{
578	  slotvec = (struct slotvec *) xmalloc (sizeof *slotvec);
579	  *slotvec = slotvec0;
580	}
581      slotvec = (struct slotvec *) xrealloc (slotvec, s);
582      memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
583      nslots = n1;
584    }
585
586  {
587    size_t size = slotvec[n].size;
588    char *val = slotvec[n].val;
589    size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
590
591    if (size <= qsize)
592      {
593	slotvec[n].size = size = qsize + 1;
594	slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
595	quotearg_buffer (val, size, arg, argsize, options);
596      }
597
598    return val;
599  }
600}
601
602char *
603quotearg_n (int n, char const *arg)
604{
605  return quotearg_n_options (n, arg, (size_t) -1, &default_quoting_options);
606}
607
608char *
609quotearg (char const *arg)
610{
611  return quotearg_n (0, arg);
612}
613
614/* Return quoting options for STYLE, with no extra quoting.  */
615static struct quoting_options
616quoting_options_from_style (enum quoting_style style)
617{
618  struct quoting_options o;
619  o.style = style;
620  memset (o.quote_these_too, 0, sizeof o.quote_these_too);
621  return o;
622}
623
624char *
625quotearg_n_style (int n, enum quoting_style s, char const *arg)
626{
627  struct quoting_options const o = quoting_options_from_style (s);
628  return quotearg_n_options (n, arg, (size_t) -1, &o);
629}
630
631char *
632quotearg_n_style_mem (int n, enum quoting_style s,
633		      char const *arg, size_t argsize)
634{
635  struct quoting_options const o = quoting_options_from_style (s);
636  return quotearg_n_options (n, arg, argsize, &o);
637}
638
639char *
640quotearg_style (enum quoting_style s, char const *arg)
641{
642  return quotearg_n_style (0, s, arg);
643}
644
645char *
646quotearg_char (char const *arg, char ch)
647{
648  struct quoting_options options;
649  options = default_quoting_options;
650  set_char_quoting (&options, ch, 1);
651  return quotearg_n_options (0, arg, (size_t) -1, &options);
652}
653
654char *
655quotearg_colon (char const *arg)
656{
657  return quotearg_char (arg, ':');
658}
659