1/*	$NetBSD$	*/
2
3/* quotearg.c - quote arguments for output
4   Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
19
20/* Written by Paul Eggert <eggert@twinsun.com> */
21
22#if HAVE_CONFIG_H
23# include <config.h>
24#endif
25
26#if HAVE_STDDEF_H
27# include <stddef.h>  /* For the definition of size_t on windows w/MSVC.  */
28#endif
29#include <sys/types.h>
30#include <quotearg.h>
31#include <xalloc.h>
32
33#include <ctype.h>
34
35#if ENABLE_NLS
36# include <libintl.h>
37# define _(text) gettext (text)
38#else
39# define _(text) text
40#endif
41#define N_(text) text
42
43#if HAVE_LIMITS_H
44# include <limits.h>
45#endif
46#ifndef CHAR_BIT
47# define CHAR_BIT 8
48#endif
49#ifndef UCHAR_MAX
50# define UCHAR_MAX ((unsigned char) -1)
51#endif
52
53#if HAVE_C_BACKSLASH_A
54# define ALERT_CHAR '\a'
55#else
56# define ALERT_CHAR '\7'
57#endif
58
59#if HAVE_STDLIB_H
60# include <stdlib.h>
61#endif
62
63#if HAVE_STRING_H
64# include <string.h>
65#endif
66
67#if HAVE_WCHAR_H
68# include <wchar.h>
69#endif
70
71#if !HAVE_MBRTOWC
72/* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
73   other macros are defined only for documentation and to satisfy C
74   syntax.  */
75# undef MB_CUR_MAX
76# define MB_CUR_MAX 1
77# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
78# define mbsinit(ps) 1
79# define iswprint(wc) ISPRINT ((unsigned char) (wc))
80#endif
81
82#ifndef iswprint
83# if HAVE_WCTYPE_H
84#  include <wctype.h>
85# endif
86# if !defined iswprint && !HAVE_ISWPRINT
87#  define iswprint(wc) 1
88# endif
89#endif
90
91#define INT_BITS (sizeof (int) * CHAR_BIT)
92
93#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
94# define IN_CTYPE_DOMAIN(c) 1
95#else
96# define IN_CTYPE_DOMAIN(c) isascii(c)
97#endif
98
99/* Undefine to protect against the definition in wctype.h of solaris2.6.   */
100#undef ISPRINT
101#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
102
103struct quoting_options
104{
105  /* Basic quoting style.  */
106  enum quoting_style style;
107
108  /* Quote the characters indicated by this bit vector even if the
109     quoting style would not normally require them to be quoted.  */
110  int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
111};
112
113/* Names of quoting styles.  */
114char const *const quoting_style_args[] =
115{
116  "literal",
117  "shell",
118  "shell-always",
119  "c",
120  "escape",
121  "locale",
122  "clocale",
123  0
124};
125
126/* Correspondences to quoting style names.  */
127enum quoting_style const quoting_style_vals[] =
128{
129  literal_quoting_style,
130  shell_quoting_style,
131  shell_always_quoting_style,
132  c_quoting_style,
133  escape_quoting_style,
134  locale_quoting_style,
135  clocale_quoting_style
136};
137
138/* The default quoting options.  */
139static struct quoting_options default_quoting_options;
140
141/* Allocate a new set of quoting options, with contents initially identical
142   to O if O is not null, or to the default if O is null.
143   It is the caller's responsibility to free the result.  */
144struct quoting_options *
145clone_quoting_options (struct quoting_options *o)
146{
147  struct quoting_options *p
148    = (struct quoting_options *) xmalloc (sizeof (struct quoting_options));
149  *p = *(o ? o : &default_quoting_options);
150  return p;
151}
152
153/* Get the value of O's quoting style.  If O is null, use the default.  */
154enum quoting_style
155get_quoting_style (struct quoting_options *o)
156{
157  return (o ? o : &default_quoting_options)->style;
158}
159
160/* In O (or in the default if O is null),
161   set the value of the quoting style to S.  */
162void
163set_quoting_style (struct quoting_options *o, enum quoting_style s)
164{
165  (o ? o : &default_quoting_options)->style = s;
166}
167
168/* In O (or in the default if O is null),
169   set the value of the quoting options for character C to I.
170   Return the old value.  Currently, the only values defined for I are
171   0 (the default) and 1 (which means to quote the character even if
172   it would not otherwise be quoted).  */
173int
174set_char_quoting (struct quoting_options *o, char c, int i)
175{
176  unsigned char uc = c;
177  int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
178  int shift = uc % INT_BITS;
179  int r = (*p >> shift) & 1;
180  *p ^= ((i & 1) ^ r) << shift;
181  return r;
182}
183
184/* MSGID approximates a quotation mark.  Return its translation if it
185   has one; otherwise, return either it or "\"", depending on S.  */
186static char const *
187gettext_quote (char const *msgid, enum quoting_style s)
188{
189  char const *translation = _(msgid);
190  if (translation == msgid && s == clocale_quoting_style)
191    translation = "\"";
192  return translation;
193}
194
195/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
196   argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
197   non-quoting-style part of O to control quoting.
198   Terminate the output with a null character, and return the written
199   size of the output, not counting the terminating null.
200   If BUFFERSIZE is too small to store the output string, return the
201   value that would have been returned had BUFFERSIZE been large enough.
202   If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
203
204   This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
205   ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
206   style specified by O, and O may not be null.  */
207
208static size_t
209quotearg_buffer_restyled (char *buffer, size_t buffersize,
210			  char const *arg, size_t argsize,
211			  enum quoting_style quoting_style,
212			  struct quoting_options const *o)
213{
214  size_t i;
215  size_t len = 0;
216  char const *quote_string = 0;
217  size_t quote_string_len = 0;
218  int backslash_escapes = 0;
219  int unibyte_locale = MB_CUR_MAX == 1;
220
221#define STORE(c) \
222    do \
223      { \
224	if (len < buffersize) \
225	  buffer[len] = (c); \
226	len++; \
227      } \
228    while (0)
229
230  switch (quoting_style)
231    {
232    case c_quoting_style:
233      STORE ('"');
234      backslash_escapes = 1;
235      quote_string = "\"";
236      quote_string_len = 1;
237      break;
238
239    case escape_quoting_style:
240      backslash_escapes = 1;
241      break;
242
243    case locale_quoting_style:
244    case clocale_quoting_style:
245      {
246	/* Get translations for open and closing quotation marks.
247
248	   The message catalog should translate "`" to a left
249	   quotation mark suitable for the locale, and similarly for
250	   "'".  If the catalog has no translation,
251	   locale_quoting_style quotes `like this', and
252	   clocale_quoting_style quotes "like this".
253
254	   For example, an American English Unicode locale should
255	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
256	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
257	   MARK).  A British English Unicode locale should instead
258	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
259	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.  */
260
261	char const *left = gettext_quote (N_("`"), quoting_style);
262	char const *right = gettext_quote (N_("'"), quoting_style);
263	for (quote_string = left; *quote_string; quote_string++)
264	  STORE (*quote_string);
265	backslash_escapes = 1;
266	quote_string = right;
267	quote_string_len = strlen (quote_string);
268      }
269      break;
270
271    case shell_always_quoting_style:
272      STORE ('\'');
273      quote_string = "'";
274      quote_string_len = 1;
275      break;
276
277    default:
278      break;
279    }
280
281  for (i = 0;  ! (argsize == (size_t) -1 ? arg[i] == '\0' : i == argsize);  i++)
282    {
283      unsigned char c;
284      unsigned char esc;
285
286      if (backslash_escapes
287	  && quote_string_len
288	  && i + quote_string_len <= argsize
289	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
290	STORE ('\\');
291
292      c = arg[i];
293      switch (c)
294	{
295	case '?':
296	  switch (quoting_style)
297	    {
298	    case shell_quoting_style:
299	      goto use_shell_always_quoting_style;
300
301	    case c_quoting_style:
302	      if (i + 2 < argsize && arg[i + 1] == '?')
303		switch (arg[i + 2])
304		  {
305		  case '!': case '\'':
306		  case '(': case ')': case '-': case '/':
307		  case '<': case '=': case '>':
308		    /* Escape the second '?' in what would otherwise be
309		       a trigraph.  */
310		    i += 2;
311		    c = arg[i + 2];
312		    STORE ('?');
313		    STORE ('\\');
314		    STORE ('?');
315		    break;
316		  }
317	      break;
318
319	    default:
320	      break;
321	    }
322	  break;
323
324	case ALERT_CHAR: esc = 'a'; goto c_escape;
325	case '\b': esc = 'b'; goto c_escape;
326	case '\f': esc = 'f'; goto c_escape;
327	case '\n': esc = 'n'; goto c_and_shell_escape;
328	case '\r': esc = 'r'; goto c_and_shell_escape;
329	case '\t': esc = 't'; goto c_and_shell_escape;
330	case '\v': esc = 'v'; goto c_escape;
331	case '\\': esc = c; goto c_and_shell_escape;
332
333	c_and_shell_escape:
334	  if (quoting_style == shell_quoting_style)
335	    goto use_shell_always_quoting_style;
336	c_escape:
337	  if (backslash_escapes)
338	    {
339	      c = esc;
340	      goto store_escape;
341	    }
342	  break;
343
344	case '#': case '~':
345	  if (i != 0)
346	    break;
347	  /* Fall through.  */
348	case ' ':
349	case '!': /* special in bash */
350	case '"': case '$': case '&':
351	case '(': case ')': case '*': case ';':
352	case '<': case '>': case '[':
353	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
354	case '`': case '|':
355	  /* A shell special character.  In theory, '$' and '`' could
356	     be the first bytes of multibyte characters, which means
357	     we should check them with mbrtowc, but in practice this
358	     doesn't happen so it's not worth worrying about.  */
359	  if (quoting_style == shell_quoting_style)
360	    goto use_shell_always_quoting_style;
361	  break;
362
363	case '\'':
364	  switch (quoting_style)
365	    {
366	    case shell_quoting_style:
367	      goto use_shell_always_quoting_style;
368
369	    case shell_always_quoting_style:
370	      STORE ('\'');
371	      STORE ('\\');
372	      STORE ('\'');
373	      break;
374
375	    default:
376	      break;
377	    }
378	  break;
379
380	case '%': case '+': case ',': case '-': case '.': case '/':
381	case '0': case '1': case '2': case '3': case '4': case '5':
382	case '6': case '7': case '8': case '9': case ':': case '=':
383	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
384	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
385	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
386	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
387	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
388	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
389	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
390	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
391	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
392	case '{': case '}':
393	  /* These characters don't cause problems, no matter what the
394	     quoting style is.  They cannot start multibyte sequences.  */
395	  break;
396
397	default:
398	  /* If we have a multibyte sequence, copy it until we reach
399	     its end, find an error, or come back to the initial shift
400	     state.  For C-like styles, if the sequence has
401	     unprintable characters, escape the whole sequence, since
402	     we can't easily escape single characters within it.  */
403	  {
404	    /* Length of multibyte sequence found so far.  */
405	    size_t m;
406
407	    int printable;
408
409	    if (unibyte_locale)
410	      {
411		m = 1;
412		printable = ISPRINT (c);
413	      }
414	    else
415	      {
416		mbstate_t mbstate;
417		memset (&mbstate, 0, sizeof mbstate);
418
419		m = 0;
420		printable = 1;
421		if (argsize == (size_t) -1)
422		  argsize = strlen (arg);
423
424		do
425		  {
426		    wchar_t w;
427		    size_t bytes = mbrtowc (&w, &arg[i + m],
428					    argsize - (i + m), &mbstate);
429		    if (bytes == 0)
430		      break;
431		    else if (bytes == (size_t) -1)
432		      {
433			printable = 0;
434			break;
435		      }
436		    else if (bytes == (size_t) -2)
437		      {
438			printable = 0;
439			while (i + m < argsize && arg[i + m])
440			  m++;
441			break;
442		      }
443		    else
444		      {
445			if (! iswprint (w))
446			  printable = 0;
447			m += bytes;
448		      }
449		  }
450		while (! mbsinit (&mbstate));
451	      }
452
453	    if (1 < m || (backslash_escapes && ! printable))
454	      {
455		/* Output a multibyte sequence, or an escaped
456		   unprintable unibyte character.  */
457		size_t ilim = i + m;
458
459		for (;;)
460		  {
461		    if (backslash_escapes && ! printable)
462		      {
463			STORE ('\\');
464			STORE ('0' + (c >> 6));
465			STORE ('0' + ((c >> 3) & 7));
466			c = '0' + (c & 7);
467		      }
468		    if (ilim <= i + 1)
469		      break;
470		    STORE (c);
471		    c = arg[++i];
472		  }
473
474		goto store_c;
475	      }
476	  }
477	}
478
479      if (! (backslash_escapes
480	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
481	goto store_c;
482
483    store_escape:
484      STORE ('\\');
485
486    store_c:
487      STORE (c);
488    }
489
490  if (quote_string)
491    for (; *quote_string; quote_string++)
492      STORE (*quote_string);
493
494  if (len < buffersize)
495    buffer[len] = '\0';
496  return len;
497
498 use_shell_always_quoting_style:
499  return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
500				   shell_always_quoting_style, o);
501}
502
503/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
504   argument ARG (of size ARGSIZE), using O to control quoting.
505   If O is null, use the default.
506   Terminate the output with a null character, and return the written
507   size of the output, not counting the terminating null.
508   If BUFFERSIZE is too small to store the output string, return the
509   value that would have been returned had BUFFERSIZE been large enough.
510   If ARGSIZE is -1, use the string length of the argument for ARGSIZE.  */
511size_t
512quotearg_buffer (char *buffer, size_t buffersize,
513		 char const *arg, size_t argsize,
514		 struct quoting_options const *o)
515{
516  struct quoting_options const *p = o ? o : &default_quoting_options;
517  return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
518				   p->style, p);
519}
520
521/* Use storage slot N to return a quoted version of the string ARG.
522   OPTIONS specifies the quoting options.
523   The returned value points to static storage that can be
524   reused by the next call to this function with the same value of N.
525   N must be nonnegative.  N is deliberately declared with type "int"
526   to allow for future extensions (using negative values).  */
527static char *
528quotearg_n_options (int n, char const *arg,
529		    struct quoting_options const *options)
530{
531  /* Preallocate a slot 0 buffer, so that the caller can always quote
532     one small component of a "memory exhausted" message in slot 0.  */
533  static char slot0[256];
534  static unsigned int nslots = 1;
535  struct slotvec
536    {
537      size_t size;
538      char *val;
539    };
540  static struct slotvec slotvec0 = {sizeof slot0, slot0};
541  static struct slotvec *slotvec = &slotvec0;
542
543  if (nslots <= n)
544    {
545      int n1 = n + 1;
546      size_t s = n1 * sizeof (struct slotvec);
547      if (! (0 < n1 && n1 == s / sizeof (struct slotvec)))
548	abort ();
549      if (slotvec == &slotvec0)
550	{
551	  slotvec = (struct slotvec *) xmalloc (sizeof (struct slotvec));
552	  *slotvec = slotvec0;
553	}
554      slotvec = (struct slotvec *) xrealloc (slotvec, s);
555      memset (slotvec + nslots, 0, (n1 - nslots) * sizeof (struct slotvec));
556      nslots = n;
557    }
558
559  {
560    size_t size = slotvec[n].size;
561    char *val = slotvec[n].val;
562    size_t qsize = quotearg_buffer (val, size, arg, (size_t) -1, options);
563
564    if (size <= qsize)
565      {
566	slotvec[n].size = size = qsize + 1;
567	slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
568	quotearg_buffer (val, size, arg, (size_t) -1, options);
569      }
570
571    return val;
572  }
573}
574
575char *
576quotearg_n (unsigned int n, char const *arg)
577{
578  return quotearg_n_options (n, arg, &default_quoting_options);
579}
580
581char *
582quotearg (char const *arg)
583{
584  return quotearg_n (0, arg);
585}
586
587char *
588quotearg_n_style (unsigned int n, enum quoting_style s, char const *arg)
589{
590  struct quoting_options o;
591  o.style = s;
592  memset (o.quote_these_too, 0, sizeof o.quote_these_too);
593  return quotearg_n_options (n, arg, &o);
594}
595
596char *
597quotearg_style (enum quoting_style s, char const *arg)
598{
599  return quotearg_n_style (0, s, arg);
600}
601
602char *
603quotearg_char (char const *arg, char ch)
604{
605  struct quoting_options options;
606  options = default_quoting_options;
607  set_char_quoting (&options, ch, 1);
608  return quotearg_n_options (0, arg, &options);
609}
610
611char *
612quotearg_colon (char const *arg)
613{
614  return quotearg_char (arg, ':');
615}
616