quotearg.c revision 1.2
1/* quotearg.c - quote arguments for output
2
3   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005 Free Software
4   Foundation, Inc.
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2, or (at your option)
9   any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software Foundation,
18   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
19
20/* Written by Paul Eggert <eggert@twinsun.com> */
21
22#ifdef HAVE_CONFIG_H
23# include <config.h>
24#endif
25
26#include "quotearg.h"
27
28#include "xalloc.h"
29
30#include <ctype.h>
31#include <errno.h>
32#include <limits.h>
33#include <stdbool.h>
34#include <stdlib.h>
35#include <string.h>
36
37#include "gettext.h"
38#define _(msgid) gettext (msgid)
39#define N_(msgid) msgid
40
41#if HAVE_WCHAR_H
42
43/* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared.  */
44# include <stdio.h>
45# include <time.h>
46
47# include <wchar.h>
48#endif
49
50#if !HAVE_MBRTOWC
51/* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
52   other macros are defined only for documentation and to satisfy C
53   syntax.  */
54# undef MB_CUR_MAX
55# define MB_CUR_MAX 1
56# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
57# define iswprint(wc) isprint ((unsigned char) (wc))
58# undef HAVE_MBSINIT
59#endif
60
61#if !defined mbsinit && !HAVE_MBSINIT
62# define mbsinit(ps) 1
63#endif
64
65#ifndef iswprint
66# if HAVE_WCTYPE_H
67#  include <wctype.h>
68# endif
69# if !defined iswprint && !HAVE_ISWPRINT
70#  define iswprint(wc) 1
71# endif
72#endif
73
74#ifndef SIZE_MAX
75# define SIZE_MAX ((size_t) -1)
76#endif
77
78#define INT_BITS (sizeof (int) * CHAR_BIT)
79
80struct quoting_options
81{
82  /* Basic quoting style.  */
83  enum quoting_style style;
84
85  /* Quote the characters indicated by this bit vector even if the
86     quoting style would not normally require them to be quoted.  */
87  unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
88};
89
90/* Names of quoting styles.  */
91char const *const quoting_style_args[] =
92{
93  "literal",
94  "shell",
95  "shell-always",
96  "c",
97  "escape",
98  "locale",
99  "clocale",
100  0
101};
102
103/* Correspondences to quoting style names.  */
104enum quoting_style const quoting_style_vals[] =
105{
106  literal_quoting_style,
107  shell_quoting_style,
108  shell_always_quoting_style,
109  c_quoting_style,
110  escape_quoting_style,
111  locale_quoting_style,
112  clocale_quoting_style
113};
114
115/* The default quoting options.  */
116static struct quoting_options default_quoting_options;
117
118/* Allocate a new set of quoting options, with contents initially identical
119   to O if O is not null, or to the default if O is null.
120   It is the caller's responsibility to free the result.  */
121struct quoting_options *
122clone_quoting_options (struct quoting_options *o)
123{
124  int e = errno;
125  struct quoting_options *p = xmalloc (sizeof *p);
126  *p = *(o ? o : &default_quoting_options);
127  errno = e;
128  return p;
129}
130
131/* Get the value of O's quoting style.  If O is null, use the default.  */
132enum quoting_style
133get_quoting_style (struct quoting_options *o)
134{
135  return (o ? o : &default_quoting_options)->style;
136}
137
138/* In O (or in the default if O is null),
139   set the value of the quoting style to S.  */
140void
141set_quoting_style (struct quoting_options *o, enum quoting_style s)
142{
143  (o ? o : &default_quoting_options)->style = s;
144}
145
146/* In O (or in the default if O is null),
147   set the value of the quoting options for character C to I.
148   Return the old value.  Currently, the only values defined for I are
149   0 (the default) and 1 (which means to quote the character even if
150   it would not otherwise be quoted).  */
151int
152set_char_quoting (struct quoting_options *o, char c, int i)
153{
154  unsigned char uc = c;
155  unsigned int *p =
156    (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
157  int shift = uc % INT_BITS;
158  int r = (*p >> shift) & 1;
159  *p ^= ((i & 1) ^ r) << shift;
160  return r;
161}
162
163/* MSGID approximates a quotation mark.  Return its translation if it
164   has one; otherwise, return either it or "\"", depending on S.  */
165static char const *
166gettext_quote (char const *msgid, enum quoting_style s)
167{
168  char const *translation = _(msgid);
169  if (translation == msgid && s == clocale_quoting_style)
170    translation = "\"";
171  return translation;
172}
173
174/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
175   argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
176   non-quoting-style part of O to control quoting.
177   Terminate the output with a null character, and return the written
178   size of the output, not counting the terminating null.
179   If BUFFERSIZE is too small to store the output string, return the
180   value that would have been returned had BUFFERSIZE been large enough.
181   If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
182
183   This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
184   ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
185   style specified by O, and O may not be null.  */
186
187static size_t
188quotearg_buffer_restyled (char *buffer, size_t buffersize,
189			  char const *arg, size_t argsize,
190			  enum quoting_style quoting_style,
191			  struct quoting_options const *o)
192{
193  size_t i;
194  size_t len = 0;
195  char const *quote_string = 0;
196  size_t quote_string_len = 0;
197  bool backslash_escapes = false;
198  bool unibyte_locale = MB_CUR_MAX == 1;
199
200#define STORE(c) \
201    do \
202      { \
203	if (len < buffersize) \
204	  buffer[len] = (c); \
205	len++; \
206      } \
207    while (0)
208
209  switch (quoting_style)
210    {
211    case c_quoting_style:
212      STORE ('"');
213      backslash_escapes = true;
214      quote_string = "\"";
215      quote_string_len = 1;
216      break;
217
218    case escape_quoting_style:
219      backslash_escapes = true;
220      break;
221
222    case locale_quoting_style:
223    case clocale_quoting_style:
224      {
225	/* TRANSLATORS:
226	   Get translations for open and closing quotation marks.
227
228	   The message catalog should translate "`" to a left
229	   quotation mark suitable for the locale, and similarly for
230	   "'".  If the catalog has no translation,
231	   locale_quoting_style quotes `like this', and
232	   clocale_quoting_style quotes "like this".
233
234	   For example, an American English Unicode locale should
235	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
236	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
237	   MARK).  A British English Unicode locale should instead
238	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
239	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
240
241	   If you don't know what to put here, please see
242	   <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
243	   and use glyphs suitable for your language.  */
244
245	char const *left = gettext_quote (N_("`"), quoting_style);
246	char const *right = gettext_quote (N_("'"), quoting_style);
247	for (quote_string = left; *quote_string; quote_string++)
248	  STORE (*quote_string);
249	backslash_escapes = true;
250	quote_string = right;
251	quote_string_len = strlen (quote_string);
252      }
253      break;
254
255    case shell_always_quoting_style:
256      STORE ('\'');
257      quote_string = "'";
258      quote_string_len = 1;
259      break;
260
261    default:
262      break;
263    }
264
265  for (i = 0;  ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize);  i++)
266    {
267      unsigned char c;
268      unsigned char esc;
269
270      if (backslash_escapes
271	  && quote_string_len
272	  && i + quote_string_len <= argsize
273	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
274	STORE ('\\');
275
276      c = arg[i];
277      switch (c)
278	{
279	case '\0':
280	  if (backslash_escapes)
281	    {
282	      STORE ('\\');
283	      STORE ('0');
284	      STORE ('0');
285	      c = '0';
286	    }
287	  break;
288
289	case '?':
290	  switch (quoting_style)
291	    {
292	    case shell_quoting_style:
293	      goto use_shell_always_quoting_style;
294
295	    case c_quoting_style:
296	      if (i + 2 < argsize && arg[i + 1] == '?')
297		switch (arg[i + 2])
298		  {
299		  case '!': case '\'':
300		  case '(': case ')': case '-': case '/':
301		  case '<': case '=': case '>':
302		    /* Escape the second '?' in what would otherwise be
303		       a trigraph.  */
304		    c = arg[i + 2];
305		    i += 2;
306		    STORE ('?');
307		    STORE ('\\');
308		    STORE ('?');
309		    break;
310		  }
311	      break;
312
313	    default:
314	      break;
315	    }
316	  break;
317
318	case '\a': esc = 'a'; goto c_escape;
319	case '\b': esc = 'b'; goto c_escape;
320	case '\f': esc = 'f'; goto c_escape;
321	case '\n': esc = 'n'; goto c_and_shell_escape;
322	case '\r': esc = 'r'; goto c_and_shell_escape;
323	case '\t': esc = 't'; goto c_and_shell_escape;
324	case '\v': esc = 'v'; goto c_escape;
325	case '\\': esc = c; goto c_and_shell_escape;
326
327	c_and_shell_escape:
328	  if (quoting_style == shell_quoting_style)
329	    goto use_shell_always_quoting_style;
330	c_escape:
331	  if (backslash_escapes)
332	    {
333	      c = esc;
334	      goto store_escape;
335	    }
336	  break;
337
338	case '{': case '}': /* sometimes special if isolated */
339	  if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
340	    break;
341	  /* Fall through.  */
342	case '#': case '~':
343	  if (i != 0)
344	    break;
345	  /* Fall through.  */
346	case ' ':
347	case '!': /* special in bash */
348	case '"': case '$': case '&':
349	case '(': case ')': case '*': case ';':
350	case '<':
351	case '=': /* sometimes special in 0th or (with "set -k") later args */
352	case '>': case '[':
353	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
354	case '`': case '|':
355	  /* A shell special character.  In theory, '$' and '`' could
356	     be the first bytes of multibyte characters, which means
357	     we should check them with mbrtowc, but in practice this
358	     doesn't happen so it's not worth worrying about.  */
359	  if (quoting_style == shell_quoting_style)
360	    goto use_shell_always_quoting_style;
361	  break;
362
363	case '\'':
364	  switch (quoting_style)
365	    {
366	    case shell_quoting_style:
367	      goto use_shell_always_quoting_style;
368
369	    case shell_always_quoting_style:
370	      STORE ('\'');
371	      STORE ('\\');
372	      STORE ('\'');
373	      break;
374
375	    default:
376	      break;
377	    }
378	  break;
379
380	case '%': case '+': case ',': case '-': case '.': case '/':
381	case '0': case '1': case '2': case '3': case '4': case '5':
382	case '6': case '7': case '8': case '9': case ':':
383	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
384	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
385	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
386	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
387	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
388	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
389	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
390	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
391	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
392	  /* These characters don't cause problems, no matter what the
393	     quoting style is.  They cannot start multibyte sequences.  */
394	  break;
395
396	default:
397	  /* If we have a multibyte sequence, copy it until we reach
398	     its end, find an error, or come back to the initial shift
399	     state.  For C-like styles, if the sequence has
400	     unprintable characters, escape the whole sequence, since
401	     we can't easily escape single characters within it.  */
402	  {
403	    /* Length of multibyte sequence found so far.  */
404	    size_t m;
405
406	    bool printable;
407
408	    if (unibyte_locale)
409	      {
410		m = 1;
411		printable = isprint (c) != 0;
412	      }
413	    else
414	      {
415		mbstate_t mbstate;
416		memset (&mbstate, 0, sizeof mbstate);
417
418		m = 0;
419		printable = true;
420		if (argsize == SIZE_MAX)
421		  argsize = strlen (arg);
422
423		do
424		  {
425		    wchar_t w;
426		    size_t bytes = mbrtowc (&w, &arg[i + m],
427					    argsize - (i + m), &mbstate);
428		    if (bytes == 0)
429		      break;
430		    else if (bytes == (size_t) -1)
431		      {
432			printable = false;
433			break;
434		      }
435		    else if (bytes == (size_t) -2)
436		      {
437			printable = false;
438			while (i + m < argsize && arg[i + m])
439			  m++;
440			break;
441		      }
442		    else
443		      {
444			/* Work around a bug with older shells that "see" a '\'
445			   that is really the 2nd byte of a multibyte character.
446			   In practice the problem is limited to ASCII
447			   chars >= '@' that are shell special chars.  */
448			if ('[' == 0x5b && quoting_style == shell_quoting_style)
449			  {
450			    size_t j;
451			    for (j = 1; j < bytes; j++)
452			      switch (arg[i + m + j])
453				{
454				case '[': case '\\': case '^':
455				case '`': case '|':
456				  goto use_shell_always_quoting_style;
457				}
458			  }
459
460			if (! iswprint (w))
461			  printable = false;
462			m += bytes;
463		      }
464		  }
465		while (! mbsinit (&mbstate));
466	      }
467
468	    if (1 < m || (backslash_escapes && ! printable))
469	      {
470		/* Output a multibyte sequence, or an escaped
471		   unprintable unibyte character.  */
472		size_t ilim = i + m;
473
474		for (;;)
475		  {
476		    if (backslash_escapes && ! printable)
477		      {
478			STORE ('\\');
479			STORE ('0' + (c >> 6));
480			STORE ('0' + ((c >> 3) & 7));
481			c = '0' + (c & 7);
482		      }
483		    if (ilim <= i + 1)
484		      break;
485		    STORE (c);
486		    c = arg[++i];
487		  }
488
489		goto store_c;
490	      }
491	  }
492	}
493
494      if (! (backslash_escapes
495	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
496	goto store_c;
497
498    store_escape:
499      STORE ('\\');
500
501    store_c:
502      STORE (c);
503    }
504
505  if (i == 0 && quoting_style == shell_quoting_style)
506    goto use_shell_always_quoting_style;
507
508  if (quote_string)
509    for (; *quote_string; quote_string++)
510      STORE (*quote_string);
511
512  if (len < buffersize)
513    buffer[len] = '\0';
514  return len;
515
516 use_shell_always_quoting_style:
517  return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
518				   shell_always_quoting_style, o);
519}
520
521/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
522   argument ARG (of size ARGSIZE), using O to control quoting.
523   If O is null, use the default.
524   Terminate the output with a null character, and return the written
525   size of the output, not counting the terminating null.
526   If BUFFERSIZE is too small to store the output string, return the
527   value that would have been returned had BUFFERSIZE been large enough.
528   If ARGSIZE is SIZE_MAX, use the string length of the argument for
529   ARGSIZE.  */
530size_t
531quotearg_buffer (char *buffer, size_t buffersize,
532		 char const *arg, size_t argsize,
533		 struct quoting_options const *o)
534{
535  struct quoting_options const *p = o ? o : &default_quoting_options;
536  int e = errno;
537  size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
538				       p->style, p);
539  errno = e;
540  return r;
541}
542
543/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
544   allocated storage containing the quoted string.  */
545char *
546quotearg_alloc (char const *arg, size_t argsize,
547		struct quoting_options const *o)
548{
549  int e = errno;
550  size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
551  char *buf = xmalloc (bufsize);
552  quotearg_buffer (buf, bufsize, arg, argsize, o);
553  errno = e;
554  return buf;
555}
556
557/* Use storage slot N to return a quoted version of argument ARG.
558   ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
559   null-terminated string.
560   OPTIONS specifies the quoting options.
561   The returned value points to static storage that can be
562   reused by the next call to this function with the same value of N.
563   N must be nonnegative.  N is deliberately declared with type "int"
564   to allow for future extensions (using negative values).  */
565static char *
566quotearg_n_options (int n, char const *arg, size_t argsize,
567		    struct quoting_options const *options)
568{
569  int e = errno;
570
571  /* Preallocate a slot 0 buffer, so that the caller can always quote
572     one small component of a "memory exhausted" message in slot 0.  */
573  static char slot0[256];
574  static unsigned int nslots = 1;
575  unsigned int n0 = n;
576  struct slotvec
577    {
578      size_t size;
579      char *val;
580    };
581  static struct slotvec slotvec0 = {sizeof slot0, slot0};
582  static struct slotvec *slotvec = &slotvec0;
583
584  if (n < 0)
585    abort ();
586
587  if (nslots <= n0)
588    {
589      unsigned int n1 = n0 + 1;
590
591      /* XXX: wrong int cast to avoid gcc warning */
592      if (xalloc_oversized ((int)n1, sizeof *slotvec))
593	xalloc_die ();
594
595      if (slotvec == &slotvec0)
596	{
597	  slotvec = xmalloc (sizeof *slotvec);
598	  *slotvec = slotvec0;
599	}
600      slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
601      memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
602      nslots = n1;
603    }
604
605  {
606    size_t size = slotvec[n].size;
607    char *val = slotvec[n].val;
608    size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
609
610    if (size <= qsize)
611      {
612	slotvec[n].size = size = qsize + 1;
613	if (val != slot0)
614	  free (val);
615	slotvec[n].val = val = xmalloc (size);
616	quotearg_buffer (val, size, arg, argsize, options);
617      }
618
619    errno = e;
620    return val;
621  }
622}
623
624char *
625quotearg_n (int n, char const *arg)
626{
627  return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
628}
629
630char *
631quotearg (char const *arg)
632{
633  return quotearg_n (0, arg);
634}
635
636/* Return quoting options for STYLE, with no extra quoting.  */
637static struct quoting_options
638quoting_options_from_style (enum quoting_style style)
639{
640  struct quoting_options o;
641  o.style = style;
642  memset (o.quote_these_too, 0, sizeof o.quote_these_too);
643  return o;
644}
645
646char *
647quotearg_n_style (int n, enum quoting_style s, char const *arg)
648{
649  struct quoting_options const o = quoting_options_from_style (s);
650  return quotearg_n_options (n, arg, SIZE_MAX, &o);
651}
652
653char *
654quotearg_n_style_mem (int n, enum quoting_style s,
655		      char const *arg, size_t argsize)
656{
657  struct quoting_options const o = quoting_options_from_style (s);
658  return quotearg_n_options (n, arg, argsize, &o);
659}
660
661char *
662quotearg_style (enum quoting_style s, char const *arg)
663{
664  return quotearg_n_style (0, s, arg);
665}
666
667char *
668quotearg_char (char const *arg, char ch)
669{
670  struct quoting_options options;
671  options = default_quoting_options;
672  set_char_quoting (&options, ch, 1);
673  return quotearg_n_options (0, arg, SIZE_MAX, &options);
674}
675
676char *
677quotearg_colon (char const *arg)
678{
679  return quotearg_char (arg, ':');
680}
681