1/* strtrans.c - Translate and untranslate strings with ANSI-C escape
2		sequences. */
3
4/* Copyright (C) 2000
5   Free Software Foundation, Inc.
6
7   This file is part of GNU Bash, the Bourne Again SHell.
8
9   Bash is free software; you can redistribute it and/or modify it under
10   the terms of the GNU General Public License as published by the Free
11   Software Foundation; either version 2, or (at your option) any later
12   version.
13
14   Bash is distributed in the hope that it will be useful, but WITHOUT ANY
15   WARRANTY; without even the implied warranty of MERCHANTABILITY or
16   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
17   for more details.
18
19   You should have received a copy of the GNU General Public License along
20   with Bash; see the file COPYING.  If not, write to the Free Software
21   Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
22
23#include <config.h>
24
25#if defined (HAVE_UNISTD_H)
26#  include <unistd.h>
27#endif
28
29#include <bashansi.h>
30#include <stdio.h>
31#include <chartypes.h>
32
33#include "shell.h"
34
35#ifdef ESC
36#undef ESC
37#endif
38#define ESC '\033'	/* ASCII */
39
40/* Convert STRING by expanding the escape sequences specified by the
41   ANSI C standard.  If SAWC is non-null, recognize `\c' and use that
42   as a string terminator.  If we see \c, set *SAWC to 1 before
43   returning.  LEN is the length of STRING.  If (FLAGS&1) is non-zero,
44   that we're translating a string for `echo -e', and therefore should not
45   treat a single quote as a character that may be escaped with a backslash.
46   If (FLAGS&2) is non-zero, we're expanding for the parser and want to
47   quote CTLESC and CTLNUL with CTLESC.  If (flags&4) is non-zero, we want
48   to remove the backslash before any unrecognized escape sequence. */
49char *
50ansicstr (string, len, flags, sawc, rlen)
51     char *string;
52     int len, flags, *sawc, *rlen;
53{
54  int c, temp;
55  char *ret, *r, *s;
56
57  if (string == 0 || *string == '\0')
58    return ((char *)NULL);
59
60  ret = (char *)xmalloc (2*len + 1);	/* 2*len for possible CTLESC */
61  for (r = ret, s = string; s && *s; )
62    {
63      c = *s++;
64      if (c != '\\' || *s == '\0')
65	*r++ = c;
66      else
67	{
68	  switch (c = *s++)
69	    {
70#if defined (__STDC__)
71	    case 'a': c = '\a'; break;
72	    case 'v': c = '\v'; break;
73#else
74	    case 'a': c = '\007'; break;
75	    case 'v': c = (int) 0x0B; break;
76#endif
77	    case 'b': c = '\b'; break;
78	    case 'e': case 'E':		/* ESC -- non-ANSI */
79		    if (0 == (flags & 1))
80			    c = ESC;
81		    else if (0 == (flags & 4))
82		            *r++ = '\\';
83		    break;
84	    case 'f': c = '\f'; break;
85	    case 'n': c = '\n'; break;
86	    case 'r': c = '\r'; break;
87	    case 't': c = '\t'; break;
88	    case '1': case '2': case '3':
89	    case '4': case '5': case '6':
90	    case '7':
91#if 1
92	      if (flags & 1)
93		{
94		  *r++ = '\\';
95		  break;
96		}
97	    /*FALLTHROUGH*/
98#endif
99	    case '0':
100	      /* If (FLAGS & 1), we're translating a string for echo -e (or
101		 the equivalent xpg_echo option), so we obey the SUSv3/
102		 POSIX-2001 requirement and accept 0-3 octal digits after
103		 a leading `0'. */
104	      temp = 2 + ((flags & 1) && (c == '0'));
105	      for (c -= '0'; ISOCTAL (*s) && temp--; s++)
106		c = (c * 8) + OCTVALUE (*s);
107	      c &= 0xFF;
108	      break;
109	    case 'x':			/* Hex digit -- non-ANSI */
110	      if ((flags & 2) && *s == '{')
111		{
112		  flags |= 16;		/* internal flag value */
113		  s++;
114		}
115	      /* Consume at least two hex characters */
116	      for (temp = 2, c = 0; ISXDIGIT ((unsigned char)*s) && temp--; s++)
117		c = (c * 16) + HEXVALUE (*s);
118	      /* DGK says that after a `\x{' ksh93 consumes ISXDIGIT chars
119		 until a non-xdigit or `}', so potentially more than two
120		 chars are consumed. */
121	      if (flags & 16)
122		{
123		  for ( ; ISXDIGIT ((unsigned char)*s); s++)
124		    c = (c * 16) + HEXVALUE (*s);
125		  flags &= ~16;
126		  if (*s == '}')
127		    s++;
128	        }
129	      /* \x followed by non-hex digits is passed through unchanged */
130	      else if (temp == 2)
131		{
132		  *r++ = '\\';
133		  c = 'x';
134		}
135	      c &= 0xFF;
136	      break;
137	    case '\\':
138	      break;
139	    case '\'': case '"': case '?':
140	      if (flags & 1)
141		*r++ = '\\';
142	      break;
143	    case 'c':
144	      if (sawc)
145		{
146		  *sawc = 1;
147		  *r = '\0';
148		  if (rlen)
149		    *rlen = r - ret;
150		  return ret;
151		}
152	      else if ((flags & 1) == 0 && (c = *s))
153		{
154		  s++;
155		  c = TOCTRL(c);
156		  break;
157		}
158		/*FALLTHROUGH*/
159	    default:
160		if ((flags & 4) == 0)
161		  *r++ = '\\';
162		break;
163	    }
164	  if ((flags & 2) && (c == CTLESC || c == CTLNUL))
165	    *r++ = CTLESC;
166	  *r++ = c;
167	}
168    }
169  *r = '\0';
170  if (rlen)
171    *rlen = r - ret;
172  return ret;
173}
174
175/* Take a string STR, possibly containing non-printing characters, and turn it
176   into a $'...' ANSI-C style quoted string.  Returns a new string. */
177char *
178ansic_quote (str, flags, rlen)
179     char *str;
180     int flags, *rlen;
181{
182  char *r, *ret, *s;
183  int l, rsize;
184  unsigned char c;
185
186  if (str == 0 || *str == 0)
187    return ((char *)0);
188
189  l = strlen (str);
190  rsize = 4 * l + 4;
191  r = ret = (char *)xmalloc (rsize);
192
193  *r++ = '$';
194  *r++ = '\'';
195
196  for (s = str, l = 0; *s; s++)
197    {
198      c = *s;
199      l = 1;		/* 1 == add backslash; 0 == no backslash */
200      switch (c)
201	{
202	case ESC: c = 'E'; break;
203#ifdef __STDC__
204	case '\a': c = 'a'; break;
205	case '\v': c = 'v'; break;
206#else
207	case '\007': c = 'a'; break;
208	case 0x0b: c = 'v'; break;
209#endif
210
211	case '\b': c = 'b'; break;
212	case '\f': c = 'f'; break;
213	case '\n': c = 'n'; break;
214	case '\r': c = 'r'; break;
215	case '\t': c = 't'; break;
216	case '\\':
217	case '\'':
218	  break;
219	default:
220	  if (ISPRINT (c) == 0)
221	    {
222	      *r++ = '\\';
223	      *r++ = TOCHAR ((c >> 6) & 07);
224	      *r++ = TOCHAR ((c >> 3) & 07);
225	      *r++ = TOCHAR (c & 07);
226	      continue;
227	    }
228	  l = 0;
229	  break;
230	}
231      if (l)
232	*r++ = '\\';
233      *r++ = c;
234    }
235
236  *r++ = '\'';
237  *r = '\0';
238  if (rlen)
239    *rlen = r - ret;
240  return ret;
241}
242
243/* return 1 if we need to quote with $'...' because of non-printing chars. */
244int
245ansic_shouldquote (string)
246     const char *string;
247{
248  const char *s;
249  unsigned char c;
250
251  if (string == 0)
252    return 0;
253
254  for (s = string; c = *s; s++)
255    if (ISPRINT (c) == 0)
256      return 1;
257
258  return 0;
259}
260
261/* $'...' ANSI-C expand the portion of STRING between START and END and
262   return the result.  The result cannot be longer than the input string. */
263char *
264ansiexpand (string, start, end, lenp)
265     char *string;
266     int start, end, *lenp;
267{
268  char *temp, *t;
269  int len, tlen;
270
271  temp = (char *)xmalloc (end - start + 1);
272  for (tlen = 0, len = start; len < end; )
273    temp[tlen++] = string[len++];
274  temp[tlen] = '\0';
275
276  if (*temp)
277    {
278      t = ansicstr (temp, tlen, 2, (int *)NULL, lenp);
279      free (temp);
280      return (t);
281    }
282  else
283    {
284      if (lenp)
285	*lenp = 0;
286      return (temp);
287    }
288}
289