1/* braces.c -- code for doing word expansion in curly braces. */
2
3/* Copyright (C) 1987-2003 Free Software Foundation, Inc.
4
5   This file is part of GNU Bash, the Bourne Again SHell.
6
7   Bash is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published by
9   the Free Software Foundation; either version 2, or (at your option)
10   any later version.
11
12   Bash is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with Bash; see the file COPYING.  If not, write to the Free
19   Software Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
20
21/* Stuff in curly braces gets expanded before all other shell expansions. */
22
23#include "config.h"
24
25#if defined (BRACE_EXPANSION)
26
27#if defined (HAVE_UNISTD_H)
28#  ifdef _MINIX
29#    include <sys/types.h>
30#  endif
31#  include <unistd.h>
32#endif
33
34#include "bashansi.h"
35
36#if defined (SHELL)
37#  include "shell.h"
38#endif /* SHELL */
39
40#include "general.h"
41#include "shmbutil.h"
42#include "chartypes.h"
43
44#define brace_whitespace(c) (!(c) || (c) == ' ' || (c) == '\t' || (c) == '\n')
45
46#define BRACE_SEQ_SPECIFIER	".."
47
48/* Basic idea:
49
50   Segregate the text into 3 sections: preamble (stuff before an open brace),
51   postamble (stuff after the matching close brace) and amble (stuff after
52   preamble, and before postamble).  Expand amble, and then tack on the
53   expansions to preamble.  Expand postamble, and tack on the expansions to
54   the result so far.
55 */
56
57/* The character which is used to separate arguments. */
58int brace_arg_separator = ',';
59
60#if defined (__P)
61static int brace_gobbler __P((char *, size_t, int *, int));
62static char **expand_amble __P((char *, size_t, int));
63static char **expand_seqterm __P((char *, size_t));
64static char **mkseq __P((int, int, int, int));
65static char **array_concat __P((char **, char **));
66#else
67static int brace_gobbler ();
68static char **expand_amble ();
69static char **expand_seqterm ();
70static char **mkseq();
71static char **array_concat ();
72#endif
73
74#if 0
75static void
76dump_result (a)
77     char **a;
78{
79  int i;
80
81  for (i = 0; a[i]; i++)
82    printf ("dump_result: a[%d] = -%s-\n", i, a[i]);
83}
84#endif
85
86/* Return an array of strings; the brace expansion of TEXT. */
87char **
88brace_expand (text)
89     char *text;
90{
91  register int start;
92  size_t tlen;
93  char *preamble, *postamble, *amble;
94  size_t alen;
95  char **tack, **result;
96  int i, j, c, c1;
97
98  DECLARE_MBSTATE;
99
100  /* Find the text of the preamble. */
101  tlen = strlen (text);
102  i = 0;
103#if defined (CSH_BRACE_COMPAT)
104  c = brace_gobbler (text, tlen, &i, '{');	/* } */
105#else
106  /* Make sure that when we exit this loop, c == 0 or text[i] begins a
107     valid brace expansion sequence. */
108  do
109    {
110      c = brace_gobbler (text, tlen, &i, '{');	/* } */
111      c1 = c;
112      /* Verify that c begins a valid brace expansion word.  If it doesn't, we
113	 go on.  Loop stops when there are no more open braces in the word. */
114      if (c)
115	{
116	  start = j = i + 1;	/* { */
117	  c = brace_gobbler (text, tlen, &j, '}');
118	  if (c == 0)		/* it's not */
119	    {
120	      i++;
121	      c = c1;
122	      continue;
123	    }
124	  else			/* it is */
125	    {
126	      c = c1;
127	      break;
128	    }
129	}
130      else
131	break;
132    }
133  while (c);
134#endif /* !CSH_BRACE_COMPAT */
135
136  preamble = (char *)xmalloc (i + 1);
137  strncpy (preamble, text, i);
138  preamble[i] = '\0';
139
140  result = (char **)xmalloc (2 * sizeof (char *));
141  result[0] = preamble;
142  result[1] = (char *)NULL;
143
144  /* Special case.  If we never found an exciting character, then
145     the preamble is all of the text, so just return that. */
146  if (c != '{')
147    return (result);
148
149  /* Find the amble.  This is the stuff inside this set of braces. */
150  start = ++i;
151  c = brace_gobbler (text, tlen, &i, '}');
152
153  /* What if there isn't a matching close brace? */
154  if (c == 0)
155    {
156#if defined (NOTDEF)
157      /* Well, if we found an unquoted BRACE_ARG_SEPARATOR between START
158	 and I, then this should be an error.  Otherwise, it isn't. */
159      j = start;
160      while (j < i)
161	{
162	  if (text[j] == '\\')
163	    {
164	      j++;
165	      ADVANCE_CHAR (text, tlen, j);
166	      continue;
167	    }
168
169	  if (text[j] == brace_arg_separator)
170	    {	/* { */
171	      strvec_dispose (result);
172	      report_error ("no closing `%c' in %s", '}', text);
173	      throw_to_top_level ();
174	    }
175	  ADVANCE_CHAR (text, tlen, j);
176	}
177#endif
178      free (preamble);		/* Same as result[0]; see initialization. */
179      result[0] = savestring (text);
180      return (result);
181    }
182
183#if defined (SHELL)
184  amble = substring (text, start, i);
185  alen = i - start;
186#else
187  amble = (char *)xmalloc (1 + (i - start));
188  strncpy (amble, &text[start], (i - start));
189  alen = i - start;
190  amble[alen] = '\0';
191#endif
192
193#if defined (SHELL)
194  INITIALIZE_MBSTATE;
195
196  /* If the amble does not contain an unquoted BRACE_ARG_SEPARATOR, then
197     just return without doing any expansion.  */
198  j = 0;
199  while (amble[j])
200    {
201      if (amble[j] == '\\')
202	{
203	  j++;
204	  ADVANCE_CHAR (amble, alen, j);
205	  continue;
206	}
207
208      if (amble[j] == brace_arg_separator)
209	break;
210
211      ADVANCE_CHAR (amble, alen, j);
212    }
213
214  if (amble[j] == 0)
215    {
216      tack = expand_seqterm (amble, alen);
217      if (tack)
218	goto add_tack;
219      else
220	{
221	  free (amble);
222	  free (preamble);
223	  result[0] = savestring (text);
224	  return (result);
225	}
226    }
227#endif /* SHELL */
228
229  tack = expand_amble (amble, alen, 0);
230add_tack:
231  result = array_concat (result, tack);
232  free (amble);
233  strvec_dispose (tack);
234
235  postamble = text + i + 1;
236
237  tack = brace_expand (postamble);
238  result = array_concat (result, tack);
239  strvec_dispose (tack);
240
241  return (result);
242}
243
244/* Expand the text found inside of braces.  We simply try to split the
245   text at BRACE_ARG_SEPARATORs into separate strings.  We then brace
246   expand each slot which needs it, until there are no more slots which
247   need it. */
248static char **
249expand_amble (text, tlen, flags)
250     char *text;
251     size_t tlen;
252     int flags;
253{
254  char **result, **partial;
255  char *tem;
256  int start, i, c;
257
258  DECLARE_MBSTATE;
259
260  result = (char **)NULL;
261
262  start = i = 0;
263  c = 1;
264  while (c)
265    {
266      c = brace_gobbler (text, tlen, &i, brace_arg_separator);
267#if defined (SHELL)
268      tem = substring (text, start, i);
269#else
270      tem = (char *)xmalloc (1 + (i - start));
271      strncpy (tem, &text[start], (i - start));
272      tem[i- start] = '\0';
273#endif
274
275      partial = brace_expand (tem);
276
277      if (!result)
278	result = partial;
279      else
280	{
281	  register int lr, lp, j;
282
283	  lr = strvec_len (result);
284	  lp = strvec_len (partial);
285
286	  result = strvec_resize (result, lp + lr + 1);
287
288	  for (j = 0; j < lp; j++)
289	    result[lr + j] = partial[j];
290
291	  result[lr + j] = (char *)NULL;
292	  free (partial);
293	}
294      free (tem);
295      ADVANCE_CHAR (text, tlen, i);
296      start = i;
297    }
298  return (result);
299}
300
301#define ST_BAD	0
302#define ST_INT	1
303#define ST_CHAR	2
304
305static char **
306mkseq (start, end, incr, type)
307     int start, end, incr, type;
308{
309  int n, i;
310  char **result, *t;
311
312  n = abs (end - start) + 1;
313  result = strvec_create (n + 1);
314
315  if (incr == 0)
316    incr = 1;
317
318  if (start > end && incr > 0)
319    incr = -incr;
320  else if (start < end && incr < 0)
321    incr = -incr;
322
323  /* Make sure we go through the loop at least once, so {3..3} prints `3' */
324  i = 0;
325  n = start;
326  do
327    {
328#if defined (SHELL)
329      QUIT;		/* XXX - memory leak here */
330#endif
331      if (type == ST_INT)
332	result[i++] = itos (n);
333      else
334	{
335	  t = (char *)xmalloc (2);
336	  t[0] = n;
337	  t[1] = '\0';
338	  result[i++] = t;
339	}
340      if (n == end)
341        break;
342      n += incr;
343    }
344  while (1);
345
346  result[i] = (char *)0;
347  return (result);
348}
349
350static char **
351expand_seqterm (text, tlen)
352     char *text;
353     size_t tlen;
354{
355  char *t, *lhs, *rhs;
356  int i, lhs_t, rhs_t, lhs_v, rhs_v;
357  intmax_t tl, tr;
358  char **result;
359
360  t = strstr (text, BRACE_SEQ_SPECIFIER);
361  if (t == 0)
362    return ((char **)NULL);
363
364  i = t - text;		/* index of start of BRACE_SEQ_SPECIFIER */
365  lhs = substring (text, 0, i);
366  rhs = substring (text, i + sizeof(BRACE_SEQ_SPECIFIER) - 1, tlen);
367
368  if (lhs[0] == 0 || rhs[0] == 0)
369    {
370      free (lhs);
371      free (rhs);
372      return ((char **)NULL);
373    }
374
375  /* Now figure out whether LHS and RHS are integers or letters.  Both
376     sides have to match. */
377  lhs_t = (legal_number (lhs, &tl)) ? ST_INT :
378  		((ISALPHA (lhs[0]) && lhs[1] == 0) ?  ST_CHAR : ST_BAD);
379  rhs_t = (legal_number (rhs, &tr)) ? ST_INT :
380  		((ISALPHA (rhs[0]) && rhs[1] == 0) ?  ST_CHAR : ST_BAD);
381
382  if (lhs_t != rhs_t || lhs_t == ST_BAD || rhs_t == ST_BAD)
383    {
384      free (lhs);
385      free (rhs);
386      return ((char **)NULL);
387    }
388
389  /* OK, we have something.  It's either a sequence of integers, ascending
390     or descending, or a sequence or letters, ditto.  Generate the sequence,
391     put it into a string vector, and return it. */
392
393  if (lhs_t == ST_CHAR)
394    {
395      lhs_v = (unsigned char)lhs[0];
396      rhs_v = (unsigned char)rhs[0];
397    }
398  else
399    {
400      lhs_v = tl;		/* integer truncation */
401      rhs_v = tr;
402    }
403
404  result = mkseq (lhs_v, rhs_v, 1, lhs_t);
405
406  free (lhs);
407  free (rhs);
408
409  return (result);
410}
411
412/* Start at INDEX, and skip characters in TEXT. Set INDEX to the
413   index of the character matching SATISFY.  This understands about
414   quoting.  Return the character that caused us to stop searching;
415   this is either the same as SATISFY, or 0. */
416/* If SATISFY is `}', we are looking for a brace expression, so we
417   should enforce the rules that govern valid brace expansions:
418	1) to count as an arg separator, a comma or `..' has to be outside
419	   an inner set of braces.
420*/
421static int
422brace_gobbler (text, tlen, indx, satisfy)
423     char *text;
424     size_t tlen;
425     int *indx;
426     int satisfy;
427{
428  register int i, c, quoted, level, commas, pass_next;
429#if defined (SHELL)
430  int si;
431  char *t;
432#endif
433  DECLARE_MBSTATE;
434
435  level = quoted = pass_next = 0;
436#if defined (CSH_BRACE_COMPAT)
437  commas = 1;
438#else
439  commas = (satisfy == '}') ? 0 : 1;
440#endif
441
442  i = *indx;
443  while (c = text[i])
444    {
445      if (pass_next)
446	{
447	  pass_next = 0;
448	  ADVANCE_CHAR (text, tlen, i);
449	  continue;
450	}
451
452      /* A backslash escapes the next character.  This allows backslash to
453	 escape the quote character in a double-quoted string. */
454      if (c == '\\' && (quoted == 0 || quoted == '"' || quoted == '`'))
455	{
456	  pass_next = 1;
457	  i++;
458	  continue;
459	}
460
461#if defined (SHELL)
462      /* If compiling for the shell, treat ${...} like \{...} */
463      if (c == '$' && text[i+1] == '{' && quoted != '\'')		/* } */
464	{
465	  pass_next = 1;
466	  i++;
467	  if (quoted == 0)
468	    level++;
469	  continue;
470	}
471#endif
472
473      if (quoted)
474	{
475	  if (c == quoted)
476	    quoted = 0;
477	  ADVANCE_CHAR (text, tlen, i);
478	  continue;
479	}
480
481      if (c == '"' || c == '\'' || c == '`')
482	{
483	  quoted = c;
484	  i++;
485	  continue;
486	}
487
488#if defined (SHELL)
489      /* Pass new-style command substitutions through unchanged. */
490      if (c == '$' && text[i+1] == '(')			/* ) */
491	{
492	  si = i + 2;
493	  t = extract_command_subst (text, &si);
494	  i = si;
495	  free (t);
496	  i++;
497	  continue;
498	}
499#endif
500
501      if (c == satisfy && level == 0 && quoted == 0 && commas > 0)
502	{
503	  /* We ignore an open brace surrounded by whitespace, and also
504	     an open brace followed immediately by a close brace preceded
505	     by whitespace.  */
506	  if (c == '{' &&
507	      ((!i || brace_whitespace (text[i - 1])) &&
508	       (brace_whitespace (text[i + 1]) || text[i + 1] == '}')))
509	    {
510	      i++;
511	      continue;
512	    }
513
514	    break;
515	}
516
517      if (c == '{')
518	level++;
519      else if (c == '}' && level)
520	level--;
521#if !defined (CSH_BRACE_COMPAT)
522      else if (satisfy == '}' && c == brace_arg_separator && level == 0)
523	commas++;
524      else if (satisfy == '}' && STREQN (text+i, BRACE_SEQ_SPECIFIER, 2) &&
525      		text[i+2] != satisfy && level == 0)
526	commas++;
527#endif
528
529      ADVANCE_CHAR (text, tlen, i);
530    }
531
532  *indx = i;
533  return (c);
534}
535
536/* Return a new array of strings which is the result of appending each
537   string in ARR2 to each string in ARR1.  The resultant array is
538   len (arr1) * len (arr2) long.  For convenience, ARR1 (and its contents)
539   are free ()'ed.  ARR1 can be NULL, in that case, a new version of ARR2
540   is returned. */
541static char **
542array_concat (arr1, arr2)
543     char **arr1, **arr2;
544{
545  register int i, j, len, len1, len2;
546  register char **result;
547
548  if (arr1 == 0)
549    return (strvec_copy (arr2));
550
551  if (arr2 == 0)
552    return (strvec_copy (arr1));
553
554  len1 = strvec_len (arr1);
555  len2 = strvec_len (arr2);
556
557  result = (char **)xmalloc ((1 + (len1 * len2)) * sizeof (char *));
558
559  len = 0;
560  for (i = 0; i < len1; i++)
561    {
562      int strlen_1 = strlen (arr1[i]);
563
564      for (j = 0; j < len2; j++)
565	{
566	  result[len] = (char *)xmalloc (1 + strlen_1 + strlen (arr2[j]));
567	  strcpy (result[len], arr1[i]);
568	  strcpy (result[len] + strlen_1, arr2[j]);
569	  len++;
570	}
571      free (arr1[i]);
572    }
573  free (arr1);
574
575  result[len] = (char *)NULL;
576  return (result);
577}
578
579#if defined (TEST)
580#include <stdio.h>
581
582fatal_error (format, arg1, arg2)
583     char *format, *arg1, *arg2;
584{
585  report_error (format, arg1, arg2);
586  exit (1);
587}
588
589report_error (format, arg1, arg2)
590     char *format, *arg1, *arg2;
591{
592  fprintf (stderr, format, arg1, arg2);
593  fprintf (stderr, "\n");
594}
595
596main ()
597{
598  char example[256];
599
600  for (;;)
601    {
602      char **result;
603      int i;
604
605      fprintf (stderr, "brace_expand> ");
606
607      if ((!fgets (example, 256, stdin)) ||
608	  (strncmp (example, "quit", 4) == 0))
609	break;
610
611      if (strlen (example))
612	example[strlen (example) - 1] = '\0';
613
614      result = brace_expand (example);
615
616      for (i = 0; result[i]; i++)
617	printf ("%s\n", result[i]);
618
619      free_array (result);
620    }
621}
622
623/*
624 * Local variables:
625 * compile-command: "gcc -g -Bstatic -DTEST -o brace_expand braces.c general.o"
626 * end:
627 */
628
629#endif /* TEST */
630#endif /* BRACE_EXPANSION */
631