• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-WNDR4500v2-V1.0.0.60_1.0.38/ap/gpl/timemachine/gettext-0.17/gettext-tools/src/
1/* xgettext sh backend.
2   Copyright (C) 2003, 2005-2007 Free Software Foundation, Inc.
3   Written by Bruno Haible <bruno@clisp.org>, 2003.
4
5   This program is free software: you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3 of the License, or
8   (at your option) any later version.
9
10   This program is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
17
18#ifdef HAVE_CONFIG_H
19# include "config.h"
20#endif
21
22/* Specification.  */
23#include "x-sh.h"
24
25#include <errno.h>
26#include <limits.h>
27#include <stdbool.h>
28#include <stdio.h>
29#include <stdlib.h>
30#include <string.h>
31
32#include "message.h"
33#include "xgettext.h"
34#include "x-sh.h"
35#include "error.h"
36#include "xalloc.h"
37#include "hash.h"
38#include "gettext.h"
39
40#define _(s) gettext(s)
41
42#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
43
44
45/* The sh syntax is defined in POSIX:2001, see
46     http://www.opengroup.org/onlinepubs/007904975/utilities/xcu_chap02.html
47   Summary of sh syntax:
48   - Input is broken into words, which are then subject to
49     - tilde expansion ~...
50     - command substitution `...`
51     - variable substitution $var
52     - arithmetic substitution $((...))
53     - field splitting at whitespace (IFS)
54     - wildcard pattern expansion *?
55     - quote removal
56   - Strings are enclosed in "..."; command substitution, variable
57     substitution and arithmetic substitution are performed here as well.
58   - '...' is a string without substitutions.
59   - The list of resulting words is split into commands by semicolon and
60     newline.
61   - '#' at the beginning of a word introduces a comment until end of line.
62   The parser is implemented in bash-2.05b/parse.y.  */
63
64
65/* ====================== Keyword set customization.  ====================== */
66
67/* If true extract all strings.  */
68static bool extract_all = false;
69
70static hash_table keywords;
71static bool default_keywords = true;
72
73
74void
75x_sh_extract_all ()
76{
77  extract_all = true;
78}
79
80
81void
82x_sh_keyword (const char *name)
83{
84  if (name == NULL)
85    default_keywords = false;
86  else
87    {
88      const char *end;
89      struct callshape shape;
90      const char *colon;
91
92      if (keywords.table == NULL)
93	hash_init (&keywords, 100);
94
95      split_keywordspec (name, &end, &shape);
96
97      /* The characters between name and end should form a valid C identifier.
98	 A colon means an invalid parse in split_keywordspec().  */
99      colon = strchr (name, ':');
100      if (colon == NULL || colon >= end)
101	insert_keyword_callshape (&keywords, name, end - name, &shape);
102    }
103}
104
105/* Finish initializing the keywords hash table.
106   Called after argument processing, before each file is processed.  */
107static void
108init_keywords ()
109{
110  if (default_keywords)
111    {
112      /* When adding new keywords here, also update the documentation in
113	 xgettext.texi!  */
114      x_sh_keyword ("gettext");
115      x_sh_keyword ("ngettext:1,2");
116      x_sh_keyword ("eval_gettext");
117      x_sh_keyword ("eval_ngettext:1,2");
118      default_keywords = false;
119    }
120}
121
122void
123init_flag_table_sh ()
124{
125  xgettext_record_flag ("gettext:1:pass-sh-format");
126  xgettext_record_flag ("ngettext:1:pass-sh-format");
127  xgettext_record_flag ("ngettext:2:pass-sh-format");
128  xgettext_record_flag ("eval_gettext:1:sh-format");
129  xgettext_record_flag ("eval_ngettext:1:sh-format");
130  xgettext_record_flag ("eval_ngettext:2:sh-format");
131}
132
133
134/* ======================== Reading of characters.  ======================== */
135
136/* Real filename, used in error messages about the input file.  */
137static const char *real_file_name;
138
139/* Logical filename and line number, used to label the extracted messages.  */
140static char *logical_file_name;
141static int line_number;
142
143/* The input file stream.  */
144static FILE *fp;
145
146
147/* Fetch the next character from the input file.  */
148static int
149do_getc ()
150{
151  int c = getc (fp);
152
153  if (c == EOF)
154    {
155      if (ferror (fp))
156	error (EXIT_FAILURE, errno, _("\
157error while reading \"%s\""), real_file_name);
158    }
159  else if (c == '\n')
160   line_number++;
161
162  return c;
163}
164
165/* Put back the last fetched character, not EOF.  */
166static void
167do_ungetc (int c)
168{
169  if (c == '\n')
170    line_number--;
171  ungetc (c, fp);
172}
173
174
175/* Remove backslash followed by newline from the input stream.  */
176
177static int phase1_pushback[1];
178static int phase1_pushback_length;
179
180static int
181phase1_getc ()
182{
183  int c;
184
185  if (phase1_pushback_length)
186    {
187      c = phase1_pushback[--phase1_pushback_length];
188      if (c == '\n')
189	++line_number;
190      return c;
191    }
192  for (;;)
193    {
194      c = do_getc ();
195      if (c != '\\')
196	return c;
197      c = do_getc ();
198      if (c != '\n')
199	{
200	  if (c != EOF)
201	    do_ungetc (c);
202	  return '\\';
203	}
204    }
205}
206
207/* Supports only one pushback character.  */
208static void
209phase1_ungetc (int c)
210{
211  switch (c)
212    {
213    case EOF:
214      break;
215
216    case '\n':
217      --line_number;
218      /* FALLTHROUGH */
219
220    default:
221      if (phase1_pushback_length == SIZEOF (phase1_pushback))
222	abort ();
223      phase1_pushback[phase1_pushback_length++] = c;
224      break;
225    }
226}
227
228
229/* ========================== Reading of tokens.  ========================== */
230
231
232/* A token consists of a sequence of characters.  */
233struct token
234{
235  int allocated;		/* number of allocated 'token_char's */
236  int charcount;		/* number of used 'token_char's */
237  char *chars;			/* the token's constituents */
238};
239
240/* Initialize a 'struct token'.  */
241static inline void
242init_token (struct token *tp)
243{
244  tp->allocated = 10;
245  tp->chars = XNMALLOC (tp->allocated, char);
246  tp->charcount = 0;
247}
248
249/* Free the memory pointed to by a 'struct token'.  */
250static inline void
251free_token (struct token *tp)
252{
253  free (tp->chars);
254}
255
256/* Ensure there is enough room in the token for one more character.  */
257static inline void
258grow_token (struct token *tp)
259{
260  if (tp->charcount == tp->allocated)
261    {
262      tp->allocated *= 2;
263      tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
264    }
265}
266
267/* Convert a struct token * to a char*.  */
268static char *
269string_of_token (const struct token *tp)
270{
271  char *str;
272  int n;
273
274  n = tp->charcount;
275  str = XNMALLOC (n + 1, char);
276  memcpy (str, tp->chars, n);
277  str[n] = '\0';
278  return str;
279}
280
281
282/* ========================= Accumulating messages ========================= */
283
284
285static message_list_ty *mlp;
286
287
288/* ========================= Accumulating comments ========================= */
289
290
291static char *buffer;
292static size_t bufmax;
293static size_t buflen;
294
295static inline void
296comment_start ()
297{
298  buflen = 0;
299}
300
301static inline void
302comment_add (int c)
303{
304  if (buflen >= bufmax)
305    {
306      bufmax = 2 * bufmax + 10;
307      buffer = xrealloc (buffer, bufmax);
308    }
309  buffer[buflen++] = c;
310}
311
312static inline void
313comment_line_end ()
314{
315  while (buflen >= 1
316	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
317    --buflen;
318  if (buflen >= bufmax)
319    {
320      bufmax = 2 * bufmax + 10;
321      buffer = xrealloc (buffer, bufmax);
322    }
323  buffer[buflen] = '\0';
324  savable_comment_add (buffer);
325}
326
327
328/* These are for tracking whether comments count as immediately before
329   keyword.  */
330static int last_comment_line;
331static int last_non_comment_line;
332
333
334/* ========================= Debackslashification ========================== */
335
336/* This state tracks the effect of backquotes, double-quotes and single-quotes
337   on the parsing of backslashes.  We make a single pass through the input
338   file, keeping the state up to date.  This is much faster than accumulating
339   strings and processing them with explicit debackslashification, like the
340   shell does it.  */
341
342/* The number of nested `...` or "`...`" constructs.  Assumed to be <= 32.  */
343static unsigned int nested_backquotes;
344
345/* A bit mask indicating which of the currently open `...` or "`...`"
346   constructs is with double-quotes: "`...`".
347   A bit value of 1 stands for "`...`", a bit value of 0 stands for `...`.
348   Bit position 0 designates the outermost backquotes nesting,
349   bit position 1 the second-outermost backquotes nesting,
350   ...
351   bit position (nested_backquotes-1) the innermost backquotes nesting.  */
352static unsigned int open_doublequotes_mask;
353
354/* A bit indicating whether a double-quote is currently open inside the
355   innermost backquotes nesting.  */
356static bool open_doublequote;
357
358/* A bit indicating whether a single-quote is currently open inside the
359   innermost backquotes nesting.  */
360static bool open_singlequote;
361
362/* The expected terminator of the currently open single-quote.
363   Usually '\'', but can be '"' for i18n-quotes.  */
364static char open_singlequote_terminator;
365
366
367/* Functions to update the state.  */
368
369static inline void
370saw_opening_backquote ()
371{
372  if (open_singlequote)
373    abort ();
374  if (open_doublequote)
375    open_doublequotes_mask |= (unsigned int) 1 << nested_backquotes;
376  nested_backquotes++;
377  open_doublequote = false;
378}
379
380static inline void
381saw_closing_backquote ()
382{
383  nested_backquotes--;
384  open_doublequote = (open_doublequotes_mask >> nested_backquotes) & 1;
385  open_doublequotes_mask &= ((unsigned int) 1 << nested_backquotes) - 1;
386  open_singlequote = false; /* just for safety */
387}
388
389static inline void
390saw_opening_doublequote ()
391{
392  if (open_singlequote || open_doublequote)
393    abort ();
394  open_doublequote = true;
395}
396
397static inline void
398saw_closing_doublequote ()
399{
400  if (open_singlequote || !open_doublequote)
401    abort ();
402  open_doublequote = false;
403}
404
405static inline void
406saw_opening_singlequote ()
407{
408  if (open_doublequote || open_singlequote)
409    abort ();
410  open_singlequote = true;
411  open_singlequote_terminator = '\'';
412}
413
414static inline void
415saw_closing_singlequote ()
416{
417  if (open_doublequote || !open_singlequote)
418    abort ();
419  open_singlequote = false;
420}
421
422
423/* ========================== Reading of commands ========================== */
424
425/* We are only interested in constant strings.  Other words need not to be
426   represented precisely.  */
427enum word_type
428{
429  t_string,	/* constant string */
430  t_other,	/* other string */
431  t_separator,	/* command separator: semicolon or newline */
432  t_redirect,	/* redirection: one of < > >| << <<- >> <> <& >& */
433  t_backquote,	/* closing '`' pseudo word */
434  t_paren,	/* closing ')' pseudo word */
435  t_eof		/* EOF marker */
436};
437
438struct word
439{
440  enum word_type type;
441  struct token *token;		/* for t_string */
442  int line_number_at_start;	/* for t_string */
443};
444
445/* Free the memory pointed to by a 'struct word'.  */
446static inline void
447free_word (struct word *wp)
448{
449  if (wp->type == t_string)
450    {
451      free_token (wp->token);
452      free (wp->token);
453    }
454}
455
456/* Convert a t_string token to a char*.  */
457static char *
458string_of_word (const struct word *wp)
459{
460  char *str;
461  int n;
462
463  if (!(wp->type == t_string))
464    abort ();
465  n = wp->token->charcount;
466  str = XNMALLOC (n + 1, char);
467  memcpy (str, wp->token->chars, n);
468  str[n] = '\0';
469  return str;
470}
471
472
473/* Whitespace recognition.  */
474
475static inline bool
476is_whitespace (int c)
477{
478  return (c == ' ' || c == '\t' || c == '\n');
479}
480
481/* Operator character recognition.  */
482
483static inline bool
484is_operator_start (int c)
485{
486  return (c == '|' || c == '&' || c == ';' || c == '<' || c == '>'
487	  || c == '(' || c == ')');
488}
489
490
491/* Denotation of a quoted character.
492   The distinction between quoted and unquoted character is important only for
493   the special, whitespace and operator characters; it is irrelevant for
494   alphanumeric characters, '\\' and many others.  */
495#define QUOTED(c) (UCHAR_MAX + 1 + (c))
496/* Values in the 'unsigned char' range are implicitly unquoted.  Among these,
497   the following are important:
498     '"'         opening or closing double quote
499     '\''        opening or closing single quote
500     '$'         the unknown result of a dollar expansion
501     '`'         does not occur - replaced with OPENING_BACKQUOTE or
502                 CLOSING_BACKQUOTE
503 */
504#define OPENING_BACKQUOTE (2 * (UCHAR_MAX + 1) + '`')
505#define CLOSING_BACKQUOTE (3 * (UCHAR_MAX + 1) + '`')
506
507/* 2 characters of pushback are supported.
508   2 characters of pushback occur only when the first is an 'x'; in all
509   other cases only one character of pushback is needed.  */
510static int phase2_pushback[2];
511static int phase2_pushback_length;
512
513/* Return the next character, with backslashes removed.
514   The result is QUOTED(c) for some unsigned char c, if the next character
515   is escaped sufficiently often to make it a regular constituent character,
516   or simply an 'unsigned char' if it has its special meaning (of special,
517   whitespace or operator charcter), or OPENING_BACKQUOTE, CLOSING_BACKQUOTE,
518   EOF.
519   It's the caller's responsibility to update the state.  */
520static int
521phase2_getc ()
522{
523  int c;
524
525  if (phase2_pushback_length)
526    {
527      c = phase2_pushback[--phase2_pushback_length];
528      if (c == '\n')
529	++line_number;
530      return c;
531    }
532
533  c = phase1_getc ();
534  if (c == EOF)
535    return c;
536  if (c == '\'')
537    return ((open_doublequote
538	     || (open_singlequote && open_singlequote_terminator != c))
539	    ? QUOTED (c)
540	    : c);
541  if (open_singlequote)
542    {
543      if (c == open_singlequote_terminator)
544	return c;
545    }
546  else
547    {
548      if (c == '"' || c == '$')
549	return c;
550      if (c == '`')
551	return (nested_backquotes > 0 ? CLOSING_BACKQUOTE : OPENING_BACKQUOTE);
552    }
553  if (c == '\\')
554    {
555      /* Number of debackslahificication passes that are active at the
556	 current point.  */
557      unsigned int debackslahify =
558	nested_backquotes + (open_singlequote ? 0 : 1);
559      /* Normal number of backslashes that yield a single backslash in the
560	 final output.  */
561      unsigned int expected_count =
562	(unsigned int) 1 << debackslahify;
563      /* Number of backslashes found.  */
564      unsigned int count;
565
566      for (count = 1; count < expected_count; count++)
567	{
568	  c = phase1_getc ();
569	  if (c != '\\')
570	    break;
571	}
572      if (count == expected_count)
573	return '\\';
574
575      /* The count of backslashes is > 0 and < expected_count, therefore the
576	 result depends on c, the first character after the backslashes.
577	 Note: The formulas below don't necessarily have a logic; they were
578	 empirically determined such that 1. the xgettext-30 test succeeds,
579	 2. the behaviour for count == 0 would correspond to the one without
580	 any baskslash.  */
581      if (c == '\'')
582	{
583	  if (!open_singlequote && count > (expected_count >> 1))
584	    {
585	      phase1_ungetc (c);
586	      return '\\';
587	    }
588	  else
589	    return ((open_doublequote
590		     || (open_singlequote && open_singlequote_terminator != c))
591		    ? QUOTED (c)
592		    : c);
593	}
594      else if (c == '"')
595	{
596	  /* Each debackslahificication pass converts \\ to \ and \" to ";
597	     passes corresponding to `...` drop a lone " whereas passes
598	     corresponding to "`...`" leave it alone.  Therefore, the
599	     minimum number of backslashes needed to get one double-quote
600	     in the end is  open_doublequotes_mask + 1.  */
601	  if (open_singlequote)
602	    {
603	      if (count > open_doublequotes_mask)
604		{
605		  phase1_ungetc (c);
606		  return '\\';
607		}
608	      else
609		return (open_singlequote_terminator != c ? QUOTED (c) : c);
610	    }
611	  else
612	    {
613	      if (count > open_doublequotes_mask)
614		return QUOTED (c);
615	      else
616	        /* Some of the count values <= open_doublequotes_mask are
617		   actually invalid here, but we assume a syntactically
618		   correct input file anyway.  */
619		return c;
620	    }
621	}
622      else if (c == '`')
623	{
624	  /* FIXME: This code looks fishy.  */
625	  if (count == expected_count - 1)
626	    return c;
627	  else
628	    /* Some of the count values < expected_count - 1 are
629	       actually invalid here, but we assume a syntactically
630	       correct input file anyway.  */
631	    if (nested_backquotes > 0 && !open_singlequote
632		&& count >= (expected_count >> 2))
633	      return OPENING_BACKQUOTE;
634	    else
635	      return CLOSING_BACKQUOTE;
636	}
637      else if (c == '$')
638	{
639	  if (open_singlequote)
640	    return QUOTED (c);
641	  if (count >= (expected_count >> 1))
642	    return QUOTED (c);
643	  else
644	    return c;
645	}
646      else
647	{
648	  /* When not followed by a quoting character or backslash or dollar,
649	     a backslash survives a debackslahificication pass unmodified.
650	     Therefore each debackslahificication pass performs a
651	       count := (count + 1) >> 1
652	     operation.  Therefore the minimum number of backslashes needed
653	     to get one backslash in the end is  (expected_count >> 1) + 1.  */
654	  if (open_doublequote || open_singlequote)
655	    {
656	      if (count > 0)
657		{
658		  phase1_ungetc (c);
659		  return '\\';
660		}
661	      else
662		return QUOTED (c);
663	    }
664	  else
665	    {
666	      if (count > (expected_count >> 1))
667		{
668		  phase1_ungetc (c);
669		  return '\\';
670		}
671	      else if (count > 0)
672		return QUOTED (c);
673	      else
674		return c;
675	    }
676	}
677    }
678
679  return (open_singlequote || open_doublequote ? QUOTED (c) : c);
680}
681
682/* Supports 2 characters of pushback.  */
683static void
684phase2_ungetc (int c)
685{
686  switch (c)
687    {
688    case EOF:
689      break;
690
691    case '\n':
692      --line_number;
693      /* FALLTHROUGH */
694
695    default:
696      if (phase2_pushback_length == SIZEOF (phase2_pushback))
697	abort ();
698      phase2_pushback[phase2_pushback_length++] = c;
699      break;
700    }
701}
702
703
704/* Context lookup table.  */
705static flag_context_list_table_ty *flag_context_list_table;
706
707
708/* Forward declaration of local functions.  */
709static enum word_type read_command_list (int looking_for,
710					 flag_context_ty outer_context);
711
712
713
714/* Read the next word.
715   'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
716   or '\0'.  */
717static void
718read_word (struct word *wp, int looking_for, flag_context_ty context)
719{
720  int c;
721  bool all_unquoted_digits;
722
723  do
724    {
725      c = phase2_getc ();
726      if (c == '#')
727	{
728	  /* Skip a comment up to end of line.  */
729	  last_comment_line = line_number;
730	  comment_start ();
731	  for (;;)
732	    {
733	      c = phase1_getc ();
734	      if (c == EOF || c == '\n')
735		break;
736	      /* We skip all leading white space, but not EOLs.  */
737	      if (!(buflen == 0 && (c == ' ' || c == '\t')))
738		comment_add (c);
739	    }
740	  comment_line_end ();
741	}
742      if (c == '\n')
743	{
744	  /* Comments assumed to be grouped with a message must immediately
745	     precede it, with no non-whitespace token on a line between
746	     both.  */
747	  if (last_non_comment_line > last_comment_line)
748	    savable_comment_reset ();
749	  wp->type = t_separator;
750	  return;
751	}
752    }
753  while (is_whitespace (c));
754
755  if (c == EOF)
756    {
757      wp->type = t_eof;
758      return;
759    }
760
761  if (c == '<' || c == '>')
762    {
763      /* Recognize the redirection operators < > >| << <<- >> <> <& >&
764	 But <( and >) are handled below, not here.  */
765      int c2 = phase2_getc ();
766      if (c2 != '(')
767	{
768	  if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
769	    {
770	      if (c == '<' && c2 == '<')
771		{
772		  int c3 = phase2_getc ();
773		  if (c3 != '-')
774		    phase2_ungetc (c3);
775		}
776	    }
777	  else
778	    phase2_ungetc (c2);
779	  wp->type = t_redirect;
780	  return;
781	}
782      else
783	phase2_ungetc (c2);
784    }
785
786  if (looking_for == CLOSING_BACKQUOTE && c == CLOSING_BACKQUOTE)
787    {
788      saw_closing_backquote ();
789      wp->type = t_backquote;
790      last_non_comment_line = line_number;
791      return;
792    }
793
794  if (looking_for == ')' && c == ')')
795    {
796      wp->type = t_paren;
797      last_non_comment_line = line_number;
798      return;
799    }
800
801  if (is_operator_start (c))
802    {
803      wp->type = (c == ';' ? t_separator : t_other);
804      return;
805    }
806
807  wp->type = t_string;
808  wp->token = XMALLOC (struct token);
809  init_token (wp->token);
810  wp->line_number_at_start = line_number;
811  all_unquoted_digits = true;
812
813  for (;; c = phase2_getc ())
814    {
815      if (c == EOF)
816	break;
817
818      if (all_unquoted_digits && (c == '<' || c == '>'))
819	{
820	  /* Recognize the redirection operators < > >| << <<- >> <> <& >&
821	     prefixed with a nonempty sequence of unquoted digits.  */
822	  int c2 = phase2_getc ();
823	  if ((c == '<' ? c2 == '<' : c2 == '|') || c2 == '>' || c2 == '&')
824	    {
825	      if (c == '<' && c2 == '<')
826		{
827		  int c3 = phase2_getc ();
828		  if (c3 != '-')
829		    phase2_ungetc (c3);
830		}
831	    }
832	  else
833	    phase2_ungetc (c2);
834
835	  wp->type = t_redirect;
836	  free_token (wp->token);
837	  free (wp->token);
838
839	  last_non_comment_line = line_number;
840
841	  return;
842	}
843
844      all_unquoted_digits = all_unquoted_digits && (c >= '0' && c <= '9');
845
846      if (c == '$')
847	{
848	  int c2;
849
850	  /* An unquoted dollar indicates we are not inside '...'.  */
851	  if (open_singlequote)
852	    abort ();
853	  /* After reading a dollar, we know that there is no pushed back
854	     character from an earlier lookahead.  */
855	  if (phase2_pushback_length > 0)
856	    abort ();
857	  /* Therefore we can use phase1 without interfering with phase2.
858	     We need to recognize $( outside and inside double-quotes.
859	     It would be incorrect to do
860		c2 = phase2_getc ();
861		if (c2 == '(' || c2 == QUOTED ('('))
862	     because that would also trigger for $\(.  */
863	  c2 = phase1_getc ();
864	  if (c2 == '(')
865	    {
866	      bool saved_open_doublequote;
867	      int c3;
868
869	      phase1_ungetc (c2);
870
871	      /* The entire inner command or arithmetic expression is read
872		 ignoring possible surrounding double-quotes.  */
873	      saved_open_doublequote = open_doublequote;
874	      open_doublequote = false;
875
876	      c2 = phase2_getc ();
877	      if (c2 != '(')
878		abort ();
879
880	      c3 = phase2_getc ();
881	      if (c3 == '(')
882		{
883		  /* Arithmetic expression (Bash syntax).  Skip until the
884		     matching closing parenthesis.  */
885		  unsigned int depth = 2;
886
887		  do
888		    {
889		      c = phase2_getc ();
890		      if (c == '(')
891			depth++;
892		      else if (c == ')')
893			if (--depth == 0)
894			  break;
895		    }
896		  while (c != EOF);
897		}
898	      else
899		{
900		  /* Command substitution (Bash syntax).  */
901		  phase2_ungetc (c3);
902		  read_command_list (')', context);
903		}
904
905	      open_doublequote = saved_open_doublequote;
906	    }
907	  else
908	    {
909	      phase1_ungetc (c2);
910	      c2 = phase2_getc ();
911
912	      if (c2 == '\'' && !open_singlequote)
913		{
914		  /* Bash builtin for string with ANSI-C escape sequences.  */
915		  saw_opening_singlequote ();
916		  for (;;)
917		    {
918		      c = phase2_getc ();
919		      if (c == EOF)
920			break;
921		      if (c == '\'')
922			{
923			  saw_closing_singlequote ();
924			  break;
925			}
926		      if (c == '\\')
927			{
928			  c = phase2_getc ();
929			  switch (c)
930			    {
931			    default:
932			      phase2_ungetc (c);
933			      c = '\\';
934			      break;
935
936			    case '\\':
937			      break;
938			    case '\'':
939			      /* Don't call saw_closing_singlequote ()
940				 here.  */
941			      break;
942
943			    case 'a':
944			      c = '\a';
945			      break;
946			    case 'b':
947			      c = '\b';
948			      break;
949			    case 'e':
950			      c = 0x1b; /* ESC */
951			      break;
952			    case 'f':
953			      c = '\f';
954			      break;
955			    case 'n':
956			      c = '\n';
957			      break;
958			    case 'r':
959			      c = '\r';
960			      break;
961			    case 't':
962			      c = '\t';
963			      break;
964			    case 'v':
965			      c = '\v';
966			      break;
967
968			    case 'x':
969			      c = phase2_getc ();
970			      if ((c >= '0' && c <= '9')
971				  || (c >= 'A' && c <= 'F')
972				  || (c >= 'a' && c <= 'f'))
973				{
974				  int n;
975
976				  if (c >= '0' && c <= '9')
977				    n = c - '0';
978				  else if (c >= 'A' && c <= 'F')
979				    n = 10 + c - 'A';
980				  else if (c >= 'a' && c <= 'f')
981				    n = 10 + c - 'a';
982				  else
983				    abort ();
984
985				  c = phase2_getc ();
986				  if ((c >= '0' && c <= '9')
987				      || (c >= 'A' && c <= 'F')
988				      || (c >= 'a' && c <= 'f'))
989				    {
990				      if (c >= '0' && c <= '9')
991					n = n * 16 + c - '0';
992				      else if (c >= 'A' && c <= 'F')
993					n = n * 16 + 10 + c - 'A';
994				      else if (c >= 'a' && c <= 'f')
995					n = n * 16 + 10 + c - 'a';
996				      else
997					abort ();
998				    }
999				  else
1000				    phase2_ungetc (c);
1001
1002				  c = n;
1003				}
1004			      else
1005				{
1006				  phase2_ungetc (c);
1007				  phase2_ungetc ('x');
1008				  c = '\\';
1009				}
1010			      break;
1011
1012			    case '0': case '1': case '2': case '3':
1013			    case '4': case '5': case '6': case '7':
1014			      {
1015				int n = c - '0';
1016
1017				c = phase2_getc ();
1018				if (c >= '0' && c <= '7')
1019				  {
1020				    n = n * 8 + c - '0';
1021
1022				    c = phase2_getc ();
1023				    if (c >= '0' && c <= '7')
1024				      n = n * 8 + c - '0';
1025				    else
1026				      phase2_ungetc (c);
1027				  }
1028				else
1029				  phase2_ungetc (c);
1030
1031				c = n;
1032			      }
1033			      break;
1034			    }
1035			}
1036		      if (wp->type == t_string)
1037			{
1038			  grow_token (wp->token);
1039			  wp->token->chars[wp->token->charcount++] =
1040			    (unsigned char) c;
1041			}
1042		    }
1043		  /* The result is a literal string.  Don't change wp->type.  */
1044		  continue;
1045		}
1046	      else if (c2 == '"' && !open_doublequote)
1047		{
1048		  /* Bash builtin for internationalized string.  */
1049		  lex_pos_ty pos;
1050		  struct token string;
1051
1052		  saw_opening_singlequote ();
1053		  open_singlequote_terminator = '"';
1054		  pos.file_name = logical_file_name;
1055		  pos.line_number = line_number;
1056		  init_token (&string);
1057		  for (;;)
1058		    {
1059		      c = phase2_getc ();
1060		      if (c == EOF)
1061			break;
1062		      if (c == '"')
1063			{
1064			  saw_closing_singlequote ();
1065			  break;
1066			}
1067		      grow_token (&string);
1068		      string.chars[string.charcount++] = (unsigned char) c;
1069		    }
1070		  remember_a_message (mlp, NULL, string_of_token (&string),
1071				      context, &pos, savable_comment);
1072		  free_token (&string);
1073
1074		  error_with_progname = false;
1075		  error (0, 0, _("%s:%lu: warning: the syntax $\"...\" is deprecated due to security reasons; use eval_gettext instead"),
1076			 pos.file_name, (unsigned long) pos.line_number);
1077		  error_with_progname = true;
1078
1079		  /* The result at runtime is not constant. Therefore we
1080		     change wp->type.  */
1081		}
1082	      else
1083		phase2_ungetc (c2);
1084	    }
1085	  wp->type = t_other;
1086	  continue;
1087	}
1088
1089      if (c == '\'')
1090	{
1091	  if (!open_singlequote)
1092	    {
1093	      /* Handle an opening single quote.  */
1094	      saw_opening_singlequote ();
1095	    }
1096	  else
1097	    {
1098	      /* Handle a closing single quote.  */
1099	      saw_closing_singlequote ();
1100	    }
1101	  continue;
1102	}
1103
1104      if (c == '"')
1105	{
1106	  if (open_singlequote && open_singlequote_terminator == '"')
1107	    {
1108	      /* Handle a closing i18n quote.  */
1109	      saw_closing_singlequote ();
1110	    }
1111	  else if (!open_doublequote)
1112	    {
1113	      /* Handle an opening double quote.  */
1114	      saw_opening_doublequote ();
1115	    }
1116	  else
1117	    {
1118	      /* Handle a closing double quote.  */
1119	      saw_closing_doublequote ();
1120	    }
1121	  continue;
1122	}
1123
1124      if (c == OPENING_BACKQUOTE)
1125	{
1126	  /* Handle an opening backquote.  */
1127	  saw_opening_backquote ();
1128
1129	  read_command_list (CLOSING_BACKQUOTE, context);
1130
1131	  wp->type = t_other;
1132	  continue;
1133	}
1134      if (c == CLOSING_BACKQUOTE)
1135	break;
1136
1137      if (c == '<' || c == '>')
1138	{
1139	  int c2;
1140
1141	  /* An unquoted c indicates we are not inside '...' nor "...".  */
1142	  if (open_singlequote || open_doublequote)
1143	    abort ();
1144
1145	  c2 = phase2_getc ();
1146	  if (c2 == '(')
1147	    {
1148	      /* Process substitution (Bash syntax).  */
1149	      read_command_list (')', context);
1150
1151	      wp->type = t_other;
1152	      continue;
1153	    }
1154	  else
1155	    phase2_ungetc (c2);
1156	}
1157
1158      if (!open_singlequote && !open_doublequote
1159	  && (is_whitespace (c) || is_operator_start (c)))
1160	break;
1161
1162      if (wp->type == t_string)
1163	{
1164	  grow_token (wp->token);
1165	  wp->token->chars[wp->token->charcount++] = (unsigned char) c;
1166	}
1167    }
1168
1169  phase2_ungetc (c);
1170
1171  if (wp->type != t_string)
1172    {
1173      free_token (wp->token);
1174      free (wp->token);
1175    }
1176  last_non_comment_line = line_number;
1177}
1178
1179
1180/* Read the next command.
1181   'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
1182   or '\0'.
1183   Returns the type of the word that terminated the command.  */
1184static enum word_type
1185read_command (int looking_for, flag_context_ty outer_context)
1186{
1187  /* Read the words that make up the command.
1188     Here we completely ignore field splitting at whitespace and wildcard
1189     expansions; i.e. we assume that the source is written in such a way that
1190     every word in the program determines exactly one word in the resulting
1191     command.
1192     But we do not require that the 'gettext'/'ngettext' command is the
1193     first in the command; this is because 1. we want to allow for prefixes
1194     like "$verbose" that may expand to nothing, and 2. it's a big effort
1195     to know where a command starts in a $(for ...) or $(case ...) compound
1196     command.  */
1197  int arg = 0;			/* Current argument number.  */
1198  bool arg_of_redirect = false;	/* True right after a redirection operator.  */
1199  flag_context_list_iterator_ty context_iter;
1200  const struct callshapes *shapes = NULL;
1201  struct arglist_parser *argparser = NULL;
1202
1203  for (;;)
1204    {
1205      struct word inner;
1206      flag_context_ty inner_context;
1207
1208      if (arg == 0)
1209	inner_context = null_context;
1210      else
1211	inner_context =
1212	  inherited_context (outer_context,
1213			     flag_context_list_iterator_advance (
1214			       &context_iter));
1215
1216      read_word (&inner, looking_for, inner_context);
1217
1218      /* Recognize end of command.  */
1219      if (inner.type == t_separator
1220	  || inner.type == t_backquote || inner.type == t_paren
1221	  || inner.type == t_eof)
1222	{
1223	  if (argparser != NULL)
1224	    arglist_parser_done (argparser, arg);
1225	  return inner.type;
1226	}
1227
1228      if (extract_all)
1229	{
1230	  if (inner.type == t_string)
1231	    {
1232	      lex_pos_ty pos;
1233
1234	      pos.file_name = logical_file_name;
1235	      pos.line_number = inner.line_number_at_start;
1236	      remember_a_message (mlp, NULL, string_of_word (&inner),
1237				  inner_context, &pos, savable_comment);
1238	    }
1239	}
1240
1241      if (arg_of_redirect)
1242	{
1243	  /* Ignore arguments of redirection operators.  */
1244	  arg_of_redirect = false;
1245	}
1246      else if (inner.type == t_redirect)
1247	{
1248	  /* Ignore this word and the following one.  */
1249	  arg_of_redirect = true;
1250	}
1251      else
1252	{
1253	  if (argparser == NULL)
1254	    {
1255	      /* This is the function position.  */
1256	      arg = 0;
1257	      if (inner.type == t_string)
1258		{
1259		  char *function_name = string_of_word (&inner);
1260		  void *keyword_value;
1261
1262		  if (hash_find_entry (&keywords,
1263				       function_name, strlen (function_name),
1264				       &keyword_value)
1265		      == 0)
1266		    shapes = (const struct callshapes *) keyword_value;
1267
1268		  argparser = arglist_parser_alloc (mlp, shapes);
1269
1270		  context_iter =
1271		    flag_context_list_iterator (
1272		      flag_context_list_table_lookup (
1273			flag_context_list_table,
1274			function_name, strlen (function_name)));
1275
1276		  free (function_name);
1277		}
1278	      else
1279		context_iter = null_context_list_iterator;
1280	    }
1281	  else
1282	    {
1283	      /* These are the argument positions.  */
1284	      if (inner.type == t_string)
1285		arglist_parser_remember (argparser, arg,
1286					 string_of_word (&inner),
1287					 inner_context,
1288					 logical_file_name,
1289					 inner.line_number_at_start,
1290					 savable_comment);
1291
1292	      if (arglist_parser_decidedp (argparser, arg))
1293		{
1294		  /* Stop looking for arguments of the last function_name.  */
1295		  /* FIXME: What about context_iter?  */
1296		  arglist_parser_done (argparser, arg);
1297		  shapes = NULL;
1298		  argparser = NULL;
1299		}
1300	    }
1301
1302	  arg++;
1303	}
1304
1305      free_word (&inner);
1306    }
1307}
1308
1309
1310/* Read a list of commands.
1311   'looking_for' denotes a parse terminator, either CLOSING_BACKQUOTE, ')'
1312   or '\0'.
1313   Returns the type of the word that terminated the command list.  */
1314static enum word_type
1315read_command_list (int looking_for, flag_context_ty outer_context)
1316{
1317  for (;;)
1318    {
1319      enum word_type terminator;
1320
1321      terminator = read_command (looking_for, outer_context);
1322      if (terminator != t_separator)
1323	return terminator;
1324    }
1325}
1326
1327
1328void
1329extract_sh (FILE *f,
1330	    const char *real_filename, const char *logical_filename,
1331	    flag_context_list_table_ty *flag_table,
1332	    msgdomain_list_ty *mdlp)
1333{
1334  mlp = mdlp->item[0]->messages;
1335
1336  fp = f;
1337  real_file_name = real_filename;
1338  logical_file_name = xstrdup (logical_filename);
1339  line_number = 1;
1340
1341  last_comment_line = -1;
1342  last_non_comment_line = -1;
1343
1344  nested_backquotes = 0;
1345  open_doublequotes_mask = 0;
1346  open_doublequote = false;
1347  open_singlequote = false;
1348
1349  flag_context_list_table = flag_table;
1350
1351  init_keywords ();
1352
1353  /* Eat tokens until eof is seen.  */
1354  read_command_list ('\0', null_context);
1355
1356  fp = NULL;
1357  real_file_name = NULL;
1358  logical_file_name = NULL;
1359  line_number = 0;
1360}
1361