1/* mclex.c -- lexer for Windows mc files parser.
2   Copyright (C) 2007-2020 Free Software Foundation, Inc.
3
4   Written by Kai Tietz, Onevision.
5
6   This file is part of GNU Binutils.
7
8   This program is free software; you can redistribute it and/or modify
9   it under the terms of the GNU General Public License as published by
10   the Free Software Foundation; either version 3 of the License, or
11   (at your option) any later version.
12
13   This program is distributed in the hope that it will be useful,
14   but WITHOUT ANY WARRANTY; without even the implied warranty of
15   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   GNU General Public License for more details.
17
18   You should have received a copy of the GNU General Public License
19   along with this program; if not, write to the Free Software
20   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21   02110-1301, USA.  */
22
23/* This is a lexer used by the Windows rc file parser.
24   It basically just recognized a bunch of keywords.  */
25
26#include "sysdep.h"
27#include "bfd.h"
28#include "bucomm.h"
29#include "libiberty.h"
30#include "safe-ctype.h"
31#include "windmc.h"
32#include "mcparse.h"
33
34#include <assert.h>
35
36/* Exported globals.  */
37bfd_boolean mclex_want_nl = FALSE;
38bfd_boolean mclex_want_line = FALSE;
39bfd_boolean mclex_want_filename = FALSE;
40
41/* Local globals.  */
42static unichar *input_stream = NULL;
43static unichar *input_stream_pos = NULL;
44static int input_line = 1;
45static const char *input_filename = NULL;
46
47void
48mc_set_content (const unichar *src)
49{
50  if (!src)
51    return;
52  input_stream = input_stream_pos = unichar_dup (src);
53}
54
55void
56mc_set_inputfile (const char *name)
57{
58  if (! name || *name == 0)
59    input_filename = "-";
60  else
61    {
62      const char *s1 = strrchr (name, '/');
63      const char *s2 = strrchr (name, '\\');
64
65      if (! s1)
66	s1 = s2;
67      if (s1 && s2 && s1 < s2)
68	s1 = s2;
69      if (! s1)
70	s1 = name;
71      else
72	s1++;
73      s1 = xstrdup (s1);
74      input_filename = s1;
75    }
76}
77
78static void
79show_msg (const char *kind, const char *msg, va_list argp)
80{
81  fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind);
82  vfprintf (stderr, msg, argp);
83  fprintf (stderr, ".\n");
84}
85
86void
87mc_warn (const char *s, ...)
88{
89  va_list argp;
90  va_start (argp, s);
91  show_msg ("warning", s, argp);
92  va_end (argp);
93}
94
95void
96mc_fatal (const char *s, ...)
97{
98  va_list argp;
99  va_start (argp, s);
100  show_msg ("fatal", s, argp);
101  va_end (argp);
102  xexit (1);
103}
104
105
106int
107yyerror (const char *s, ...)
108{
109  va_list argp;
110  va_start (argp, s);
111  show_msg ("parser", s, argp);
112  va_end (argp);
113  return 1;
114}
115
116static unichar *
117get_diff (unichar *end, unichar *start)
118{
119  unichar *ret;
120  unichar save = *end;
121
122  *end = 0;
123  ret = unichar_dup (start);
124  *end = save;
125  return ret;
126}
127
128static rc_uint_type
129parse_digit (unichar ch)
130{
131  rc_uint_type base = 10, v = 0, c;
132
133  if (ch == '0')
134    {
135      base = 8;
136      switch (input_stream_pos[0])
137	{
138	case 'x': case 'X': base = 16; input_stream_pos++; break;
139	case 'o': case 'O': base = 8; input_stream_pos++; break;
140	case 'b': case 'B': base = 2; input_stream_pos++; break;
141	}
142    }
143  else
144    v = (rc_uint_type) (ch - '0');
145
146  while ((ch = input_stream_pos[0]) != 0)
147    {
148      if (ch >= 'A' && ch <= 'F')
149	c = (rc_uint_type) (ch - 'A') + 10;
150      else if (ch >= 'a' && ch <= 'f')
151	c = (rc_uint_type) (ch - 'a') + 10;
152      else if (ch >= '0' && ch <= '9')
153	c = (rc_uint_type) (ch - '0');
154      else
155	break;
156      v *= base;
157      v += c;
158      ++input_stream_pos;
159    }
160  if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u')
161    input_stream_pos++;
162  if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
163    input_stream_pos++;
164  if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
165    input_stream_pos++;
166  return v;
167}
168
169static mc_keyword *keyword_top = NULL;
170
171const mc_keyword *
172enum_facility (int e)
173{
174  mc_keyword *h = keyword_top;
175
176  while (h != NULL)
177    {
178      while (h && strcmp (h->group_name, "facility") != 0)
179	h = h->next;
180      if (e == 0)
181	return h;
182      --e;
183      if (h)
184	h = h->next;
185    }
186  return h;
187}
188
189const mc_keyword *
190enum_severity (int e)
191{
192  mc_keyword *h = keyword_top;
193
194  while (h != NULL)
195    {
196      while (h && strcmp (h->group_name, "severity") != 0)
197	h = h->next;
198      if (e == 0)
199	return h;
200      --e;
201      if (h)
202	h = h->next;
203    }
204  return h;
205}
206
207static void
208mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv)
209{
210  unichar *usz, *usv = NULL;
211  rc_uint_type usz_len;
212
213  unicode_from_codepage (&usz_len, &usz, sz, CP_ACP);
214  if (sv)
215    unicode_from_codepage (&usz_len, &usv, sv, CP_ACP);
216  mc_add_keyword (usz, rid, grp, nv, usv);
217}
218
219void
220mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv)
221{
222  mc_keyword *p, *c, *n;
223  size_t len = unichar_len (usz);
224
225  c = keyword_top;
226  p = NULL;
227  while (c != NULL)
228    {
229      if (c->len > len)
230	break;
231      if (c->len == len)
232	{
233	  int e = memcmp (usz, c->usz, len * sizeof (unichar));
234
235	  if (e < 0)
236	    break;
237	  if (! e)
238	    {
239	      if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0)
240		fatal (_("Duplicate symbol entered into keyword list."));
241	      c->rid = rid;
242	      c->nval = nv;
243	      c->sval = (!sv ? NULL : unichar_dup (sv));
244	      if (! strcmp (grp, "language"))
245		{
246		  const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
247
248		  if (lag == NULL)
249		    fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
250		  memcpy (&c->lang_info, lag, sizeof (*lag));
251		}
252	      return;
253	    }
254	}
255      c = (p = c)->next;
256    }
257  n = xmalloc (sizeof (mc_keyword));
258  n->next = c;
259  n->len = len;
260  n->group_name = grp;
261  n->usz = usz;
262  n->rid = rid;
263  n->nval = nv;
264  n->sval = (!sv ? NULL : unichar_dup (sv));
265  if (! strcmp (grp, "language"))
266    {
267      const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
268      if (lag == NULL)
269	fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
270      memcpy (&n->lang_info, lag, sizeof (*lag));
271    }
272  if (! p)
273    keyword_top = n;
274  else
275    p->next = n;
276}
277
278static int
279mc_token (const unichar *t, size_t len)
280{
281  static int was_init = 0;
282  mc_keyword *k;
283
284  if (! was_init)
285    {
286      was_init = 1;
287      mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL);
288      mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL);
289      mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL);
290      mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL);
291      mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL);
292      mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL);
293      mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL);
294      mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL);
295      mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL);
296      mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL);
297      mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL);
298      mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL);
299      mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL);
300      mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL);
301      mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL);
302      mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL);
303      mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001");
304  }
305  k = keyword_top;
306  if (!len || !t || *t == 0)
307    return -1;
308  while (k != NULL)
309    {
310      if (k->len > len)
311	break;
312      if (k->len == len)
313	{
314	  if (! memcmp (k->usz, t, len * sizeof (unichar)))
315	    {
316	      if (k->rid == MCTOKEN)
317		yylval.tok = k;
318	      return k->rid;
319	    }
320	}
321      k = k->next;
322    }
323  return -1;
324}
325
326/* Skip characters in input_stream_pos up to and including a newline
327   character.  Returns non-zero if the newline was found, zero otherwise.  */
328
329static int
330skip_until_eol (void)
331{
332  while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
333    ++input_stream_pos;
334  if (input_stream_pos[0] == 0)
335    return 0;
336  if (input_stream_pos[0] == '\n')
337    {
338      ++input_stream_pos;
339      input_line += 1;
340    }
341  return 1;
342}
343
344int
345yylex (void)
346{
347  unichar *start_token;
348  unichar ch;
349
350  if (! input_stream_pos)
351    {
352      fatal ("Input stream not setuped.\n");
353      return -1;
354    }
355
356  if (mclex_want_line)
357    {
358      start_token = input_stream_pos;
359      if (input_stream_pos[0] == 0)
360	return -1;
361      /* PR 26082: Reject a period followed by EOF.  */
362      if (input_stream_pos[0] == '.' && input_stream_pos[1] == 0)
363	return -1;
364      if (input_stream_pos[0] == '.'
365	  && (input_stream_pos[1] == '\n'
366	      || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n')))
367	{
368	  mclex_want_line = FALSE;
369          return skip_until_eol () ? MCENDLINE : -1;
370	}
371      if (!skip_until_eol ())
372	return -1;
373      yylval.ustr = get_diff (input_stream_pos, start_token);
374      return MCLINE;
375    }
376
377  while ((ch = input_stream_pos[0]) <= 0x20)
378    {
379      if (ch == 0)
380	return -1;
381      ++input_stream_pos;
382      if (ch == '\n')
383	input_line += 1;
384      if (mclex_want_nl && ch == '\n')
385	{
386	  mclex_want_nl = FALSE;
387	  return NL;
388	}
389    }
390  start_token = input_stream_pos;
391  ++input_stream_pos;
392  if (mclex_want_filename)
393    {
394      mclex_want_filename = FALSE;
395      if (ch == '"')
396	{
397	  start_token++;
398	  while ((ch = input_stream_pos[0]) != 0)
399	    {
400	      if (ch == '"')
401		break;
402	      ++input_stream_pos;
403	    }
404	  yylval.ustr = get_diff (input_stream_pos, start_token);
405	  if (ch == '"')
406	    ++input_stream_pos;
407	}
408      else
409	{
410	  while ((ch = input_stream_pos[0]) != 0)
411	    {
412	      if (ch <= 0x20 || ch == ')')
413		break;
414	      ++input_stream_pos;
415	    }
416	  yylval.ustr = get_diff (input_stream_pos, start_token);
417	}
418      return MCFILENAME;
419    }
420  switch (ch)
421  {
422  case ';':
423    ++start_token;
424    if (!skip_until_eol ())
425      return -1;
426    yylval.ustr = get_diff (input_stream_pos, start_token);
427    return MCCOMMENT;
428  case '=':
429    return '=';
430  case '(':
431    return '(';
432  case ')':
433    return ')';
434  case '+':
435    return '+';
436  case ':':
437    return ':';
438  case '0': case '1': case '2': case '3': case '4':
439  case '5': case '6': case '7': case '8': case '9':
440    yylval.ival = parse_digit (ch);
441    return MCNUMBER;
442  default:
443    if (ch >= 0x40)
444      {
445	int ret;
446	while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9'))
447	  ++input_stream_pos;
448	ret = mc_token (start_token, (size_t) (input_stream_pos - start_token));
449	if (ret != -1)
450	  return ret;
451	yylval.ustr = get_diff (input_stream_pos, start_token);
452	return MCIDENT;
453      }
454    yyerror ("illegal character 0x%x.", ch);
455  }
456  return -1;
457}
458