1/* mclex.c -- lexer for Windows mc files parser.
2   Copyright 2007
3   Free Software Foundation, Inc.
4
5   Written by Kai Tietz, Onevision.
6
7   This file is part of GNU Binutils.
8
9   This program is free software; you can redistribute it and/or modify
10   it under the terms of the GNU General Public License as published by
11   the Free Software Foundation; either version 2 of the License, or
12   (at your option) any later version.
13
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software
21   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
22   02110-1301, USA.  */
23
24/* This is a lexer used by the Windows rc file parser.
25   It basically just recognized a bunch of keywords.  */
26
27#include "sysdep.h"
28#include "bfd.h"
29#include "bucomm.h"
30#include "libiberty.h"
31#include "safe-ctype.h"
32#include "windmc.h"
33#include "mcparse.h"
34
35#include <assert.h>
36
37/* Exported globals.  */
38bfd_boolean mclex_want_nl = FALSE;
39bfd_boolean mclex_want_line = FALSE;
40bfd_boolean mclex_want_filename = FALSE;
41
42/* Local globals.  */
43static unichar *input_stream = NULL;
44static unichar *input_stream_pos = NULL;
45static int input_line = 1;
46static const char *input_filename = NULL;
47
48void
49mc_set_content (const unichar *src)
50{
51  if (!src)
52    return;
53  input_stream = input_stream_pos = unichar_dup (src);
54}
55
56void
57mc_set_inputfile (const char *name)
58{
59  if (! name || *name == 0)
60    input_filename = "-";
61  else
62    {
63      const char *s1 = strrchr (name, '/');
64      const char *s2 = strrchr (name, '\\');
65
66      if (! s1)
67	s1 = s2;
68      if (s1 && s2 && s1 < s2)
69	s1 = s2;
70      if (! s1)
71	s1 = name;
72      else
73	s1++;
74      s1 = xstrdup (s1);
75      input_filename = s1;
76    }
77}
78
79static void
80show_msg (const char *kind, const char *msg, va_list argp)
81{
82  fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind);
83  vfprintf (stderr, msg, argp);
84  fprintf (stderr, ".\n");
85}
86
87void
88mc_warn (const char *s, ...)
89{
90  va_list argp;
91  va_start (argp, s);
92  show_msg ("warning", s, argp);
93  va_end (argp);
94}
95
96void
97mc_fatal (const char *s, ...)
98{
99  va_list argp;
100  va_start (argp, s);
101  show_msg ("fatal", s, argp);
102  va_end (argp);
103  xexit (1);
104}
105
106
107int
108yyerror (const char *s, ...)
109{
110  va_list argp;
111  va_start (argp, s);
112  show_msg ("parser", s, argp);
113  va_end (argp);
114  return 1;
115}
116
117static unichar *
118get_diff (unichar *end, unichar *start)
119{
120  unichar *ret;
121  unichar save = *end;
122
123  *end = 0;
124  ret = unichar_dup (start);
125  *end = save;
126  return ret;
127}
128
129static rc_uint_type
130parse_digit (unichar ch)
131{
132  rc_uint_type base = 10, v = 0, c;
133
134  if (ch == '0')
135    {
136      base = 8;
137      switch (input_stream_pos[0])
138	{
139	case 'x': case 'X': base = 16; input_stream_pos++; break;
140	case 'o': case 'O': base = 8; input_stream_pos++; break;
141	case 'b': case 'B': base = 2; input_stream_pos++; break;
142	}
143    }
144  else
145    v = (rc_uint_type) (ch - '0');
146
147  while ((ch = input_stream_pos[0]) != 0)
148    {
149      if (ch >= 'A' && ch <= 'F')
150	c = (rc_uint_type) (ch - 'A') + 10;
151      else if (ch >= 'a' && ch <= 'f')
152	c = (rc_uint_type) (ch - 'a') + 10;
153      else if (ch >= '0' && ch <= '9')
154	c = (rc_uint_type) (ch - '0');
155      else
156	break;
157      v *= base;
158      v += c;
159      ++input_stream_pos;
160    }
161  if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u')
162    input_stream_pos++;
163  if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
164    input_stream_pos++;
165  if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
166    input_stream_pos++;
167  return v;
168}
169
170static mc_keyword *keyword_top = NULL;
171
172const mc_keyword *
173enum_facility (int e)
174{
175  mc_keyword *h = keyword_top;
176
177  while (h != NULL)
178    {
179      while (h && strcmp (h->group_name, "facility") != 0)
180	h = h->next;
181      if (e == 0)
182	return h;
183      --e;
184      if (h)
185	h = h->next;
186    }
187  return h;
188}
189
190const mc_keyword *
191enum_severity (int e)
192{
193  mc_keyword *h = keyword_top;
194
195  while (h != NULL)
196    {
197      while (h && strcmp (h->group_name, "severity") != 0)
198	h = h->next;
199      if (e == 0)
200	return h;
201      --e;
202      if (h)
203	h = h->next;
204    }
205  return h;
206}
207
208static void
209mc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv)
210{
211  unichar *usz, *usv = NULL;
212  rc_uint_type usz_len;
213
214  unicode_from_codepage (&usz_len, &usz, sz, CP_ACP);
215  if (sv)
216    unicode_from_codepage (&usz_len, &usv, sv, CP_ACP);
217  mc_add_keyword (usz, rid, grp, nv, usv);
218}
219
220void
221mc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv)
222{
223  mc_keyword *p, *c, *n;
224  size_t len = unichar_len (usz);
225
226  c = keyword_top;
227  p = NULL;
228  while (c != NULL)
229    {
230      if (c->len > len)
231	break;
232      if (c->len == len)
233	{
234	  int e = memcmp (usz, c->usz, len * sizeof (unichar));
235
236	  if (e < 0)
237	    break;
238	  if (! e)
239	    {
240	      if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0)
241		fatal (_("Duplicate symbol entered into keyword list."));
242	      c->rid = rid;
243	      c->nval = nv;
244	      c->sval = (!sv ? NULL : unichar_dup (sv));
245	      if (! strcmp (grp, "language"))
246		{
247		  const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
248
249		  if (lag == NULL)
250		    fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
251		  memcpy (&c->lang_info, lag, sizeof (*lag));
252		}
253	      return;
254	    }
255	}
256      c = (p = c)->next;
257    }
258  n = xmalloc (sizeof (mc_keyword));
259  n->next = c;
260  n->len = len;
261  n->group_name = grp;
262  n->usz = usz;
263  n->rid = rid;
264  n->nval = nv;
265  n->sval = (!sv ? NULL : unichar_dup (sv));
266  if (! strcmp (grp, "language"))
267    {
268      const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
269      if (lag == NULL)
270	fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
271      memcpy (&n->lang_info, lag, sizeof (*lag));
272    }
273  if (! p)
274    keyword_top = n;
275  else
276    p->next = n;
277}
278
279static int
280mc_token (const unichar *t, size_t len)
281{
282  static int was_init = 0;
283  mc_keyword *k;
284
285  if (! was_init)
286    {
287      was_init = 1;
288      mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL);
289      mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL);
290      mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL);
291      mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL);
292      mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL);
293      mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL);
294      mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL);
295      mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL);
296      mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL);
297      mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL);
298      mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL);
299      mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL);
300      mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL);
301      mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL);
302      mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL);
303      mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL);
304      mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001");
305  }
306  k = keyword_top;
307  if (!len || !t || *t == 0)
308    return -1;
309  while (k != NULL)
310    {
311      if (k->len > len)
312	break;
313      if (k->len == len)
314	{
315	  if (! memcmp (k->usz, t, len * sizeof (unichar)))
316	    {
317	      if (k->rid == MCTOKEN)
318		yylval.tok = k;
319	      return k->rid;
320	    }
321	}
322      k = k->next;
323    }
324  return -1;
325}
326
327int
328yylex (void)
329{
330  unichar *start_token;
331  unichar ch;
332
333  if (! input_stream_pos)
334    {
335      fatal ("Input stream not setuped.\n");
336      return -1;
337    }
338  if (mclex_want_line)
339    {
340      start_token = input_stream_pos;
341      if (input_stream_pos[0] == '.'
342	  && (input_stream_pos[1] == '\n'
343	      || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n')))
344      {
345	mclex_want_line = FALSE;
346	while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
347	  ++input_stream_pos;
348	if (input_stream_pos[0] == '\n')
349	  ++input_stream_pos;
350	return MCENDLINE;
351      }
352      while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
353	++input_stream_pos;
354      if (input_stream_pos[0] == '\n')
355	++input_stream_pos;
356      yylval.ustr = get_diff (input_stream_pos, start_token);
357      return MCLINE;
358    }
359  while ((ch = input_stream_pos[0]) <= 0x20)
360    {
361      if (ch == 0)
362	return -1;
363      ++input_stream_pos;
364      if (ch == '\n')
365	input_line += 1;
366      if (mclex_want_nl && ch == '\n')
367	{
368	  mclex_want_nl = FALSE;
369	  return NL;
370	}
371    }
372  start_token = input_stream_pos;
373  ++input_stream_pos;
374  if (mclex_want_filename)
375    {
376      mclex_want_filename = FALSE;
377      if (ch == '"')
378	{
379	  start_token++;
380	  while ((ch = input_stream_pos[0]) != 0)
381	    {
382	      if (ch == '"')
383		break;
384	      ++input_stream_pos;
385	    }
386	  yylval.ustr = get_diff (input_stream_pos, start_token);
387	  if (ch == '"')
388	    ++input_stream_pos;
389	}
390      else
391	{
392	  while ((ch = input_stream_pos[0]) != 0)
393	    {
394	      if (ch <= 0x20 || ch == ')')
395		break;
396	      ++input_stream_pos;
397	    }
398	  yylval.ustr = get_diff (input_stream_pos, start_token);
399	}
400      return MCFILENAME;
401    }
402  switch (ch)
403  {
404  case ';':
405    ++start_token;
406    while (input_stream_pos[0] != '\n' && input_stream_pos[0] != 0)
407      ++input_stream_pos;
408    if (input_stream_pos[0] == '\n')
409      input_stream_pos++;
410    yylval.ustr = get_diff (input_stream_pos, start_token);
411    return MCCOMMENT;
412  case '=':
413    return '=';
414  case '(':
415    return '(';
416  case ')':
417    return ')';
418  case '+':
419    return '+';
420  case ':':
421    return ':';
422  case '0': case '1': case '2': case '3': case '4':
423  case '5': case '6': case '7': case '8': case '9':
424    yylval.ival = parse_digit (ch);
425    return MCNUMBER;
426  default:
427    if (ch >= 0x40)
428      {
429	int ret;
430	while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9'))
431	  ++input_stream_pos;
432	ret = mc_token (start_token, (size_t) (input_stream_pos - start_token));
433	if (ret != -1)
434	  return ret;
435	yylval.ustr = get_diff (input_stream_pos, start_token);
436	return MCIDENT;
437      }
438    yyerror ("illegal character 0x%x.", ch);
439  }
440  return -1;
441}
442