1214571Sdim/* mclex.c -- lexer for Windows mc files parser.
2214571Sdim   Copyright 2007
3214571Sdim   Free Software Foundation, Inc.
4214571Sdim
5214571Sdim   Written by Kai Tietz, Onevision.
6214571Sdim
7214571Sdim   This file is part of GNU Binutils.
8214571Sdim
9214571Sdim   This program is free software; you can redistribute it and/or modify
10214571Sdim   it under the terms of the GNU General Public License as published by
11214571Sdim   the Free Software Foundation; either version 2 of the License, or
12214571Sdim   (at your option) any later version.
13214571Sdim
14214571Sdim   This program is distributed in the hope that it will be useful,
15214571Sdim   but WITHOUT ANY WARRANTY; without even the implied warranty of
16214571Sdim   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17214571Sdim   GNU General Public License for more details.
18214571Sdim
19214571Sdim   You should have received a copy of the GNU General Public License
20214571Sdim   along with this program; if not, write to the Free Software
21214571Sdim   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
22214571Sdim   02110-1301, USA.  */
23214571Sdim
24214571Sdim/* This is a lexer used by the Windows rc file parser.
25214571Sdim   It basically just recognized a bunch of keywords.  */
26214571Sdim
27214571Sdim#include "sysdep.h"
28214571Sdim#include "bfd.h"
29214571Sdim#include "bucomm.h"
30214571Sdim#include "libiberty.h"
31214571Sdim#include "safe-ctype.h"
32214571Sdim#include "windmc.h"
33214571Sdim#include "mcparse.h"
34214571Sdim
35214571Sdim#include <assert.h>
36214571Sdim
37214571Sdim/* Exported globals.  */
38214571Sdimbfd_boolean mclex_want_nl = FALSE;
39214571Sdimbfd_boolean mclex_want_line = FALSE;
40214571Sdimbfd_boolean mclex_want_filename = FALSE;
41214571Sdim
42214571Sdim/* Local globals.  */
43214571Sdimstatic unichar *input_stream = NULL;
44214571Sdimstatic unichar *input_stream_pos = NULL;
45214571Sdimstatic int input_line = 1;
46214571Sdimstatic const char *input_filename = NULL;
47214571Sdim
48214571Sdimvoid
49214571Sdimmc_set_content (const unichar *src)
50214571Sdim{
51214571Sdim  if (!src)
52214571Sdim    return;
53214571Sdim  input_stream = input_stream_pos = unichar_dup (src);
54214571Sdim}
55214571Sdim
56214571Sdimvoid
57214571Sdimmc_set_inputfile (const char *name)
58214571Sdim{
59214571Sdim  if (! name || *name == 0)
60214571Sdim    input_filename = "-";
61214571Sdim  else
62214571Sdim    {
63214571Sdim      const char *s1 = strrchr (name, '/');
64214571Sdim      const char *s2 = strrchr (name, '\\');
65214571Sdim
66214571Sdim      if (! s1)
67214571Sdim	s1 = s2;
68214571Sdim      if (s1 && s2 && s1 < s2)
69214571Sdim	s1 = s2;
70214571Sdim      if (! s1)
71214571Sdim	s1 = name;
72214571Sdim      else
73214571Sdim	s1++;
74214571Sdim      s1 = xstrdup (s1);
75214571Sdim      input_filename = s1;
76214571Sdim    }
77214571Sdim}
78214571Sdim
79214571Sdimstatic void
80214571Sdimshow_msg (const char *kind, const char *msg, va_list argp)
81214571Sdim{
82214571Sdim  fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind);
83214571Sdim  vfprintf (stderr, msg, argp);
84214571Sdim  fprintf (stderr, ".\n");
85214571Sdim}
86214571Sdim
87214571Sdimvoid
88214571Sdimmc_warn (const char *s, ...)
89214571Sdim{
90214571Sdim  va_list argp;
91214571Sdim  va_start (argp, s);
92214571Sdim  show_msg ("warning", s, argp);
93214571Sdim  va_end (argp);
94214571Sdim}
95214571Sdim
96214571Sdimvoid
97214571Sdimmc_fatal (const char *s, ...)
98214571Sdim{
99214571Sdim  va_list argp;
100214571Sdim  va_start (argp, s);
101214571Sdim  show_msg ("fatal", s, argp);
102214571Sdim  va_end (argp);
103214571Sdim  xexit (1);
104214571Sdim}
105214571Sdim
106214571Sdim
107214571Sdimint
108214571Sdimyyerror (const char *s, ...)
109214571Sdim{
110214571Sdim  va_list argp;
111214571Sdim  va_start (argp, s);
112214571Sdim  show_msg ("parser", s, argp);
113214571Sdim  va_end (argp);
114214571Sdim  return 1;
115214571Sdim}
116214571Sdim
117214571Sdimstatic unichar *
118214571Sdimget_diff (unichar *end, unichar *start)
119214571Sdim{
120214571Sdim  unichar *ret;
121214571Sdim  unichar save = *end;
122214571Sdim
123214571Sdim  *end = 0;
124214571Sdim  ret = unichar_dup (start);
125214571Sdim  *end = save;
126214571Sdim  return ret;
127214571Sdim}
128214571Sdim
129214571Sdimstatic rc_uint_type
130214571Sdimparse_digit (unichar ch)
131214571Sdim{
132214571Sdim  rc_uint_type base = 10, v = 0, c;
133214571Sdim
134214571Sdim  if (ch == '0')
135214571Sdim    {
136214571Sdim      base = 8;
137214571Sdim      switch (input_stream_pos[0])
138214571Sdim	{
139214571Sdim	case 'x': case 'X': base = 16; input_stream_pos++; break;
140214571Sdim	case 'o': case 'O': base = 8; input_stream_pos++; break;
141214571Sdim	case 'b': case 'B': base = 2; input_stream_pos++; break;
142214571Sdim	}
143214571Sdim    }
144214571Sdim  else
145214571Sdim    v = (rc_uint_type) (ch - '0');
146214571Sdim
147214571Sdim  while ((ch = input_stream_pos[0]) != 0)
148214571Sdim    {
149214571Sdim      if (ch >= 'A' && ch <= 'F')
150214571Sdim	c = (rc_uint_type) (ch - 'A') + 10;
151214571Sdim      else if (ch >= 'a' && ch <= 'f')
152214571Sdim	c = (rc_uint_type) (ch - 'a') + 10;
153214571Sdim      else if (ch >= '0' && ch <= '9')
154214571Sdim	c = (rc_uint_type) (ch - '0');
155214571Sdim      else
156214571Sdim	break;
157214571Sdim      v *= base;
158214571Sdim      v += c;
159214571Sdim      ++input_stream_pos;
160214571Sdim    }
161214571Sdim  if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u')
162214571Sdim    input_stream_pos++;
163214571Sdim  if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
164214571Sdim    input_stream_pos++;
165214571Sdim  if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l')
166214571Sdim    input_stream_pos++;
167214571Sdim  return v;
168214571Sdim}
169214571Sdim
170214571Sdimstatic mc_keyword *keyword_top = NULL;
171214571Sdim
172214571Sdimconst mc_keyword *
173214571Sdimenum_facility (int e)
174214571Sdim{
175214571Sdim  mc_keyword *h = keyword_top;
176214571Sdim
177214571Sdim  while (h != NULL)
178214571Sdim    {
179214571Sdim      while (h && strcmp (h->group_name, "facility") != 0)
180214571Sdim	h = h->next;
181214571Sdim      if (e == 0)
182214571Sdim	return h;
183214571Sdim      --e;
184214571Sdim      if (h)
185214571Sdim	h = h->next;
186214571Sdim    }
187214571Sdim  return h;
188214571Sdim}
189214571Sdim
190214571Sdimconst mc_keyword *
191214571Sdimenum_severity (int e)
192214571Sdim{
193214571Sdim  mc_keyword *h = keyword_top;
194214571Sdim
195214571Sdim  while (h != NULL)
196214571Sdim    {
197214571Sdim      while (h && strcmp (h->group_name, "severity") != 0)
198214571Sdim	h = h->next;
199214571Sdim      if (e == 0)
200214571Sdim	return h;
201214571Sdim      --e;
202214571Sdim      if (h)
203214571Sdim	h = h->next;
204214571Sdim    }
205214571Sdim  return h;
206214571Sdim}
207214571Sdim
208214571Sdimstatic void
209214571Sdimmc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv)
210214571Sdim{
211214571Sdim  unichar *usz, *usv = NULL;
212214571Sdim  rc_uint_type usz_len;
213214571Sdim
214214571Sdim  unicode_from_codepage (&usz_len, &usz, sz, CP_ACP);
215214571Sdim  if (sv)
216214571Sdim    unicode_from_codepage (&usz_len, &usv, sv, CP_ACP);
217214571Sdim  mc_add_keyword (usz, rid, grp, nv, usv);
218214571Sdim}
219214571Sdim
220214571Sdimvoid
221214571Sdimmc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv)
222214571Sdim{
223214571Sdim  mc_keyword *p, *c, *n;
224214571Sdim  size_t len = unichar_len (usz);
225214571Sdim
226214571Sdim  c = keyword_top;
227214571Sdim  p = NULL;
228214571Sdim  while (c != NULL)
229214571Sdim    {
230214571Sdim      if (c->len > len)
231214571Sdim	break;
232214571Sdim      if (c->len == len)
233214571Sdim	{
234214571Sdim	  int e = memcmp (usz, c->usz, len * sizeof (unichar));
235214571Sdim
236214571Sdim	  if (e < 0)
237214571Sdim	    break;
238214571Sdim	  if (! e)
239214571Sdim	    {
240214571Sdim	      if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0)
241214571Sdim		fatal (_("Duplicate symbol entered into keyword list."));
242214571Sdim	      c->rid = rid;
243214571Sdim	      c->nval = nv;
244214571Sdim	      c->sval = (!sv ? NULL : unichar_dup (sv));
245214571Sdim	      if (! strcmp (grp, "language"))
246214571Sdim		{
247214571Sdim		  const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
248214571Sdim
249214571Sdim		  if (lag == NULL)
250214571Sdim		    fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
251214571Sdim		  memcpy (&c->lang_info, lag, sizeof (*lag));
252214571Sdim		}
253214571Sdim	      return;
254214571Sdim	    }
255214571Sdim	}
256214571Sdim      c = (p = c)->next;
257214571Sdim    }
258214571Sdim  n = xmalloc (sizeof (mc_keyword));
259214571Sdim  n->next = c;
260214571Sdim  n->len = len;
261214571Sdim  n->group_name = grp;
262214571Sdim  n->usz = usz;
263214571Sdim  n->rid = rid;
264214571Sdim  n->nval = nv;
265214571Sdim  n->sval = (!sv ? NULL : unichar_dup (sv));
266214571Sdim  if (! strcmp (grp, "language"))
267214571Sdim    {
268214571Sdim      const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv);
269214571Sdim      if (lag == NULL)
270214571Sdim	fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv);
271214571Sdim      memcpy (&n->lang_info, lag, sizeof (*lag));
272214571Sdim    }
273214571Sdim  if (! p)
274214571Sdim    keyword_top = n;
275214571Sdim  else
276214571Sdim    p->next = n;
277214571Sdim}
278214571Sdim
279214571Sdimstatic int
280214571Sdimmc_token (const unichar *t, size_t len)
281214571Sdim{
282214571Sdim  static int was_init = 0;
283214571Sdim  mc_keyword *k;
284214571Sdim
285214571Sdim  if (! was_init)
286214571Sdim    {
287214571Sdim      was_init = 1;
288214571Sdim      mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL);
289214571Sdim      mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL);
290214571Sdim      mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL);
291214571Sdim      mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL);
292214571Sdim      mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL);
293214571Sdim      mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL);
294214571Sdim      mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL);
295214571Sdim      mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL);
296214571Sdim      mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL);
297214571Sdim      mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL);
298214571Sdim      mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL);
299214571Sdim      mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL);
300214571Sdim      mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL);
301214571Sdim      mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL);
302214571Sdim      mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL);
303214571Sdim      mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL);
304214571Sdim      mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001");
305214571Sdim  }
306214571Sdim  k = keyword_top;
307214571Sdim  if (!len || !t || *t == 0)
308214571Sdim    return -1;
309214571Sdim  while (k != NULL)
310214571Sdim    {
311214571Sdim      if (k->len > len)
312214571Sdim	break;
313214571Sdim      if (k->len == len)
314214571Sdim	{
315214571Sdim	  if (! memcmp (k->usz, t, len * sizeof (unichar)))
316214571Sdim	    {
317214571Sdim	      if (k->rid == MCTOKEN)
318214571Sdim		yylval.tok = k;
319214571Sdim	      return k->rid;
320214571Sdim	    }
321214571Sdim	}
322214571Sdim      k = k->next;
323214571Sdim    }
324214571Sdim  return -1;
325214571Sdim}
326214571Sdim
327214571Sdimint
328214571Sdimyylex (void)
329214571Sdim{
330214571Sdim  unichar *start_token;
331214571Sdim  unichar ch;
332214571Sdim
333214571Sdim  if (! input_stream_pos)
334214571Sdim    {
335214571Sdim      fatal ("Input stream not setuped.\n");
336214571Sdim      return -1;
337214571Sdim    }
338214571Sdim  if (mclex_want_line)
339214571Sdim    {
340214571Sdim      start_token = input_stream_pos;
341214571Sdim      if (input_stream_pos[0] == '.'
342214571Sdim	  && (input_stream_pos[1] == '\n'
343214571Sdim	      || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n')))
344214571Sdim      {
345214571Sdim	mclex_want_line = FALSE;
346214571Sdim	while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
347214571Sdim	  ++input_stream_pos;
348214571Sdim	if (input_stream_pos[0] == '\n')
349214571Sdim	  ++input_stream_pos;
350214571Sdim	return MCENDLINE;
351214571Sdim      }
352214571Sdim      while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n')
353214571Sdim	++input_stream_pos;
354214571Sdim      if (input_stream_pos[0] == '\n')
355214571Sdim	++input_stream_pos;
356214571Sdim      yylval.ustr = get_diff (input_stream_pos, start_token);
357214571Sdim      return MCLINE;
358214571Sdim    }
359214571Sdim  while ((ch = input_stream_pos[0]) <= 0x20)
360214571Sdim    {
361214571Sdim      if (ch == 0)
362214571Sdim	return -1;
363214571Sdim      ++input_stream_pos;
364214571Sdim      if (ch == '\n')
365214571Sdim	input_line += 1;
366214571Sdim      if (mclex_want_nl && ch == '\n')
367214571Sdim	{
368214571Sdim	  mclex_want_nl = FALSE;
369214571Sdim	  return NL;
370214571Sdim	}
371214571Sdim    }
372214571Sdim  start_token = input_stream_pos;
373214571Sdim  ++input_stream_pos;
374214571Sdim  if (mclex_want_filename)
375214571Sdim    {
376214571Sdim      mclex_want_filename = FALSE;
377214571Sdim      if (ch == '"')
378214571Sdim	{
379214571Sdim	  start_token++;
380214571Sdim	  while ((ch = input_stream_pos[0]) != 0)
381214571Sdim	    {
382214571Sdim	      if (ch == '"')
383214571Sdim		break;
384214571Sdim	      ++input_stream_pos;
385214571Sdim	    }
386214571Sdim	  yylval.ustr = get_diff (input_stream_pos, start_token);
387214571Sdim	  if (ch == '"')
388214571Sdim	    ++input_stream_pos;
389214571Sdim	}
390214571Sdim      else
391214571Sdim	{
392214571Sdim	  while ((ch = input_stream_pos[0]) != 0)
393214571Sdim	    {
394214571Sdim	      if (ch <= 0x20 || ch == ')')
395214571Sdim		break;
396214571Sdim	      ++input_stream_pos;
397214571Sdim	    }
398214571Sdim	  yylval.ustr = get_diff (input_stream_pos, start_token);
399214571Sdim	}
400214571Sdim      return MCFILENAME;
401214571Sdim    }
402214571Sdim  switch (ch)
403214571Sdim  {
404214571Sdim  case ';':
405214571Sdim    ++start_token;
406214571Sdim    while (input_stream_pos[0] != '\n' && input_stream_pos[0] != 0)
407214571Sdim      ++input_stream_pos;
408214571Sdim    if (input_stream_pos[0] == '\n')
409214571Sdim      input_stream_pos++;
410214571Sdim    yylval.ustr = get_diff (input_stream_pos, start_token);
411214571Sdim    return MCCOMMENT;
412214571Sdim  case '=':
413214571Sdim    return '=';
414214571Sdim  case '(':
415214571Sdim    return '(';
416214571Sdim  case ')':
417214571Sdim    return ')';
418214571Sdim  case '+':
419214571Sdim    return '+';
420214571Sdim  case ':':
421214571Sdim    return ':';
422214571Sdim  case '0': case '1': case '2': case '3': case '4':
423214571Sdim  case '5': case '6': case '7': case '8': case '9':
424214571Sdim    yylval.ival = parse_digit (ch);
425214571Sdim    return MCNUMBER;
426214571Sdim  default:
427214571Sdim    if (ch >= 0x40)
428214571Sdim      {
429214571Sdim	int ret;
430214571Sdim	while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9'))
431214571Sdim	  ++input_stream_pos;
432214571Sdim	ret = mc_token (start_token, (size_t) (input_stream_pos - start_token));
433214571Sdim	if (ret != -1)
434214571Sdim	  return ret;
435214571Sdim	yylval.ustr = get_diff (input_stream_pos, start_token);
436214571Sdim	return MCIDENT;
437214571Sdim      }
438214571Sdim    yyerror ("illegal character 0x%x.", ch);
439214571Sdim  }
440214571Sdim  return -1;
441214571Sdim}
442