1214571Sdim/* mclex.c -- lexer for Windows mc files parser. 2214571Sdim Copyright 2007 3214571Sdim Free Software Foundation, Inc. 4214571Sdim 5214571Sdim Written by Kai Tietz, Onevision. 6214571Sdim 7214571Sdim This file is part of GNU Binutils. 8214571Sdim 9214571Sdim This program is free software; you can redistribute it and/or modify 10214571Sdim it under the terms of the GNU General Public License as published by 11214571Sdim the Free Software Foundation; either version 2 of the License, or 12214571Sdim (at your option) any later version. 13214571Sdim 14214571Sdim This program is distributed in the hope that it will be useful, 15214571Sdim but WITHOUT ANY WARRANTY; without even the implied warranty of 16214571Sdim MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17214571Sdim GNU General Public License for more details. 18214571Sdim 19214571Sdim You should have received a copy of the GNU General Public License 20214571Sdim along with this program; if not, write to the Free Software 21214571Sdim Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 22214571Sdim 02110-1301, USA. */ 23214571Sdim 24214571Sdim/* This is a lexer used by the Windows rc file parser. 25214571Sdim It basically just recognized a bunch of keywords. */ 26214571Sdim 27214571Sdim#include "sysdep.h" 28214571Sdim#include "bfd.h" 29214571Sdim#include "bucomm.h" 30214571Sdim#include "libiberty.h" 31214571Sdim#include "safe-ctype.h" 32214571Sdim#include "windmc.h" 33214571Sdim#include "mcparse.h" 34214571Sdim 35214571Sdim#include <assert.h> 36214571Sdim 37214571Sdim/* Exported globals. */ 38214571Sdimbfd_boolean mclex_want_nl = FALSE; 39214571Sdimbfd_boolean mclex_want_line = FALSE; 40214571Sdimbfd_boolean mclex_want_filename = FALSE; 41214571Sdim 42214571Sdim/* Local globals. */ 43214571Sdimstatic unichar *input_stream = NULL; 44214571Sdimstatic unichar *input_stream_pos = NULL; 45214571Sdimstatic int input_line = 1; 46214571Sdimstatic const char *input_filename = NULL; 47214571Sdim 48214571Sdimvoid 49214571Sdimmc_set_content (const unichar *src) 50214571Sdim{ 51214571Sdim if (!src) 52214571Sdim return; 53214571Sdim input_stream = input_stream_pos = unichar_dup (src); 54214571Sdim} 55214571Sdim 56214571Sdimvoid 57214571Sdimmc_set_inputfile (const char *name) 58214571Sdim{ 59214571Sdim if (! name || *name == 0) 60214571Sdim input_filename = "-"; 61214571Sdim else 62214571Sdim { 63214571Sdim const char *s1 = strrchr (name, '/'); 64214571Sdim const char *s2 = strrchr (name, '\\'); 65214571Sdim 66214571Sdim if (! s1) 67214571Sdim s1 = s2; 68214571Sdim if (s1 && s2 && s1 < s2) 69214571Sdim s1 = s2; 70214571Sdim if (! s1) 71214571Sdim s1 = name; 72214571Sdim else 73214571Sdim s1++; 74214571Sdim s1 = xstrdup (s1); 75214571Sdim input_filename = s1; 76214571Sdim } 77214571Sdim} 78214571Sdim 79214571Sdimstatic void 80214571Sdimshow_msg (const char *kind, const char *msg, va_list argp) 81214571Sdim{ 82214571Sdim fprintf (stderr, "In %s at line %d: %s: ", input_filename, input_line, kind); 83214571Sdim vfprintf (stderr, msg, argp); 84214571Sdim fprintf (stderr, ".\n"); 85214571Sdim} 86214571Sdim 87214571Sdimvoid 88214571Sdimmc_warn (const char *s, ...) 89214571Sdim{ 90214571Sdim va_list argp; 91214571Sdim va_start (argp, s); 92214571Sdim show_msg ("warning", s, argp); 93214571Sdim va_end (argp); 94214571Sdim} 95214571Sdim 96214571Sdimvoid 97214571Sdimmc_fatal (const char *s, ...) 98214571Sdim{ 99214571Sdim va_list argp; 100214571Sdim va_start (argp, s); 101214571Sdim show_msg ("fatal", s, argp); 102214571Sdim va_end (argp); 103214571Sdim xexit (1); 104214571Sdim} 105214571Sdim 106214571Sdim 107214571Sdimint 108214571Sdimyyerror (const char *s, ...) 109214571Sdim{ 110214571Sdim va_list argp; 111214571Sdim va_start (argp, s); 112214571Sdim show_msg ("parser", s, argp); 113214571Sdim va_end (argp); 114214571Sdim return 1; 115214571Sdim} 116214571Sdim 117214571Sdimstatic unichar * 118214571Sdimget_diff (unichar *end, unichar *start) 119214571Sdim{ 120214571Sdim unichar *ret; 121214571Sdim unichar save = *end; 122214571Sdim 123214571Sdim *end = 0; 124214571Sdim ret = unichar_dup (start); 125214571Sdim *end = save; 126214571Sdim return ret; 127214571Sdim} 128214571Sdim 129214571Sdimstatic rc_uint_type 130214571Sdimparse_digit (unichar ch) 131214571Sdim{ 132214571Sdim rc_uint_type base = 10, v = 0, c; 133214571Sdim 134214571Sdim if (ch == '0') 135214571Sdim { 136214571Sdim base = 8; 137214571Sdim switch (input_stream_pos[0]) 138214571Sdim { 139214571Sdim case 'x': case 'X': base = 16; input_stream_pos++; break; 140214571Sdim case 'o': case 'O': base = 8; input_stream_pos++; break; 141214571Sdim case 'b': case 'B': base = 2; input_stream_pos++; break; 142214571Sdim } 143214571Sdim } 144214571Sdim else 145214571Sdim v = (rc_uint_type) (ch - '0'); 146214571Sdim 147214571Sdim while ((ch = input_stream_pos[0]) != 0) 148214571Sdim { 149214571Sdim if (ch >= 'A' && ch <= 'F') 150214571Sdim c = (rc_uint_type) (ch - 'A') + 10; 151214571Sdim else if (ch >= 'a' && ch <= 'f') 152214571Sdim c = (rc_uint_type) (ch - 'a') + 10; 153214571Sdim else if (ch >= '0' && ch <= '9') 154214571Sdim c = (rc_uint_type) (ch - '0'); 155214571Sdim else 156214571Sdim break; 157214571Sdim v *= base; 158214571Sdim v += c; 159214571Sdim ++input_stream_pos; 160214571Sdim } 161214571Sdim if (input_stream_pos[0] == 'U' || input_stream_pos[0] == 'u') 162214571Sdim input_stream_pos++; 163214571Sdim if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l') 164214571Sdim input_stream_pos++; 165214571Sdim if (input_stream_pos[0] == 'L' || input_stream_pos[0] == 'l') 166214571Sdim input_stream_pos++; 167214571Sdim return v; 168214571Sdim} 169214571Sdim 170214571Sdimstatic mc_keyword *keyword_top = NULL; 171214571Sdim 172214571Sdimconst mc_keyword * 173214571Sdimenum_facility (int e) 174214571Sdim{ 175214571Sdim mc_keyword *h = keyword_top; 176214571Sdim 177214571Sdim while (h != NULL) 178214571Sdim { 179214571Sdim while (h && strcmp (h->group_name, "facility") != 0) 180214571Sdim h = h->next; 181214571Sdim if (e == 0) 182214571Sdim return h; 183214571Sdim --e; 184214571Sdim if (h) 185214571Sdim h = h->next; 186214571Sdim } 187214571Sdim return h; 188214571Sdim} 189214571Sdim 190214571Sdimconst mc_keyword * 191214571Sdimenum_severity (int e) 192214571Sdim{ 193214571Sdim mc_keyword *h = keyword_top; 194214571Sdim 195214571Sdim while (h != NULL) 196214571Sdim { 197214571Sdim while (h && strcmp (h->group_name, "severity") != 0) 198214571Sdim h = h->next; 199214571Sdim if (e == 0) 200214571Sdim return h; 201214571Sdim --e; 202214571Sdim if (h) 203214571Sdim h = h->next; 204214571Sdim } 205214571Sdim return h; 206214571Sdim} 207214571Sdim 208214571Sdimstatic void 209214571Sdimmc_add_keyword_ascii (const char *sz, int rid, const char *grp, rc_uint_type nv, const char *sv) 210214571Sdim{ 211214571Sdim unichar *usz, *usv = NULL; 212214571Sdim rc_uint_type usz_len; 213214571Sdim 214214571Sdim unicode_from_codepage (&usz_len, &usz, sz, CP_ACP); 215214571Sdim if (sv) 216214571Sdim unicode_from_codepage (&usz_len, &usv, sv, CP_ACP); 217214571Sdim mc_add_keyword (usz, rid, grp, nv, usv); 218214571Sdim} 219214571Sdim 220214571Sdimvoid 221214571Sdimmc_add_keyword (unichar *usz, int rid, const char *grp, rc_uint_type nv, unichar *sv) 222214571Sdim{ 223214571Sdim mc_keyword *p, *c, *n; 224214571Sdim size_t len = unichar_len (usz); 225214571Sdim 226214571Sdim c = keyword_top; 227214571Sdim p = NULL; 228214571Sdim while (c != NULL) 229214571Sdim { 230214571Sdim if (c->len > len) 231214571Sdim break; 232214571Sdim if (c->len == len) 233214571Sdim { 234214571Sdim int e = memcmp (usz, c->usz, len * sizeof (unichar)); 235214571Sdim 236214571Sdim if (e < 0) 237214571Sdim break; 238214571Sdim if (! e) 239214571Sdim { 240214571Sdim if (! strcmp (grp, "keyword") || strcmp (c->group_name, grp) != 0) 241214571Sdim fatal (_("Duplicate symbol entered into keyword list.")); 242214571Sdim c->rid = rid; 243214571Sdim c->nval = nv; 244214571Sdim c->sval = (!sv ? NULL : unichar_dup (sv)); 245214571Sdim if (! strcmp (grp, "language")) 246214571Sdim { 247214571Sdim const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv); 248214571Sdim 249214571Sdim if (lag == NULL) 250214571Sdim fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv); 251214571Sdim memcpy (&c->lang_info, lag, sizeof (*lag)); 252214571Sdim } 253214571Sdim return; 254214571Sdim } 255214571Sdim } 256214571Sdim c = (p = c)->next; 257214571Sdim } 258214571Sdim n = xmalloc (sizeof (mc_keyword)); 259214571Sdim n->next = c; 260214571Sdim n->len = len; 261214571Sdim n->group_name = grp; 262214571Sdim n->usz = usz; 263214571Sdim n->rid = rid; 264214571Sdim n->nval = nv; 265214571Sdim n->sval = (!sv ? NULL : unichar_dup (sv)); 266214571Sdim if (! strcmp (grp, "language")) 267214571Sdim { 268214571Sdim const wind_language_t *lag = wind_find_language_by_id ((unsigned) nv); 269214571Sdim if (lag == NULL) 270214571Sdim fatal ("Language ident 0x%lx is not resolvable.\n", (long) nv); 271214571Sdim memcpy (&n->lang_info, lag, sizeof (*lag)); 272214571Sdim } 273214571Sdim if (! p) 274214571Sdim keyword_top = n; 275214571Sdim else 276214571Sdim p->next = n; 277214571Sdim} 278214571Sdim 279214571Sdimstatic int 280214571Sdimmc_token (const unichar *t, size_t len) 281214571Sdim{ 282214571Sdim static int was_init = 0; 283214571Sdim mc_keyword *k; 284214571Sdim 285214571Sdim if (! was_init) 286214571Sdim { 287214571Sdim was_init = 1; 288214571Sdim mc_add_keyword_ascii ("OutputBase", MCOUTPUTBASE, "keyword", 0, NULL); 289214571Sdim mc_add_keyword_ascii ("MessageIdTypedef", MCMESSAGEIDTYPEDEF, "keyword", 0, NULL); 290214571Sdim mc_add_keyword_ascii ("SeverityNames", MCSEVERITYNAMES, "keyword", 0, NULL); 291214571Sdim mc_add_keyword_ascii ("FacilityNames", MCFACILITYNAMES, "keyword", 0, NULL); 292214571Sdim mc_add_keyword_ascii ("LanguageNames", MCLANGUAGENAMES, "keyword", 0, NULL); 293214571Sdim mc_add_keyword_ascii ("MessageId", MCMESSAGEID, "keyword", 0, NULL); 294214571Sdim mc_add_keyword_ascii ("Severity", MCSEVERITY, "keyword", 0, NULL); 295214571Sdim mc_add_keyword_ascii ("Facility", MCFACILITY, "keyword", 0, NULL); 296214571Sdim mc_add_keyword_ascii ("SymbolicName", MCSYMBOLICNAME, "keyword", 0, NULL); 297214571Sdim mc_add_keyword_ascii ("Language", MCLANGUAGE, "keyword", 0, NULL); 298214571Sdim mc_add_keyword_ascii ("Success", MCTOKEN, "severity", 0, NULL); 299214571Sdim mc_add_keyword_ascii ("Informational", MCTOKEN, "severity", 1, NULL); 300214571Sdim mc_add_keyword_ascii ("Warning", MCTOKEN, "severity", 2, NULL); 301214571Sdim mc_add_keyword_ascii ("Error", MCTOKEN, "severity", 3, NULL); 302214571Sdim mc_add_keyword_ascii ("System", MCTOKEN, "facility", 0xff, NULL); 303214571Sdim mc_add_keyword_ascii ("Application", MCTOKEN, "facility", 0xfff, NULL); 304214571Sdim mc_add_keyword_ascii ("English", MCTOKEN, "language", 0x409, "MSG00001"); 305214571Sdim } 306214571Sdim k = keyword_top; 307214571Sdim if (!len || !t || *t == 0) 308214571Sdim return -1; 309214571Sdim while (k != NULL) 310214571Sdim { 311214571Sdim if (k->len > len) 312214571Sdim break; 313214571Sdim if (k->len == len) 314214571Sdim { 315214571Sdim if (! memcmp (k->usz, t, len * sizeof (unichar))) 316214571Sdim { 317214571Sdim if (k->rid == MCTOKEN) 318214571Sdim yylval.tok = k; 319214571Sdim return k->rid; 320214571Sdim } 321214571Sdim } 322214571Sdim k = k->next; 323214571Sdim } 324214571Sdim return -1; 325214571Sdim} 326214571Sdim 327214571Sdimint 328214571Sdimyylex (void) 329214571Sdim{ 330214571Sdim unichar *start_token; 331214571Sdim unichar ch; 332214571Sdim 333214571Sdim if (! input_stream_pos) 334214571Sdim { 335214571Sdim fatal ("Input stream not setuped.\n"); 336214571Sdim return -1; 337214571Sdim } 338214571Sdim if (mclex_want_line) 339214571Sdim { 340214571Sdim start_token = input_stream_pos; 341214571Sdim if (input_stream_pos[0] == '.' 342214571Sdim && (input_stream_pos[1] == '\n' 343214571Sdim || (input_stream_pos[1] == '\r' && input_stream_pos[2] == '\n'))) 344214571Sdim { 345214571Sdim mclex_want_line = FALSE; 346214571Sdim while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n') 347214571Sdim ++input_stream_pos; 348214571Sdim if (input_stream_pos[0] == '\n') 349214571Sdim ++input_stream_pos; 350214571Sdim return MCENDLINE; 351214571Sdim } 352214571Sdim while (input_stream_pos[0] != 0 && input_stream_pos[0] != '\n') 353214571Sdim ++input_stream_pos; 354214571Sdim if (input_stream_pos[0] == '\n') 355214571Sdim ++input_stream_pos; 356214571Sdim yylval.ustr = get_diff (input_stream_pos, start_token); 357214571Sdim return MCLINE; 358214571Sdim } 359214571Sdim while ((ch = input_stream_pos[0]) <= 0x20) 360214571Sdim { 361214571Sdim if (ch == 0) 362214571Sdim return -1; 363214571Sdim ++input_stream_pos; 364214571Sdim if (ch == '\n') 365214571Sdim input_line += 1; 366214571Sdim if (mclex_want_nl && ch == '\n') 367214571Sdim { 368214571Sdim mclex_want_nl = FALSE; 369214571Sdim return NL; 370214571Sdim } 371214571Sdim } 372214571Sdim start_token = input_stream_pos; 373214571Sdim ++input_stream_pos; 374214571Sdim if (mclex_want_filename) 375214571Sdim { 376214571Sdim mclex_want_filename = FALSE; 377214571Sdim if (ch == '"') 378214571Sdim { 379214571Sdim start_token++; 380214571Sdim while ((ch = input_stream_pos[0]) != 0) 381214571Sdim { 382214571Sdim if (ch == '"') 383214571Sdim break; 384214571Sdim ++input_stream_pos; 385214571Sdim } 386214571Sdim yylval.ustr = get_diff (input_stream_pos, start_token); 387214571Sdim if (ch == '"') 388214571Sdim ++input_stream_pos; 389214571Sdim } 390214571Sdim else 391214571Sdim { 392214571Sdim while ((ch = input_stream_pos[0]) != 0) 393214571Sdim { 394214571Sdim if (ch <= 0x20 || ch == ')') 395214571Sdim break; 396214571Sdim ++input_stream_pos; 397214571Sdim } 398214571Sdim yylval.ustr = get_diff (input_stream_pos, start_token); 399214571Sdim } 400214571Sdim return MCFILENAME; 401214571Sdim } 402214571Sdim switch (ch) 403214571Sdim { 404214571Sdim case ';': 405214571Sdim ++start_token; 406214571Sdim while (input_stream_pos[0] != '\n' && input_stream_pos[0] != 0) 407214571Sdim ++input_stream_pos; 408214571Sdim if (input_stream_pos[0] == '\n') 409214571Sdim input_stream_pos++; 410214571Sdim yylval.ustr = get_diff (input_stream_pos, start_token); 411214571Sdim return MCCOMMENT; 412214571Sdim case '=': 413214571Sdim return '='; 414214571Sdim case '(': 415214571Sdim return '('; 416214571Sdim case ')': 417214571Sdim return ')'; 418214571Sdim case '+': 419214571Sdim return '+'; 420214571Sdim case ':': 421214571Sdim return ':'; 422214571Sdim case '0': case '1': case '2': case '3': case '4': 423214571Sdim case '5': case '6': case '7': case '8': case '9': 424214571Sdim yylval.ival = parse_digit (ch); 425214571Sdim return MCNUMBER; 426214571Sdim default: 427214571Sdim if (ch >= 0x40) 428214571Sdim { 429214571Sdim int ret; 430214571Sdim while (input_stream_pos[0] >= 0x40 || (input_stream_pos[0] >= '0' && input_stream_pos[0] <= '9')) 431214571Sdim ++input_stream_pos; 432214571Sdim ret = mc_token (start_token, (size_t) (input_stream_pos - start_token)); 433214571Sdim if (ret != -1) 434214571Sdim return ret; 435214571Sdim yylval.ustr = get_diff (input_stream_pos, start_token); 436214571Sdim return MCIDENT; 437214571Sdim } 438214571Sdim yyerror ("illegal character 0x%x.", ch); 439214571Sdim } 440214571Sdim return -1; 441214571Sdim} 442