1/* xgettext YCP backend. 2 Copyright (C) 2001-2003 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 19 20#ifdef HAVE_CONFIG_H 21# include "config.h" 22#endif 23 24#include <ctype.h> 25#include <errno.h> 26#include <limits.h> 27#include <stdbool.h> 28#include <stdio.h> 29#include <stdlib.h> 30 31#include "message.h" 32#include "xgettext.h" 33#include "x-ycp.h" 34#include "error.h" 35#include "xalloc.h" 36#include "exit.h" 37#include "gettext.h" 38 39#define _(s) gettext(s) 40 41#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) 42 43 44/* The YCP syntax is defined in libycp/doc/syntax.html. 45 See also libycp/src/scanner.ll. */ 46 47 48void 49init_flag_table_ycp () 50{ 51 xgettext_record_flag ("sformat:1:ycp-format"); 52 xgettext_record_flag ("y2debug:1:ycp-format"); 53 xgettext_record_flag ("y2milestone:1:ycp-format"); 54 xgettext_record_flag ("y2warning:1:ycp-format"); 55 xgettext_record_flag ("y2error:1:ycp-format"); 56 xgettext_record_flag ("y2security:1:ycp-format"); 57 xgettext_record_flag ("y2internal:1:ycp-format"); 58} 59 60 61/* ======================== Reading of characters. ======================== */ 62 63 64/* Real filename, used in error messages about the input file. */ 65static const char *real_file_name; 66 67/* Logical filename and line number, used to label the extracted messages. */ 68static char *logical_file_name; 69static int line_number; 70static int char_in_line; 71 72/* The input file stream. */ 73static FILE *fp; 74 75/* These are for tracking whether comments count as immediately before 76 keyword. */ 77static int last_comment_line; 78static int last_non_comment_line; 79 80 81/* 1. line_number handling. */ 82 83static int 84phase1_getc () 85{ 86 int c = getc (fp); 87 88 if (c == EOF) 89 { 90 if (ferror (fp)) 91 error (EXIT_FAILURE, errno, _("error while reading \"%s\""), 92 real_file_name); 93 return EOF; 94 } 95 96 if (c == '\n') 97 { 98 line_number++; 99 char_in_line = 0; 100 } 101 else 102 char_in_line++; 103 104 return c; 105} 106 107/* Supports only one pushback character. */ 108static void 109phase1_ungetc (int c) 110{ 111 if (c != EOF) 112 { 113 if (c == '\n') 114 { 115 --line_number; 116 char_in_line = INT_MAX; 117 } 118 else 119 --char_in_line; 120 121 ungetc (c, fp); 122 } 123} 124 125 126/* 2. Replace each comment that is not inside a character constant or 127 string literal with a space character. We need to remember the 128 comment for later, because it may be attached to a keyword string. 129 YCP comments can be in C comment syntax, C++ comment syntax or sh 130 comment syntax. */ 131 132static unsigned char phase2_pushback[1]; 133static int phase2_pushback_length; 134 135static int 136phase2_getc () 137{ 138 static char *buffer; 139 static size_t bufmax; 140 size_t buflen; 141 int lineno; 142 int c; 143 bool last_was_star; 144 145 if (phase2_pushback_length) 146 return phase2_pushback[--phase2_pushback_length]; 147 148 if (char_in_line == 0) 149 { 150 /* Eat whitespace, to recognize ^[\t ]*# pattern. */ 151 do 152 c = phase1_getc (); 153 while (c == '\t' || c == ' '); 154 155 if (c == '#') 156 { 157 /* sh comment. */ 158 buflen = 0; 159 lineno = line_number; 160 for (;;) 161 { 162 c = phase1_getc (); 163 if (c == '\n' || c == EOF) 164 break; 165 /* We skip all leading white space, but not EOLs. */ 166 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 167 { 168 if (buflen >= bufmax) 169 { 170 bufmax = 2 * bufmax + 10; 171 buffer = xrealloc (buffer, bufmax); 172 } 173 buffer[buflen++] = c; 174 } 175 } 176 if (buflen >= bufmax) 177 { 178 bufmax = 2 * bufmax + 10; 179 buffer = xrealloc (buffer, bufmax); 180 } 181 buffer[buflen] = '\0'; 182 xgettext_comment_add (buffer); 183 last_comment_line = lineno; 184 return '\n'; 185 } 186 } 187 else 188 c = phase1_getc (); 189 190 if (c == '/') 191 { 192 c = phase1_getc (); 193 194 switch (c) 195 { 196 default: 197 phase1_ungetc (c); 198 return '/'; 199 200 case '*': 201 /* C comment. */ 202 buflen = 0; 203 lineno = line_number; 204 last_was_star = false; 205 for (;;) 206 { 207 c = phase1_getc (); 208 if (c == EOF) 209 break; 210 /* We skip all leading white space, but not EOLs. */ 211 if (buflen == 0 && (c == ' ' || c == '\t')) 212 continue; 213 if (buflen >= bufmax) 214 { 215 bufmax = 2 * bufmax + 10; 216 buffer = xrealloc (buffer, bufmax); 217 } 218 buffer[buflen++] = c; 219 switch (c) 220 { 221 case '\n': 222 --buflen; 223 while (buflen >= 1 224 && (buffer[buflen - 1] == ' ' 225 || buffer[buflen - 1] == '\t')) 226 --buflen; 227 buffer[buflen] = '\0'; 228 xgettext_comment_add (buffer); 229 buflen = 0; 230 lineno = line_number; 231 last_was_star = false; 232 continue; 233 234 case '*': 235 last_was_star = true; 236 continue; 237 238 case '/': 239 if (last_was_star) 240 { 241 buflen -= 2; 242 while (buflen >= 1 243 && (buffer[buflen - 1] == ' ' 244 || buffer[buflen - 1] == '\t')) 245 --buflen; 246 buffer[buflen] = '\0'; 247 xgettext_comment_add (buffer); 248 break; 249 } 250 /* FALLTHROUGH */ 251 252 default: 253 last_was_star = false; 254 continue; 255 } 256 break; 257 } 258 last_comment_line = lineno; 259 return ' '; 260 261 case '/': 262 /* C++ comment. */ 263 buflen = 0; 264 lineno = line_number; 265 for (;;) 266 { 267 c = phase1_getc (); 268 if (c == '\n' || c == EOF) 269 break; 270 /* We skip all leading white space, but not EOLs. */ 271 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 272 { 273 if (buflen >= bufmax) 274 { 275 bufmax = 2 * bufmax + 10; 276 buffer = xrealloc (buffer, bufmax); 277 } 278 buffer[buflen++] = c; 279 } 280 } 281 if (buflen >= bufmax) 282 { 283 bufmax = 2 * bufmax + 10; 284 buffer = xrealloc (buffer, bufmax); 285 } 286 buffer[buflen] = '\0'; 287 xgettext_comment_add (buffer); 288 last_comment_line = lineno; 289 return '\n'; 290 } 291 } 292 else 293 return c; 294} 295 296/* Supports only one pushback character. */ 297static void 298phase2_ungetc (int c) 299{ 300 if (c != EOF) 301 { 302 if (phase2_pushback_length == SIZEOF (phase2_pushback)) 303 abort (); 304 phase2_pushback[phase2_pushback_length++] = c; 305 } 306} 307 308 309/* ========================== Reading of tokens. ========================== */ 310 311 312enum token_type_ty 313{ 314 token_type_eof, 315 token_type_lparen, /* ( */ 316 token_type_rparen, /* ) */ 317 token_type_comma, /* , */ 318 token_type_i18n, /* _( */ 319 token_type_string_literal, /* "abc" */ 320 token_type_symbol, /* symbol, number */ 321 token_type_other /* misc. operator */ 322}; 323typedef enum token_type_ty token_type_ty; 324 325typedef struct token_ty token_ty; 326struct token_ty 327{ 328 token_type_ty type; 329 char *string; /* for token_type_string_literal, token_type_symbol */ 330 int line_number; 331}; 332 333 334/* 7. Replace escape sequences within character strings with their 335 single character equivalents. */ 336 337#define P7_QUOTES (1000 + '"') 338 339static int 340phase7_getc () 341{ 342 int c; 343 344 for (;;) 345 { 346 /* Use phase 1, because phase 2 elides comments. */ 347 c = phase1_getc (); 348 349 if (c == '"') 350 return P7_QUOTES; 351 if (c != '\\') 352 return c; 353 c = phase1_getc (); 354 if (c != '\n') 355 switch (c) 356 { 357 case 'b': 358 return '\b'; 359 case 'f': 360 return '\f'; 361 case 'n': 362 return '\n'; 363 case 'r': 364 return '\r'; 365 case 't': 366 return '\t'; 367 368 /* FIXME: What is the octal escape syntax? 369 syntax.html says: [0] [0-7]+ 370 scanner.ll says: [0-7] [0-7] [0-7] 371 */ 372#if 0 373 case '0': case '1': case '2': case '3': 374 case '4': case '5': case '6': case '7': 375 { 376 int n, j; 377 378 n = 0; 379 for (j = 0; j < 3; ++j) 380 { 381 n = n * 8 + c - '0'; 382 c = phase1_getc (); 383 switch (c) 384 { 385 default: 386 break; 387 388 case '0': case '1': case '2': case '3': 389 case '4': case '5': case '6': case '7': 390 continue; 391 } 392 break; 393 } 394 phase1_ungetc (c); 395 return n; 396 } 397#endif 398 399 default: 400 return c; 401 } 402 } 403} 404 405 406/* Combine characters into tokens. Discard whitespace. */ 407 408static void 409x_ycp_lex (token_ty *tp) 410{ 411 static char *buffer; 412 static int bufmax; 413 int bufpos; 414 int c; 415 416 for (;;) 417 { 418 tp->line_number = line_number; 419 c = phase2_getc (); 420 421 switch (c) 422 { 423 case EOF: 424 tp->type = token_type_eof; 425 return; 426 427 case '\n': 428 if (last_non_comment_line > last_comment_line) 429 xgettext_comment_reset (); 430 /* FALLTHROUGH */ 431 case '\r': 432 case '\t': 433 case ' ': 434 /* Ignore whitespace and comments. */ 435 continue; 436 } 437 438 last_non_comment_line = tp->line_number; 439 440 switch (c) 441 { 442 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 443 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 444 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 445 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 446 case 'Y': case 'Z': 447 case '_': 448 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 449 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 450 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 451 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 452 case 'y': case 'z': 453 case '0': case '1': case '2': case '3': case '4': 454 case '5': case '6': case '7': case '8': case '9': 455 /* Symbol, or part of a number. */ 456 bufpos = 0; 457 for (;;) 458 { 459 if (bufpos >= bufmax) 460 { 461 bufmax = 2 * bufmax + 10; 462 buffer = xrealloc (buffer, bufmax); 463 } 464 buffer[bufpos++] = c; 465 c = phase2_getc (); 466 switch (c) 467 { 468 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 469 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 470 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 471 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 472 case 'Y': case 'Z': 473 case '_': 474 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 475 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 476 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 477 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 478 case 'y': case 'z': 479 case '0': case '1': case '2': case '3': case '4': 480 case '5': case '6': case '7': case '8': case '9': 481 continue; 482 default: 483 if (bufpos == 1 && buffer[0] == '_' && c == '(') 484 { 485 tp->type = token_type_i18n; 486 return; 487 } 488 phase2_ungetc (c); 489 break; 490 } 491 break; 492 } 493 if (bufpos >= bufmax) 494 { 495 bufmax = 2 * bufmax + 10; 496 buffer = xrealloc (buffer, bufmax); 497 } 498 buffer[bufpos] = '\0'; 499 tp->string = xstrdup (buffer); 500 tp->type = token_type_symbol; 501 return; 502 503 case '"': 504 bufpos = 0; 505 for (;;) 506 { 507 c = phase7_getc (); 508 if (c == EOF || c == P7_QUOTES) 509 break; 510 if (bufpos >= bufmax) 511 { 512 bufmax = 2 * bufmax + 10; 513 buffer = xrealloc (buffer, bufmax); 514 } 515 buffer[bufpos++] = c; 516 } 517 if (bufpos >= bufmax) 518 { 519 bufmax = 2 * bufmax + 10; 520 buffer = xrealloc (buffer, bufmax); 521 } 522 buffer[bufpos] = '\0'; 523 tp->string = xstrdup (buffer); 524 tp->type = token_type_string_literal; 525 return; 526 527 case '(': 528 tp->type = token_type_lparen; 529 return; 530 531 case ')': 532 tp->type = token_type_rparen; 533 return; 534 535 case ',': 536 tp->type = token_type_comma; 537 return; 538 539 default: 540 /* We could carefully recognize each of the 2 and 3 character 541 operators, but it is not necessary, as we only need to recognize 542 gettext invocations. Don't bother. */ 543 tp->type = token_type_other; 544 return; 545 } 546 } 547} 548 549 550/* ========================= Extracting strings. ========================== */ 551 552 553/* Context lookup table. */ 554static flag_context_list_table_ty *flag_context_list_table; 555 556 557/* The file is broken into tokens. 558 559 Normal handling: Look for 560 [A] _( [B] msgid ... ) 561 Plural handling: Look for 562 [A] _( [B] msgid [C] , [D] msgid_plural ... ) 563 At point [A]: state == 0. 564 At point [B]: state == 1, plural_mp == NULL. 565 At point [C]: state == 2, plural_mp != NULL. 566 At point [D]: state == 1, plural_mp != NULL. 567 568 We use recursion because we have to set the context according to the given 569 flags. */ 570 571 572/* Extract messages until the next balanced closing parenthesis. 573 Extracted messages are added to MLP. 574 Return true upon eof, false upon closing parenthesis. */ 575static bool 576extract_parenthesized (message_list_ty *mlp, 577 flag_context_ty outer_context, 578 flag_context_list_iterator_ty context_iter, 579 bool in_i18n) 580{ 581 int state; /* 1 or 2 inside _( ... ), otherwise 0 */ 582 message_ty *plural_mp = NULL; /* defined only when in states 1 and 2 */ 583 /* Context iterator that will be used if the next token is a '('. */ 584 flag_context_list_iterator_ty next_context_iter = 585 passthrough_context_list_iterator; 586 /* Current context. */ 587 flag_context_ty inner_context = 588 inherited_context (outer_context, 589 flag_context_list_iterator_advance (&context_iter)); 590 591 /* Start state is 0 or 1. */ 592 state = (in_i18n ? 1 : 0); 593 594 for (;;) 595 { 596 token_ty token; 597 598 x_ycp_lex (&token); 599 switch (token.type) 600 { 601 case token_type_i18n: 602 if (extract_parenthesized (mlp, inner_context, next_context_iter, 603 true)) 604 return true; 605 next_context_iter = null_context_list_iterator; 606 state = 0; 607 continue; 608 609 case token_type_string_literal: 610 if (state == 1) 611 { 612 lex_pos_ty pos; 613 pos.file_name = logical_file_name; 614 pos.line_number = token.line_number; 615 616 if (plural_mp == NULL) 617 { 618 /* Seen an msgid. */ 619 plural_mp = remember_a_message (mlp, token.string, 620 inner_context, &pos); 621 state = 2; 622 } 623 else 624 { 625 /* Seen an msgid_plural. */ 626 remember_a_message_plural (plural_mp, token.string, 627 inner_context, &pos); 628 state = 0; 629 } 630 } 631 else 632 { 633 free (token.string); 634 state = 0; 635 } 636 next_context_iter = null_context_list_iterator; 637 continue; 638 639 case token_type_symbol: 640 next_context_iter = 641 flag_context_list_iterator ( 642 flag_context_list_table_lookup ( 643 flag_context_list_table, 644 token.string, strlen (token.string))); 645 free (token.string); 646 state = 0; 647 continue; 648 649 case token_type_lparen: 650 if (extract_parenthesized (mlp, inner_context, next_context_iter, 651 false)) 652 return true; 653 next_context_iter = null_context_list_iterator; 654 state = 0; 655 continue; 656 657 case token_type_rparen: 658 return false; 659 660 case token_type_comma: 661 if (state == 2) 662 state = 1; 663 else 664 state = 0; 665 inner_context = 666 inherited_context (outer_context, 667 flag_context_list_iterator_advance ( 668 &context_iter)); 669 next_context_iter = passthrough_context_list_iterator; 670 continue; 671 672 case token_type_other: 673 next_context_iter = null_context_list_iterator; 674 state = 0; 675 continue; 676 677 case token_type_eof: 678 return true; 679 680 default: 681 abort (); 682 } 683 } 684} 685 686 687void 688extract_ycp (FILE *f, 689 const char *real_filename, const char *logical_filename, 690 flag_context_list_table_ty *flag_table, 691 msgdomain_list_ty *mdlp) 692{ 693 message_list_ty *mlp = mdlp->item[0]->messages; 694 695 fp = f; 696 real_file_name = real_filename; 697 logical_file_name = xstrdup (logical_filename); 698 line_number = 1; 699 char_in_line = 0; 700 701 last_comment_line = -1; 702 last_non_comment_line = -1; 703 704 flag_context_list_table = flag_table; 705 706 /* Eat tokens until eof is seen. When extract_parenthesized returns 707 due to an unbalanced closing parenthesis, just restart it. */ 708 while (!extract_parenthesized (mlp, null_context, null_context_list_iterator, 709 false)) 710 ; 711 712 fp = NULL; 713 real_file_name = NULL; 714 logical_file_name = NULL; 715 line_number = 0; 716 char_in_line = 0; 717} 718