1/* xgettext YCP backend. 2 Copyright (C) 2001-2003, 2005-2006 Free Software Foundation, Inc. 3 4 This file was written by Bruno Haible <haible@clisp.cons.org>, 2001. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2, or (at your option) 9 any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software Foundation, 18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ 19 20#ifdef HAVE_CONFIG_H 21# include "config.h" 22#endif 23 24#include <errno.h> 25#include <limits.h> 26#include <stdbool.h> 27#include <stdio.h> 28#include <stdlib.h> 29 30#include "message.h" 31#include "xgettext.h" 32#include "x-ycp.h" 33#include "error.h" 34#include "xalloc.h" 35#include "exit.h" 36#include "gettext.h" 37 38#define _(s) gettext(s) 39 40#define SIZEOF(a) (sizeof(a) / sizeof(a[0])) 41 42 43/* The YCP syntax is defined in libycp/doc/syntax.html. 44 See also libycp/src/scanner.ll. 45 Both are part of the yast2-core package in SuSE Linux distributions. */ 46 47 48void 49init_flag_table_ycp () 50{ 51 xgettext_record_flag ("sformat:1:ycp-format"); 52 xgettext_record_flag ("y2debug:1:ycp-format"); 53 xgettext_record_flag ("y2milestone:1:ycp-format"); 54 xgettext_record_flag ("y2warning:1:ycp-format"); 55 xgettext_record_flag ("y2error:1:ycp-format"); 56 xgettext_record_flag ("y2security:1:ycp-format"); 57 xgettext_record_flag ("y2internal:1:ycp-format"); 58} 59 60 61/* ======================== Reading of characters. ======================== */ 62 63 64/* Real filename, used in error messages about the input file. */ 65static const char *real_file_name; 66 67/* Logical filename and line number, used to label the extracted messages. */ 68static char *logical_file_name; 69static int line_number; 70static int char_in_line; 71 72/* The input file stream. */ 73static FILE *fp; 74 75/* These are for tracking whether comments count as immediately before 76 keyword. */ 77static int last_comment_line; 78static int last_non_comment_line; 79 80 81/* 1. line_number handling. */ 82 83static int 84phase1_getc () 85{ 86 int c = getc (fp); 87 88 if (c == EOF) 89 { 90 if (ferror (fp)) 91 error (EXIT_FAILURE, errno, _("error while reading \"%s\""), 92 real_file_name); 93 return EOF; 94 } 95 96 if (c == '\n') 97 { 98 line_number++; 99 char_in_line = 0; 100 } 101 else 102 char_in_line++; 103 104 return c; 105} 106 107/* Supports only one pushback character. */ 108static void 109phase1_ungetc (int c) 110{ 111 if (c != EOF) 112 { 113 if (c == '\n') 114 { 115 --line_number; 116 char_in_line = INT_MAX; 117 } 118 else 119 --char_in_line; 120 121 ungetc (c, fp); 122 } 123} 124 125 126/* 2. Replace each comment that is not inside a character constant or 127 string literal with a space character. We need to remember the 128 comment for later, because it may be attached to a keyword string. 129 YCP comments can be in C comment syntax, C++ comment syntax or sh 130 comment syntax. */ 131 132static unsigned char phase2_pushback[1]; 133static int phase2_pushback_length; 134 135static int 136phase2_getc () 137{ 138 static char *buffer; 139 static size_t bufmax; 140 size_t buflen; 141 int lineno; 142 int c; 143 bool last_was_star; 144 145 if (phase2_pushback_length) 146 return phase2_pushback[--phase2_pushback_length]; 147 148 if (char_in_line == 0) 149 { 150 /* Eat whitespace, to recognize ^[\t ]*# pattern. */ 151 do 152 c = phase1_getc (); 153 while (c == '\t' || c == ' '); 154 155 if (c == '#') 156 { 157 /* sh comment. */ 158 buflen = 0; 159 lineno = line_number; 160 for (;;) 161 { 162 c = phase1_getc (); 163 if (c == '\n' || c == EOF) 164 break; 165 /* We skip all leading white space, but not EOLs. */ 166 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 167 { 168 if (buflen >= bufmax) 169 { 170 bufmax = 2 * bufmax + 10; 171 buffer = xrealloc (buffer, bufmax); 172 } 173 buffer[buflen++] = c; 174 } 175 } 176 if (buflen >= bufmax) 177 { 178 bufmax = 2 * bufmax + 10; 179 buffer = xrealloc (buffer, bufmax); 180 } 181 buffer[buflen] = '\0'; 182 savable_comment_add (buffer); 183 last_comment_line = lineno; 184 return '\n'; 185 } 186 } 187 else 188 c = phase1_getc (); 189 190 if (c == '/') 191 { 192 c = phase1_getc (); 193 194 switch (c) 195 { 196 default: 197 phase1_ungetc (c); 198 return '/'; 199 200 case '*': 201 /* C comment. */ 202 buflen = 0; 203 lineno = line_number; 204 last_was_star = false; 205 for (;;) 206 { 207 c = phase1_getc (); 208 if (c == EOF) 209 break; 210 /* We skip all leading white space, but not EOLs. */ 211 if (buflen == 0 && (c == ' ' || c == '\t')) 212 continue; 213 if (buflen >= bufmax) 214 { 215 bufmax = 2 * bufmax + 10; 216 buffer = xrealloc (buffer, bufmax); 217 } 218 buffer[buflen++] = c; 219 switch (c) 220 { 221 case '\n': 222 --buflen; 223 while (buflen >= 1 224 && (buffer[buflen - 1] == ' ' 225 || buffer[buflen - 1] == '\t')) 226 --buflen; 227 buffer[buflen] = '\0'; 228 savable_comment_add (buffer); 229 buflen = 0; 230 lineno = line_number; 231 last_was_star = false; 232 continue; 233 234 case '*': 235 last_was_star = true; 236 continue; 237 238 case '/': 239 if (last_was_star) 240 { 241 buflen -= 2; 242 while (buflen >= 1 243 && (buffer[buflen - 1] == ' ' 244 || buffer[buflen - 1] == '\t')) 245 --buflen; 246 buffer[buflen] = '\0'; 247 savable_comment_add (buffer); 248 break; 249 } 250 /* FALLTHROUGH */ 251 252 default: 253 last_was_star = false; 254 continue; 255 } 256 break; 257 } 258 last_comment_line = lineno; 259 return ' '; 260 261 case '/': 262 /* C++ comment. */ 263 buflen = 0; 264 lineno = line_number; 265 for (;;) 266 { 267 c = phase1_getc (); 268 if (c == '\n' || c == EOF) 269 break; 270 /* We skip all leading white space, but not EOLs. */ 271 if (!(buflen == 0 && (c == ' ' || c == '\t'))) 272 { 273 if (buflen >= bufmax) 274 { 275 bufmax = 2 * bufmax + 10; 276 buffer = xrealloc (buffer, bufmax); 277 } 278 buffer[buflen++] = c; 279 } 280 } 281 if (buflen >= bufmax) 282 { 283 bufmax = 2 * bufmax + 10; 284 buffer = xrealloc (buffer, bufmax); 285 } 286 buffer[buflen] = '\0'; 287 savable_comment_add (buffer); 288 last_comment_line = lineno; 289 return '\n'; 290 } 291 } 292 else 293 return c; 294} 295 296/* Supports only one pushback character. */ 297static void 298phase2_ungetc (int c) 299{ 300 if (c != EOF) 301 { 302 if (phase2_pushback_length == SIZEOF (phase2_pushback)) 303 abort (); 304 phase2_pushback[phase2_pushback_length++] = c; 305 } 306} 307 308 309/* ========================== Reading of tokens. ========================== */ 310 311 312enum token_type_ty 313{ 314 token_type_eof, 315 token_type_lparen, /* ( */ 316 token_type_rparen, /* ) */ 317 token_type_comma, /* , */ 318 token_type_i18n, /* _( */ 319 token_type_string_literal, /* "abc" */ 320 token_type_symbol, /* symbol, number */ 321 token_type_other /* misc. operator */ 322}; 323typedef enum token_type_ty token_type_ty; 324 325typedef struct token_ty token_ty; 326struct token_ty 327{ 328 token_type_ty type; 329 char *string; /* for token_type_string_literal, token_type_symbol */ 330 int line_number; 331}; 332 333 334/* 7. Replace escape sequences within character strings with their 335 single character equivalents. */ 336 337#define P7_QUOTES (1000 + '"') 338 339static int 340phase7_getc () 341{ 342 int c; 343 344 for (;;) 345 { 346 /* Use phase 1, because phase 2 elides comments. */ 347 c = phase1_getc (); 348 349 if (c == '"') 350 return P7_QUOTES; 351 if (c != '\\') 352 return c; 353 c = phase1_getc (); 354 if (c != '\n') 355 switch (c) 356 { 357 case 'b': 358 return '\b'; 359 case 'f': 360 return '\f'; 361 case 'n': 362 return '\n'; 363 case 'r': 364 return '\r'; 365 case 't': 366 return '\t'; 367 368 /* FIXME: What is the octal escape syntax? 369 syntax.html says: [0] [0-7]+ 370 scanner.ll says: [0-7] [0-7] [0-7] 371 */ 372#if 0 373 case '0': case '1': case '2': case '3': 374 case '4': case '5': case '6': case '7': 375 { 376 int n, j; 377 378 n = 0; 379 for (j = 0; j < 3; ++j) 380 { 381 n = n * 8 + c - '0'; 382 c = phase1_getc (); 383 switch (c) 384 { 385 default: 386 break; 387 388 case '0': case '1': case '2': case '3': 389 case '4': case '5': case '6': case '7': 390 continue; 391 } 392 break; 393 } 394 phase1_ungetc (c); 395 return n; 396 } 397#endif 398 399 default: 400 return c; 401 } 402 } 403} 404 405 406/* Combine characters into tokens. Discard whitespace. */ 407 408static token_ty phase5_pushback[1]; 409static int phase5_pushback_length; 410 411static void 412phase5_get (token_ty *tp) 413{ 414 static char *buffer; 415 static int bufmax; 416 int bufpos; 417 int c; 418 419 if (phase5_pushback_length) 420 { 421 *tp = phase5_pushback[--phase5_pushback_length]; 422 return; 423 } 424 for (;;) 425 { 426 tp->line_number = line_number; 427 c = phase2_getc (); 428 429 switch (c) 430 { 431 case EOF: 432 tp->type = token_type_eof; 433 return; 434 435 case '\n': 436 if (last_non_comment_line > last_comment_line) 437 savable_comment_reset (); 438 /* FALLTHROUGH */ 439 case '\r': 440 case '\t': 441 case ' ': 442 /* Ignore whitespace and comments. */ 443 continue; 444 } 445 446 last_non_comment_line = tp->line_number; 447 448 switch (c) 449 { 450 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 451 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 452 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 453 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 454 case 'Y': case 'Z': 455 case '_': 456 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 457 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 458 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 459 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 460 case 'y': case 'z': 461 case '0': case '1': case '2': case '3': case '4': 462 case '5': case '6': case '7': case '8': case '9': 463 /* Symbol, or part of a number. */ 464 bufpos = 0; 465 for (;;) 466 { 467 if (bufpos >= bufmax) 468 { 469 bufmax = 2 * bufmax + 10; 470 buffer = xrealloc (buffer, bufmax); 471 } 472 buffer[bufpos++] = c; 473 c = phase2_getc (); 474 switch (c) 475 { 476 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 477 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 478 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 479 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 480 case 'Y': case 'Z': 481 case '_': 482 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 483 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 484 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 485 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 486 case 'y': case 'z': 487 case '0': case '1': case '2': case '3': case '4': 488 case '5': case '6': case '7': case '8': case '9': 489 continue; 490 default: 491 if (bufpos == 1 && buffer[0] == '_' && c == '(') 492 { 493 tp->type = token_type_i18n; 494 return; 495 } 496 phase2_ungetc (c); 497 break; 498 } 499 break; 500 } 501 if (bufpos >= bufmax) 502 { 503 bufmax = 2 * bufmax + 10; 504 buffer = xrealloc (buffer, bufmax); 505 } 506 buffer[bufpos] = '\0'; 507 tp->string = xstrdup (buffer); 508 tp->type = token_type_symbol; 509 return; 510 511 case '"': 512 bufpos = 0; 513 for (;;) 514 { 515 c = phase7_getc (); 516 if (c == EOF || c == P7_QUOTES) 517 break; 518 if (bufpos >= bufmax) 519 { 520 bufmax = 2 * bufmax + 10; 521 buffer = xrealloc (buffer, bufmax); 522 } 523 buffer[bufpos++] = c; 524 } 525 if (bufpos >= bufmax) 526 { 527 bufmax = 2 * bufmax + 10; 528 buffer = xrealloc (buffer, bufmax); 529 } 530 buffer[bufpos] = '\0'; 531 tp->string = xstrdup (buffer); 532 tp->type = token_type_string_literal; 533 return; 534 535 case '(': 536 tp->type = token_type_lparen; 537 return; 538 539 case ')': 540 tp->type = token_type_rparen; 541 return; 542 543 case ',': 544 tp->type = token_type_comma; 545 return; 546 547 default: 548 /* We could carefully recognize each of the 2 and 3 character 549 operators, but it is not necessary, as we only need to recognize 550 gettext invocations. Don't bother. */ 551 tp->type = token_type_other; 552 return; 553 } 554 } 555} 556 557/* Supports only one pushback token. */ 558static void 559phase5_unget (token_ty *tp) 560{ 561 if (tp->type != token_type_eof) 562 { 563 if (phase5_pushback_length == SIZEOF (phase5_pushback)) 564 abort (); 565 phase5_pushback[phase5_pushback_length++] = *tp; 566 } 567} 568 569 570/* Concatenate adjacent string literals to form single string literals. 571 (See libycp/src/parser.yy, rule 'string' vs. terminal 'STRING'.) */ 572 573static void 574phase8_get (token_ty *tp) 575{ 576 phase5_get (tp); 577 if (tp->type != token_type_string_literal) 578 return; 579 for (;;) 580 { 581 token_ty tmp; 582 size_t len; 583 584 phase5_get (&tmp); 585 if (tmp.type != token_type_string_literal) 586 { 587 phase5_unget (&tmp); 588 return; 589 } 590 len = strlen (tp->string); 591 tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1); 592 strcpy (tp->string + len, tmp.string); 593 free (tmp.string); 594 } 595} 596 597 598/* ========================= Extracting strings. ========================== */ 599 600 601/* Context lookup table. */ 602static flag_context_list_table_ty *flag_context_list_table; 603 604 605/* The file is broken into tokens. 606 607 Normal handling: Look for 608 [A] _( [B] msgid ... ) 609 Plural handling: Look for 610 [A] _( [B] msgid [C] , [D] msgid_plural ... ) 611 At point [A]: state == 0. 612 At point [B]: state == 1, plural_mp == NULL. 613 At point [C]: state == 2, plural_mp != NULL. 614 At point [D]: state == 1, plural_mp != NULL. 615 616 We use recursion because we have to set the context according to the given 617 flags. */ 618 619 620/* Extract messages until the next balanced closing parenthesis. 621 Extracted messages are added to MLP. 622 Return true upon eof, false upon closing parenthesis. */ 623static bool 624extract_parenthesized (message_list_ty *mlp, 625 flag_context_ty outer_context, 626 flag_context_list_iterator_ty context_iter, 627 bool in_i18n) 628{ 629 int state; /* 1 or 2 inside _( ... ), otherwise 0 */ 630 message_ty *plural_mp = NULL; /* defined only when in states 1 and 2 */ 631 /* Context iterator that will be used if the next token is a '('. */ 632 flag_context_list_iterator_ty next_context_iter = 633 passthrough_context_list_iterator; 634 /* Current context. */ 635 flag_context_ty inner_context = 636 inherited_context (outer_context, 637 flag_context_list_iterator_advance (&context_iter)); 638 639 /* Start state is 0 or 1. */ 640 state = (in_i18n ? 1 : 0); 641 642 for (;;) 643 { 644 token_ty token; 645 646 if (in_i18n) 647 phase8_get (&token); 648 else 649 phase5_get (&token); 650 651 switch (token.type) 652 { 653 case token_type_i18n: 654 if (extract_parenthesized (mlp, inner_context, next_context_iter, 655 true)) 656 return true; 657 next_context_iter = null_context_list_iterator; 658 state = 0; 659 continue; 660 661 case token_type_string_literal: 662 if (state == 1) 663 { 664 lex_pos_ty pos; 665 pos.file_name = logical_file_name; 666 pos.line_number = token.line_number; 667 668 if (plural_mp == NULL) 669 { 670 /* Seen an msgid. */ 671 plural_mp = remember_a_message (mlp, NULL, token.string, 672 inner_context, &pos, 673 savable_comment); 674 state = 2; 675 } 676 else 677 { 678 /* Seen an msgid_plural. */ 679 remember_a_message_plural (plural_mp, token.string, 680 inner_context, &pos, 681 savable_comment); 682 state = 0; 683 } 684 } 685 else 686 { 687 free (token.string); 688 state = 0; 689 } 690 next_context_iter = null_context_list_iterator; 691 continue; 692 693 case token_type_symbol: 694 next_context_iter = 695 flag_context_list_iterator ( 696 flag_context_list_table_lookup ( 697 flag_context_list_table, 698 token.string, strlen (token.string))); 699 free (token.string); 700 state = 0; 701 continue; 702 703 case token_type_lparen: 704 if (extract_parenthesized (mlp, inner_context, next_context_iter, 705 false)) 706 return true; 707 next_context_iter = null_context_list_iterator; 708 state = 0; 709 continue; 710 711 case token_type_rparen: 712 return false; 713 714 case token_type_comma: 715 if (state == 2) 716 state = 1; 717 else 718 state = 0; 719 inner_context = 720 inherited_context (outer_context, 721 flag_context_list_iterator_advance ( 722 &context_iter)); 723 next_context_iter = passthrough_context_list_iterator; 724 continue; 725 726 case token_type_other: 727 next_context_iter = null_context_list_iterator; 728 state = 0; 729 continue; 730 731 case token_type_eof: 732 return true; 733 734 default: 735 abort (); 736 } 737 } 738} 739 740 741void 742extract_ycp (FILE *f, 743 const char *real_filename, const char *logical_filename, 744 flag_context_list_table_ty *flag_table, 745 msgdomain_list_ty *mdlp) 746{ 747 message_list_ty *mlp = mdlp->item[0]->messages; 748 749 fp = f; 750 real_file_name = real_filename; 751 logical_file_name = xstrdup (logical_filename); 752 line_number = 1; 753 char_in_line = 0; 754 755 last_comment_line = -1; 756 last_non_comment_line = -1; 757 758 flag_context_list_table = flag_table; 759 760 /* Eat tokens until eof is seen. When extract_parenthesized returns 761 due to an unbalanced closing parenthesis, just restart it. */ 762 while (!extract_parenthesized (mlp, null_context, null_context_list_iterator, 763 false)) 764 ; 765 766 fp = NULL; 767 real_file_name = NULL; 768 logical_file_name = NULL; 769 line_number = 0; 770 char_in_line = 0; 771} 772