1/* Reading PO files, abstract class. 2 Copyright (C) 1995-1996, 1998, 2000-2007 Free Software Foundation, Inc. 3 4 This file was written by Peter Miller <millerp@canb.auug.org.au> 5 6 This program is free software: you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 3 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 18 19 20#ifdef HAVE_CONFIG_H 21# include "config.h" 22#endif 23 24/* Specification. */ 25#include "read-catalog-abstract.h" 26 27#include <stdlib.h> 28#include <string.h> 29 30#include "xalloc.h" 31#include "xvasprintf.h" 32#include "po-xerror.h" 33#include "gettext.h" 34 35/* Local variables. */ 36static abstract_catalog_reader_ty *callback_arg; 37 38 39/* ========================================================================= */ 40/* Allocating and freeing instances of abstract_catalog_reader_ty. */ 41 42 43abstract_catalog_reader_ty * 44catalog_reader_alloc (abstract_catalog_reader_class_ty *method_table) 45{ 46 abstract_catalog_reader_ty *pop; 47 48 pop = (abstract_catalog_reader_ty *) xmalloc (method_table->size); 49 pop->methods = method_table; 50 if (method_table->constructor) 51 method_table->constructor (pop); 52 return pop; 53} 54 55 56void 57catalog_reader_free (abstract_catalog_reader_ty *pop) 58{ 59 if (pop->methods->destructor) 60 pop->methods->destructor (pop); 61 free (pop); 62} 63 64 65/* ========================================================================= */ 66/* Inline functions to invoke the methods. */ 67 68 69static inline void 70call_parse_brief (abstract_catalog_reader_ty *pop) 71{ 72 if (pop->methods->parse_brief) 73 pop->methods->parse_brief (pop); 74} 75 76static inline void 77call_parse_debrief (abstract_catalog_reader_ty *pop) 78{ 79 if (pop->methods->parse_debrief) 80 pop->methods->parse_debrief (pop); 81} 82 83static inline void 84call_directive_domain (abstract_catalog_reader_ty *pop, char *name) 85{ 86 if (pop->methods->directive_domain) 87 pop->methods->directive_domain (pop, name); 88} 89 90static inline void 91call_directive_message (abstract_catalog_reader_ty *pop, 92 char *msgctxt, 93 char *msgid, 94 lex_pos_ty *msgid_pos, 95 char *msgid_plural, 96 char *msgstr, size_t msgstr_len, 97 lex_pos_ty *msgstr_pos, 98 char *prev_msgctxt, 99 char *prev_msgid, 100 char *prev_msgid_plural, 101 bool force_fuzzy, bool obsolete) 102{ 103 if (pop->methods->directive_message) 104 pop->methods->directive_message (pop, msgctxt, 105 msgid, msgid_pos, msgid_plural, 106 msgstr, msgstr_len, msgstr_pos, 107 prev_msgctxt, 108 prev_msgid, 109 prev_msgid_plural, 110 force_fuzzy, obsolete); 111} 112 113static inline void 114call_comment (abstract_catalog_reader_ty *pop, const char *s) 115{ 116 if (pop->methods->comment != NULL) 117 pop->methods->comment (pop, s); 118} 119 120static inline void 121call_comment_dot (abstract_catalog_reader_ty *pop, const char *s) 122{ 123 if (pop->methods->comment_dot != NULL) 124 pop->methods->comment_dot (pop, s); 125} 126 127static inline void 128call_comment_filepos (abstract_catalog_reader_ty *pop, const char *name, 129 size_t line) 130{ 131 if (pop->methods->comment_filepos) 132 pop->methods->comment_filepos (pop, name, line); 133} 134 135static inline void 136call_comment_special (abstract_catalog_reader_ty *pop, const char *s) 137{ 138 if (pop->methods->comment_special != NULL) 139 pop->methods->comment_special (pop, s); 140} 141 142 143/* ========================================================================= */ 144/* Exported functions. */ 145 146 147static inline void 148parse_start (abstract_catalog_reader_ty *pop) 149{ 150 /* The parse will call the po_callback_... functions (see below) 151 when the various directive are recognised. The callback_arg 152 variable is used to tell these functions which instance is to 153 have the relevant method invoked. */ 154 callback_arg = pop; 155 156 call_parse_brief (pop); 157} 158 159static inline void 160parse_end (abstract_catalog_reader_ty *pop) 161{ 162 call_parse_debrief (pop); 163 callback_arg = NULL; 164} 165 166 167void 168catalog_reader_parse (abstract_catalog_reader_ty *pop, FILE *fp, 169 const char *real_filename, const char *logical_filename, 170 catalog_input_format_ty input_syntax) 171{ 172 /* Parse the stream's content. */ 173 parse_start (pop); 174 input_syntax->parse (pop, fp, real_filename, logical_filename); 175 parse_end (pop); 176 177 if (error_message_count > 0) 178 po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, 179 /*real_filename*/ NULL, (size_t)(-1), (size_t)(-1), false, 180 xasprintf (ngettext ("found %d fatal error", 181 "found %d fatal errors", 182 error_message_count), 183 error_message_count)); 184 error_message_count = 0; 185} 186 187 188/* ========================================================================= */ 189/* Callbacks used by po-gram.y or po-lex.c, indirectly from 190 catalog_reader_parse. */ 191 192 193/* This function is called by po_gram_lex() whenever a domain directive 194 has been seen. */ 195void 196po_callback_domain (char *name) 197{ 198 /* assert(callback_arg); */ 199 call_directive_domain (callback_arg, name); 200} 201 202 203/* This function is called by po_gram_lex() whenever a message has been 204 seen. */ 205void 206po_callback_message (char *msgctxt, 207 char *msgid, lex_pos_ty *msgid_pos, char *msgid_plural, 208 char *msgstr, size_t msgstr_len, lex_pos_ty *msgstr_pos, 209 char *prev_msgctxt, 210 char *prev_msgid, 211 char *prev_msgid_plural, 212 bool force_fuzzy, bool obsolete) 213{ 214 /* assert(callback_arg); */ 215 call_directive_message (callback_arg, msgctxt, 216 msgid, msgid_pos, msgid_plural, 217 msgstr, msgstr_len, msgstr_pos, 218 prev_msgctxt, prev_msgid, prev_msgid_plural, 219 force_fuzzy, obsolete); 220} 221 222 223void 224po_callback_comment (const char *s) 225{ 226 /* assert(callback_arg); */ 227 call_comment (callback_arg, s); 228} 229 230 231void 232po_callback_comment_dot (const char *s) 233{ 234 /* assert(callback_arg); */ 235 call_comment_dot (callback_arg, s); 236} 237 238 239/* This function is called by po_parse_comment_filepos(), once for each 240 filename. */ 241void 242po_callback_comment_filepos (const char *name, size_t line) 243{ 244 /* assert(callback_arg); */ 245 call_comment_filepos (callback_arg, name, line); 246} 247 248 249void 250po_callback_comment_special (const char *s) 251{ 252 /* assert(callback_arg); */ 253 call_comment_special (callback_arg, s); 254} 255 256 257/* Parse a special comment and put the result in *fuzzyp, formatp, *wrapp. */ 258void 259po_parse_comment_special (const char *s, 260 bool *fuzzyp, enum is_format formatp[NFORMATS], 261 enum is_wrap *wrapp) 262{ 263 size_t i; 264 265 *fuzzyp = false; 266 for (i = 0; i < NFORMATS; i++) 267 formatp[i] = undecided; 268 *wrapp = undecided; 269 270 while (*s != '\0') 271 { 272 const char *t; 273 274 /* Skip whitespace. */ 275 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) != NULL) 276 s++; 277 278 /* Collect a token. */ 279 t = s; 280 while (*s != '\0' && strchr ("\n \t\r\f\v,", *s) == NULL) 281 s++; 282 if (s != t) 283 { 284 size_t len = s - t; 285 286 /* Accept fuzzy flag. */ 287 if (len == 5 && memcmp (t, "fuzzy", 5) == 0) 288 { 289 *fuzzyp = true; 290 continue; 291 } 292 293 /* Accept format description. */ 294 if (len >= 7 && memcmp (t + len - 7, "-format", 7) == 0) 295 { 296 const char *p; 297 size_t n; 298 enum is_format value; 299 300 p = t; 301 n = len - 7; 302 303 if (n >= 3 && memcmp (p, "no-", 3) == 0) 304 { 305 p += 3; 306 n -= 3; 307 value = no; 308 } 309 else if (n >= 9 && memcmp (p, "possible-", 9) == 0) 310 { 311 p += 9; 312 n -= 9; 313 value = possible; 314 } 315 else if (n >= 11 && memcmp (p, "impossible-", 11) == 0) 316 { 317 p += 11; 318 n -= 11; 319 value = impossible; 320 } 321 else 322 value = yes; 323 324 for (i = 0; i < NFORMATS; i++) 325 if (strlen (format_language[i]) == n 326 && memcmp (format_language[i], p, n) == 0) 327 { 328 formatp[i] = value; 329 break; 330 } 331 if (i < NFORMATS) 332 continue; 333 } 334 335 /* Accept wrap description. */ 336 if (len == 4 && memcmp (t, "wrap", 4) == 0) 337 { 338 *wrapp = yes; 339 continue; 340 } 341 if (len == 7 && memcmp (t, "no-wrap", 7) == 0) 342 { 343 *wrapp = no; 344 continue; 345 } 346 347 /* Unknown special comment marker. It may have been generated 348 from a future xgettext version. Ignore it. */ 349 } 350 } 351} 352 353 354/* Parse a GNU style file comment. 355 Syntax: an arbitrary number of 356 STRING COLON NUMBER 357 or 358 STRING 359 The latter style, without line number, occurs in PO files converted e.g. 360 from Pascal .rst files or from OpenOffice resource files. 361 Call po_callback_comment_filepos for each of them. */ 362static void 363po_parse_comment_filepos (const char *s) 364{ 365 while (*s != '\0') 366 { 367 while (*s == ' ' || *s == '\t' || *s == '\n') 368 s++; 369 if (*s != '\0') 370 { 371 const char *string_start = s; 372 373 do 374 s++; 375 while (!(*s == '\0' || *s == ' ' || *s == '\t' || *s == '\n')); 376 377 /* See if there is a COLON and NUMBER after the STRING, separated 378 through optional spaces. */ 379 { 380 const char *p = s; 381 382 while (*p == ' ' || *p == '\t' || *p == '\n') 383 p++; 384 385 if (*p == ':') 386 { 387 p++; 388 389 while (*p == ' ' || *p == '\t' || *p == '\n') 390 p++; 391 392 if (*p >= '0' && *p <= '9') 393 { 394 /* Accumulate a number. */ 395 size_t n = 0; 396 397 do 398 { 399 n = n * 10 + (*p - '0'); 400 p++; 401 } 402 while (*p >= '0' && *p <= '9'); 403 404 if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n') 405 { 406 /* Parsed a GNU style file comment with spaces. */ 407 const char *string_end = s; 408 size_t string_length = string_end - string_start; 409 char *string = XNMALLOC (string_length + 1, char); 410 411 memcpy (string, string_start, string_length); 412 string[string_length] = '\0'; 413 414 po_callback_comment_filepos (string, n); 415 416 free (string); 417 418 s = p; 419 continue; 420 } 421 } 422 } 423 } 424 425 /* See if there is a COLON at the end of STRING and a NUMBER after 426 it, separated through optional spaces. */ 427 if (s[-1] == ':') 428 { 429 const char *p = s; 430 431 while (*p == ' ' || *p == '\t' || *p == '\n') 432 p++; 433 434 if (*p >= '0' && *p <= '9') 435 { 436 /* Accumulate a number. */ 437 size_t n = 0; 438 439 do 440 { 441 n = n * 10 + (*p - '0'); 442 p++; 443 } 444 while (*p >= '0' && *p <= '9'); 445 446 if (*p == '\0' || *p == ' ' || *p == '\t' || *p == '\n') 447 { 448 /* Parsed a GNU style file comment with spaces. */ 449 const char *string_end = s - 1; 450 size_t string_length = string_end - string_start; 451 char *string = XNMALLOC (string_length + 1, char); 452 453 memcpy (string, string_start, string_length); 454 string[string_length] = '\0'; 455 456 po_callback_comment_filepos (string, n); 457 458 free (string); 459 460 s = p; 461 continue; 462 } 463 } 464 } 465 466 /* See if there is a COLON and NUMBER at the end of the STRING, 467 without separating spaces. */ 468 { 469 const char *p = s; 470 471 while (p > string_start) 472 { 473 p--; 474 if (!(*p >= '0' && *p <= '9')) 475 { 476 p++; 477 break; 478 } 479 } 480 481 /* p now points to the beginning of the trailing digits segment 482 at the end of STRING. */ 483 484 if (p < s 485 && p > string_start + 1 486 && p[-1] == ':') 487 { 488 /* Parsed a GNU style file comment without spaces. */ 489 const char *string_end = p - 1; 490 491 /* Accumulate a number. */ 492 { 493 size_t n = 0; 494 495 do 496 { 497 n = n * 10 + (*p - '0'); 498 p++; 499 } 500 while (p < s); 501 502 { 503 size_t string_length = string_end - string_start; 504 char *string = XNMALLOC (string_length + 1, char); 505 506 memcpy (string, string_start, string_length); 507 string[string_length] = '\0'; 508 509 po_callback_comment_filepos (string, n); 510 511 free (string); 512 513 continue; 514 } 515 } 516 } 517 } 518 519 /* Parsed a file comment without line number. */ 520 { 521 const char *string_end = s; 522 size_t string_length = string_end - string_start; 523 char *string = XNMALLOC (string_length + 1, char); 524 525 memcpy (string, string_start, string_length); 526 string[string_length] = '\0'; 527 528 po_callback_comment_filepos (string, (size_t)(-1)); 529 530 free (string); 531 } 532 } 533 } 534} 535 536 537/* Parse a SunOS or Solaris style file comment. 538 Syntax of SunOS style: 539 FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD COLON NUMBER 540 Syntax of Solaris style: 541 FILE_KEYWORD COLON STRING COMMA LINE_KEYWORD NUMBER_KEYWORD COLON NUMBER 542 where 543 FILE_KEYWORD ::= "file" | "File" 544 COLON ::= ":" 545 COMMA ::= "," 546 LINE_KEYWORD ::= "line" 547 NUMBER_KEYWORD ::= "number" 548 NUMBER ::= [0-9]+ 549 Return true if parsed, false if not a comment of this form. */ 550static bool 551po_parse_comment_solaris_filepos (const char *s) 552{ 553 if (s[0] == ' ' 554 && (s[1] == 'F' || s[1] == 'f') 555 && s[2] == 'i' && s[3] == 'l' && s[4] == 'e' 556 && s[5] == ':') 557 { 558 const char *string_start; 559 const char *string_end; 560 561 { 562 const char *p = s + 6; 563 564 while (*p == ' ' || *p == '\t') 565 p++; 566 string_start = p; 567 } 568 569 for (string_end = string_start; *string_end != '\0'; string_end++) 570 { 571 const char *p = string_end; 572 573 while (*p == ' ' || *p == '\t') 574 p++; 575 576 if (*p == ',') 577 { 578 p++; 579 580 while (*p == ' ' || *p == '\t') 581 p++; 582 583 if (p[0] == 'l' && p[1] == 'i' && p[2] == 'n' && p[3] == 'e') 584 { 585 p += 4; 586 587 while (*p == ' ' || *p == '\t') 588 p++; 589 590 if (p[0] == 'n' && p[1] == 'u' && p[2] == 'm' 591 && p[3] == 'b' && p[4] == 'e' && p[5] == 'r') 592 { 593 p += 6; 594 while (*p == ' ' || *p == '\t') 595 p++; 596 } 597 598 if (*p == ':') 599 { 600 p++; 601 602 if (*p >= '0' && *p <= '9') 603 { 604 /* Accumulate a number. */ 605 size_t n = 0; 606 607 do 608 { 609 n = n * 10 + (*p - '0'); 610 p++; 611 } 612 while (*p >= '0' && *p <= '9'); 613 614 while (*p == ' ' || *p == '\t' || *p == '\n') 615 p++; 616 617 if (*p == '\0') 618 { 619 /* Parsed a Sun style file comment. */ 620 size_t string_length = string_end - string_start; 621 char *string = 622 XNMALLOC (string_length + 1, char); 623 624 memcpy (string, string_start, string_length); 625 string[string_length] = '\0'; 626 627 po_callback_comment_filepos (string, n); 628 629 free (string); 630 return true; 631 } 632 } 633 } 634 } 635 } 636 } 637 } 638 639 return false; 640} 641 642 643/* This function is called by po_gram_lex() whenever a comment is 644 seen. It analyzes the comment to see what sort it is, and then 645 dispatches it to the appropriate method: call_comment, call_comment_dot, 646 call_comment_filepos (via po_parse_comment_filepos), or 647 call_comment_special. */ 648void 649po_callback_comment_dispatcher (const char *s) 650{ 651 if (*s == '.') 652 { 653 s++; 654 /* There is usually a space before the comment. People don't 655 consider it part of the comment, therefore remove it here. */ 656 if (*s == ' ') 657 s++; 658 po_callback_comment_dot (s); 659 } 660 else if (*s == ':') 661 { 662 /* Parse the file location string. The appropriate callback will be 663 invoked. */ 664 po_parse_comment_filepos (s + 1); 665 } 666 else if (*s == ',' || *s == '!') 667 { 668 /* Get all entries in the special comment line. */ 669 po_callback_comment_special (s + 1); 670 } 671 else 672 { 673 /* It looks like a plain vanilla comment, but Solaris-style file 674 position lines do, too. Try to parse the lot. If the parse 675 succeeds, the appropriate callback will be invoked. */ 676 if (po_parse_comment_solaris_filepos (s)) 677 /* Do nothing, it is a Sun-style file pos line. */ ; 678 else 679 { 680 /* There is usually a space before the comment. People don't 681 consider it part of the comment, therefore remove it here. */ 682 if (*s == ' ') 683 s++; 684 po_callback_comment (s); 685 } 686 } 687} 688