label.y revision 75584
1/* -*- C++ -*- 2 Copyright (C) 1989, 1990, 1991, 1992, 2000 Free Software Foundation, Inc. 3 Written by James Clark (jjc@jclark.com) 4 5This file is part of groff. 6 7groff is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12groff is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License along 18with groff; see the file COPYING. If not, write to the Free Software 19Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 20 21%{ 22 23#include "refer.h" 24#include "refid.h" 25#include "ref.h" 26#include "token.h" 27 28int yylex(); 29void yyerror(const char *); 30int yyparse(); 31 32static const char *format_serial(char c, int n); 33 34struct label_info { 35 int start; 36 int length; 37 int count; 38 int total; 39 label_info(const string &); 40}; 41 42label_info *lookup_label(const string &label); 43 44struct expression { 45 enum { 46 // Does the tentative label depend on the reference? 47 CONTAINS_VARIABLE = 01, 48 CONTAINS_STAR = 02, 49 CONTAINS_FORMAT = 04, 50 CONTAINS_AT = 010 51 }; 52 virtual ~expression() { } 53 virtual void evaluate(int, const reference &, string &, 54 substring_position &) = 0; 55 virtual unsigned analyze() { return 0; } 56}; 57 58class at_expr : public expression { 59public: 60 at_expr() { } 61 void evaluate(int, const reference &, string &, substring_position &); 62 unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; } 63}; 64 65class format_expr : public expression { 66 char type; 67 int width; 68 int first_number; 69public: 70 format_expr(char c, int w = 0, int f = 1) 71 : type(c), width(w), first_number(f) { } 72 void evaluate(int, const reference &, string &, substring_position &); 73 unsigned analyze() { return CONTAINS_FORMAT; } 74}; 75 76class field_expr : public expression { 77 int number; 78 char name; 79public: 80 field_expr(char nm, int num) : number(num), name(nm) { } 81 void evaluate(int, const reference &, string &, substring_position &); 82 unsigned analyze() { return CONTAINS_VARIABLE; } 83}; 84 85class literal_expr : public expression { 86 string s; 87public: 88 literal_expr(const char *ptr, int len) : s(ptr, len) { } 89 void evaluate(int, const reference &, string &, substring_position &); 90}; 91 92class unary_expr : public expression { 93protected: 94 expression *expr; 95public: 96 unary_expr(expression *e) : expr(e) { } 97 ~unary_expr() { delete expr; } 98 void evaluate(int, const reference &, string &, substring_position &) = 0; 99 unsigned analyze() { return expr ? expr->analyze() : 0; } 100}; 101 102// This caches the analysis of an expression. 103 104class analyzed_expr : public unary_expr { 105 unsigned flags; 106public: 107 analyzed_expr(expression *); 108 void evaluate(int, const reference &, string &, substring_position &); 109 unsigned analyze() { return flags; } 110}; 111 112class star_expr : public unary_expr { 113public: 114 star_expr(expression *e) : unary_expr(e) { } 115 void evaluate(int, const reference &, string &, substring_position &); 116 unsigned analyze() { 117 return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0) 118 | CONTAINS_STAR); 119 } 120}; 121 122typedef void map_func(const char *, const char *, string &); 123 124class map_expr : public unary_expr { 125 map_func *func; 126public: 127 map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { } 128 void evaluate(int, const reference &, string &, substring_position &); 129}; 130 131typedef const char *extractor_func(const char *, const char *, const char **); 132 133class extractor_expr : public unary_expr { 134 int part; 135 extractor_func *func; 136public: 137 enum { BEFORE = +1, MATCH = 0, AFTER = -1 }; 138 extractor_expr(expression *e, extractor_func *f, int pt) 139 : unary_expr(e), part(pt), func(f) { } 140 void evaluate(int, const reference &, string &, substring_position &); 141}; 142 143class truncate_expr : public unary_expr { 144 int n; 145public: 146 truncate_expr(expression *e, int i) : unary_expr(e), n(i) { } 147 void evaluate(int, const reference &, string &, substring_position &); 148}; 149 150class separator_expr : public unary_expr { 151public: 152 separator_expr(expression *e) : unary_expr(e) { } 153 void evaluate(int, const reference &, string &, substring_position &); 154}; 155 156class binary_expr : public expression { 157protected: 158 expression *expr1; 159 expression *expr2; 160public: 161 binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { } 162 ~binary_expr() { delete expr1; delete expr2; } 163 void evaluate(int, const reference &, string &, substring_position &) = 0; 164 unsigned analyze() { 165 return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0); 166 } 167}; 168 169class alternative_expr : public binary_expr { 170public: 171 alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 172 void evaluate(int, const reference &, string &, substring_position &); 173}; 174 175class list_expr : public binary_expr { 176public: 177 list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 178 void evaluate(int, const reference &, string &, substring_position &); 179}; 180 181class substitute_expr : public binary_expr { 182public: 183 substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 184 void evaluate(int, const reference &, string &, substring_position &); 185}; 186 187class ternary_expr : public expression { 188protected: 189 expression *expr1; 190 expression *expr2; 191 expression *expr3; 192public: 193 ternary_expr(expression *e1, expression *e2, expression *e3) 194 : expr1(e1), expr2(e2), expr3(e3) { } 195 ~ternary_expr() { delete expr1; delete expr2; delete expr3; } 196 void evaluate(int, const reference &, string &, substring_position &) = 0; 197 unsigned analyze() { 198 return ((expr1 ? expr1->analyze() : 0) 199 | (expr2 ? expr2->analyze() : 0) 200 | (expr3 ? expr3->analyze() : 0)); 201 } 202}; 203 204class conditional_expr : public ternary_expr { 205public: 206 conditional_expr(expression *e1, expression *e2, expression *e3) 207 : ternary_expr(e1, e2, e3) { } 208 void evaluate(int, const reference &, string &, substring_position &); 209}; 210 211static expression *parsed_label = 0; 212static expression *parsed_date_label = 0; 213static expression *parsed_short_label = 0; 214 215static expression *parse_result; 216 217string literals; 218 219%} 220 221%union { 222 int num; 223 expression *expr; 224 struct { int ndigits; int val; } dig; 225 struct { int start; int len; } str; 226} 227 228/* uppercase or lowercase letter */ 229%token <num> TOKEN_LETTER 230/* literal characters */ 231%token <str> TOKEN_LITERAL 232/* digit */ 233%token <num> TOKEN_DIGIT 234 235%type <expr> conditional 236%type <expr> alternative 237%type <expr> list 238%type <expr> string 239%type <expr> substitute 240%type <expr> optional_conditional 241%type <num> number 242%type <dig> digits 243%type <num> optional_number 244%type <num> flag 245 246%% 247 248expr: 249 optional_conditional 250 { parse_result = ($1 ? new analyzed_expr($1) : 0); } 251 ; 252 253conditional: 254 alternative 255 { $$ = $1; } 256 | alternative '?' optional_conditional ':' conditional 257 { $$ = new conditional_expr($1, $3, $5); } 258 ; 259 260optional_conditional: 261 /* empty */ 262 { $$ = 0; } 263 | conditional 264 { $$ = $1; } 265 ; 266 267alternative: 268 list 269 { $$ = $1; } 270 | alternative '|' list 271 { $$ = new alternative_expr($1, $3); } 272 | alternative '&' list 273 { $$ = new conditional_expr($1, $3, 0); } 274 ; 275 276list: 277 substitute 278 { $$ = $1; } 279 | list substitute 280 { $$ = new list_expr($1, $2); } 281 ; 282 283substitute: 284 string 285 { $$ = $1; } 286 | substitute '~' string 287 { $$ = new substitute_expr($1, $3); } 288 ; 289 290string: 291 '@' 292 { $$ = new at_expr; } 293 | TOKEN_LITERAL 294 { 295 $$ = new literal_expr(literals.contents() + $1.start, 296 $1.len); 297 } 298 | TOKEN_LETTER 299 { $$ = new field_expr($1, 0); } 300 | TOKEN_LETTER number 301 { $$ = new field_expr($1, $2 - 1); } 302 | '%' TOKEN_LETTER 303 { 304 switch ($2) { 305 case 'I': 306 case 'i': 307 case 'A': 308 case 'a': 309 $$ = new format_expr($2); 310 break; 311 default: 312 command_error("unrecognized format `%1'", char($2)); 313 $$ = new format_expr('a'); 314 break; 315 } 316 } 317 318 | '%' digits 319 { 320 $$ = new format_expr('0', $2.ndigits, $2.val); 321 } 322 | string '.' flag TOKEN_LETTER optional_number 323 { 324 switch ($4) { 325 case 'l': 326 $$ = new map_expr($1, lowercase); 327 break; 328 case 'u': 329 $$ = new map_expr($1, uppercase); 330 break; 331 case 'c': 332 $$ = new map_expr($1, capitalize); 333 break; 334 case 'r': 335 $$ = new map_expr($1, reverse_name); 336 break; 337 case 'a': 338 $$ = new map_expr($1, abbreviate_name); 339 break; 340 case 'y': 341 $$ = new extractor_expr($1, find_year, $3); 342 break; 343 case 'n': 344 $$ = new extractor_expr($1, find_last_name, $3); 345 break; 346 default: 347 $$ = $1; 348 command_error("unknown function `%1'", char($4)); 349 break; 350 } 351 } 352 353 | string '+' number 354 { $$ = new truncate_expr($1, $3); } 355 | string '-' number 356 { $$ = new truncate_expr($1, -$3); } 357 | string '*' 358 { $$ = new star_expr($1); } 359 | '(' optional_conditional ')' 360 { $$ = $2; } 361 | '<' optional_conditional '>' 362 { $$ = new separator_expr($2); } 363 ; 364 365optional_number: 366 /* empty */ 367 { $$ = -1; } 368 | number 369 { $$ = $1; } 370 ; 371 372number: 373 TOKEN_DIGIT 374 { $$ = $1; } 375 | number TOKEN_DIGIT 376 { $$ = $1*10 + $2; } 377 ; 378 379digits: 380 TOKEN_DIGIT 381 { $$.ndigits = 1; $$.val = $1; } 382 | digits TOKEN_DIGIT 383 { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; } 384 ; 385 386 387flag: 388 /* empty */ 389 { $$ = 0; } 390 | '+' 391 { $$ = 1; } 392 | '-' 393 { $$ = -1; } 394 ; 395 396%% 397 398/* bison defines const to be empty unless __STDC__ is defined, which it 399isn't under cfront */ 400 401#ifdef const 402#undef const 403#endif 404 405const char *spec_ptr; 406const char *spec_end; 407const char *spec_cur; 408 409int yylex() 410{ 411 while (spec_ptr < spec_end && csspace(*spec_ptr)) 412 spec_ptr++; 413 spec_cur = spec_ptr; 414 if (spec_ptr >= spec_end) 415 return 0; 416 unsigned char c = *spec_ptr++; 417 if (csalpha(c)) { 418 yylval.num = c; 419 return TOKEN_LETTER; 420 } 421 if (csdigit(c)) { 422 yylval.num = c - '0'; 423 return TOKEN_DIGIT; 424 } 425 if (c == '\'') { 426 yylval.str.start = literals.length(); 427 for (; spec_ptr < spec_end; spec_ptr++) { 428 if (*spec_ptr == '\'') { 429 if (++spec_ptr < spec_end && *spec_ptr == '\'') 430 literals += '\''; 431 else { 432 yylval.str.len = literals.length() - yylval.str.start; 433 return TOKEN_LITERAL; 434 } 435 } 436 else 437 literals += *spec_ptr; 438 } 439 yylval.str.len = literals.length() - yylval.str.start; 440 return TOKEN_LITERAL; 441 } 442 return c; 443} 444 445int set_label_spec(const char *label_spec) 446{ 447 spec_cur = spec_ptr = label_spec; 448 spec_end = strchr(label_spec, '\0'); 449 literals.clear(); 450 if (yyparse()) 451 return 0; 452 delete parsed_label; 453 parsed_label = parse_result; 454 return 1; 455} 456 457int set_date_label_spec(const char *label_spec) 458{ 459 spec_cur = spec_ptr = label_spec; 460 spec_end = strchr(label_spec, '\0'); 461 literals.clear(); 462 if (yyparse()) 463 return 0; 464 delete parsed_date_label; 465 parsed_date_label = parse_result; 466 return 1; 467} 468 469int set_short_label_spec(const char *label_spec) 470{ 471 spec_cur = spec_ptr = label_spec; 472 spec_end = strchr(label_spec, '\0'); 473 literals.clear(); 474 if (yyparse()) 475 return 0; 476 delete parsed_short_label; 477 parsed_short_label = parse_result; 478 return 1; 479} 480 481void yyerror(const char *message) 482{ 483 if (spec_cur < spec_end) 484 command_error("label specification %1 before `%2'", message, spec_cur); 485 else 486 command_error("label specification %1 at end of string", 487 message, spec_cur); 488} 489 490void at_expr::evaluate(int tentative, const reference &ref, 491 string &result, substring_position &) 492{ 493 if (tentative) 494 ref.canonicalize_authors(result); 495 else { 496 const char *end, *start = ref.get_authors(&end); 497 if (start) 498 result.append(start, end - start); 499 } 500} 501 502void format_expr::evaluate(int tentative, const reference &ref, 503 string &result, substring_position &) 504{ 505 if (tentative) 506 return; 507 const label_info *lp = ref.get_label_ptr(); 508 int num = lp == 0 ? ref.get_number() : lp->count; 509 if (type != '0') 510 result += format_serial(type, num + 1); 511 else { 512 const char *ptr = i_to_a(num + first_number); 513 int pad = width - strlen(ptr); 514 while (--pad >= 0) 515 result += '0'; 516 result += ptr; 517 } 518} 519 520static const char *format_serial(char c, int n) 521{ 522 assert(n > 0); 523 static char buf[128]; // more than enough. 524 switch (c) { 525 case 'i': 526 case 'I': 527 { 528 char *p = buf; 529 // troff uses z and w to represent 10000 and 5000 in Roman 530 // numerals; I can find no historical basis for this usage 531 const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI"; 532 if (n >= 40000) 533 return i_to_a(n); 534 while (n >= 10000) { 535 *p++ = s[0]; 536 n -= 10000; 537 } 538 for (int i = 1000; i > 0; i /= 10, s += 2) { 539 int m = n/i; 540 n -= m*i; 541 switch (m) { 542 case 3: 543 *p++ = s[2]; 544 /* falls through */ 545 case 2: 546 *p++ = s[2]; 547 /* falls through */ 548 case 1: 549 *p++ = s[2]; 550 break; 551 case 4: 552 *p++ = s[2]; 553 *p++ = s[1]; 554 break; 555 case 8: 556 *p++ = s[1]; 557 *p++ = s[2]; 558 *p++ = s[2]; 559 *p++ = s[2]; 560 break; 561 case 7: 562 *p++ = s[1]; 563 *p++ = s[2]; 564 *p++ = s[2]; 565 break; 566 case 6: 567 *p++ = s[1]; 568 *p++ = s[2]; 569 break; 570 case 5: 571 *p++ = s[1]; 572 break; 573 case 9: 574 *p++ = s[2]; 575 *p++ = s[0]; 576 } 577 } 578 *p = 0; 579 break; 580 } 581 case 'a': 582 case 'A': 583 { 584 char *p = buf; 585 // this is derived from troff/reg.c 586 while (n > 0) { 587 int d = n % 26; 588 if (d == 0) 589 d = 26; 590 n -= d; 591 n /= 26; 592 *p++ = c + d - 1; // ASCII dependent 593 } 594 *p-- = 0; 595 // Reverse it. 596 char *q = buf; 597 while (q < p) { 598 char temp = *q; 599 *q = *p; 600 *p = temp; 601 --p; 602 ++q; 603 } 604 break; 605 } 606 default: 607 assert(0); 608 } 609 return buf; 610} 611 612void field_expr::evaluate(int, const reference &ref, 613 string &result, substring_position &) 614{ 615 const char *end; 616 const char *start = ref.get_field(name, &end); 617 if (start) { 618 start = nth_field(number, start, &end); 619 if (start) 620 result.append(start, end - start); 621 } 622} 623 624void literal_expr::evaluate(int, const reference &, 625 string &result, substring_position &) 626{ 627 result += s; 628} 629 630analyzed_expr::analyzed_expr(expression *e) 631: unary_expr(e), flags(e ? e->analyze() : 0) 632{ 633} 634 635void analyzed_expr::evaluate(int tentative, const reference &ref, 636 string &result, substring_position &pos) 637{ 638 if (expr) 639 expr->evaluate(tentative, ref, result, pos); 640} 641 642void star_expr::evaluate(int tentative, const reference &ref, 643 string &result, substring_position &pos) 644{ 645 const label_info *lp = ref.get_label_ptr(); 646 if (!tentative 647 && (lp == 0 || lp->total > 1) 648 && expr) 649 expr->evaluate(tentative, ref, result, pos); 650} 651 652void separator_expr::evaluate(int tentative, const reference &ref, 653 string &result, substring_position &pos) 654{ 655 int start_length = result.length(); 656 int is_first = pos.start < 0; 657 if (expr) 658 expr->evaluate(tentative, ref, result, pos); 659 if (is_first) { 660 pos.start = start_length; 661 pos.length = result.length() - start_length; 662 } 663} 664 665void map_expr::evaluate(int tentative, const reference &ref, 666 string &result, substring_position &) 667{ 668 if (expr) { 669 string temp; 670 substring_position temp_pos; 671 expr->evaluate(tentative, ref, temp, temp_pos); 672 (*func)(temp.contents(), temp.contents() + temp.length(), result); 673 } 674} 675 676void extractor_expr::evaluate(int tentative, const reference &ref, 677 string &result, substring_position &) 678{ 679 if (expr) { 680 string temp; 681 substring_position temp_pos; 682 expr->evaluate(tentative, ref, temp, temp_pos); 683 const char *end, *start = (*func)(temp.contents(), 684 temp.contents() + temp.length(), 685 &end); 686 switch (part) { 687 case BEFORE: 688 if (start) 689 result.append(temp.contents(), start - temp.contents()); 690 else 691 result += temp; 692 break; 693 case MATCH: 694 if (start) 695 result.append(start, end - start); 696 break; 697 case AFTER: 698 if (start) 699 result.append(end, temp.contents() + temp.length() - end); 700 break; 701 default: 702 assert(0); 703 } 704 } 705} 706 707static void first_part(int len, const char *ptr, const char *end, 708 string &result) 709{ 710 for (;;) { 711 const char *token_start = ptr; 712 if (!get_token(&ptr, end)) 713 break; 714 const token_info *ti = lookup_token(token_start, ptr); 715 int counts = ti->sortify_non_empty(token_start, ptr); 716 if (counts && --len < 0) 717 break; 718 if (counts || ti->is_accent()) 719 result.append(token_start, ptr - token_start); 720 } 721} 722 723static void last_part(int len, const char *ptr, const char *end, 724 string &result) 725{ 726 const char *start = ptr; 727 int count = 0; 728 for (;;) { 729 const char *token_start = ptr; 730 if (!get_token(&ptr, end)) 731 break; 732 const token_info *ti = lookup_token(token_start, ptr); 733 if (ti->sortify_non_empty(token_start, ptr)) 734 count++; 735 } 736 ptr = start; 737 int skip = count - len; 738 if (skip > 0) { 739 for (;;) { 740 const char *token_start = ptr; 741 if (!get_token(&ptr, end)) 742 assert(0); 743 const token_info *ti = lookup_token(token_start, ptr); 744 if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) { 745 ptr = token_start; 746 break; 747 } 748 } 749 } 750 first_part(len, ptr, end, result); 751} 752 753void truncate_expr::evaluate(int tentative, const reference &ref, 754 string &result, substring_position &) 755{ 756 if (expr) { 757 string temp; 758 substring_position temp_pos; 759 expr->evaluate(tentative, ref, temp, temp_pos); 760 const char *start = temp.contents(); 761 const char *end = start + temp.length(); 762 if (n > 0) 763 first_part(n, start, end, result); 764 else if (n < 0) 765 last_part(-n, start, end, result); 766 } 767} 768 769void alternative_expr::evaluate(int tentative, const reference &ref, 770 string &result, substring_position &pos) 771{ 772 int start_length = result.length(); 773 if (expr1) 774 expr1->evaluate(tentative, ref, result, pos); 775 if (result.length() == start_length && expr2) 776 expr2->evaluate(tentative, ref, result, pos); 777} 778 779void list_expr::evaluate(int tentative, const reference &ref, 780 string &result, substring_position &pos) 781{ 782 if (expr1) 783 expr1->evaluate(tentative, ref, result, pos); 784 if (expr2) 785 expr2->evaluate(tentative, ref, result, pos); 786} 787 788void substitute_expr::evaluate(int tentative, const reference &ref, 789 string &result, substring_position &pos) 790{ 791 int start_length = result.length(); 792 if (expr1) 793 expr1->evaluate(tentative, ref, result, pos); 794 if (result.length() > start_length && result[result.length() - 1] == '-') { 795 // ought to see if pos covers the - 796 result.set_length(result.length() - 1); 797 if (expr2) 798 expr2->evaluate(tentative, ref, result, pos); 799 } 800} 801 802void conditional_expr::evaluate(int tentative, const reference &ref, 803 string &result, substring_position &pos) 804{ 805 string temp; 806 substring_position temp_pos; 807 if (expr1) 808 expr1->evaluate(tentative, ref, temp, temp_pos); 809 if (temp.length() > 0) { 810 if (expr2) 811 expr2->evaluate(tentative, ref, result, pos); 812 } 813 else { 814 if (expr3) 815 expr3->evaluate(tentative, ref, result, pos); 816 } 817} 818 819void reference::pre_compute_label() 820{ 821 if (parsed_label != 0 822 && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) { 823 label.clear(); 824 substring_position temp_pos; 825 parsed_label->evaluate(1, *this, label, temp_pos); 826 label_ptr = lookup_label(label); 827 } 828} 829 830void reference::compute_label() 831{ 832 label.clear(); 833 if (parsed_label) 834 parsed_label->evaluate(0, *this, label, separator_pos); 835 if (short_label_flag && parsed_short_label) 836 parsed_short_label->evaluate(0, *this, short_label, short_separator_pos); 837 if (date_as_label) { 838 string new_date; 839 if (parsed_date_label) { 840 substring_position temp_pos; 841 parsed_date_label->evaluate(0, *this, new_date, temp_pos); 842 } 843 set_date(new_date); 844 } 845 if (label_ptr) 846 label_ptr->count += 1; 847} 848 849void reference::immediate_compute_label() 850{ 851 if (label_ptr) 852 label_ptr->total = 2; // force use of disambiguator 853 compute_label(); 854} 855 856int reference::merge_labels(reference **v, int n, label_type type, 857 string &result) 858{ 859 if (abbreviate_label_ranges) 860 return merge_labels_by_number(v, n, type, result); 861 else 862 return merge_labels_by_parts(v, n, type, result); 863} 864 865int reference::merge_labels_by_number(reference **v, int n, label_type type, 866 string &result) 867{ 868 if (n <= 1) 869 return 0; 870 int num = get_number(); 871 // Only merge three or more labels. 872 if (v[0]->get_number() != num + 1 873 || v[1]->get_number() != num + 2) 874 return 0; 875 int i; 876 for (i = 2; i < n; i++) 877 if (v[i]->get_number() != num + i + 1) 878 break; 879 result = get_label(type); 880 result += label_range_indicator; 881 result += v[i - 1]->get_label(type); 882 return i; 883} 884 885const substring_position &reference::get_separator_pos(label_type type) const 886{ 887 if (type == SHORT_LABEL && short_label_flag) 888 return short_separator_pos; 889 else 890 return separator_pos; 891} 892 893const string &reference::get_label(label_type type) const 894{ 895 if (type == SHORT_LABEL && short_label_flag) 896 return short_label; 897 else 898 return label; 899} 900 901int reference::merge_labels_by_parts(reference **v, int n, label_type type, 902 string &result) 903{ 904 if (n <= 0) 905 return 0; 906 const string &lb = get_label(type); 907 const substring_position &sp = get_separator_pos(type); 908 if (sp.start < 0 909 || sp.start != v[0]->get_separator_pos(type).start 910 || memcmp(lb.contents(), v[0]->get_label(type).contents(), 911 sp.start) != 0) 912 return 0; 913 result = lb; 914 int i = 0; 915 do { 916 result += separate_label_second_parts; 917 const substring_position &s = v[i]->get_separator_pos(type); 918 int sep_end_pos = s.start + s.length; 919 result.append(v[i]->get_label(type).contents() + sep_end_pos, 920 v[i]->get_label(type).length() - sep_end_pos); 921 } while (++i < n 922 && sp.start == v[i]->get_separator_pos(type).start 923 && memcmp(lb.contents(), v[i]->get_label(type).contents(), 924 sp.start) == 0); 925 return i; 926} 927 928string label_pool; 929 930label_info::label_info(const string &s) 931: start(label_pool.length()), length(s.length()), count(0), total(1) 932{ 933 label_pool += s; 934} 935 936static label_info **label_table = 0; 937static int label_table_size = 0; 938static int label_table_used = 0; 939 940label_info *lookup_label(const string &label) 941{ 942 if (label_table == 0) { 943 label_table = new label_info *[17]; 944 label_table_size = 17; 945 for (int i = 0; i < 17; i++) 946 label_table[i] = 0; 947 } 948 unsigned h = hash_string(label.contents(), label.length()) % label_table_size; 949 label_info **ptr; 950 for (ptr = label_table + h; 951 *ptr != 0; 952 (ptr == label_table) 953 ? (ptr = label_table + label_table_size - 1) 954 : ptr--) 955 if ((*ptr)->length == label.length() 956 && memcmp(label_pool.contents() + (*ptr)->start, label.contents(), 957 label.length()) == 0) { 958 (*ptr)->total += 1; 959 return *ptr; 960 } 961 label_info *result = *ptr = new label_info(label); 962 if (++label_table_used * 2 > label_table_size) { 963 // Rehash the table. 964 label_info **old_table = label_table; 965 int old_size = label_table_size; 966 label_table_size = next_size(label_table_size); 967 label_table = new label_info *[label_table_size]; 968 int i; 969 for (i = 0; i < label_table_size; i++) 970 label_table[i] = 0; 971 for (i = 0; i < old_size; i++) 972 if (old_table[i]) { 973 unsigned h = hash_string(label_pool.contents() + old_table[i]->start, 974 old_table[i]->length); 975 label_info **p; 976 for (p = label_table + (h % label_table_size); 977 *p != 0; 978 (p == label_table) 979 ? (p = label_table + label_table_size - 1) 980 : --p) 981 ; 982 *p = old_table[i]; 983 } 984 a_delete old_table; 985 } 986 return result; 987} 988 989void clear_labels() 990{ 991 for (int i = 0; i < label_table_size; i++) { 992 delete label_table[i]; 993 label_table[i] = 0; 994 } 995 label_table_used = 0; 996 label_pool.clear(); 997} 998 999static void consider_authors(reference **start, reference **end, int i); 1000 1001void compute_labels(reference **v, int n) 1002{ 1003 if (parsed_label 1004 && (parsed_label->analyze() & expression::CONTAINS_AT) 1005 && sort_fields.length() >= 2 1006 && sort_fields[0] == 'A' 1007 && sort_fields[1] == '+') 1008 consider_authors(v, v + n, 0); 1009 for (int i = 0; i < n; i++) 1010 v[i]->compute_label(); 1011} 1012 1013 1014/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i 1015where 0 <= i <= N if there exists a reference with a list of authors 1016<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i 1017and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0, 1018A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and 1019<B0,B1,...,BM>. If a reference needs author i we only have to call 1020need_author(j) for some j >= i such that the reference also needs 1021author j. */ 1022 1023/* This function handles 2 tasks: 1024determine which authors are needed (cannot be elided with et al.); 1025determine which authors can have only last names in the labels. 1026 1027References >= start and < end have the same first i author names. 1028Also they're sorted by A+. */ 1029 1030static void consider_authors(reference **start, reference **end, int i) 1031{ 1032 if (start >= end) 1033 return; 1034 reference **p = start; 1035 if (i >= (*p)->get_nauthors()) { 1036 for (++p; p < end && i >= (*p)->get_nauthors(); p++) 1037 ; 1038 if (p < end && i > 0) { 1039 // If we have an author list <A B C> and an author list <A B C D>, 1040 // then both lists need C. 1041 for (reference **q = start; q < end; q++) 1042 (*q)->need_author(i - 1); 1043 } 1044 start = p; 1045 } 1046 while (p < end) { 1047 reference **last_name_start = p; 1048 reference **name_start = p; 1049 for (++p; 1050 p < end && i < (*p)->get_nauthors() 1051 && same_author_last_name(**last_name_start, **p, i); 1052 p++) { 1053 if (!same_author_name(**name_start, **p, i)) { 1054 consider_authors(name_start, p, i + 1); 1055 name_start = p; 1056 } 1057 } 1058 consider_authors(name_start, p, i + 1); 1059 if (last_name_start == name_start) { 1060 for (reference **q = last_name_start; q < p; q++) 1061 (*q)->set_last_name_unambiguous(i); 1062 } 1063 // If we have an author list <A B C D> and <A B C E>, then the lists 1064 // need author D and E respectively. 1065 if (name_start > start || p < end) { 1066 for (reference **q = last_name_start; q < p; q++) 1067 (*q)->need_author(i); 1068 } 1069 } 1070} 1071 1072int same_author_last_name(const reference &r1, const reference &r2, int n) 1073{ 1074 const char *ae1; 1075 const char *as1 = r1.get_sort_field(0, n, 0, &ae1); 1076 assert(as1 != 0); 1077 const char *ae2; 1078 const char *as2 = r2.get_sort_field(0, n, 0, &ae2); 1079 assert(as2 != 0); 1080 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; 1081} 1082 1083int same_author_name(const reference &r1, const reference &r2, int n) 1084{ 1085 const char *ae1; 1086 const char *as1 = r1.get_sort_field(0, n, -1, &ae1); 1087 assert(as1 != 0); 1088 const char *ae2; 1089 const char *as2 = r2.get_sort_field(0, n, -1, &ae2); 1090 assert(as2 != 0); 1091 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; 1092} 1093 1094 1095void int_set::set(int i) 1096{ 1097 assert(i >= 0); 1098 int bytei = i >> 3; 1099 if (bytei >= v.length()) { 1100 int old_length = v.length(); 1101 v.set_length(bytei + 1); 1102 for (int j = old_length; j <= bytei; j++) 1103 v[j] = 0; 1104 } 1105 v[bytei] |= 1 << (i & 7); 1106} 1107 1108int int_set::get(int i) const 1109{ 1110 assert(i >= 0); 1111 int bytei = i >> 3; 1112 return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0; 1113} 1114 1115void reference::set_last_name_unambiguous(int i) 1116{ 1117 last_name_unambiguous.set(i); 1118} 1119 1120void reference::need_author(int n) 1121{ 1122 if (n > last_needed_author) 1123 last_needed_author = n; 1124} 1125 1126const char *reference::get_authors(const char **end) const 1127{ 1128 if (!computed_authors) { 1129 ((reference *)this)->computed_authors = 1; 1130 string &result = ((reference *)this)->authors; 1131 int na = get_nauthors(); 1132 result.clear(); 1133 for (int i = 0; i < na; i++) { 1134 if (last_name_unambiguous.get(i)) { 1135 const char *e, *start = get_author_last_name(i, &e); 1136 assert(start != 0); 1137 result.append(start, e - start); 1138 } 1139 else { 1140 const char *e, *start = get_author(i, &e); 1141 assert(start != 0); 1142 result.append(start, e - start); 1143 } 1144 if (i == last_needed_author 1145 && et_al.length() > 0 1146 && et_al_min_elide > 0 1147 && last_needed_author + et_al_min_elide < na 1148 && na >= et_al_min_total) { 1149 result += et_al; 1150 break; 1151 } 1152 if (i < na - 1) { 1153 if (na == 2) 1154 result += join_authors_exactly_two; 1155 else if (i < na - 2) 1156 result += join_authors_default; 1157 else 1158 result += join_authors_last_two; 1159 } 1160 } 1161 } 1162 const char *start = authors.contents(); 1163 *end = start + authors.length(); 1164 return start; 1165} 1166 1167int reference::get_nauthors() const 1168{ 1169 if (nauthors < 0) { 1170 const char *dummy; 1171 int na; 1172 for (na = 0; get_author(na, &dummy) != 0; na++) 1173 ; 1174 ((reference *)this)->nauthors = na; 1175 } 1176 return nauthors; 1177} 1178