1/* $NetBSD: label.y,v 1.1.1.4 2006/02/06 18:14:37 wiz Exp $ */ 2 3/* -*- C++ -*- 4 Copyright (C) 1989, 1990, 1991, 1992, 2000, 2004 5 Free Software Foundation, Inc. 6 Written by James Clark (jjc@jclark.com) 7 8This file is part of groff. 9 10groff is free software; you can redistribute it and/or modify it under 11the terms of the GNU General Public License as published by the Free 12Software Foundation; either version 2, or (at your option) any later 13version. 14 15groff is distributed in the hope that it will be useful, but WITHOUT ANY 16WARRANTY; without even the implied warranty of MERCHANTABILITY or 17FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 18for more details. 19 20You should have received a copy of the GNU General Public License along 21with groff; see the file COPYING. If not, write to the Free Software 22Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 23 24%{ 25 26#include "refer.h" 27#include "refid.h" 28#include "ref.h" 29#include "token.h" 30 31int yylex(); 32void yyerror(const char *); 33int yyparse(); 34 35static const char *format_serial(char c, int n); 36 37struct label_info { 38 int start; 39 int length; 40 int count; 41 int total; 42 label_info(const string &); 43}; 44 45label_info *lookup_label(const string &label); 46 47struct expression { 48 enum { 49 // Does the tentative label depend on the reference? 50 CONTAINS_VARIABLE = 01, 51 CONTAINS_STAR = 02, 52 CONTAINS_FORMAT = 04, 53 CONTAINS_AT = 010 54 }; 55 virtual ~expression() { } 56 virtual void evaluate(int, const reference &, string &, 57 substring_position &) = 0; 58 virtual unsigned analyze() { return 0; } 59}; 60 61class at_expr : public expression { 62public: 63 at_expr() { } 64 void evaluate(int, const reference &, string &, substring_position &); 65 unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; } 66}; 67 68class format_expr : public expression { 69 char type; 70 int width; 71 int first_number; 72public: 73 format_expr(char c, int w = 0, int f = 1) 74 : type(c), width(w), first_number(f) { } 75 void evaluate(int, const reference &, string &, substring_position &); 76 unsigned analyze() { return CONTAINS_FORMAT; } 77}; 78 79class field_expr : public expression { 80 int number; 81 char name; 82public: 83 field_expr(char nm, int num) : number(num), name(nm) { } 84 void evaluate(int, const reference &, string &, substring_position &); 85 unsigned analyze() { return CONTAINS_VARIABLE; } 86}; 87 88class literal_expr : public expression { 89 string s; 90public: 91 literal_expr(const char *ptr, int len) : s(ptr, len) { } 92 void evaluate(int, const reference &, string &, substring_position &); 93}; 94 95class unary_expr : public expression { 96protected: 97 expression *expr; 98public: 99 unary_expr(expression *e) : expr(e) { } 100 ~unary_expr() { delete expr; } 101 void evaluate(int, const reference &, string &, substring_position &) = 0; 102 unsigned analyze() { return expr ? expr->analyze() : 0; } 103}; 104 105// This caches the analysis of an expression. 106 107class analyzed_expr : public unary_expr { 108 unsigned flags; 109public: 110 analyzed_expr(expression *); 111 void evaluate(int, const reference &, string &, substring_position &); 112 unsigned analyze() { return flags; } 113}; 114 115class star_expr : public unary_expr { 116public: 117 star_expr(expression *e) : unary_expr(e) { } 118 void evaluate(int, const reference &, string &, substring_position &); 119 unsigned analyze() { 120 return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0) 121 | CONTAINS_STAR); 122 } 123}; 124 125typedef void map_func(const char *, const char *, string &); 126 127class map_expr : public unary_expr { 128 map_func *func; 129public: 130 map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { } 131 void evaluate(int, const reference &, string &, substring_position &); 132}; 133 134typedef const char *extractor_func(const char *, const char *, const char **); 135 136class extractor_expr : public unary_expr { 137 int part; 138 extractor_func *func; 139public: 140 enum { BEFORE = +1, MATCH = 0, AFTER = -1 }; 141 extractor_expr(expression *e, extractor_func *f, int pt) 142 : unary_expr(e), part(pt), func(f) { } 143 void evaluate(int, const reference &, string &, substring_position &); 144}; 145 146class truncate_expr : public unary_expr { 147 int n; 148public: 149 truncate_expr(expression *e, int i) : unary_expr(e), n(i) { } 150 void evaluate(int, const reference &, string &, substring_position &); 151}; 152 153class separator_expr : public unary_expr { 154public: 155 separator_expr(expression *e) : unary_expr(e) { } 156 void evaluate(int, const reference &, string &, substring_position &); 157}; 158 159class binary_expr : public expression { 160protected: 161 expression *expr1; 162 expression *expr2; 163public: 164 binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { } 165 ~binary_expr() { delete expr1; delete expr2; } 166 void evaluate(int, const reference &, string &, substring_position &) = 0; 167 unsigned analyze() { 168 return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0); 169 } 170}; 171 172class alternative_expr : public binary_expr { 173public: 174 alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 175 void evaluate(int, const reference &, string &, substring_position &); 176}; 177 178class list_expr : public binary_expr { 179public: 180 list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 181 void evaluate(int, const reference &, string &, substring_position &); 182}; 183 184class substitute_expr : public binary_expr { 185public: 186 substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 187 void evaluate(int, const reference &, string &, substring_position &); 188}; 189 190class ternary_expr : public expression { 191protected: 192 expression *expr1; 193 expression *expr2; 194 expression *expr3; 195public: 196 ternary_expr(expression *e1, expression *e2, expression *e3) 197 : expr1(e1), expr2(e2), expr3(e3) { } 198 ~ternary_expr() { delete expr1; delete expr2; delete expr3; } 199 void evaluate(int, const reference &, string &, substring_position &) = 0; 200 unsigned analyze() { 201 return ((expr1 ? expr1->analyze() : 0) 202 | (expr2 ? expr2->analyze() : 0) 203 | (expr3 ? expr3->analyze() : 0)); 204 } 205}; 206 207class conditional_expr : public ternary_expr { 208public: 209 conditional_expr(expression *e1, expression *e2, expression *e3) 210 : ternary_expr(e1, e2, e3) { } 211 void evaluate(int, const reference &, string &, substring_position &); 212}; 213 214static expression *parsed_label = 0; 215static expression *parsed_date_label = 0; 216static expression *parsed_short_label = 0; 217 218static expression *parse_result; 219 220string literals; 221 222%} 223 224%union { 225 int num; 226 expression *expr; 227 struct { int ndigits; int val; } dig; 228 struct { int start; int len; } str; 229} 230 231/* uppercase or lowercase letter */ 232%token <num> TOKEN_LETTER 233/* literal characters */ 234%token <str> TOKEN_LITERAL 235/* digit */ 236%token <num> TOKEN_DIGIT 237 238%type <expr> conditional 239%type <expr> alternative 240%type <expr> list 241%type <expr> string 242%type <expr> substitute 243%type <expr> optional_conditional 244%type <num> number 245%type <dig> digits 246%type <num> optional_number 247%type <num> flag 248 249%% 250 251expr: 252 optional_conditional 253 { parse_result = ($1 ? new analyzed_expr($1) : 0); } 254 ; 255 256conditional: 257 alternative 258 { $$ = $1; } 259 | alternative '?' optional_conditional ':' conditional 260 { $$ = new conditional_expr($1, $3, $5); } 261 ; 262 263optional_conditional: 264 /* empty */ 265 { $$ = 0; } 266 | conditional 267 { $$ = $1; } 268 ; 269 270alternative: 271 list 272 { $$ = $1; } 273 | alternative '|' list 274 { $$ = new alternative_expr($1, $3); } 275 | alternative '&' list 276 { $$ = new conditional_expr($1, $3, 0); } 277 ; 278 279list: 280 substitute 281 { $$ = $1; } 282 | list substitute 283 { $$ = new list_expr($1, $2); } 284 ; 285 286substitute: 287 string 288 { $$ = $1; } 289 | substitute '~' string 290 { $$ = new substitute_expr($1, $3); } 291 ; 292 293string: 294 '@' 295 { $$ = new at_expr; } 296 | TOKEN_LITERAL 297 { 298 $$ = new literal_expr(literals.contents() + $1.start, 299 $1.len); 300 } 301 | TOKEN_LETTER 302 { $$ = new field_expr($1, 0); } 303 | TOKEN_LETTER number 304 { $$ = new field_expr($1, $2 - 1); } 305 | '%' TOKEN_LETTER 306 { 307 switch ($2) { 308 case 'I': 309 case 'i': 310 case 'A': 311 case 'a': 312 $$ = new format_expr($2); 313 break; 314 default: 315 command_error("unrecognized format `%1'", char($2)); 316 $$ = new format_expr('a'); 317 break; 318 } 319 } 320 321 | '%' digits 322 { 323 $$ = new format_expr('0', $2.ndigits, $2.val); 324 } 325 | string '.' flag TOKEN_LETTER optional_number 326 { 327 switch ($4) { 328 case 'l': 329 $$ = new map_expr($1, lowercase); 330 break; 331 case 'u': 332 $$ = new map_expr($1, uppercase); 333 break; 334 case 'c': 335 $$ = new map_expr($1, capitalize); 336 break; 337 case 'r': 338 $$ = new map_expr($1, reverse_name); 339 break; 340 case 'a': 341 $$ = new map_expr($1, abbreviate_name); 342 break; 343 case 'y': 344 $$ = new extractor_expr($1, find_year, $3); 345 break; 346 case 'n': 347 $$ = new extractor_expr($1, find_last_name, $3); 348 break; 349 default: 350 $$ = $1; 351 command_error("unknown function `%1'", char($4)); 352 break; 353 } 354 } 355 356 | string '+' number 357 { $$ = new truncate_expr($1, $3); } 358 | string '-' number 359 { $$ = new truncate_expr($1, -$3); } 360 | string '*' 361 { $$ = new star_expr($1); } 362 | '(' optional_conditional ')' 363 { $$ = $2; } 364 | '<' optional_conditional '>' 365 { $$ = new separator_expr($2); } 366 ; 367 368optional_number: 369 /* empty */ 370 { $$ = -1; } 371 | number 372 { $$ = $1; } 373 ; 374 375number: 376 TOKEN_DIGIT 377 { $$ = $1; } 378 | number TOKEN_DIGIT 379 { $$ = $1*10 + $2; } 380 ; 381 382digits: 383 TOKEN_DIGIT 384 { $$.ndigits = 1; $$.val = $1; } 385 | digits TOKEN_DIGIT 386 { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; } 387 ; 388 389 390flag: 391 /* empty */ 392 { $$ = 0; } 393 | '+' 394 { $$ = 1; } 395 | '-' 396 { $$ = -1; } 397 ; 398 399%% 400 401/* bison defines const to be empty unless __STDC__ is defined, which it 402isn't under cfront */ 403 404#ifdef const 405#undef const 406#endif 407 408const char *spec_ptr; 409const char *spec_end; 410const char *spec_cur; 411 412static char uppercase_array[] = { 413 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 414 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 415 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 416 'Y', 'Z', 417}; 418 419static char lowercase_array[] = { 420 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 421 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 422 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 423 'y', 'z', 424}; 425 426int yylex() 427{ 428 while (spec_ptr < spec_end && csspace(*spec_ptr)) 429 spec_ptr++; 430 spec_cur = spec_ptr; 431 if (spec_ptr >= spec_end) 432 return 0; 433 unsigned char c = *spec_ptr++; 434 if (csalpha(c)) { 435 yylval.num = c; 436 return TOKEN_LETTER; 437 } 438 if (csdigit(c)) { 439 yylval.num = c - '0'; 440 return TOKEN_DIGIT; 441 } 442 if (c == '\'') { 443 yylval.str.start = literals.length(); 444 for (; spec_ptr < spec_end; spec_ptr++) { 445 if (*spec_ptr == '\'') { 446 if (++spec_ptr < spec_end && *spec_ptr == '\'') 447 literals += '\''; 448 else { 449 yylval.str.len = literals.length() - yylval.str.start; 450 return TOKEN_LITERAL; 451 } 452 } 453 else 454 literals += *spec_ptr; 455 } 456 yylval.str.len = literals.length() - yylval.str.start; 457 return TOKEN_LITERAL; 458 } 459 return c; 460} 461 462int set_label_spec(const char *label_spec) 463{ 464 spec_cur = spec_ptr = label_spec; 465 spec_end = strchr(label_spec, '\0'); 466 literals.clear(); 467 if (yyparse()) 468 return 0; 469 delete parsed_label; 470 parsed_label = parse_result; 471 return 1; 472} 473 474int set_date_label_spec(const char *label_spec) 475{ 476 spec_cur = spec_ptr = label_spec; 477 spec_end = strchr(label_spec, '\0'); 478 literals.clear(); 479 if (yyparse()) 480 return 0; 481 delete parsed_date_label; 482 parsed_date_label = parse_result; 483 return 1; 484} 485 486int set_short_label_spec(const char *label_spec) 487{ 488 spec_cur = spec_ptr = label_spec; 489 spec_end = strchr(label_spec, '\0'); 490 literals.clear(); 491 if (yyparse()) 492 return 0; 493 delete parsed_short_label; 494 parsed_short_label = parse_result; 495 return 1; 496} 497 498void yyerror(const char *message) 499{ 500 if (spec_cur < spec_end) 501 command_error("label specification %1 before `%2'", message, spec_cur); 502 else 503 command_error("label specification %1 at end of string", 504 message, spec_cur); 505} 506 507void at_expr::evaluate(int tentative, const reference &ref, 508 string &result, substring_position &) 509{ 510 if (tentative) 511 ref.canonicalize_authors(result); 512 else { 513 const char *end, *start = ref.get_authors(&end); 514 if (start) 515 result.append(start, end - start); 516 } 517} 518 519void format_expr::evaluate(int tentative, const reference &ref, 520 string &result, substring_position &) 521{ 522 if (tentative) 523 return; 524 const label_info *lp = ref.get_label_ptr(); 525 int num = lp == 0 ? ref.get_number() : lp->count; 526 if (type != '0') 527 result += format_serial(type, num + 1); 528 else { 529 const char *ptr = i_to_a(num + first_number); 530 int pad = width - strlen(ptr); 531 while (--pad >= 0) 532 result += '0'; 533 result += ptr; 534 } 535} 536 537static const char *format_serial(char c, int n) 538{ 539 assert(n > 0); 540 static char buf[128]; // more than enough. 541 switch (c) { 542 case 'i': 543 case 'I': 544 { 545 char *p = buf; 546 // troff uses z and w to represent 10000 and 5000 in Roman 547 // numerals; I can find no historical basis for this usage 548 const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI"; 549 if (n >= 40000) 550 return i_to_a(n); 551 while (n >= 10000) { 552 *p++ = s[0]; 553 n -= 10000; 554 } 555 for (int i = 1000; i > 0; i /= 10, s += 2) { 556 int m = n/i; 557 n -= m*i; 558 switch (m) { 559 case 3: 560 *p++ = s[2]; 561 /* falls through */ 562 case 2: 563 *p++ = s[2]; 564 /* falls through */ 565 case 1: 566 *p++ = s[2]; 567 break; 568 case 4: 569 *p++ = s[2]; 570 *p++ = s[1]; 571 break; 572 case 8: 573 *p++ = s[1]; 574 *p++ = s[2]; 575 *p++ = s[2]; 576 *p++ = s[2]; 577 break; 578 case 7: 579 *p++ = s[1]; 580 *p++ = s[2]; 581 *p++ = s[2]; 582 break; 583 case 6: 584 *p++ = s[1]; 585 *p++ = s[2]; 586 break; 587 case 5: 588 *p++ = s[1]; 589 break; 590 case 9: 591 *p++ = s[2]; 592 *p++ = s[0]; 593 } 594 } 595 *p = 0; 596 break; 597 } 598 case 'a': 599 case 'A': 600 { 601 char *p = buf; 602 // this is derived from troff/reg.c 603 while (n > 0) { 604 int d = n % 26; 605 if (d == 0) 606 d = 26; 607 n -= d; 608 n /= 26; 609 *p++ = c == 'a' ? lowercase_array[d - 1] : 610 uppercase_array[d - 1]; 611 } 612 *p-- = 0; 613 // Reverse it. 614 char *q = buf; 615 while (q < p) { 616 char temp = *q; 617 *q = *p; 618 *p = temp; 619 --p; 620 ++q; 621 } 622 break; 623 } 624 default: 625 assert(0); 626 } 627 return buf; 628} 629 630void field_expr::evaluate(int, const reference &ref, 631 string &result, substring_position &) 632{ 633 const char *end; 634 const char *start = ref.get_field(name, &end); 635 if (start) { 636 start = nth_field(number, start, &end); 637 if (start) 638 result.append(start, end - start); 639 } 640} 641 642void literal_expr::evaluate(int, const reference &, 643 string &result, substring_position &) 644{ 645 result += s; 646} 647 648analyzed_expr::analyzed_expr(expression *e) 649: unary_expr(e), flags(e ? e->analyze() : 0) 650{ 651} 652 653void analyzed_expr::evaluate(int tentative, const reference &ref, 654 string &result, substring_position &pos) 655{ 656 if (expr) 657 expr->evaluate(tentative, ref, result, pos); 658} 659 660void star_expr::evaluate(int tentative, const reference &ref, 661 string &result, substring_position &pos) 662{ 663 const label_info *lp = ref.get_label_ptr(); 664 if (!tentative 665 && (lp == 0 || lp->total > 1) 666 && expr) 667 expr->evaluate(tentative, ref, result, pos); 668} 669 670void separator_expr::evaluate(int tentative, const reference &ref, 671 string &result, substring_position &pos) 672{ 673 int start_length = result.length(); 674 int is_first = pos.start < 0; 675 if (expr) 676 expr->evaluate(tentative, ref, result, pos); 677 if (is_first) { 678 pos.start = start_length; 679 pos.length = result.length() - start_length; 680 } 681} 682 683void map_expr::evaluate(int tentative, const reference &ref, 684 string &result, substring_position &) 685{ 686 if (expr) { 687 string temp; 688 substring_position temp_pos; 689 expr->evaluate(tentative, ref, temp, temp_pos); 690 (*func)(temp.contents(), temp.contents() + temp.length(), result); 691 } 692} 693 694void extractor_expr::evaluate(int tentative, const reference &ref, 695 string &result, substring_position &) 696{ 697 if (expr) { 698 string temp; 699 substring_position temp_pos; 700 expr->evaluate(tentative, ref, temp, temp_pos); 701 const char *end, *start = (*func)(temp.contents(), 702 temp.contents() + temp.length(), 703 &end); 704 switch (part) { 705 case BEFORE: 706 if (start) 707 result.append(temp.contents(), start - temp.contents()); 708 else 709 result += temp; 710 break; 711 case MATCH: 712 if (start) 713 result.append(start, end - start); 714 break; 715 case AFTER: 716 if (start) 717 result.append(end, temp.contents() + temp.length() - end); 718 break; 719 default: 720 assert(0); 721 } 722 } 723} 724 725static void first_part(int len, const char *ptr, const char *end, 726 string &result) 727{ 728 for (;;) { 729 const char *token_start = ptr; 730 if (!get_token(&ptr, end)) 731 break; 732 const token_info *ti = lookup_token(token_start, ptr); 733 int counts = ti->sortify_non_empty(token_start, ptr); 734 if (counts && --len < 0) 735 break; 736 if (counts || ti->is_accent()) 737 result.append(token_start, ptr - token_start); 738 } 739} 740 741static void last_part(int len, const char *ptr, const char *end, 742 string &result) 743{ 744 const char *start = ptr; 745 int count = 0; 746 for (;;) { 747 const char *token_start = ptr; 748 if (!get_token(&ptr, end)) 749 break; 750 const token_info *ti = lookup_token(token_start, ptr); 751 if (ti->sortify_non_empty(token_start, ptr)) 752 count++; 753 } 754 ptr = start; 755 int skip = count - len; 756 if (skip > 0) { 757 for (;;) { 758 const char *token_start = ptr; 759 if (!get_token(&ptr, end)) 760 assert(0); 761 const token_info *ti = lookup_token(token_start, ptr); 762 if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) { 763 ptr = token_start; 764 break; 765 } 766 } 767 } 768 first_part(len, ptr, end, result); 769} 770 771void truncate_expr::evaluate(int tentative, const reference &ref, 772 string &result, substring_position &) 773{ 774 if (expr) { 775 string temp; 776 substring_position temp_pos; 777 expr->evaluate(tentative, ref, temp, temp_pos); 778 const char *start = temp.contents(); 779 const char *end = start + temp.length(); 780 if (n > 0) 781 first_part(n, start, end, result); 782 else if (n < 0) 783 last_part(-n, start, end, result); 784 } 785} 786 787void alternative_expr::evaluate(int tentative, const reference &ref, 788 string &result, substring_position &pos) 789{ 790 int start_length = result.length(); 791 if (expr1) 792 expr1->evaluate(tentative, ref, result, pos); 793 if (result.length() == start_length && expr2) 794 expr2->evaluate(tentative, ref, result, pos); 795} 796 797void list_expr::evaluate(int tentative, const reference &ref, 798 string &result, substring_position &pos) 799{ 800 if (expr1) 801 expr1->evaluate(tentative, ref, result, pos); 802 if (expr2) 803 expr2->evaluate(tentative, ref, result, pos); 804} 805 806void substitute_expr::evaluate(int tentative, const reference &ref, 807 string &result, substring_position &pos) 808{ 809 int start_length = result.length(); 810 if (expr1) 811 expr1->evaluate(tentative, ref, result, pos); 812 if (result.length() > start_length && result[result.length() - 1] == '-') { 813 // ought to see if pos covers the - 814 result.set_length(result.length() - 1); 815 if (expr2) 816 expr2->evaluate(tentative, ref, result, pos); 817 } 818} 819 820void conditional_expr::evaluate(int tentative, const reference &ref, 821 string &result, substring_position &pos) 822{ 823 string temp; 824 substring_position temp_pos; 825 if (expr1) 826 expr1->evaluate(tentative, ref, temp, temp_pos); 827 if (temp.length() > 0) { 828 if (expr2) 829 expr2->evaluate(tentative, ref, result, pos); 830 } 831 else { 832 if (expr3) 833 expr3->evaluate(tentative, ref, result, pos); 834 } 835} 836 837void reference::pre_compute_label() 838{ 839 if (parsed_label != 0 840 && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) { 841 label.clear(); 842 substring_position temp_pos; 843 parsed_label->evaluate(1, *this, label, temp_pos); 844 label_ptr = lookup_label(label); 845 } 846} 847 848void reference::compute_label() 849{ 850 label.clear(); 851 if (parsed_label) 852 parsed_label->evaluate(0, *this, label, separator_pos); 853 if (short_label_flag && parsed_short_label) 854 parsed_short_label->evaluate(0, *this, short_label, short_separator_pos); 855 if (date_as_label) { 856 string new_date; 857 if (parsed_date_label) { 858 substring_position temp_pos; 859 parsed_date_label->evaluate(0, *this, new_date, temp_pos); 860 } 861 set_date(new_date); 862 } 863 if (label_ptr) 864 label_ptr->count += 1; 865} 866 867void reference::immediate_compute_label() 868{ 869 if (label_ptr) 870 label_ptr->total = 2; // force use of disambiguator 871 compute_label(); 872} 873 874int reference::merge_labels(reference **v, int n, label_type type, 875 string &result) 876{ 877 if (abbreviate_label_ranges) 878 return merge_labels_by_number(v, n, type, result); 879 else 880 return merge_labels_by_parts(v, n, type, result); 881} 882 883int reference::merge_labels_by_number(reference **v, int n, label_type type, 884 string &result) 885{ 886 if (n <= 1) 887 return 0; 888 int num = get_number(); 889 // Only merge three or more labels. 890 if (v[0]->get_number() != num + 1 891 || v[1]->get_number() != num + 2) 892 return 0; 893 int i; 894 for (i = 2; i < n; i++) 895 if (v[i]->get_number() != num + i + 1) 896 break; 897 result = get_label(type); 898 result += label_range_indicator; 899 result += v[i - 1]->get_label(type); 900 return i; 901} 902 903const substring_position &reference::get_separator_pos(label_type type) const 904{ 905 if (type == SHORT_LABEL && short_label_flag) 906 return short_separator_pos; 907 else 908 return separator_pos; 909} 910 911const string &reference::get_label(label_type type) const 912{ 913 if (type == SHORT_LABEL && short_label_flag) 914 return short_label; 915 else 916 return label; 917} 918 919int reference::merge_labels_by_parts(reference **v, int n, label_type type, 920 string &result) 921{ 922 if (n <= 0) 923 return 0; 924 const string &lb = get_label(type); 925 const substring_position &sp = get_separator_pos(type); 926 if (sp.start < 0 927 || sp.start != v[0]->get_separator_pos(type).start 928 || memcmp(lb.contents(), v[0]->get_label(type).contents(), 929 sp.start) != 0) 930 return 0; 931 result = lb; 932 int i = 0; 933 do { 934 result += separate_label_second_parts; 935 const substring_position &s = v[i]->get_separator_pos(type); 936 int sep_end_pos = s.start + s.length; 937 result.append(v[i]->get_label(type).contents() + sep_end_pos, 938 v[i]->get_label(type).length() - sep_end_pos); 939 } while (++i < n 940 && sp.start == v[i]->get_separator_pos(type).start 941 && memcmp(lb.contents(), v[i]->get_label(type).contents(), 942 sp.start) == 0); 943 return i; 944} 945 946string label_pool; 947 948label_info::label_info(const string &s) 949: start(label_pool.length()), length(s.length()), count(0), total(1) 950{ 951 label_pool += s; 952} 953 954static label_info **label_table = 0; 955static int label_table_size = 0; 956static int label_table_used = 0; 957 958label_info *lookup_label(const string &label) 959{ 960 if (label_table == 0) { 961 label_table = new label_info *[17]; 962 label_table_size = 17; 963 for (int i = 0; i < 17; i++) 964 label_table[i] = 0; 965 } 966 unsigned h = hash_string(label.contents(), label.length()) % label_table_size; 967 label_info **ptr; 968 for (ptr = label_table + h; 969 *ptr != 0; 970 (ptr == label_table) 971 ? (ptr = label_table + label_table_size - 1) 972 : ptr--) 973 if ((*ptr)->length == label.length() 974 && memcmp(label_pool.contents() + (*ptr)->start, label.contents(), 975 label.length()) == 0) { 976 (*ptr)->total += 1; 977 return *ptr; 978 } 979 label_info *result = *ptr = new label_info(label); 980 if (++label_table_used * 2 > label_table_size) { 981 // Rehash the table. 982 label_info **old_table = label_table; 983 int old_size = label_table_size; 984 label_table_size = next_size(label_table_size); 985 label_table = new label_info *[label_table_size]; 986 int i; 987 for (i = 0; i < label_table_size; i++) 988 label_table[i] = 0; 989 for (i = 0; i < old_size; i++) 990 if (old_table[i]) { 991 h = hash_string(label_pool.contents() + old_table[i]->start, 992 old_table[i]->length); 993 label_info **p; 994 for (p = label_table + (h % label_table_size); 995 *p != 0; 996 (p == label_table) 997 ? (p = label_table + label_table_size - 1) 998 : --p) 999 ; 1000 *p = old_table[i]; 1001 } 1002 a_delete old_table; 1003 } 1004 return result; 1005} 1006 1007void clear_labels() 1008{ 1009 for (int i = 0; i < label_table_size; i++) { 1010 delete label_table[i]; 1011 label_table[i] = 0; 1012 } 1013 label_table_used = 0; 1014 label_pool.clear(); 1015} 1016 1017static void consider_authors(reference **start, reference **end, int i); 1018 1019void compute_labels(reference **v, int n) 1020{ 1021 if (parsed_label 1022 && (parsed_label->analyze() & expression::CONTAINS_AT) 1023 && sort_fields.length() >= 2 1024 && sort_fields[0] == 'A' 1025 && sort_fields[1] == '+') 1026 consider_authors(v, v + n, 0); 1027 for (int i = 0; i < n; i++) 1028 v[i]->compute_label(); 1029} 1030 1031 1032/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i 1033where 0 <= i <= N if there exists a reference with a list of authors 1034<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i 1035and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0, 1036A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and 1037<B0,B1,...,BM>. If a reference needs author i we only have to call 1038need_author(j) for some j >= i such that the reference also needs 1039author j. */ 1040 1041/* This function handles 2 tasks: 1042determine which authors are needed (cannot be elided with et al.); 1043determine which authors can have only last names in the labels. 1044 1045References >= start and < end have the same first i author names. 1046Also they're sorted by A+. */ 1047 1048static void consider_authors(reference **start, reference **end, int i) 1049{ 1050 if (start >= end) 1051 return; 1052 reference **p = start; 1053 if (i >= (*p)->get_nauthors()) { 1054 for (++p; p < end && i >= (*p)->get_nauthors(); p++) 1055 ; 1056 if (p < end && i > 0) { 1057 // If we have an author list <A B C> and an author list <A B C D>, 1058 // then both lists need C. 1059 for (reference **q = start; q < end; q++) 1060 (*q)->need_author(i - 1); 1061 } 1062 start = p; 1063 } 1064 while (p < end) { 1065 reference **last_name_start = p; 1066 reference **name_start = p; 1067 for (++p; 1068 p < end && i < (*p)->get_nauthors() 1069 && same_author_last_name(**last_name_start, **p, i); 1070 p++) { 1071 if (!same_author_name(**name_start, **p, i)) { 1072 consider_authors(name_start, p, i + 1); 1073 name_start = p; 1074 } 1075 } 1076 consider_authors(name_start, p, i + 1); 1077 if (last_name_start == name_start) { 1078 for (reference **q = last_name_start; q < p; q++) 1079 (*q)->set_last_name_unambiguous(i); 1080 } 1081 // If we have an author list <A B C D> and <A B C E>, then the lists 1082 // need author D and E respectively. 1083 if (name_start > start || p < end) { 1084 for (reference **q = last_name_start; q < p; q++) 1085 (*q)->need_author(i); 1086 } 1087 } 1088} 1089 1090int same_author_last_name(const reference &r1, const reference &r2, int n) 1091{ 1092 const char *ae1; 1093 const char *as1 = r1.get_sort_field(0, n, 0, &ae1); 1094 const char *ae2; 1095 const char *as2 = r2.get_sort_field(0, n, 0, &ae2); 1096 if (!as1 && !as2) return 1; // they are the same 1097 if (!as1 || !as2) return 0; 1098 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; 1099} 1100 1101int same_author_name(const reference &r1, const reference &r2, int n) 1102{ 1103 const char *ae1; 1104 const char *as1 = r1.get_sort_field(0, n, -1, &ae1); 1105 const char *ae2; 1106 const char *as2 = r2.get_sort_field(0, n, -1, &ae2); 1107 if (!as1 && !as2) return 1; // they are the same 1108 if (!as1 || !as2) return 0; 1109 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; 1110} 1111 1112 1113void int_set::set(int i) 1114{ 1115 assert(i >= 0); 1116 int bytei = i >> 3; 1117 if (bytei >= v.length()) { 1118 int old_length = v.length(); 1119 v.set_length(bytei + 1); 1120 for (int j = old_length; j <= bytei; j++) 1121 v[j] = 0; 1122 } 1123 v[bytei] |= 1 << (i & 7); 1124} 1125 1126int int_set::get(int i) const 1127{ 1128 assert(i >= 0); 1129 int bytei = i >> 3; 1130 return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0; 1131} 1132 1133void reference::set_last_name_unambiguous(int i) 1134{ 1135 last_name_unambiguous.set(i); 1136} 1137 1138void reference::need_author(int n) 1139{ 1140 if (n > last_needed_author) 1141 last_needed_author = n; 1142} 1143 1144const char *reference::get_authors(const char **end) const 1145{ 1146 if (!computed_authors) { 1147 ((reference *)this)->computed_authors = 1; 1148 string &result = ((reference *)this)->authors; 1149 int na = get_nauthors(); 1150 result.clear(); 1151 for (int i = 0; i < na; i++) { 1152 if (last_name_unambiguous.get(i)) { 1153 const char *e, *start = get_author_last_name(i, &e); 1154 assert(start != 0); 1155 result.append(start, e - start); 1156 } 1157 else { 1158 const char *e, *start = get_author(i, &e); 1159 assert(start != 0); 1160 result.append(start, e - start); 1161 } 1162 if (i == last_needed_author 1163 && et_al.length() > 0 1164 && et_al_min_elide > 0 1165 && last_needed_author + et_al_min_elide < na 1166 && na >= et_al_min_total) { 1167 result += et_al; 1168 break; 1169 } 1170 if (i < na - 1) { 1171 if (na == 2) 1172 result += join_authors_exactly_two; 1173 else if (i < na - 2) 1174 result += join_authors_default; 1175 else 1176 result += join_authors_last_two; 1177 } 1178 } 1179 } 1180 const char *start = authors.contents(); 1181 *end = start + authors.length(); 1182 return start; 1183} 1184 1185int reference::get_nauthors() const 1186{ 1187 if (nauthors < 0) { 1188 const char *dummy; 1189 int na; 1190 for (na = 0; get_author(na, &dummy) != 0; na++) 1191 ; 1192 ((reference *)this)->nauthors = na; 1193 } 1194 return nauthors; 1195} 1196