ref.cpp revision 114402
1// -*- C++ -*- 2/* Copyright (C) 1989, 1990, 1991, 1992, 2001 Free Software Foundation, Inc. 3Written by James Clark (jjc@jclark.com) 4 5This file is part of groff. 6 7groff is free software; you can redistribute it and/or modify it under 8the terms of the GNU General Public License as published by the Free 9Software Foundation; either version 2, or (at your option) any later 10version. 11 12groff is distributed in the hope that it will be useful, but WITHOUT ANY 13WARRANTY; without even the implied warranty of MERCHANTABILITY or 14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15for more details. 16 17You should have received a copy of the GNU General Public License along 18with groff; see the file COPYING. If not, write to the Free Software 19Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ 20 21#include "refer.h" 22#include "refid.h" 23#include "ref.h" 24#include "token.h" 25 26static const char *find_day(const char *, const char *, const char **); 27static int find_month(const char *start, const char *end); 28static void abbreviate_names(string &); 29 30#define DEFAULT_ARTICLES "the\000a\000an" 31 32string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES)); 33 34// Multiple occurrences of fields are separated by FIELD_SEPARATOR. 35const char FIELD_SEPARATOR = '\0'; 36 37const char MULTI_FIELD_NAMES[] = "AE"; 38const char *AUTHOR_FIELDS = "AQ"; 39 40enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM }; 41 42const char *reference_types[] = { 43 "other", 44 "journal-article", 45 "book", 46 "article-in-book", 47 "tech-report", 48 "bell-tm", 49}; 50 51static string temp_fields[256]; 52 53reference::reference(const char *start, int len, reference_id *ridp) 54: h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0), 55 computed_authors(0), last_needed_author(-1), nauthors(-1) 56{ 57 int i; 58 for (i = 0; i < 256; i++) 59 field_index[i] = NULL_FIELD_INDEX; 60 if (ridp) 61 rid = *ridp; 62 if (start == 0) 63 return; 64 if (len <= 0) 65 return; 66 const char *end = start + len; 67 const char *ptr = start; 68 assert(*ptr == '%'); 69 while (ptr < end) { 70 if (ptr + 1 < end && ptr[1] != '\0' 71 && ((ptr[1] != '%' && ptr[1] == annotation_field) 72 || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0' 73 && discard_fields.search(ptr[2]) < 0))) { 74 if (ptr[1] == '%') 75 ptr++; 76 string &f = temp_fields[(unsigned char)ptr[1]]; 77 ptr += 2; 78 while (ptr < end && csspace(*ptr)) 79 ptr++; 80 for (;;) { 81 for (;;) { 82 if (ptr >= end) { 83 f += '\n'; 84 break; 85 } 86 f += *ptr; 87 if (*ptr++ == '\n') 88 break; 89 } 90 if (ptr >= end || *ptr == '%') 91 break; 92 } 93 } 94 else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%' 95 && discard_fields.search(ptr[1]) < 0) { 96 string &f = temp_fields[(unsigned char)ptr[1]]; 97 if (f.length() > 0) { 98 if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0) 99 f += FIELD_SEPARATOR; 100 else 101 f.clear(); 102 } 103 ptr += 2; 104 if (ptr < end) { 105 if (*ptr == ' ') 106 ptr++; 107 for (;;) { 108 const char *p = ptr; 109 while (ptr < end && *ptr != '\n') 110 ptr++; 111 // strip trailing white space 112 const char *q = ptr; 113 while (q > p && q[-1] != '\n' && csspace(q[-1])) 114 q--; 115 while (p < q) 116 f += *p++; 117 if (ptr >= end) 118 break; 119 ptr++; 120 if (ptr >= end) 121 break; 122 if (*ptr == '%') 123 break; 124 f += ' '; 125 } 126 } 127 } 128 else { 129 // skip this field 130 for (;;) { 131 while (ptr < end && *ptr++ != '\n') 132 ; 133 if (ptr >= end || *ptr == '%') 134 break; 135 } 136 } 137 } 138 for (i = 0; i < 256; i++) 139 if (temp_fields[i].length() > 0) 140 nfields++; 141 field = new string[nfields]; 142 int j = 0; 143 for (i = 0; i < 256; i++) 144 if (temp_fields[i].length() > 0) { 145 field[j].move(temp_fields[i]); 146 if (abbreviate_fields.search(i) >= 0) 147 abbreviate_names(field[j]); 148 field_index[i] = j; 149 j++; 150 } 151} 152 153reference::~reference() 154{ 155 if (nfields > 0) 156 ad_delete(nfields) field; 157} 158 159// ref is the inline, this is the database ref 160 161void reference::merge(reference &ref) 162{ 163 int i; 164 for (i = 0; i < 256; i++) 165 if (field_index[i] != NULL_FIELD_INDEX) 166 temp_fields[i].move(field[field_index[i]]); 167 for (i = 0; i < 256; i++) 168 if (ref.field_index[i] != NULL_FIELD_INDEX) 169 temp_fields[i].move(ref.field[ref.field_index[i]]); 170 for (i = 0; i < 256; i++) 171 field_index[i] = NULL_FIELD_INDEX; 172 int old_nfields = nfields; 173 nfields = 0; 174 for (i = 0; i < 256; i++) 175 if (temp_fields[i].length() > 0) 176 nfields++; 177 if (nfields != old_nfields) { 178 if (old_nfields > 0) 179 ad_delete(old_nfields) field; 180 field = new string[nfields]; 181 } 182 int j = 0; 183 for (i = 0; i < 256; i++) 184 if (temp_fields[i].length() > 0) { 185 field[j].move(temp_fields[i]); 186 field_index[i] = j; 187 j++; 188 } 189 merged = 1; 190} 191 192void reference::insert_field(unsigned char c, string &s) 193{ 194 assert(s.length() > 0); 195 if (field_index[c] != NULL_FIELD_INDEX) { 196 field[field_index[c]].move(s); 197 return; 198 } 199 assert(field_index[c] == NULL_FIELD_INDEX); 200 string *old_field = field; 201 field = new string[nfields + 1]; 202 int pos = 0; 203 int i; 204 for (i = 0; i < int(c); i++) 205 if (field_index[i] != NULL_FIELD_INDEX) 206 pos++; 207 for (i = 0; i < pos; i++) 208 field[i].move(old_field[i]); 209 field[pos].move(s); 210 for (i = pos; i < nfields; i++) 211 field[i + 1].move(old_field[i]); 212 if (nfields > 0) 213 ad_delete(nfields) old_field; 214 nfields++; 215 field_index[c] = pos; 216 for (i = c + 1; i < 256; i++) 217 if (field_index[i] != NULL_FIELD_INDEX) 218 field_index[i] += 1; 219} 220 221void reference::delete_field(unsigned char c) 222{ 223 if (field_index[c] == NULL_FIELD_INDEX) 224 return; 225 string *old_field = field; 226 field = new string[nfields - 1]; 227 int i; 228 for (i = 0; i < int(field_index[c]); i++) 229 field[i].move(old_field[i]); 230 for (i = field_index[c]; i < nfields - 1; i++) 231 field[i].move(old_field[i + 1]); 232 if (nfields > 0) 233 ad_delete(nfields) old_field; 234 nfields--; 235 field_index[c] = NULL_FIELD_INDEX; 236 for (i = c + 1; i < 256; i++) 237 if (field_index[i] != NULL_FIELD_INDEX) 238 field_index[i] -= 1; 239} 240 241void reference::compute_hash_code() 242{ 243 if (!rid.is_null()) 244 h = rid.hash(); 245 else { 246 h = 0; 247 for (int i = 0; i < nfields; i++) 248 if (field[i].length() > 0) { 249 h <<= 4; 250 h ^= hash_string(field[i].contents(), field[i].length()); 251 } 252 } 253} 254 255void reference::set_number(int n) 256{ 257 no = n; 258} 259 260const char SORT_SEP = '\001'; 261const char SORT_SUB_SEP = '\002'; 262const char SORT_SUB_SUB_SEP = '\003'; 263 264// sep specifies additional word separators 265 266void sortify_words(const char *s, const char *end, const char *sep, 267 string &result) 268{ 269 int non_empty = 0; 270 int need_separator = 0; 271 for (;;) { 272 const char *token_start = s; 273 if (!get_token(&s, end)) 274 break; 275 if ((s - token_start == 1 276 && (*token_start == ' ' 277 || *token_start == '\n' 278 || (sep && *token_start != '\0' 279 && strchr(sep, *token_start) != 0))) 280 || (s - token_start == 2 281 && token_start[0] == '\\' && token_start[1] == ' ')) { 282 if (non_empty) 283 need_separator = 1; 284 } 285 else { 286 const token_info *ti = lookup_token(token_start, s); 287 if (ti->sortify_non_empty(token_start, s)) { 288 if (need_separator) { 289 result += ' '; 290 need_separator = 0; 291 } 292 ti->sortify(token_start, s, result); 293 non_empty = 1; 294 } 295 } 296 } 297} 298 299void sortify_word(const char *s, const char *end, string &result) 300{ 301 for (;;) { 302 const char *token_start = s; 303 if (!get_token(&s, end)) 304 break; 305 const token_info *ti = lookup_token(token_start, s); 306 ti->sortify(token_start, s, result); 307 } 308} 309 310void sortify_other(const char *s, int len, string &key) 311{ 312 sortify_words(s, s + len, 0, key); 313} 314 315void sortify_title(const char *s, int len, string &key) 316{ 317 const char *end = s + len; 318 for (; s < end && (*s == ' ' || *s == '\n'); s++) 319 ; 320 const char *ptr = s; 321 for (;;) { 322 const char *token_start = ptr; 323 if (!get_token(&ptr, end)) 324 break; 325 if (ptr - token_start == 1 326 && (*token_start == ' ' || *token_start == '\n')) 327 break; 328 } 329 if (ptr < end) { 330 unsigned int first_word_len = ptr - s - 1; 331 const char *ae = articles.contents() + articles.length(); 332 for (const char *a = articles.contents(); 333 a < ae; 334 a = strchr(a, '\0') + 1) 335 if (first_word_len == strlen(a)) { 336 unsigned int j; 337 for (j = 0; j < first_word_len; j++) 338 if (a[j] != cmlower(s[j])) 339 break; 340 if (j >= first_word_len) { 341 s = ptr; 342 for (; s < end && (*s == ' ' || *s == '\n'); s++) 343 ; 344 break; 345 } 346 } 347 } 348 sortify_words(s, end, 0, key); 349} 350 351void sortify_name(const char *s, int len, string &key) 352{ 353 const char *last_name_end; 354 const char *last_name = find_last_name(s, s + len, &last_name_end); 355 sortify_word(last_name, last_name_end, key); 356 key += SORT_SUB_SUB_SEP; 357 if (last_name > s) 358 sortify_words(s, last_name, ".", key); 359 key += SORT_SUB_SUB_SEP; 360 if (last_name_end < s + len) 361 sortify_words(last_name_end, s + len, ".,", key); 362} 363 364void sortify_date(const char *s, int len, string &key) 365{ 366 const char *year_end; 367 const char *year_start = find_year(s, s + len, &year_end); 368 if (!year_start) { 369 // Things without years are often `forthcoming', so it makes sense 370 // that they sort after things with explicit years. 371 key += 'A'; 372 sortify_words(s, s + len, 0, key); 373 return; 374 } 375 int n = year_end - year_start; 376 while (n < 4) { 377 key += '0'; 378 n++; 379 } 380 while (year_start < year_end) 381 key += *year_start++; 382 int m = find_month(s, s + len); 383 if (m < 0) 384 return; 385 key += 'A' + m; 386 const char *day_end; 387 const char *day_start = find_day(s, s + len, &day_end); 388 if (!day_start) 389 return; 390 if (day_end - day_start == 1) 391 key += '0'; 392 while (day_start < day_end) 393 key += *day_start++; 394} 395 396// SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification. 397 398void sortify_label(const char *s, int len, string &key) 399{ 400 const char *end = s + len; 401 for (;;) { 402 const char *ptr; 403 for (ptr = s; 404 ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP; 405 ptr++) 406 ; 407 if (ptr > s) 408 sortify_words(s, ptr, 0, key); 409 s = ptr; 410 if (s >= end) 411 break; 412 key += *s++; 413 } 414} 415 416void reference::compute_sort_key() 417{ 418 if (sort_fields.length() == 0) 419 return; 420 sort_fields += '\0'; 421 const char *sf = sort_fields.contents(); 422 while (*sf != '\0') { 423 if (sf > sort_fields) 424 sort_key += SORT_SEP; 425 char f = *sf++; 426 int n = 1; 427 if (*sf == '+') { 428 n = INT_MAX; 429 sf++; 430 } 431 else if (csdigit(*sf)) { 432 char *ptr; 433 long l = strtol(sf, &ptr, 10); 434 if (l == 0 && ptr == sf) 435 ; 436 else { 437 sf = ptr; 438 if (l < 0) { 439 n = 1; 440 } 441 else { 442 n = int(l); 443 } 444 } 445 } 446 if (f == '.') 447 sortify_label(label.contents(), label.length(), sort_key); 448 else if (f == AUTHOR_FIELDS[0]) 449 sortify_authors(n, sort_key); 450 else 451 sortify_field(f, n, sort_key); 452 } 453 sort_fields.set_length(sort_fields.length() - 1); 454} 455 456void reference::sortify_authors(int n, string &result) const 457{ 458 for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++) 459 if (contains_field(*p)) { 460 sortify_field(*p, n, result); 461 return; 462 } 463 sortify_field(AUTHOR_FIELDS[0], n, result); 464} 465 466void reference::canonicalize_authors(string &result) const 467{ 468 int len = result.length(); 469 sortify_authors(INT_MAX, result); 470 if (result.length() > len) 471 result += SORT_SUB_SEP; 472} 473 474void reference::sortify_field(unsigned char f, int n, string &result) const 475{ 476 typedef void (*sortify_t)(const char *, int, string &); 477 sortify_t sortifier = sortify_other; 478 switch (f) { 479 case 'A': 480 case 'E': 481 sortifier = sortify_name; 482 break; 483 case 'D': 484 sortifier = sortify_date; 485 break; 486 case 'B': 487 case 'J': 488 case 'T': 489 sortifier = sortify_title; 490 break; 491 } 492 int fi = field_index[(unsigned char)f]; 493 if (fi != NULL_FIELD_INDEX) { 494 string &str = field[fi]; 495 const char *start = str.contents(); 496 const char *end = start + str.length(); 497 for (int i = 0; i < n && start < end; i++) { 498 const char *p = start; 499 while (start < end && *start != FIELD_SEPARATOR) 500 start++; 501 if (i > 0) 502 result += SORT_SUB_SEP; 503 (*sortifier)(p, start - p, result); 504 if (start < end) 505 start++; 506 } 507 } 508} 509 510int compare_reference(const reference &r1, const reference &r2) 511{ 512 assert(r1.no >= 0); 513 assert(r2.no >= 0); 514 const char *s1 = r1.sort_key.contents(); 515 int n1 = r1.sort_key.length(); 516 const char *s2 = r2.sort_key.contents(); 517 int n2 = r2.sort_key.length(); 518 for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2) 519 if (*s1 != *s2) 520 return (int)(unsigned char)*s1 - (int)(unsigned char)*s2; 521 if (n2 > 0) 522 return -1; 523 if (n1 > 0) 524 return 1; 525 return r1.no - r2.no; 526} 527 528int same_reference(const reference &r1, const reference &r2) 529{ 530 if (!r1.rid.is_null() && r1.rid == r2.rid) 531 return 1; 532 if (r1.h != r2.h) 533 return 0; 534 if (r1.nfields != r2.nfields) 535 return 0; 536 int i = 0; 537 for (i = 0; i < 256; i++) 538 if (r1.field_index != r2.field_index) 539 return 0; 540 for (i = 0; i < r1.nfields; i++) 541 if (r1.field[i] != r2.field[i]) 542 return 0; 543 return 1; 544} 545 546const char *find_last_name(const char *start, const char *end, 547 const char **endp) 548{ 549 const char *ptr = start; 550 const char *last_word = start; 551 for (;;) { 552 const char *token_start = ptr; 553 if (!get_token(&ptr, end)) 554 break; 555 if (ptr - token_start == 1) { 556 if (*token_start == ',') { 557 *endp = token_start; 558 return last_word; 559 } 560 else if (*token_start == ' ' || *token_start == '\n') { 561 if (ptr < end && *ptr != ' ' && *ptr != '\n') 562 last_word = ptr; 563 } 564 } 565 } 566 *endp = end; 567 return last_word; 568} 569 570void abbreviate_name(const char *ptr, const char *end, string &result) 571{ 572 const char *last_name_end; 573 const char *last_name_start = find_last_name(ptr, end, &last_name_end); 574 int need_period = 0; 575 for (;;) { 576 const char *token_start = ptr; 577 if (!get_token(&ptr, last_name_start)) 578 break; 579 const token_info *ti = lookup_token(token_start, ptr); 580 if (need_period) { 581 if ((ptr - token_start == 1 && *token_start == ' ') 582 || (ptr - token_start == 2 && token_start[0] == '\\' 583 && token_start[1] == ' ')) 584 continue; 585 if (ti->is_upper()) 586 result += period_before_initial; 587 else 588 result += period_before_other; 589 need_period = 0; 590 } 591 result.append(token_start, ptr - token_start); 592 if (ti->is_upper()) { 593 const char *lower_ptr = ptr; 594 int first_token = 1; 595 for (;;) { 596 token_start = ptr; 597 if (!get_token(&ptr, last_name_start)) 598 break; 599 if ((ptr - token_start == 1 && *token_start == ' ') 600 || (ptr - token_start == 2 && token_start[0] == '\\' 601 && token_start[1] == ' ')) 602 break; 603 ti = lookup_token(token_start, ptr); 604 if (ti->is_hyphen()) { 605 const char *ptr1 = ptr; 606 if (get_token(&ptr1, last_name_start)) { 607 ti = lookup_token(ptr, ptr1); 608 if (ti->is_upper()) { 609 result += period_before_hyphen; 610 result.append(token_start, ptr1 - token_start); 611 ptr = ptr1; 612 } 613 } 614 } 615 else if (ti->is_upper()) { 616 // MacDougal -> MacD. 617 result.append(lower_ptr, ptr - lower_ptr); 618 lower_ptr = ptr; 619 first_token = 1; 620 } 621 else if (first_token && ti->is_accent()) { 622 result.append(token_start, ptr - token_start); 623 lower_ptr = ptr; 624 } 625 first_token = 0; 626 } 627 need_period = 1; 628 } 629 } 630 if (need_period) 631 result += period_before_last_name; 632 result.append(last_name_start, end - last_name_start); 633} 634 635static void abbreviate_names(string &result) 636{ 637 string str; 638 str.move(result); 639 const char *ptr = str.contents(); 640 const char *end = ptr + str.length(); 641 while (ptr < end) { 642 const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr); 643 if (name_end == 0) 644 name_end = end; 645 abbreviate_name(ptr, name_end, result); 646 if (name_end >= end) 647 break; 648 ptr = name_end + 1; 649 result += FIELD_SEPARATOR; 650 } 651} 652 653void reverse_name(const char *ptr, const char *name_end, string &result) 654{ 655 const char *last_name_end; 656 const char *last_name_start = find_last_name(ptr, name_end, &last_name_end); 657 result.append(last_name_start, last_name_end - last_name_start); 658 while (last_name_start > ptr 659 && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n')) 660 last_name_start--; 661 if (last_name_start > ptr) { 662 result += ", "; 663 result.append(ptr, last_name_start - ptr); 664 } 665 if (last_name_end < name_end) 666 result.append(last_name_end, name_end - last_name_end); 667} 668 669void reverse_names(string &result, int n) 670{ 671 if (n <= 0) 672 return; 673 string str; 674 str.move(result); 675 const char *ptr = str.contents(); 676 const char *end = ptr + str.length(); 677 while (ptr < end) { 678 if (--n < 0) { 679 result.append(ptr, end - ptr); 680 break; 681 } 682 const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr); 683 if (name_end == 0) 684 name_end = end; 685 reverse_name(ptr, name_end, result); 686 if (name_end >= end) 687 break; 688 ptr = name_end + 1; 689 result += FIELD_SEPARATOR; 690 } 691} 692 693// Return number of field separators. 694 695int join_fields(string &f) 696{ 697 const char *ptr = f.contents(); 698 int len = f.length(); 699 int nfield_seps = 0; 700 int j; 701 for (j = 0; j < len; j++) 702 if (ptr[j] == FIELD_SEPARATOR) 703 nfield_seps++; 704 if (nfield_seps == 0) 705 return 0; 706 string temp; 707 int field_seps_left = nfield_seps; 708 for (j = 0; j < len; j++) { 709 if (ptr[j] == FIELD_SEPARATOR) { 710 if (nfield_seps == 1) 711 temp += join_authors_exactly_two; 712 else if (--field_seps_left == 0) 713 temp += join_authors_last_two; 714 else 715 temp += join_authors_default; 716 } 717 else 718 temp += ptr[j]; 719 } 720 f = temp; 721 return nfield_seps; 722} 723 724void uppercase(const char *start, const char *end, string &result) 725{ 726 for (;;) { 727 const char *token_start = start; 728 if (!get_token(&start, end)) 729 break; 730 const token_info *ti = lookup_token(token_start, start); 731 ti->upper_case(token_start, start, result); 732 } 733} 734 735void lowercase(const char *start, const char *end, string &result) 736{ 737 for (;;) { 738 const char *token_start = start; 739 if (!get_token(&start, end)) 740 break; 741 const token_info *ti = lookup_token(token_start, start); 742 ti->lower_case(token_start, start, result); 743 } 744} 745 746void capitalize(const char *ptr, const char *end, string &result) 747{ 748 int in_small_point_size = 0; 749 for (;;) { 750 const char *start = ptr; 751 if (!get_token(&ptr, end)) 752 break; 753 const token_info *ti = lookup_token(start, ptr); 754 const char *char_end = ptr; 755 int is_lower = ti->is_lower(); 756 if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) { 757 const token_info *ti2 = lookup_token(char_end, ptr); 758 if (!ti2->is_accent()) 759 ptr = char_end; 760 } 761 if (is_lower) { 762 if (!in_small_point_size) { 763 result += "\\s-2"; 764 in_small_point_size = 1; 765 } 766 ti->upper_case(start, char_end, result); 767 result.append(char_end, ptr - char_end); 768 } 769 else { 770 if (in_small_point_size) { 771 result += "\\s+2"; 772 in_small_point_size = 0; 773 } 774 result.append(start, ptr - start); 775 } 776 } 777 if (in_small_point_size) 778 result += "\\s+2"; 779} 780 781void capitalize_field(string &str) 782{ 783 string temp; 784 capitalize(str.contents(), str.contents() + str.length(), temp); 785 str.move(temp); 786} 787 788int is_terminated(const char *ptr, const char *end) 789{ 790 const char *last_token = end; 791 for (;;) { 792 const char *p = ptr; 793 if (!get_token(&ptr, end)) 794 break; 795 last_token = p; 796 } 797 return end - last_token == 1 798 && (*last_token == '.' || *last_token == '!' || *last_token == '?'); 799} 800 801void reference::output(FILE *fp) 802{ 803 fputs(".]-\n", fp); 804 for (int i = 0; i < 256; i++) 805 if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) { 806 string &f = field[field_index[i]]; 807 if (!csdigit(i)) { 808 int j = reverse_fields.search(i); 809 if (j >= 0) { 810 int n; 811 int len = reverse_fields.length(); 812 if (++j < len && csdigit(reverse_fields[j])) { 813 n = reverse_fields[j] - '0'; 814 for (++j; j < len && csdigit(reverse_fields[j]); j++) 815 // should check for overflow 816 n = n*10 + reverse_fields[j] - '0'; 817 } 818 else 819 n = INT_MAX; 820 reverse_names(f, n); 821 } 822 } 823 int is_multiple = join_fields(f) > 0; 824 if (capitalize_fields.search(i) >= 0) 825 capitalize_field(f); 826 if (memchr(f.contents(), '\n', f.length()) == 0) { 827 fprintf(fp, ".ds [%c ", i); 828 if (f[0] == ' ' || f[0] == '\\' || f[0] == '"') 829 putc('"', fp); 830 put_string(f, fp); 831 putc('\n', fp); 832 } 833 else { 834 fprintf(fp, ".de [%c\n", i); 835 put_string(f, fp); 836 fputs("..\n", fp); 837 } 838 if (i == 'P') { 839 int multiple_pages = 0; 840 const char *s = f.contents(); 841 const char *end = f.contents() + f.length(); 842 for (;;) { 843 const char *token_start = s; 844 if (!get_token(&s, end)) 845 break; 846 const token_info *ti = lookup_token(token_start, s); 847 if (ti->is_hyphen() || ti->is_range_sep()) { 848 multiple_pages = 1; 849 break; 850 } 851 } 852 fprintf(fp, ".nr [P %d\n", multiple_pages); 853 } 854 else if (i == 'E') 855 fprintf(fp, ".nr [E %d\n", is_multiple); 856 } 857 for (const char *p = "TAO"; *p; p++) { 858 int fi = field_index[(unsigned char)*p]; 859 if (fi != NULL_FIELD_INDEX) { 860 string &f = field[fi]; 861 fprintf(fp, ".nr [%c %d\n", *p, 862 is_terminated(f.contents(), f.contents() + f.length())); 863 } 864 } 865 int t = classify(); 866 fprintf(fp, ".][ %d %s\n", t, reference_types[t]); 867 if (annotation_macro.length() > 0 && annotation_field >= 0 868 && field_index[annotation_field] != NULL_FIELD_INDEX) { 869 putc('.', fp); 870 put_string(annotation_macro, fp); 871 putc('\n', fp); 872 put_string(field[field_index[annotation_field]], fp); 873 } 874} 875 876void reference::print_sort_key_comment(FILE *fp) 877{ 878 fputs(".\\\"", fp); 879 put_string(sort_key, fp); 880 putc('\n', fp); 881} 882 883const char *find_year(const char *start, const char *end, const char **endp) 884{ 885 for (;;) { 886 while (start < end && !csdigit(*start)) 887 start++; 888 const char *ptr = start; 889 if (start == end) 890 break; 891 while (ptr < end && csdigit(*ptr)) 892 ptr++; 893 if (ptr - start == 4 || ptr - start == 3 894 || (ptr - start == 2 895 && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) { 896 *endp = ptr; 897 return start; 898 } 899 start = ptr; 900 } 901 return 0; 902} 903 904static const char *find_day(const char *start, const char *end, 905 const char **endp) 906{ 907 for (;;) { 908 while (start < end && !csdigit(*start)) 909 start++; 910 const char *ptr = start; 911 if (start == end) 912 break; 913 while (ptr < end && csdigit(*ptr)) 914 ptr++; 915 if ((ptr - start == 1 && start[0] != '0') 916 || (ptr - start == 2 && 917 (start[0] == '1' 918 || start[0] == '2' 919 || (start[0] == '3' && start[1] <= '1') 920 || (start[0] == '0' && start[1] != '0')))) { 921 *endp = ptr; 922 return start; 923 } 924 start = ptr; 925 } 926 return 0; 927} 928 929static int find_month(const char *start, const char *end) 930{ 931 static const char *months[] = { 932 "january", 933 "february", 934 "march", 935 "april", 936 "may", 937 "june", 938 "july", 939 "august", 940 "september", 941 "october", 942 "november", 943 "december", 944 }; 945 for (;;) { 946 while (start < end && !csalpha(*start)) 947 start++; 948 const char *ptr = start; 949 if (start == end) 950 break; 951 while (ptr < end && csalpha(*ptr)) 952 ptr++; 953 if (ptr - start >= 3) { 954 for (unsigned int i = 0; i < sizeof(months)/sizeof(months[0]); i++) { 955 const char *q = months[i]; 956 const char *p = start; 957 for (; p < ptr; p++, q++) 958 if (cmlower(*p) != *q) 959 break; 960 if (p >= ptr) 961 return i; 962 } 963 } 964 start = ptr; 965 } 966 return -1; 967} 968 969int reference::contains_field(char c) const 970{ 971 return field_index[(unsigned char)c] != NULL_FIELD_INDEX; 972} 973 974int reference::classify() 975{ 976 if (contains_field('J')) 977 return JOURNAL_ARTICLE; 978 if (contains_field('B')) 979 return ARTICLE_IN_BOOK; 980 if (contains_field('G')) 981 return TECH_REPORT; 982 if (contains_field('R')) 983 return TECH_REPORT; 984 if (contains_field('I')) 985 return BOOK; 986 if (contains_field('M')) 987 return BELL_TM; 988 return OTHER; 989} 990 991const char *reference::get_year(const char **endp) const 992{ 993 if (field_index['D'] != NULL_FIELD_INDEX) { 994 string &date = field[field_index['D']]; 995 const char *start = date.contents(); 996 const char *end = start + date.length(); 997 return find_year(start, end, endp); 998 } 999 else 1000 return 0; 1001} 1002 1003const char *reference::get_field(unsigned char c, const char **endp) const 1004{ 1005 if (field_index[c] != NULL_FIELD_INDEX) { 1006 string &f = field[field_index[c]]; 1007 const char *start = f.contents(); 1008 *endp = start + f.length(); 1009 return start; 1010 } 1011 else 1012 return 0; 1013} 1014 1015const char *reference::get_date(const char **endp) const 1016{ 1017 return get_field('D', endp); 1018} 1019 1020const char *nth_field(int i, const char *start, const char **endp) 1021{ 1022 while (--i >= 0) { 1023 start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start); 1024 if (!start) 1025 return 0; 1026 start++; 1027 } 1028 const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start); 1029 if (e) 1030 *endp = e; 1031 return start; 1032} 1033 1034const char *reference::get_author(int i, const char **endp) const 1035{ 1036 for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) { 1037 const char *start = get_field(*f, endp); 1038 if (start) { 1039 if (strchr(MULTI_FIELD_NAMES, *f) != 0) 1040 return nth_field(i, start, endp); 1041 else if (i == 0) 1042 return start; 1043 else 1044 return 0; 1045 } 1046 } 1047 return 0; 1048} 1049 1050const char *reference::get_author_last_name(int i, const char **endp) const 1051{ 1052 for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) { 1053 const char *start = get_field(*f, endp); 1054 if (start) { 1055 if (strchr(MULTI_FIELD_NAMES, *f) != 0) { 1056 start = nth_field(i, start, endp); 1057 if (!start) 1058 return 0; 1059 } 1060 if (*f == 'A') 1061 return find_last_name(start, *endp, endp); 1062 else 1063 return start; 1064 } 1065 } 1066 return 0; 1067} 1068 1069void reference::set_date(string &d) 1070{ 1071 if (d.length() == 0) 1072 delete_field('D'); 1073 else 1074 insert_field('D', d); 1075} 1076 1077int same_year(const reference &r1, const reference &r2) 1078{ 1079 const char *ye1; 1080 const char *ys1 = r1.get_year(&ye1); 1081 const char *ye2; 1082 const char *ys2 = r2.get_year(&ye2); 1083 if (ys1 == 0) { 1084 if (ys2 == 0) 1085 return same_date(r1, r2); 1086 else 1087 return 0; 1088 } 1089 else if (ys2 == 0) 1090 return 0; 1091 else if (ye1 - ys1 != ye2 - ys2) 1092 return 0; 1093 else 1094 return memcmp(ys1, ys2, ye1 - ys1) == 0; 1095} 1096 1097int same_date(const reference &r1, const reference &r2) 1098{ 1099 const char *e1; 1100 const char *s1 = r1.get_date(&e1); 1101 const char *e2; 1102 const char *s2 = r2.get_date(&e2); 1103 if (s1 == 0) 1104 return s2 == 0; 1105 else if (s2 == 0) 1106 return 0; 1107 else if (e1 - s1 != e2 - s2) 1108 return 0; 1109 else 1110 return memcmp(s1, s2, e1 - s1) == 0; 1111} 1112 1113const char *reference::get_sort_field(int i, int si, int ssi, 1114 const char **endp) const 1115{ 1116 const char *start = sort_key.contents(); 1117 const char *end = start + sort_key.length(); 1118 if (i < 0) { 1119 *endp = end; 1120 return start; 1121 } 1122 while (--i >= 0) { 1123 start = (char *)memchr(start, SORT_SEP, end - start); 1124 if (!start) 1125 return 0; 1126 start++; 1127 } 1128 const char *e = (char *)memchr(start, SORT_SEP, end - start); 1129 if (e) 1130 end = e; 1131 if (si < 0) { 1132 *endp = end; 1133 return start; 1134 } 1135 while (--si >= 0) { 1136 start = (char *)memchr(start, SORT_SUB_SEP, end - start); 1137 if (!start) 1138 return 0; 1139 start++; 1140 } 1141 e = (char *)memchr(start, SORT_SUB_SEP, end - start); 1142 if (e) 1143 end = e; 1144 if (ssi < 0) { 1145 *endp = end; 1146 return start; 1147 } 1148 while (--ssi >= 0) { 1149 start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start); 1150 if (!start) 1151 return 0; 1152 start++; 1153 } 1154 e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start); 1155 if (e) 1156 end = e; 1157 *endp = end; 1158 return start; 1159} 1160 1161