175584Sru/* -*- C++ -*- 2151497Sru Copyright (C) 1989, 1990, 1991, 1992, 2000, 2004 3151497Sru Free Software Foundation, Inc. 475584Sru Written by James Clark (jjc@jclark.com) 575584Sru 675584SruThis file is part of groff. 775584Sru 875584Srugroff is free software; you can redistribute it and/or modify it under 975584Sruthe terms of the GNU General Public License as published by the Free 1075584SruSoftware Foundation; either version 2, or (at your option) any later 1175584Sruversion. 1275584Sru 1375584Srugroff is distributed in the hope that it will be useful, but WITHOUT ANY 1475584SruWARRANTY; without even the implied warranty of MERCHANTABILITY or 1575584SruFITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 1675584Srufor more details. 1775584Sru 1875584SruYou should have received a copy of the GNU General Public License along 1975584Sruwith groff; see the file COPYING. If not, write to the Free Software 20151497SruFoundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */ 2175584Sru 2275584Sru%{ 2375584Sru 2475584Sru#include "refer.h" 2575584Sru#include "refid.h" 2675584Sru#include "ref.h" 2775584Sru#include "token.h" 2875584Sru 2975584Sruint yylex(); 3075584Sruvoid yyerror(const char *); 3175584Sruint yyparse(); 3275584Sru 3375584Srustatic const char *format_serial(char c, int n); 3475584Sru 3575584Srustruct label_info { 3675584Sru int start; 3775584Sru int length; 3875584Sru int count; 3975584Sru int total; 4075584Sru label_info(const string &); 4175584Sru}; 4275584Sru 4375584Srulabel_info *lookup_label(const string &label); 4475584Sru 4575584Srustruct expression { 4675584Sru enum { 4775584Sru // Does the tentative label depend on the reference? 4875584Sru CONTAINS_VARIABLE = 01, 4975584Sru CONTAINS_STAR = 02, 5075584Sru CONTAINS_FORMAT = 04, 5175584Sru CONTAINS_AT = 010 5275584Sru }; 5375584Sru virtual ~expression() { } 5475584Sru virtual void evaluate(int, const reference &, string &, 5575584Sru substring_position &) = 0; 5675584Sru virtual unsigned analyze() { return 0; } 5775584Sru}; 5875584Sru 5975584Sruclass at_expr : public expression { 6075584Srupublic: 6175584Sru at_expr() { } 6275584Sru void evaluate(int, const reference &, string &, substring_position &); 6375584Sru unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; } 6475584Sru}; 6575584Sru 6675584Sruclass format_expr : public expression { 6775584Sru char type; 6875584Sru int width; 6975584Sru int first_number; 7075584Srupublic: 7175584Sru format_expr(char c, int w = 0, int f = 1) 7275584Sru : type(c), width(w), first_number(f) { } 7375584Sru void evaluate(int, const reference &, string &, substring_position &); 7475584Sru unsigned analyze() { return CONTAINS_FORMAT; } 7575584Sru}; 7675584Sru 7775584Sruclass field_expr : public expression { 7875584Sru int number; 7975584Sru char name; 8075584Srupublic: 8175584Sru field_expr(char nm, int num) : number(num), name(nm) { } 8275584Sru void evaluate(int, const reference &, string &, substring_position &); 8375584Sru unsigned analyze() { return CONTAINS_VARIABLE; } 8475584Sru}; 8575584Sru 8675584Sruclass literal_expr : public expression { 8775584Sru string s; 8875584Srupublic: 8975584Sru literal_expr(const char *ptr, int len) : s(ptr, len) { } 9075584Sru void evaluate(int, const reference &, string &, substring_position &); 9175584Sru}; 9275584Sru 9375584Sruclass unary_expr : public expression { 9475584Sruprotected: 9575584Sru expression *expr; 9675584Srupublic: 9775584Sru unary_expr(expression *e) : expr(e) { } 9875584Sru ~unary_expr() { delete expr; } 9975584Sru void evaluate(int, const reference &, string &, substring_position &) = 0; 10075584Sru unsigned analyze() { return expr ? expr->analyze() : 0; } 10175584Sru}; 10275584Sru 10375584Sru// This caches the analysis of an expression. 10475584Sru 10575584Sruclass analyzed_expr : public unary_expr { 10675584Sru unsigned flags; 10775584Srupublic: 10875584Sru analyzed_expr(expression *); 10975584Sru void evaluate(int, const reference &, string &, substring_position &); 11075584Sru unsigned analyze() { return flags; } 11175584Sru}; 11275584Sru 11375584Sruclass star_expr : public unary_expr { 11475584Srupublic: 11575584Sru star_expr(expression *e) : unary_expr(e) { } 11675584Sru void evaluate(int, const reference &, string &, substring_position &); 11775584Sru unsigned analyze() { 11875584Sru return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0) 11975584Sru | CONTAINS_STAR); 12075584Sru } 12175584Sru}; 12275584Sru 12375584Srutypedef void map_func(const char *, const char *, string &); 12475584Sru 12575584Sruclass map_expr : public unary_expr { 12675584Sru map_func *func; 12775584Srupublic: 12875584Sru map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { } 12975584Sru void evaluate(int, const reference &, string &, substring_position &); 13075584Sru}; 13175584Sru 13275584Srutypedef const char *extractor_func(const char *, const char *, const char **); 13375584Sru 13475584Sruclass extractor_expr : public unary_expr { 13575584Sru int part; 13675584Sru extractor_func *func; 13775584Srupublic: 13875584Sru enum { BEFORE = +1, MATCH = 0, AFTER = -1 }; 13975584Sru extractor_expr(expression *e, extractor_func *f, int pt) 14075584Sru : unary_expr(e), part(pt), func(f) { } 14175584Sru void evaluate(int, const reference &, string &, substring_position &); 14275584Sru}; 14375584Sru 14475584Sruclass truncate_expr : public unary_expr { 14575584Sru int n; 14675584Srupublic: 14775584Sru truncate_expr(expression *e, int i) : unary_expr(e), n(i) { } 14875584Sru void evaluate(int, const reference &, string &, substring_position &); 14975584Sru}; 15075584Sru 15175584Sruclass separator_expr : public unary_expr { 15275584Srupublic: 15375584Sru separator_expr(expression *e) : unary_expr(e) { } 15475584Sru void evaluate(int, const reference &, string &, substring_position &); 15575584Sru}; 15675584Sru 15775584Sruclass binary_expr : public expression { 15875584Sruprotected: 15975584Sru expression *expr1; 16075584Sru expression *expr2; 16175584Srupublic: 16275584Sru binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { } 16375584Sru ~binary_expr() { delete expr1; delete expr2; } 16475584Sru void evaluate(int, const reference &, string &, substring_position &) = 0; 16575584Sru unsigned analyze() { 16675584Sru return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0); 16775584Sru } 16875584Sru}; 16975584Sru 17075584Sruclass alternative_expr : public binary_expr { 17175584Srupublic: 17275584Sru alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 17375584Sru void evaluate(int, const reference &, string &, substring_position &); 17475584Sru}; 17575584Sru 17675584Sruclass list_expr : public binary_expr { 17775584Srupublic: 17875584Sru list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 17975584Sru void evaluate(int, const reference &, string &, substring_position &); 18075584Sru}; 18175584Sru 18275584Sruclass substitute_expr : public binary_expr { 18375584Srupublic: 18475584Sru substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } 18575584Sru void evaluate(int, const reference &, string &, substring_position &); 18675584Sru}; 18775584Sru 18875584Sruclass ternary_expr : public expression { 18975584Sruprotected: 19075584Sru expression *expr1; 19175584Sru expression *expr2; 19275584Sru expression *expr3; 19375584Srupublic: 19475584Sru ternary_expr(expression *e1, expression *e2, expression *e3) 19575584Sru : expr1(e1), expr2(e2), expr3(e3) { } 19675584Sru ~ternary_expr() { delete expr1; delete expr2; delete expr3; } 19775584Sru void evaluate(int, const reference &, string &, substring_position &) = 0; 19875584Sru unsigned analyze() { 19975584Sru return ((expr1 ? expr1->analyze() : 0) 20075584Sru | (expr2 ? expr2->analyze() : 0) 20175584Sru | (expr3 ? expr3->analyze() : 0)); 20275584Sru } 20375584Sru}; 20475584Sru 20575584Sruclass conditional_expr : public ternary_expr { 20675584Srupublic: 20775584Sru conditional_expr(expression *e1, expression *e2, expression *e3) 20875584Sru : ternary_expr(e1, e2, e3) { } 20975584Sru void evaluate(int, const reference &, string &, substring_position &); 21075584Sru}; 21175584Sru 21275584Srustatic expression *parsed_label = 0; 21375584Srustatic expression *parsed_date_label = 0; 21475584Srustatic expression *parsed_short_label = 0; 21575584Sru 21675584Srustatic expression *parse_result; 21775584Sru 21875584Srustring literals; 21975584Sru 22075584Sru%} 22175584Sru 22275584Sru%union { 22375584Sru int num; 22475584Sru expression *expr; 22575584Sru struct { int ndigits; int val; } dig; 22675584Sru struct { int start; int len; } str; 22775584Sru} 22875584Sru 22975584Sru/* uppercase or lowercase letter */ 23075584Sru%token <num> TOKEN_LETTER 23175584Sru/* literal characters */ 23275584Sru%token <str> TOKEN_LITERAL 23375584Sru/* digit */ 23475584Sru%token <num> TOKEN_DIGIT 23575584Sru 23675584Sru%type <expr> conditional 23775584Sru%type <expr> alternative 23875584Sru%type <expr> list 23975584Sru%type <expr> string 24075584Sru%type <expr> substitute 24175584Sru%type <expr> optional_conditional 24275584Sru%type <num> number 24375584Sru%type <dig> digits 24475584Sru%type <num> optional_number 24575584Sru%type <num> flag 24675584Sru 24775584Sru%% 24875584Sru 24975584Sruexpr: 25075584Sru optional_conditional 25175584Sru { parse_result = ($1 ? new analyzed_expr($1) : 0); } 25275584Sru ; 25375584Sru 25475584Sruconditional: 25575584Sru alternative 25675584Sru { $$ = $1; } 25775584Sru | alternative '?' optional_conditional ':' conditional 25875584Sru { $$ = new conditional_expr($1, $3, $5); } 25975584Sru ; 26075584Sru 26175584Sruoptional_conditional: 26275584Sru /* empty */ 26375584Sru { $$ = 0; } 26475584Sru | conditional 26575584Sru { $$ = $1; } 26675584Sru ; 26775584Sru 26875584Srualternative: 26975584Sru list 27075584Sru { $$ = $1; } 27175584Sru | alternative '|' list 27275584Sru { $$ = new alternative_expr($1, $3); } 27375584Sru | alternative '&' list 27475584Sru { $$ = new conditional_expr($1, $3, 0); } 27575584Sru ; 27675584Sru 27775584Srulist: 27875584Sru substitute 27975584Sru { $$ = $1; } 28075584Sru | list substitute 28175584Sru { $$ = new list_expr($1, $2); } 28275584Sru ; 28375584Sru 28475584Srusubstitute: 28575584Sru string 28675584Sru { $$ = $1; } 28775584Sru | substitute '~' string 28875584Sru { $$ = new substitute_expr($1, $3); } 28975584Sru ; 29075584Sru 29175584Srustring: 29275584Sru '@' 29375584Sru { $$ = new at_expr; } 29475584Sru | TOKEN_LITERAL 29575584Sru { 29675584Sru $$ = new literal_expr(literals.contents() + $1.start, 29775584Sru $1.len); 29875584Sru } 29975584Sru | TOKEN_LETTER 30075584Sru { $$ = new field_expr($1, 0); } 30175584Sru | TOKEN_LETTER number 30275584Sru { $$ = new field_expr($1, $2 - 1); } 30375584Sru | '%' TOKEN_LETTER 30475584Sru { 30575584Sru switch ($2) { 30675584Sru case 'I': 30775584Sru case 'i': 30875584Sru case 'A': 30975584Sru case 'a': 31075584Sru $$ = new format_expr($2); 31175584Sru break; 31275584Sru default: 31375584Sru command_error("unrecognized format `%1'", char($2)); 31475584Sru $$ = new format_expr('a'); 31575584Sru break; 31675584Sru } 31775584Sru } 31875584Sru 31975584Sru | '%' digits 32075584Sru { 32175584Sru $$ = new format_expr('0', $2.ndigits, $2.val); 32275584Sru } 32375584Sru | string '.' flag TOKEN_LETTER optional_number 32475584Sru { 32575584Sru switch ($4) { 32675584Sru case 'l': 32775584Sru $$ = new map_expr($1, lowercase); 32875584Sru break; 32975584Sru case 'u': 33075584Sru $$ = new map_expr($1, uppercase); 33175584Sru break; 33275584Sru case 'c': 33375584Sru $$ = new map_expr($1, capitalize); 33475584Sru break; 33575584Sru case 'r': 33675584Sru $$ = new map_expr($1, reverse_name); 33775584Sru break; 33875584Sru case 'a': 33975584Sru $$ = new map_expr($1, abbreviate_name); 34075584Sru break; 34175584Sru case 'y': 34275584Sru $$ = new extractor_expr($1, find_year, $3); 34375584Sru break; 34475584Sru case 'n': 34575584Sru $$ = new extractor_expr($1, find_last_name, $3); 34675584Sru break; 34775584Sru default: 34875584Sru $$ = $1; 34975584Sru command_error("unknown function `%1'", char($4)); 35075584Sru break; 35175584Sru } 35275584Sru } 35375584Sru 35475584Sru | string '+' number 35575584Sru { $$ = new truncate_expr($1, $3); } 35675584Sru | string '-' number 35775584Sru { $$ = new truncate_expr($1, -$3); } 35875584Sru | string '*' 35975584Sru { $$ = new star_expr($1); } 36075584Sru | '(' optional_conditional ')' 36175584Sru { $$ = $2; } 36275584Sru | '<' optional_conditional '>' 36375584Sru { $$ = new separator_expr($2); } 36475584Sru ; 36575584Sru 36675584Sruoptional_number: 36775584Sru /* empty */ 36875584Sru { $$ = -1; } 36975584Sru | number 37075584Sru { $$ = $1; } 37175584Sru ; 37275584Sru 37375584Srunumber: 37475584Sru TOKEN_DIGIT 37575584Sru { $$ = $1; } 37675584Sru | number TOKEN_DIGIT 37775584Sru { $$ = $1*10 + $2; } 37875584Sru ; 37975584Sru 38075584Srudigits: 38175584Sru TOKEN_DIGIT 38275584Sru { $$.ndigits = 1; $$.val = $1; } 38375584Sru | digits TOKEN_DIGIT 38475584Sru { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; } 38575584Sru ; 38675584Sru 38775584Sru 38875584Sruflag: 38975584Sru /* empty */ 39075584Sru { $$ = 0; } 39175584Sru | '+' 39275584Sru { $$ = 1; } 39375584Sru | '-' 39475584Sru { $$ = -1; } 39575584Sru ; 39675584Sru 39775584Sru%% 39875584Sru 39975584Sru/* bison defines const to be empty unless __STDC__ is defined, which it 40075584Sruisn't under cfront */ 40175584Sru 40275584Sru#ifdef const 40375584Sru#undef const 40475584Sru#endif 40575584Sru 40675584Sruconst char *spec_ptr; 40775584Sruconst char *spec_end; 40875584Sruconst char *spec_cur; 40975584Sru 410151497Srustatic char uppercase_array[] = { 411151497Sru 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 412151497Sru 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 413151497Sru 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 414151497Sru 'Y', 'Z', 415151497Sru}; 416151497Sru 417151497Srustatic char lowercase_array[] = { 418151497Sru 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 419151497Sru 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 420151497Sru 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 421151497Sru 'y', 'z', 422151497Sru}; 423151497Sru 42475584Sruint yylex() 42575584Sru{ 42675584Sru while (spec_ptr < spec_end && csspace(*spec_ptr)) 42775584Sru spec_ptr++; 42875584Sru spec_cur = spec_ptr; 42975584Sru if (spec_ptr >= spec_end) 43075584Sru return 0; 43175584Sru unsigned char c = *spec_ptr++; 43275584Sru if (csalpha(c)) { 43375584Sru yylval.num = c; 43475584Sru return TOKEN_LETTER; 43575584Sru } 43675584Sru if (csdigit(c)) { 43775584Sru yylval.num = c - '0'; 43875584Sru return TOKEN_DIGIT; 43975584Sru } 44075584Sru if (c == '\'') { 44175584Sru yylval.str.start = literals.length(); 44275584Sru for (; spec_ptr < spec_end; spec_ptr++) { 44375584Sru if (*spec_ptr == '\'') { 44475584Sru if (++spec_ptr < spec_end && *spec_ptr == '\'') 44575584Sru literals += '\''; 44675584Sru else { 44775584Sru yylval.str.len = literals.length() - yylval.str.start; 44875584Sru return TOKEN_LITERAL; 44975584Sru } 45075584Sru } 45175584Sru else 45275584Sru literals += *spec_ptr; 45375584Sru } 45475584Sru yylval.str.len = literals.length() - yylval.str.start; 45575584Sru return TOKEN_LITERAL; 45675584Sru } 45775584Sru return c; 45875584Sru} 45975584Sru 46075584Sruint set_label_spec(const char *label_spec) 46175584Sru{ 46275584Sru spec_cur = spec_ptr = label_spec; 46375584Sru spec_end = strchr(label_spec, '\0'); 46475584Sru literals.clear(); 46575584Sru if (yyparse()) 46675584Sru return 0; 46775584Sru delete parsed_label; 46875584Sru parsed_label = parse_result; 46975584Sru return 1; 47075584Sru} 47175584Sru 47275584Sruint set_date_label_spec(const char *label_spec) 47375584Sru{ 47475584Sru spec_cur = spec_ptr = label_spec; 47575584Sru spec_end = strchr(label_spec, '\0'); 47675584Sru literals.clear(); 47775584Sru if (yyparse()) 47875584Sru return 0; 47975584Sru delete parsed_date_label; 48075584Sru parsed_date_label = parse_result; 48175584Sru return 1; 48275584Sru} 48375584Sru 48475584Sruint set_short_label_spec(const char *label_spec) 48575584Sru{ 48675584Sru spec_cur = spec_ptr = label_spec; 48775584Sru spec_end = strchr(label_spec, '\0'); 48875584Sru literals.clear(); 48975584Sru if (yyparse()) 49075584Sru return 0; 49175584Sru delete parsed_short_label; 49275584Sru parsed_short_label = parse_result; 49375584Sru return 1; 49475584Sru} 49575584Sru 49675584Sruvoid yyerror(const char *message) 49775584Sru{ 49875584Sru if (spec_cur < spec_end) 49975584Sru command_error("label specification %1 before `%2'", message, spec_cur); 50075584Sru else 50175584Sru command_error("label specification %1 at end of string", 50275584Sru message, spec_cur); 50375584Sru} 50475584Sru 50575584Sruvoid at_expr::evaluate(int tentative, const reference &ref, 50675584Sru string &result, substring_position &) 50775584Sru{ 50875584Sru if (tentative) 50975584Sru ref.canonicalize_authors(result); 51075584Sru else { 51175584Sru const char *end, *start = ref.get_authors(&end); 51275584Sru if (start) 51375584Sru result.append(start, end - start); 51475584Sru } 51575584Sru} 51675584Sru 51775584Sruvoid format_expr::evaluate(int tentative, const reference &ref, 51875584Sru string &result, substring_position &) 51975584Sru{ 52075584Sru if (tentative) 52175584Sru return; 52275584Sru const label_info *lp = ref.get_label_ptr(); 52375584Sru int num = lp == 0 ? ref.get_number() : lp->count; 52475584Sru if (type != '0') 52575584Sru result += format_serial(type, num + 1); 52675584Sru else { 52775584Sru const char *ptr = i_to_a(num + first_number); 52875584Sru int pad = width - strlen(ptr); 52975584Sru while (--pad >= 0) 53075584Sru result += '0'; 53175584Sru result += ptr; 53275584Sru } 53375584Sru} 53475584Sru 53575584Srustatic const char *format_serial(char c, int n) 53675584Sru{ 53775584Sru assert(n > 0); 53875584Sru static char buf[128]; // more than enough. 53975584Sru switch (c) { 54075584Sru case 'i': 54175584Sru case 'I': 54275584Sru { 54375584Sru char *p = buf; 54475584Sru // troff uses z and w to represent 10000 and 5000 in Roman 54575584Sru // numerals; I can find no historical basis for this usage 54675584Sru const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI"; 54775584Sru if (n >= 40000) 54875584Sru return i_to_a(n); 54975584Sru while (n >= 10000) { 55075584Sru *p++ = s[0]; 55175584Sru n -= 10000; 55275584Sru } 55375584Sru for (int i = 1000; i > 0; i /= 10, s += 2) { 55475584Sru int m = n/i; 55575584Sru n -= m*i; 55675584Sru switch (m) { 55775584Sru case 3: 55875584Sru *p++ = s[2]; 55975584Sru /* falls through */ 56075584Sru case 2: 56175584Sru *p++ = s[2]; 56275584Sru /* falls through */ 56375584Sru case 1: 56475584Sru *p++ = s[2]; 56575584Sru break; 56675584Sru case 4: 56775584Sru *p++ = s[2]; 56875584Sru *p++ = s[1]; 56975584Sru break; 57075584Sru case 8: 57175584Sru *p++ = s[1]; 57275584Sru *p++ = s[2]; 57375584Sru *p++ = s[2]; 57475584Sru *p++ = s[2]; 57575584Sru break; 57675584Sru case 7: 57775584Sru *p++ = s[1]; 57875584Sru *p++ = s[2]; 57975584Sru *p++ = s[2]; 58075584Sru break; 58175584Sru case 6: 58275584Sru *p++ = s[1]; 58375584Sru *p++ = s[2]; 58475584Sru break; 58575584Sru case 5: 58675584Sru *p++ = s[1]; 58775584Sru break; 58875584Sru case 9: 58975584Sru *p++ = s[2]; 59075584Sru *p++ = s[0]; 59175584Sru } 59275584Sru } 59375584Sru *p = 0; 59475584Sru break; 59575584Sru } 59675584Sru case 'a': 59775584Sru case 'A': 59875584Sru { 59975584Sru char *p = buf; 60075584Sru // this is derived from troff/reg.c 60175584Sru while (n > 0) { 60275584Sru int d = n % 26; 60375584Sru if (d == 0) 60475584Sru d = 26; 60575584Sru n -= d; 60675584Sru n /= 26; 607151497Sru *p++ = c == 'a' ? lowercase_array[d - 1] : 608151497Sru uppercase_array[d - 1]; 60975584Sru } 61075584Sru *p-- = 0; 61175584Sru // Reverse it. 61275584Sru char *q = buf; 61375584Sru while (q < p) { 61475584Sru char temp = *q; 61575584Sru *q = *p; 61675584Sru *p = temp; 61775584Sru --p; 61875584Sru ++q; 61975584Sru } 62075584Sru break; 62175584Sru } 62275584Sru default: 62375584Sru assert(0); 62475584Sru } 62575584Sru return buf; 62675584Sru} 62775584Sru 62875584Sruvoid field_expr::evaluate(int, const reference &ref, 62975584Sru string &result, substring_position &) 63075584Sru{ 63175584Sru const char *end; 63275584Sru const char *start = ref.get_field(name, &end); 63375584Sru if (start) { 63475584Sru start = nth_field(number, start, &end); 63575584Sru if (start) 63675584Sru result.append(start, end - start); 63775584Sru } 63875584Sru} 63975584Sru 64075584Sruvoid literal_expr::evaluate(int, const reference &, 64175584Sru string &result, substring_position &) 64275584Sru{ 64375584Sru result += s; 64475584Sru} 64575584Sru 64675584Sruanalyzed_expr::analyzed_expr(expression *e) 64775584Sru: unary_expr(e), flags(e ? e->analyze() : 0) 64875584Sru{ 64975584Sru} 65075584Sru 65175584Sruvoid analyzed_expr::evaluate(int tentative, const reference &ref, 65275584Sru string &result, substring_position &pos) 65375584Sru{ 65475584Sru if (expr) 65575584Sru expr->evaluate(tentative, ref, result, pos); 65675584Sru} 65775584Sru 65875584Sruvoid star_expr::evaluate(int tentative, const reference &ref, 65975584Sru string &result, substring_position &pos) 66075584Sru{ 66175584Sru const label_info *lp = ref.get_label_ptr(); 66275584Sru if (!tentative 66375584Sru && (lp == 0 || lp->total > 1) 66475584Sru && expr) 66575584Sru expr->evaluate(tentative, ref, result, pos); 66675584Sru} 66775584Sru 66875584Sruvoid separator_expr::evaluate(int tentative, const reference &ref, 66975584Sru string &result, substring_position &pos) 67075584Sru{ 67175584Sru int start_length = result.length(); 67275584Sru int is_first = pos.start < 0; 67375584Sru if (expr) 67475584Sru expr->evaluate(tentative, ref, result, pos); 67575584Sru if (is_first) { 67675584Sru pos.start = start_length; 67775584Sru pos.length = result.length() - start_length; 67875584Sru } 67975584Sru} 68075584Sru 68175584Sruvoid map_expr::evaluate(int tentative, const reference &ref, 68275584Sru string &result, substring_position &) 68375584Sru{ 68475584Sru if (expr) { 68575584Sru string temp; 68675584Sru substring_position temp_pos; 68775584Sru expr->evaluate(tentative, ref, temp, temp_pos); 68875584Sru (*func)(temp.contents(), temp.contents() + temp.length(), result); 68975584Sru } 69075584Sru} 69175584Sru 69275584Sruvoid extractor_expr::evaluate(int tentative, const reference &ref, 69375584Sru string &result, substring_position &) 69475584Sru{ 69575584Sru if (expr) { 69675584Sru string temp; 69775584Sru substring_position temp_pos; 69875584Sru expr->evaluate(tentative, ref, temp, temp_pos); 69975584Sru const char *end, *start = (*func)(temp.contents(), 70075584Sru temp.contents() + temp.length(), 70175584Sru &end); 70275584Sru switch (part) { 70375584Sru case BEFORE: 70475584Sru if (start) 70575584Sru result.append(temp.contents(), start - temp.contents()); 70675584Sru else 70775584Sru result += temp; 70875584Sru break; 70975584Sru case MATCH: 71075584Sru if (start) 71175584Sru result.append(start, end - start); 71275584Sru break; 71375584Sru case AFTER: 71475584Sru if (start) 71575584Sru result.append(end, temp.contents() + temp.length() - end); 71675584Sru break; 71775584Sru default: 71875584Sru assert(0); 71975584Sru } 72075584Sru } 72175584Sru} 72275584Sru 72375584Srustatic void first_part(int len, const char *ptr, const char *end, 72475584Sru string &result) 72575584Sru{ 72675584Sru for (;;) { 72775584Sru const char *token_start = ptr; 72875584Sru if (!get_token(&ptr, end)) 72975584Sru break; 73075584Sru const token_info *ti = lookup_token(token_start, ptr); 73175584Sru int counts = ti->sortify_non_empty(token_start, ptr); 73275584Sru if (counts && --len < 0) 73375584Sru break; 73475584Sru if (counts || ti->is_accent()) 73575584Sru result.append(token_start, ptr - token_start); 73675584Sru } 73775584Sru} 73875584Sru 73975584Srustatic void last_part(int len, const char *ptr, const char *end, 74075584Sru string &result) 74175584Sru{ 74275584Sru const char *start = ptr; 74375584Sru int count = 0; 74475584Sru for (;;) { 74575584Sru const char *token_start = ptr; 74675584Sru if (!get_token(&ptr, end)) 74775584Sru break; 74875584Sru const token_info *ti = lookup_token(token_start, ptr); 74975584Sru if (ti->sortify_non_empty(token_start, ptr)) 75075584Sru count++; 75175584Sru } 75275584Sru ptr = start; 75375584Sru int skip = count - len; 75475584Sru if (skip > 0) { 75575584Sru for (;;) { 75675584Sru const char *token_start = ptr; 75775584Sru if (!get_token(&ptr, end)) 75875584Sru assert(0); 75975584Sru const token_info *ti = lookup_token(token_start, ptr); 76075584Sru if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) { 76175584Sru ptr = token_start; 76275584Sru break; 76375584Sru } 76475584Sru } 76575584Sru } 76675584Sru first_part(len, ptr, end, result); 76775584Sru} 76875584Sru 76975584Sruvoid truncate_expr::evaluate(int tentative, const reference &ref, 77075584Sru string &result, substring_position &) 77175584Sru{ 77275584Sru if (expr) { 77375584Sru string temp; 77475584Sru substring_position temp_pos; 77575584Sru expr->evaluate(tentative, ref, temp, temp_pos); 77675584Sru const char *start = temp.contents(); 77775584Sru const char *end = start + temp.length(); 77875584Sru if (n > 0) 77975584Sru first_part(n, start, end, result); 78075584Sru else if (n < 0) 78175584Sru last_part(-n, start, end, result); 78275584Sru } 78375584Sru} 78475584Sru 78575584Sruvoid alternative_expr::evaluate(int tentative, const reference &ref, 78675584Sru string &result, substring_position &pos) 78775584Sru{ 78875584Sru int start_length = result.length(); 78975584Sru if (expr1) 79075584Sru expr1->evaluate(tentative, ref, result, pos); 79175584Sru if (result.length() == start_length && expr2) 79275584Sru expr2->evaluate(tentative, ref, result, pos); 79375584Sru} 79475584Sru 79575584Sruvoid list_expr::evaluate(int tentative, const reference &ref, 79675584Sru string &result, substring_position &pos) 79775584Sru{ 79875584Sru if (expr1) 79975584Sru expr1->evaluate(tentative, ref, result, pos); 80075584Sru if (expr2) 80175584Sru expr2->evaluate(tentative, ref, result, pos); 80275584Sru} 80375584Sru 80475584Sruvoid substitute_expr::evaluate(int tentative, const reference &ref, 80575584Sru string &result, substring_position &pos) 80675584Sru{ 80775584Sru int start_length = result.length(); 80875584Sru if (expr1) 80975584Sru expr1->evaluate(tentative, ref, result, pos); 81075584Sru if (result.length() > start_length && result[result.length() - 1] == '-') { 81175584Sru // ought to see if pos covers the - 81275584Sru result.set_length(result.length() - 1); 81375584Sru if (expr2) 81475584Sru expr2->evaluate(tentative, ref, result, pos); 81575584Sru } 81675584Sru} 81775584Sru 81875584Sruvoid conditional_expr::evaluate(int tentative, const reference &ref, 81975584Sru string &result, substring_position &pos) 82075584Sru{ 82175584Sru string temp; 82275584Sru substring_position temp_pos; 82375584Sru if (expr1) 82475584Sru expr1->evaluate(tentative, ref, temp, temp_pos); 82575584Sru if (temp.length() > 0) { 82675584Sru if (expr2) 82775584Sru expr2->evaluate(tentative, ref, result, pos); 82875584Sru } 82975584Sru else { 83075584Sru if (expr3) 83175584Sru expr3->evaluate(tentative, ref, result, pos); 83275584Sru } 83375584Sru} 83475584Sru 83575584Sruvoid reference::pre_compute_label() 83675584Sru{ 83775584Sru if (parsed_label != 0 83875584Sru && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) { 83975584Sru label.clear(); 84075584Sru substring_position temp_pos; 84175584Sru parsed_label->evaluate(1, *this, label, temp_pos); 84275584Sru label_ptr = lookup_label(label); 84375584Sru } 84475584Sru} 84575584Sru 84675584Sruvoid reference::compute_label() 84775584Sru{ 84875584Sru label.clear(); 84975584Sru if (parsed_label) 85075584Sru parsed_label->evaluate(0, *this, label, separator_pos); 85175584Sru if (short_label_flag && parsed_short_label) 85275584Sru parsed_short_label->evaluate(0, *this, short_label, short_separator_pos); 85375584Sru if (date_as_label) { 85475584Sru string new_date; 85575584Sru if (parsed_date_label) { 85675584Sru substring_position temp_pos; 85775584Sru parsed_date_label->evaluate(0, *this, new_date, temp_pos); 85875584Sru } 85975584Sru set_date(new_date); 86075584Sru } 86175584Sru if (label_ptr) 86275584Sru label_ptr->count += 1; 86375584Sru} 86475584Sru 86575584Sruvoid reference::immediate_compute_label() 86675584Sru{ 86775584Sru if (label_ptr) 86875584Sru label_ptr->total = 2; // force use of disambiguator 86975584Sru compute_label(); 87075584Sru} 87175584Sru 87275584Sruint reference::merge_labels(reference **v, int n, label_type type, 87375584Sru string &result) 87475584Sru{ 87575584Sru if (abbreviate_label_ranges) 87675584Sru return merge_labels_by_number(v, n, type, result); 87775584Sru else 87875584Sru return merge_labels_by_parts(v, n, type, result); 87975584Sru} 88075584Sru 88175584Sruint reference::merge_labels_by_number(reference **v, int n, label_type type, 88275584Sru string &result) 88375584Sru{ 88475584Sru if (n <= 1) 88575584Sru return 0; 88675584Sru int num = get_number(); 88775584Sru // Only merge three or more labels. 88875584Sru if (v[0]->get_number() != num + 1 88975584Sru || v[1]->get_number() != num + 2) 89075584Sru return 0; 89175584Sru int i; 89275584Sru for (i = 2; i < n; i++) 89375584Sru if (v[i]->get_number() != num + i + 1) 89475584Sru break; 89575584Sru result = get_label(type); 89675584Sru result += label_range_indicator; 89775584Sru result += v[i - 1]->get_label(type); 89875584Sru return i; 89975584Sru} 90075584Sru 90175584Sruconst substring_position &reference::get_separator_pos(label_type type) const 90275584Sru{ 90375584Sru if (type == SHORT_LABEL && short_label_flag) 90475584Sru return short_separator_pos; 90575584Sru else 90675584Sru return separator_pos; 90775584Sru} 90875584Sru 90975584Sruconst string &reference::get_label(label_type type) const 91075584Sru{ 91175584Sru if (type == SHORT_LABEL && short_label_flag) 91275584Sru return short_label; 91375584Sru else 91475584Sru return label; 91575584Sru} 91675584Sru 91775584Sruint reference::merge_labels_by_parts(reference **v, int n, label_type type, 91875584Sru string &result) 91975584Sru{ 92075584Sru if (n <= 0) 92175584Sru return 0; 92275584Sru const string &lb = get_label(type); 92375584Sru const substring_position &sp = get_separator_pos(type); 92475584Sru if (sp.start < 0 92575584Sru || sp.start != v[0]->get_separator_pos(type).start 92675584Sru || memcmp(lb.contents(), v[0]->get_label(type).contents(), 92775584Sru sp.start) != 0) 92875584Sru return 0; 92975584Sru result = lb; 93075584Sru int i = 0; 93175584Sru do { 93275584Sru result += separate_label_second_parts; 93375584Sru const substring_position &s = v[i]->get_separator_pos(type); 93475584Sru int sep_end_pos = s.start + s.length; 93575584Sru result.append(v[i]->get_label(type).contents() + sep_end_pos, 93675584Sru v[i]->get_label(type).length() - sep_end_pos); 93775584Sru } while (++i < n 93875584Sru && sp.start == v[i]->get_separator_pos(type).start 93975584Sru && memcmp(lb.contents(), v[i]->get_label(type).contents(), 94075584Sru sp.start) == 0); 94175584Sru return i; 94275584Sru} 94375584Sru 94475584Srustring label_pool; 94575584Sru 94675584Srulabel_info::label_info(const string &s) 94775584Sru: start(label_pool.length()), length(s.length()), count(0), total(1) 94875584Sru{ 94975584Sru label_pool += s; 95075584Sru} 95175584Sru 95275584Srustatic label_info **label_table = 0; 95375584Srustatic int label_table_size = 0; 95475584Srustatic int label_table_used = 0; 95575584Sru 95675584Srulabel_info *lookup_label(const string &label) 95775584Sru{ 95875584Sru if (label_table == 0) { 95975584Sru label_table = new label_info *[17]; 96075584Sru label_table_size = 17; 96175584Sru for (int i = 0; i < 17; i++) 96275584Sru label_table[i] = 0; 96375584Sru } 96475584Sru unsigned h = hash_string(label.contents(), label.length()) % label_table_size; 96575584Sru label_info **ptr; 96675584Sru for (ptr = label_table + h; 96775584Sru *ptr != 0; 96875584Sru (ptr == label_table) 96975584Sru ? (ptr = label_table + label_table_size - 1) 97075584Sru : ptr--) 97175584Sru if ((*ptr)->length == label.length() 97275584Sru && memcmp(label_pool.contents() + (*ptr)->start, label.contents(), 97375584Sru label.length()) == 0) { 97475584Sru (*ptr)->total += 1; 97575584Sru return *ptr; 97675584Sru } 97775584Sru label_info *result = *ptr = new label_info(label); 97875584Sru if (++label_table_used * 2 > label_table_size) { 97975584Sru // Rehash the table. 98075584Sru label_info **old_table = label_table; 98175584Sru int old_size = label_table_size; 98275584Sru label_table_size = next_size(label_table_size); 98375584Sru label_table = new label_info *[label_table_size]; 98475584Sru int i; 98575584Sru for (i = 0; i < label_table_size; i++) 98675584Sru label_table[i] = 0; 98775584Sru for (i = 0; i < old_size; i++) 98875584Sru if (old_table[i]) { 989151497Sru h = hash_string(label_pool.contents() + old_table[i]->start, 990151497Sru old_table[i]->length); 99175584Sru label_info **p; 99275584Sru for (p = label_table + (h % label_table_size); 99375584Sru *p != 0; 99475584Sru (p == label_table) 99575584Sru ? (p = label_table + label_table_size - 1) 99675584Sru : --p) 99775584Sru ; 99875584Sru *p = old_table[i]; 99975584Sru } 100075584Sru a_delete old_table; 100175584Sru } 100275584Sru return result; 100375584Sru} 100475584Sru 100575584Sruvoid clear_labels() 100675584Sru{ 100775584Sru for (int i = 0; i < label_table_size; i++) { 100875584Sru delete label_table[i]; 100975584Sru label_table[i] = 0; 101075584Sru } 101175584Sru label_table_used = 0; 101275584Sru label_pool.clear(); 101375584Sru} 101475584Sru 101575584Srustatic void consider_authors(reference **start, reference **end, int i); 101675584Sru 101775584Sruvoid compute_labels(reference **v, int n) 101875584Sru{ 101975584Sru if (parsed_label 102075584Sru && (parsed_label->analyze() & expression::CONTAINS_AT) 102175584Sru && sort_fields.length() >= 2 102275584Sru && sort_fields[0] == 'A' 102375584Sru && sort_fields[1] == '+') 102475584Sru consider_authors(v, v + n, 0); 102575584Sru for (int i = 0; i < n; i++) 102675584Sru v[i]->compute_label(); 102775584Sru} 102875584Sru 102975584Sru 103075584Sru/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i 103175584Sruwhere 0 <= i <= N if there exists a reference with a list of authors 103275584Sru<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i 103375584Sruand Aj = Bj for 0 <= j < i. In this case if we can't say ``A0, 103475584SruA1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and 103575584Sru<B0,B1,...,BM>. If a reference needs author i we only have to call 103675584Sruneed_author(j) for some j >= i such that the reference also needs 103775584Sruauthor j. */ 103875584Sru 103975584Sru/* This function handles 2 tasks: 104075584Srudetermine which authors are needed (cannot be elided with et al.); 104175584Srudetermine which authors can have only last names in the labels. 104275584Sru 104375584SruReferences >= start and < end have the same first i author names. 104475584SruAlso they're sorted by A+. */ 104575584Sru 104675584Srustatic void consider_authors(reference **start, reference **end, int i) 104775584Sru{ 104875584Sru if (start >= end) 104975584Sru return; 105075584Sru reference **p = start; 105175584Sru if (i >= (*p)->get_nauthors()) { 105275584Sru for (++p; p < end && i >= (*p)->get_nauthors(); p++) 105375584Sru ; 105475584Sru if (p < end && i > 0) { 105575584Sru // If we have an author list <A B C> and an author list <A B C D>, 105675584Sru // then both lists need C. 105775584Sru for (reference **q = start; q < end; q++) 105875584Sru (*q)->need_author(i - 1); 105975584Sru } 106075584Sru start = p; 106175584Sru } 106275584Sru while (p < end) { 106375584Sru reference **last_name_start = p; 106475584Sru reference **name_start = p; 106575584Sru for (++p; 106675584Sru p < end && i < (*p)->get_nauthors() 106775584Sru && same_author_last_name(**last_name_start, **p, i); 106875584Sru p++) { 106975584Sru if (!same_author_name(**name_start, **p, i)) { 107075584Sru consider_authors(name_start, p, i + 1); 107175584Sru name_start = p; 107275584Sru } 107375584Sru } 107475584Sru consider_authors(name_start, p, i + 1); 107575584Sru if (last_name_start == name_start) { 107675584Sru for (reference **q = last_name_start; q < p; q++) 107775584Sru (*q)->set_last_name_unambiguous(i); 107875584Sru } 107975584Sru // If we have an author list <A B C D> and <A B C E>, then the lists 108075584Sru // need author D and E respectively. 108175584Sru if (name_start > start || p < end) { 108275584Sru for (reference **q = last_name_start; q < p; q++) 108375584Sru (*q)->need_author(i); 108475584Sru } 108575584Sru } 108675584Sru} 108775584Sru 108875584Sruint same_author_last_name(const reference &r1, const reference &r2, int n) 108975584Sru{ 109075584Sru const char *ae1; 109175584Sru const char *as1 = r1.get_sort_field(0, n, 0, &ae1); 109275584Sru const char *ae2; 109375584Sru const char *as2 = r2.get_sort_field(0, n, 0, &ae2); 1094151497Sru if (!as1 && !as2) return 1; // they are the same 1095151497Sru if (!as1 || !as2) return 0; 109675584Sru return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; 109775584Sru} 109875584Sru 109975584Sruint same_author_name(const reference &r1, const reference &r2, int n) 110075584Sru{ 110175584Sru const char *ae1; 110275584Sru const char *as1 = r1.get_sort_field(0, n, -1, &ae1); 110375584Sru const char *ae2; 110475584Sru const char *as2 = r2.get_sort_field(0, n, -1, &ae2); 1105151497Sru if (!as1 && !as2) return 1; // they are the same 1106151497Sru if (!as1 || !as2) return 0; 110775584Sru return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; 110875584Sru} 110975584Sru 111075584Sru 111175584Sruvoid int_set::set(int i) 111275584Sru{ 111375584Sru assert(i >= 0); 111475584Sru int bytei = i >> 3; 111575584Sru if (bytei >= v.length()) { 111675584Sru int old_length = v.length(); 111775584Sru v.set_length(bytei + 1); 111875584Sru for (int j = old_length; j <= bytei; j++) 111975584Sru v[j] = 0; 112075584Sru } 112175584Sru v[bytei] |= 1 << (i & 7); 112275584Sru} 112375584Sru 112475584Sruint int_set::get(int i) const 112575584Sru{ 112675584Sru assert(i >= 0); 112775584Sru int bytei = i >> 3; 112875584Sru return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0; 112975584Sru} 113075584Sru 113175584Sruvoid reference::set_last_name_unambiguous(int i) 113275584Sru{ 113375584Sru last_name_unambiguous.set(i); 113475584Sru} 113575584Sru 113675584Sruvoid reference::need_author(int n) 113775584Sru{ 113875584Sru if (n > last_needed_author) 113975584Sru last_needed_author = n; 114075584Sru} 114175584Sru 114275584Sruconst char *reference::get_authors(const char **end) const 114375584Sru{ 114475584Sru if (!computed_authors) { 114575584Sru ((reference *)this)->computed_authors = 1; 114675584Sru string &result = ((reference *)this)->authors; 114775584Sru int na = get_nauthors(); 114875584Sru result.clear(); 114975584Sru for (int i = 0; i < na; i++) { 115075584Sru if (last_name_unambiguous.get(i)) { 115175584Sru const char *e, *start = get_author_last_name(i, &e); 115275584Sru assert(start != 0); 115375584Sru result.append(start, e - start); 115475584Sru } 115575584Sru else { 115675584Sru const char *e, *start = get_author(i, &e); 115775584Sru assert(start != 0); 115875584Sru result.append(start, e - start); 115975584Sru } 116075584Sru if (i == last_needed_author 116175584Sru && et_al.length() > 0 116275584Sru && et_al_min_elide > 0 116375584Sru && last_needed_author + et_al_min_elide < na 116475584Sru && na >= et_al_min_total) { 116575584Sru result += et_al; 116675584Sru break; 116775584Sru } 116875584Sru if (i < na - 1) { 116975584Sru if (na == 2) 117075584Sru result += join_authors_exactly_two; 117175584Sru else if (i < na - 2) 117275584Sru result += join_authors_default; 117375584Sru else 117475584Sru result += join_authors_last_two; 117575584Sru } 117675584Sru } 117775584Sru } 117875584Sru const char *start = authors.contents(); 117975584Sru *end = start + authors.length(); 118075584Sru return start; 118175584Sru} 118275584Sru 118375584Sruint reference::get_nauthors() const 118475584Sru{ 118575584Sru if (nauthors < 0) { 118675584Sru const char *dummy; 118775584Sru int na; 118875584Sru for (na = 0; get_author(na, &dummy) != 0; na++) 118975584Sru ; 119075584Sru ((reference *)this)->nauthors = na; 119175584Sru } 119275584Sru return nauthors; 119375584Sru} 1194