175584Sru/* -*- C++ -*-
2151497Sru   Copyright (C) 1989, 1990, 1991, 1992, 2000, 2004
3151497Sru   Free Software Foundation, Inc.
475584Sru     Written by James Clark (jjc@jclark.com)
575584Sru
675584SruThis file is part of groff.
775584Sru
875584Srugroff is free software; you can redistribute it and/or modify it under
975584Sruthe terms of the GNU General Public License as published by the Free
1075584SruSoftware Foundation; either version 2, or (at your option) any later
1175584Sruversion.
1275584Sru
1375584Srugroff is distributed in the hope that it will be useful, but WITHOUT ANY
1475584SruWARRANTY; without even the implied warranty of MERCHANTABILITY or
1575584SruFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1675584Srufor more details.
1775584Sru
1875584SruYou should have received a copy of the GNU General Public License along
1975584Sruwith groff; see the file COPYING.  If not, write to the Free Software
20151497SruFoundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
2175584Sru
2275584Sru%{
2375584Sru
2475584Sru#include "refer.h"
2575584Sru#include "refid.h"
2675584Sru#include "ref.h"
2775584Sru#include "token.h"
2875584Sru
2975584Sruint yylex();
3075584Sruvoid yyerror(const char *);
3175584Sruint yyparse();
3275584Sru
3375584Srustatic const char *format_serial(char c, int n);
3475584Sru
3575584Srustruct label_info {
3675584Sru  int start;
3775584Sru  int length;
3875584Sru  int count;
3975584Sru  int total;
4075584Sru  label_info(const string &);
4175584Sru};
4275584Sru
4375584Srulabel_info *lookup_label(const string &label);
4475584Sru
4575584Srustruct expression {
4675584Sru  enum {
4775584Sru    // Does the tentative label depend on the reference?
4875584Sru    CONTAINS_VARIABLE = 01,
4975584Sru    CONTAINS_STAR = 02,
5075584Sru    CONTAINS_FORMAT = 04,
5175584Sru    CONTAINS_AT = 010
5275584Sru  };
5375584Sru  virtual ~expression() { }
5475584Sru  virtual void evaluate(int, const reference &, string &,
5575584Sru			substring_position &) = 0;
5675584Sru  virtual unsigned analyze() { return 0; }
5775584Sru};
5875584Sru
5975584Sruclass at_expr : public expression {
6075584Srupublic:
6175584Sru  at_expr() { }
6275584Sru  void evaluate(int, const reference &, string &, substring_position &);
6375584Sru  unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
6475584Sru};
6575584Sru
6675584Sruclass format_expr : public expression {
6775584Sru  char type;
6875584Sru  int width;
6975584Sru  int first_number;
7075584Srupublic:
7175584Sru  format_expr(char c, int w = 0, int f = 1)
7275584Sru    : type(c), width(w), first_number(f) { }
7375584Sru  void evaluate(int, const reference &, string &, substring_position &);
7475584Sru  unsigned analyze() { return CONTAINS_FORMAT; }
7575584Sru};
7675584Sru
7775584Sruclass field_expr : public expression {
7875584Sru  int number;
7975584Sru  char name;
8075584Srupublic:
8175584Sru  field_expr(char nm, int num) : number(num), name(nm) { }
8275584Sru  void evaluate(int, const reference &, string &, substring_position &);
8375584Sru  unsigned analyze() { return CONTAINS_VARIABLE; }
8475584Sru};
8575584Sru
8675584Sruclass literal_expr : public expression {
8775584Sru  string s;
8875584Srupublic:
8975584Sru  literal_expr(const char *ptr, int len) : s(ptr, len) { }
9075584Sru  void evaluate(int, const reference &, string &, substring_position &);
9175584Sru};
9275584Sru
9375584Sruclass unary_expr : public expression {
9475584Sruprotected:
9575584Sru  expression *expr;
9675584Srupublic:
9775584Sru  unary_expr(expression *e) : expr(e) { }
9875584Sru  ~unary_expr() { delete expr; }
9975584Sru  void evaluate(int, const reference &, string &, substring_position &) = 0;
10075584Sru  unsigned analyze() { return expr ? expr->analyze() : 0; }
10175584Sru};
10275584Sru
10375584Sru// This caches the analysis of an expression.
10475584Sru
10575584Sruclass analyzed_expr : public unary_expr {
10675584Sru  unsigned flags;
10775584Srupublic:
10875584Sru  analyzed_expr(expression *);
10975584Sru  void evaluate(int, const reference &, string &, substring_position &);
11075584Sru  unsigned analyze() { return flags; }
11175584Sru};
11275584Sru
11375584Sruclass star_expr : public unary_expr {
11475584Srupublic:
11575584Sru  star_expr(expression *e) : unary_expr(e) { }
11675584Sru  void evaluate(int, const reference &, string &, substring_position &);
11775584Sru  unsigned analyze() {
11875584Sru    return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
11975584Sru	    | CONTAINS_STAR);
12075584Sru  }
12175584Sru};
12275584Sru
12375584Srutypedef void map_func(const char *, const char *, string &);
12475584Sru
12575584Sruclass map_expr : public unary_expr {
12675584Sru  map_func *func;
12775584Srupublic:
12875584Sru  map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
12975584Sru  void evaluate(int, const reference &, string &, substring_position &);
13075584Sru};
13175584Sru
13275584Srutypedef const char *extractor_func(const char *, const char *, const char **);
13375584Sru
13475584Sruclass extractor_expr : public unary_expr {
13575584Sru  int part;
13675584Sru  extractor_func *func;
13775584Srupublic:
13875584Sru  enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
13975584Sru  extractor_expr(expression *e, extractor_func *f, int pt)
14075584Sru    : unary_expr(e), part(pt), func(f) { }
14175584Sru  void evaluate(int, const reference &, string &, substring_position &);
14275584Sru};
14375584Sru
14475584Sruclass truncate_expr : public unary_expr {
14575584Sru  int n;
14675584Srupublic:
14775584Sru  truncate_expr(expression *e, int i) : unary_expr(e), n(i) { }
14875584Sru  void evaluate(int, const reference &, string &, substring_position &);
14975584Sru};
15075584Sru
15175584Sruclass separator_expr : public unary_expr {
15275584Srupublic:
15375584Sru  separator_expr(expression *e) : unary_expr(e) { }
15475584Sru  void evaluate(int, const reference &, string &, substring_position &);
15575584Sru};
15675584Sru
15775584Sruclass binary_expr : public expression {
15875584Sruprotected:
15975584Sru  expression *expr1;
16075584Sru  expression *expr2;
16175584Srupublic:
16275584Sru  binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
16375584Sru  ~binary_expr() { delete expr1; delete expr2; }
16475584Sru  void evaluate(int, const reference &, string &, substring_position &) = 0;
16575584Sru  unsigned analyze() {
16675584Sru    return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
16775584Sru  }
16875584Sru};
16975584Sru
17075584Sruclass alternative_expr : public binary_expr {
17175584Srupublic:
17275584Sru  alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
17375584Sru  void evaluate(int, const reference &, string &, substring_position &);
17475584Sru};
17575584Sru
17675584Sruclass list_expr : public binary_expr {
17775584Srupublic:
17875584Sru  list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
17975584Sru  void evaluate(int, const reference &, string &, substring_position &);
18075584Sru};
18175584Sru
18275584Sruclass substitute_expr : public binary_expr {
18375584Srupublic:
18475584Sru  substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
18575584Sru  void evaluate(int, const reference &, string &, substring_position &);
18675584Sru};
18775584Sru
18875584Sruclass ternary_expr : public expression {
18975584Sruprotected:
19075584Sru  expression *expr1;
19175584Sru  expression *expr2;
19275584Sru  expression *expr3;
19375584Srupublic:
19475584Sru  ternary_expr(expression *e1, expression *e2, expression *e3)
19575584Sru    : expr1(e1), expr2(e2), expr3(e3) { }
19675584Sru  ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
19775584Sru  void evaluate(int, const reference &, string &, substring_position &) = 0;
19875584Sru  unsigned analyze() {
19975584Sru    return ((expr1 ? expr1->analyze() : 0)
20075584Sru	    | (expr2 ? expr2->analyze() : 0)
20175584Sru	    | (expr3 ? expr3->analyze() : 0));
20275584Sru  }
20375584Sru};
20475584Sru
20575584Sruclass conditional_expr : public ternary_expr {
20675584Srupublic:
20775584Sru  conditional_expr(expression *e1, expression *e2, expression *e3)
20875584Sru    : ternary_expr(e1, e2, e3) { }
20975584Sru  void evaluate(int, const reference &, string &, substring_position &);
21075584Sru};
21175584Sru
21275584Srustatic expression *parsed_label = 0;
21375584Srustatic expression *parsed_date_label = 0;
21475584Srustatic expression *parsed_short_label = 0;
21575584Sru
21675584Srustatic expression *parse_result;
21775584Sru
21875584Srustring literals;
21975584Sru
22075584Sru%}
22175584Sru
22275584Sru%union {
22375584Sru  int num;
22475584Sru  expression *expr;
22575584Sru  struct { int ndigits; int val; } dig;
22675584Sru  struct { int start; int len; } str;
22775584Sru}
22875584Sru
22975584Sru/* uppercase or lowercase letter */
23075584Sru%token <num> TOKEN_LETTER
23175584Sru/* literal characters */
23275584Sru%token <str> TOKEN_LITERAL
23375584Sru/* digit */
23475584Sru%token <num> TOKEN_DIGIT
23575584Sru
23675584Sru%type <expr> conditional
23775584Sru%type <expr> alternative
23875584Sru%type <expr> list
23975584Sru%type <expr> string
24075584Sru%type <expr> substitute
24175584Sru%type <expr> optional_conditional
24275584Sru%type <num> number
24375584Sru%type <dig> digits
24475584Sru%type <num> optional_number
24575584Sru%type <num> flag
24675584Sru
24775584Sru%%
24875584Sru
24975584Sruexpr:
25075584Sru	optional_conditional
25175584Sru		{ parse_result = ($1 ? new analyzed_expr($1) : 0); }
25275584Sru	;
25375584Sru
25475584Sruconditional:
25575584Sru	alternative
25675584Sru		{ $$ = $1; }
25775584Sru	| alternative '?' optional_conditional ':' conditional
25875584Sru		{ $$ = new conditional_expr($1, $3, $5); }
25975584Sru	;
26075584Sru
26175584Sruoptional_conditional:
26275584Sru	/* empty */
26375584Sru		{ $$ = 0; }
26475584Sru	| conditional
26575584Sru		{ $$ = $1; }
26675584Sru	;
26775584Sru
26875584Srualternative:
26975584Sru	list
27075584Sru		{ $$ = $1; }
27175584Sru	| alternative '|' list
27275584Sru		{ $$ = new alternative_expr($1, $3); }
27375584Sru	| alternative '&' list
27475584Sru		{ $$ = new conditional_expr($1, $3, 0); }
27575584Sru	;
27675584Sru
27775584Srulist:
27875584Sru	substitute
27975584Sru		{ $$ = $1; }
28075584Sru	| list substitute
28175584Sru		{ $$ = new list_expr($1, $2); }
28275584Sru	;
28375584Sru
28475584Srusubstitute:
28575584Sru	string
28675584Sru		{ $$ = $1; }
28775584Sru	| substitute '~' string
28875584Sru		{ $$ = new substitute_expr($1, $3); }
28975584Sru	;
29075584Sru
29175584Srustring:
29275584Sru	'@'
29375584Sru		{ $$ = new at_expr; }
29475584Sru	| TOKEN_LITERAL
29575584Sru		{
29675584Sru		  $$ = new literal_expr(literals.contents() + $1.start,
29775584Sru					$1.len);
29875584Sru		}
29975584Sru	| TOKEN_LETTER
30075584Sru		{ $$ = new field_expr($1, 0); }
30175584Sru	| TOKEN_LETTER number
30275584Sru		{ $$ = new field_expr($1, $2 - 1); }
30375584Sru	| '%' TOKEN_LETTER
30475584Sru		{
30575584Sru		  switch ($2) {
30675584Sru		  case 'I':
30775584Sru		  case 'i':
30875584Sru		  case 'A':
30975584Sru		  case 'a':
31075584Sru		    $$ = new format_expr($2);
31175584Sru		    break;
31275584Sru		  default:
31375584Sru		    command_error("unrecognized format `%1'", char($2));
31475584Sru		    $$ = new format_expr('a');
31575584Sru		    break;
31675584Sru		  }
31775584Sru		}
31875584Sru
31975584Sru	| '%' digits
32075584Sru		{
32175584Sru		  $$ = new format_expr('0', $2.ndigits, $2.val);
32275584Sru		}
32375584Sru	| string '.' flag TOKEN_LETTER optional_number
32475584Sru		{
32575584Sru		  switch ($4) {
32675584Sru		  case 'l':
32775584Sru		    $$ = new map_expr($1, lowercase);
32875584Sru		    break;
32975584Sru		  case 'u':
33075584Sru		    $$ = new map_expr($1, uppercase);
33175584Sru		    break;
33275584Sru		  case 'c':
33375584Sru		    $$ = new map_expr($1, capitalize);
33475584Sru		    break;
33575584Sru		  case 'r':
33675584Sru		    $$ = new map_expr($1, reverse_name);
33775584Sru		    break;
33875584Sru		  case 'a':
33975584Sru		    $$ = new map_expr($1, abbreviate_name);
34075584Sru		    break;
34175584Sru		  case 'y':
34275584Sru		    $$ = new extractor_expr($1, find_year, $3);
34375584Sru		    break;
34475584Sru		  case 'n':
34575584Sru		    $$ = new extractor_expr($1, find_last_name, $3);
34675584Sru		    break;
34775584Sru		  default:
34875584Sru		    $$ = $1;
34975584Sru		    command_error("unknown function `%1'", char($4));
35075584Sru		    break;
35175584Sru		  }
35275584Sru		}
35375584Sru
35475584Sru	| string '+' number
35575584Sru		{ $$ = new truncate_expr($1, $3); }
35675584Sru	| string '-' number
35775584Sru		{ $$ = new truncate_expr($1, -$3); }
35875584Sru	| string '*'
35975584Sru		{ $$ = new star_expr($1); }
36075584Sru	| '(' optional_conditional ')'
36175584Sru		{ $$ = $2; }
36275584Sru	| '<' optional_conditional '>'
36375584Sru		{ $$ = new separator_expr($2); }
36475584Sru	;
36575584Sru
36675584Sruoptional_number:
36775584Sru	/* empty */
36875584Sru		{ $$ = -1; }
36975584Sru	| number
37075584Sru		{ $$ = $1; }
37175584Sru	;
37275584Sru
37375584Srunumber:
37475584Sru	TOKEN_DIGIT
37575584Sru		{ $$ = $1; }
37675584Sru	| number TOKEN_DIGIT
37775584Sru		{ $$ = $1*10 + $2; }
37875584Sru	;
37975584Sru
38075584Srudigits:
38175584Sru	TOKEN_DIGIT
38275584Sru		{ $$.ndigits = 1; $$.val = $1; }
38375584Sru	| digits TOKEN_DIGIT
38475584Sru		{ $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; }
38575584Sru	;
38675584Sru
38775584Sru
38875584Sruflag:
38975584Sru	/* empty */
39075584Sru		{ $$ = 0; }
39175584Sru	| '+'
39275584Sru		{ $$ = 1; }
39375584Sru	| '-'
39475584Sru		{ $$ = -1; }
39575584Sru	;
39675584Sru
39775584Sru%%
39875584Sru
39975584Sru/* bison defines const to be empty unless __STDC__ is defined, which it
40075584Sruisn't under cfront */
40175584Sru
40275584Sru#ifdef const
40375584Sru#undef const
40475584Sru#endif
40575584Sru
40675584Sruconst char *spec_ptr;
40775584Sruconst char *spec_end;
40875584Sruconst char *spec_cur;
40975584Sru
410151497Srustatic char uppercase_array[] = {
411151497Sru  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
412151497Sru  'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
413151497Sru  'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
414151497Sru  'Y', 'Z',
415151497Sru};
416151497Sru
417151497Srustatic char lowercase_array[] = {
418151497Sru  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
419151497Sru  'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
420151497Sru  'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
421151497Sru  'y', 'z',
422151497Sru};
423151497Sru
42475584Sruint yylex()
42575584Sru{
42675584Sru  while (spec_ptr < spec_end && csspace(*spec_ptr))
42775584Sru    spec_ptr++;
42875584Sru  spec_cur = spec_ptr;
42975584Sru  if (spec_ptr >= spec_end)
43075584Sru    return 0;
43175584Sru  unsigned char c = *spec_ptr++;
43275584Sru  if (csalpha(c)) {
43375584Sru    yylval.num = c;
43475584Sru    return TOKEN_LETTER;
43575584Sru  }
43675584Sru  if (csdigit(c)) {
43775584Sru    yylval.num = c - '0';
43875584Sru    return TOKEN_DIGIT;
43975584Sru  }
44075584Sru  if (c == '\'') {
44175584Sru    yylval.str.start = literals.length();
44275584Sru    for (; spec_ptr < spec_end; spec_ptr++) {
44375584Sru      if (*spec_ptr == '\'') {
44475584Sru	if (++spec_ptr < spec_end && *spec_ptr == '\'')
44575584Sru	  literals += '\'';
44675584Sru	else {
44775584Sru	  yylval.str.len = literals.length() - yylval.str.start;
44875584Sru	  return TOKEN_LITERAL;
44975584Sru	}
45075584Sru      }
45175584Sru      else
45275584Sru	literals += *spec_ptr;
45375584Sru    }
45475584Sru    yylval.str.len = literals.length() - yylval.str.start;
45575584Sru    return TOKEN_LITERAL;
45675584Sru  }
45775584Sru  return c;
45875584Sru}
45975584Sru
46075584Sruint set_label_spec(const char *label_spec)
46175584Sru{
46275584Sru  spec_cur = spec_ptr = label_spec;
46375584Sru  spec_end = strchr(label_spec, '\0');
46475584Sru  literals.clear();
46575584Sru  if (yyparse())
46675584Sru    return 0;
46775584Sru  delete parsed_label;
46875584Sru  parsed_label = parse_result;
46975584Sru  return 1;
47075584Sru}
47175584Sru
47275584Sruint set_date_label_spec(const char *label_spec)
47375584Sru{
47475584Sru  spec_cur = spec_ptr = label_spec;
47575584Sru  spec_end = strchr(label_spec, '\0');
47675584Sru  literals.clear();
47775584Sru  if (yyparse())
47875584Sru    return 0;
47975584Sru  delete parsed_date_label;
48075584Sru  parsed_date_label = parse_result;
48175584Sru  return 1;
48275584Sru}
48375584Sru
48475584Sruint set_short_label_spec(const char *label_spec)
48575584Sru{
48675584Sru  spec_cur = spec_ptr = label_spec;
48775584Sru  spec_end = strchr(label_spec, '\0');
48875584Sru  literals.clear();
48975584Sru  if (yyparse())
49075584Sru    return 0;
49175584Sru  delete parsed_short_label;
49275584Sru  parsed_short_label = parse_result;
49375584Sru  return 1;
49475584Sru}
49575584Sru
49675584Sruvoid yyerror(const char *message)
49775584Sru{
49875584Sru  if (spec_cur < spec_end)
49975584Sru    command_error("label specification %1 before `%2'", message, spec_cur);
50075584Sru  else
50175584Sru    command_error("label specification %1 at end of string",
50275584Sru		  message, spec_cur);
50375584Sru}
50475584Sru
50575584Sruvoid at_expr::evaluate(int tentative, const reference &ref,
50675584Sru		       string &result, substring_position &)
50775584Sru{
50875584Sru  if (tentative)
50975584Sru    ref.canonicalize_authors(result);
51075584Sru  else {
51175584Sru    const char *end, *start = ref.get_authors(&end);
51275584Sru    if (start)
51375584Sru      result.append(start, end - start);
51475584Sru  }
51575584Sru}
51675584Sru
51775584Sruvoid format_expr::evaluate(int tentative, const reference &ref,
51875584Sru			   string &result, substring_position &)
51975584Sru{
52075584Sru  if (tentative)
52175584Sru    return;
52275584Sru  const label_info *lp = ref.get_label_ptr();
52375584Sru  int num = lp == 0 ? ref.get_number() : lp->count;
52475584Sru  if (type != '0')
52575584Sru    result += format_serial(type, num + 1);
52675584Sru  else {
52775584Sru    const char *ptr = i_to_a(num + first_number);
52875584Sru    int pad = width - strlen(ptr);
52975584Sru    while (--pad >= 0)
53075584Sru      result += '0';
53175584Sru    result += ptr;
53275584Sru  }
53375584Sru}
53475584Sru
53575584Srustatic const char *format_serial(char c, int n)
53675584Sru{
53775584Sru  assert(n > 0);
53875584Sru  static char buf[128]; // more than enough.
53975584Sru  switch (c) {
54075584Sru  case 'i':
54175584Sru  case 'I':
54275584Sru    {
54375584Sru      char *p = buf;
54475584Sru      // troff uses z and w to represent 10000 and 5000 in Roman
54575584Sru      // numerals; I can find no historical basis for this usage
54675584Sru      const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
54775584Sru      if (n >= 40000)
54875584Sru	return i_to_a(n);
54975584Sru      while (n >= 10000) {
55075584Sru	*p++ = s[0];
55175584Sru	n -= 10000;
55275584Sru      }
55375584Sru      for (int i = 1000; i > 0; i /= 10, s += 2) {
55475584Sru	int m = n/i;
55575584Sru	n -= m*i;
55675584Sru	switch (m) {
55775584Sru	case 3:
55875584Sru	  *p++ = s[2];
55975584Sru	  /* falls through */
56075584Sru	case 2:
56175584Sru	  *p++ = s[2];
56275584Sru	  /* falls through */
56375584Sru	case 1:
56475584Sru	  *p++ = s[2];
56575584Sru	  break;
56675584Sru	case 4:
56775584Sru	  *p++ = s[2];
56875584Sru	  *p++ = s[1];
56975584Sru	  break;
57075584Sru	case 8:
57175584Sru	  *p++ = s[1];
57275584Sru	  *p++ = s[2];
57375584Sru	  *p++ = s[2];
57475584Sru	  *p++ = s[2];
57575584Sru	  break;
57675584Sru	case 7:
57775584Sru	  *p++ = s[1];
57875584Sru	  *p++ = s[2];
57975584Sru	  *p++ = s[2];
58075584Sru	  break;
58175584Sru	case 6:
58275584Sru	  *p++ = s[1];
58375584Sru	  *p++ = s[2];
58475584Sru	  break;
58575584Sru	case 5:
58675584Sru	  *p++ = s[1];
58775584Sru	  break;
58875584Sru	case 9:
58975584Sru	  *p++ = s[2];
59075584Sru	  *p++ = s[0];
59175584Sru	}
59275584Sru      }
59375584Sru      *p = 0;
59475584Sru      break;
59575584Sru    }
59675584Sru  case 'a':
59775584Sru  case 'A':
59875584Sru    {
59975584Sru      char *p = buf;
60075584Sru      // this is derived from troff/reg.c
60175584Sru      while (n > 0) {
60275584Sru	int d = n % 26;
60375584Sru	if (d == 0)
60475584Sru	  d = 26;
60575584Sru	n -= d;
60675584Sru	n /= 26;
607151497Sru	*p++ = c == 'a' ? lowercase_array[d - 1] :
608151497Sru			       uppercase_array[d - 1];
60975584Sru      }
61075584Sru      *p-- = 0;
61175584Sru      // Reverse it.
61275584Sru      char *q = buf;
61375584Sru      while (q < p) {
61475584Sru	char temp = *q;
61575584Sru	*q = *p;
61675584Sru	*p = temp;
61775584Sru	--p;
61875584Sru	++q;
61975584Sru      }
62075584Sru      break;
62175584Sru    }
62275584Sru  default:
62375584Sru    assert(0);
62475584Sru  }
62575584Sru  return buf;
62675584Sru}
62775584Sru
62875584Sruvoid field_expr::evaluate(int, const reference &ref,
62975584Sru			  string &result, substring_position &)
63075584Sru{
63175584Sru  const char *end;
63275584Sru  const char *start = ref.get_field(name, &end);
63375584Sru  if (start) {
63475584Sru    start = nth_field(number, start, &end);
63575584Sru    if (start)
63675584Sru      result.append(start, end - start);
63775584Sru  }
63875584Sru}
63975584Sru
64075584Sruvoid literal_expr::evaluate(int, const reference &,
64175584Sru			    string &result, substring_position &)
64275584Sru{
64375584Sru  result += s;
64475584Sru}
64575584Sru
64675584Sruanalyzed_expr::analyzed_expr(expression *e)
64775584Sru: unary_expr(e), flags(e ? e->analyze() : 0)
64875584Sru{
64975584Sru}
65075584Sru
65175584Sruvoid analyzed_expr::evaluate(int tentative, const reference &ref,
65275584Sru			     string &result, substring_position &pos)
65375584Sru{
65475584Sru  if (expr)
65575584Sru    expr->evaluate(tentative, ref, result, pos);
65675584Sru}
65775584Sru
65875584Sruvoid star_expr::evaluate(int tentative, const reference &ref,
65975584Sru			 string &result, substring_position &pos)
66075584Sru{
66175584Sru  const label_info *lp = ref.get_label_ptr();
66275584Sru  if (!tentative
66375584Sru      && (lp == 0 || lp->total > 1)
66475584Sru      && expr)
66575584Sru    expr->evaluate(tentative, ref, result, pos);
66675584Sru}
66775584Sru
66875584Sruvoid separator_expr::evaluate(int tentative, const reference &ref,
66975584Sru			      string &result, substring_position &pos)
67075584Sru{
67175584Sru  int start_length = result.length();
67275584Sru  int is_first = pos.start < 0;
67375584Sru  if (expr)
67475584Sru    expr->evaluate(tentative, ref, result, pos);
67575584Sru  if (is_first) {
67675584Sru    pos.start = start_length;
67775584Sru    pos.length = result.length() - start_length;
67875584Sru  }
67975584Sru}
68075584Sru
68175584Sruvoid map_expr::evaluate(int tentative, const reference &ref,
68275584Sru			string &result, substring_position &)
68375584Sru{
68475584Sru  if (expr) {
68575584Sru    string temp;
68675584Sru    substring_position temp_pos;
68775584Sru    expr->evaluate(tentative, ref, temp, temp_pos);
68875584Sru    (*func)(temp.contents(), temp.contents() + temp.length(), result);
68975584Sru  }
69075584Sru}
69175584Sru
69275584Sruvoid extractor_expr::evaluate(int tentative, const reference &ref,
69375584Sru			      string &result, substring_position &)
69475584Sru{
69575584Sru  if (expr) {
69675584Sru    string temp;
69775584Sru    substring_position temp_pos;
69875584Sru    expr->evaluate(tentative, ref, temp, temp_pos);
69975584Sru    const char *end, *start = (*func)(temp.contents(),
70075584Sru				      temp.contents() + temp.length(),
70175584Sru				      &end);
70275584Sru    switch (part) {
70375584Sru    case BEFORE:
70475584Sru      if (start)
70575584Sru	result.append(temp.contents(), start - temp.contents());
70675584Sru      else
70775584Sru	result += temp;
70875584Sru      break;
70975584Sru    case MATCH:
71075584Sru      if (start)
71175584Sru	result.append(start, end - start);
71275584Sru      break;
71375584Sru    case AFTER:
71475584Sru      if (start)
71575584Sru	result.append(end, temp.contents() + temp.length() - end);
71675584Sru      break;
71775584Sru    default:
71875584Sru      assert(0);
71975584Sru    }
72075584Sru  }
72175584Sru}
72275584Sru
72375584Srustatic void first_part(int len, const char *ptr, const char *end,
72475584Sru			  string &result)
72575584Sru{
72675584Sru  for (;;) {
72775584Sru    const char *token_start = ptr;
72875584Sru    if (!get_token(&ptr, end))
72975584Sru      break;
73075584Sru    const token_info *ti = lookup_token(token_start, ptr);
73175584Sru    int counts = ti->sortify_non_empty(token_start, ptr);
73275584Sru    if (counts && --len < 0)
73375584Sru      break;
73475584Sru    if (counts || ti->is_accent())
73575584Sru      result.append(token_start, ptr - token_start);
73675584Sru  }
73775584Sru}
73875584Sru
73975584Srustatic void last_part(int len, const char *ptr, const char *end,
74075584Sru		      string &result)
74175584Sru{
74275584Sru  const char *start = ptr;
74375584Sru  int count = 0;
74475584Sru  for (;;) {
74575584Sru    const char *token_start = ptr;
74675584Sru    if (!get_token(&ptr, end))
74775584Sru      break;
74875584Sru    const token_info *ti = lookup_token(token_start, ptr);
74975584Sru    if (ti->sortify_non_empty(token_start, ptr))
75075584Sru      count++;
75175584Sru  }
75275584Sru  ptr = start;
75375584Sru  int skip = count - len;
75475584Sru  if (skip > 0) {
75575584Sru    for (;;) {
75675584Sru      const char *token_start = ptr;
75775584Sru      if (!get_token(&ptr, end))
75875584Sru	assert(0);
75975584Sru      const token_info *ti = lookup_token(token_start, ptr);
76075584Sru      if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
76175584Sru	ptr = token_start;
76275584Sru	break;
76375584Sru      }
76475584Sru    }
76575584Sru  }
76675584Sru  first_part(len, ptr, end, result);
76775584Sru}
76875584Sru
76975584Sruvoid truncate_expr::evaluate(int tentative, const reference &ref,
77075584Sru			     string &result, substring_position &)
77175584Sru{
77275584Sru  if (expr) {
77375584Sru    string temp;
77475584Sru    substring_position temp_pos;
77575584Sru    expr->evaluate(tentative, ref, temp, temp_pos);
77675584Sru    const char *start = temp.contents();
77775584Sru    const char *end = start + temp.length();
77875584Sru    if (n > 0)
77975584Sru      first_part(n, start, end, result);
78075584Sru    else if (n < 0)
78175584Sru      last_part(-n, start, end, result);
78275584Sru  }
78375584Sru}
78475584Sru
78575584Sruvoid alternative_expr::evaluate(int tentative, const reference &ref,
78675584Sru				string &result, substring_position &pos)
78775584Sru{
78875584Sru  int start_length = result.length();
78975584Sru  if (expr1)
79075584Sru    expr1->evaluate(tentative, ref, result, pos);
79175584Sru  if (result.length() == start_length && expr2)
79275584Sru    expr2->evaluate(tentative, ref, result, pos);
79375584Sru}
79475584Sru
79575584Sruvoid list_expr::evaluate(int tentative, const reference &ref,
79675584Sru			 string &result, substring_position &pos)
79775584Sru{
79875584Sru  if (expr1)
79975584Sru    expr1->evaluate(tentative, ref, result, pos);
80075584Sru  if (expr2)
80175584Sru    expr2->evaluate(tentative, ref, result, pos);
80275584Sru}
80375584Sru
80475584Sruvoid substitute_expr::evaluate(int tentative, const reference &ref,
80575584Sru			       string &result, substring_position &pos)
80675584Sru{
80775584Sru  int start_length = result.length();
80875584Sru  if (expr1)
80975584Sru    expr1->evaluate(tentative, ref, result, pos);
81075584Sru  if (result.length() > start_length && result[result.length() - 1] == '-') {
81175584Sru    // ought to see if pos covers the -
81275584Sru    result.set_length(result.length() - 1);
81375584Sru    if (expr2)
81475584Sru      expr2->evaluate(tentative, ref, result, pos);
81575584Sru  }
81675584Sru}
81775584Sru
81875584Sruvoid conditional_expr::evaluate(int tentative, const reference &ref,
81975584Sru				string &result, substring_position &pos)
82075584Sru{
82175584Sru  string temp;
82275584Sru  substring_position temp_pos;
82375584Sru  if (expr1)
82475584Sru    expr1->evaluate(tentative, ref, temp, temp_pos);
82575584Sru  if (temp.length() > 0) {
82675584Sru    if (expr2)
82775584Sru      expr2->evaluate(tentative, ref, result, pos);
82875584Sru  }
82975584Sru  else {
83075584Sru    if (expr3)
83175584Sru      expr3->evaluate(tentative, ref, result, pos);
83275584Sru  }
83375584Sru}
83475584Sru
83575584Sruvoid reference::pre_compute_label()
83675584Sru{
83775584Sru  if (parsed_label != 0
83875584Sru      && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
83975584Sru    label.clear();
84075584Sru    substring_position temp_pos;
84175584Sru    parsed_label->evaluate(1, *this, label, temp_pos);
84275584Sru    label_ptr = lookup_label(label);
84375584Sru  }
84475584Sru}
84575584Sru
84675584Sruvoid reference::compute_label()
84775584Sru{
84875584Sru  label.clear();
84975584Sru  if (parsed_label)
85075584Sru    parsed_label->evaluate(0, *this, label, separator_pos);
85175584Sru  if (short_label_flag && parsed_short_label)
85275584Sru    parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
85375584Sru  if (date_as_label) {
85475584Sru    string new_date;
85575584Sru    if (parsed_date_label) {
85675584Sru      substring_position temp_pos;
85775584Sru      parsed_date_label->evaluate(0, *this, new_date, temp_pos);
85875584Sru    }
85975584Sru    set_date(new_date);
86075584Sru  }
86175584Sru  if (label_ptr)
86275584Sru    label_ptr->count += 1;
86375584Sru}
86475584Sru
86575584Sruvoid reference::immediate_compute_label()
86675584Sru{
86775584Sru  if (label_ptr)
86875584Sru    label_ptr->total = 2;	// force use of disambiguator
86975584Sru  compute_label();
87075584Sru}
87175584Sru
87275584Sruint reference::merge_labels(reference **v, int n, label_type type,
87375584Sru			    string &result)
87475584Sru{
87575584Sru  if (abbreviate_label_ranges)
87675584Sru    return merge_labels_by_number(v, n, type, result);
87775584Sru  else
87875584Sru    return merge_labels_by_parts(v, n, type, result);
87975584Sru}
88075584Sru
88175584Sruint reference::merge_labels_by_number(reference **v, int n, label_type type,
88275584Sru				      string &result)
88375584Sru{
88475584Sru  if (n <= 1)
88575584Sru    return 0;
88675584Sru  int num = get_number();
88775584Sru  // Only merge three or more labels.
88875584Sru  if (v[0]->get_number() != num + 1
88975584Sru      || v[1]->get_number() != num + 2)
89075584Sru    return 0;
89175584Sru  int i;
89275584Sru  for (i = 2; i < n; i++)
89375584Sru    if (v[i]->get_number() != num + i + 1)
89475584Sru      break;
89575584Sru  result = get_label(type);
89675584Sru  result += label_range_indicator;
89775584Sru  result += v[i - 1]->get_label(type);
89875584Sru  return i;
89975584Sru}
90075584Sru
90175584Sruconst substring_position &reference::get_separator_pos(label_type type) const
90275584Sru{
90375584Sru  if (type == SHORT_LABEL && short_label_flag)
90475584Sru    return short_separator_pos;
90575584Sru  else
90675584Sru    return separator_pos;
90775584Sru}
90875584Sru
90975584Sruconst string &reference::get_label(label_type type) const
91075584Sru{
91175584Sru  if (type == SHORT_LABEL && short_label_flag)
91275584Sru    return short_label;
91375584Sru  else
91475584Sru    return label;
91575584Sru}
91675584Sru
91775584Sruint reference::merge_labels_by_parts(reference **v, int n, label_type type,
91875584Sru				     string &result)
91975584Sru{
92075584Sru  if (n <= 0)
92175584Sru    return 0;
92275584Sru  const string &lb = get_label(type);
92375584Sru  const substring_position &sp = get_separator_pos(type);
92475584Sru  if (sp.start < 0
92575584Sru      || sp.start != v[0]->get_separator_pos(type).start
92675584Sru      || memcmp(lb.contents(), v[0]->get_label(type).contents(),
92775584Sru		sp.start) != 0)
92875584Sru    return 0;
92975584Sru  result = lb;
93075584Sru  int i = 0;
93175584Sru  do {
93275584Sru    result += separate_label_second_parts;
93375584Sru    const substring_position &s = v[i]->get_separator_pos(type);
93475584Sru    int sep_end_pos = s.start + s.length;
93575584Sru    result.append(v[i]->get_label(type).contents() + sep_end_pos,
93675584Sru		  v[i]->get_label(type).length() - sep_end_pos);
93775584Sru  } while (++i < n
93875584Sru	   && sp.start == v[i]->get_separator_pos(type).start
93975584Sru	   && memcmp(lb.contents(), v[i]->get_label(type).contents(),
94075584Sru		     sp.start) == 0);
94175584Sru  return i;
94275584Sru}
94375584Sru
94475584Srustring label_pool;
94575584Sru
94675584Srulabel_info::label_info(const string &s)
94775584Sru: start(label_pool.length()), length(s.length()), count(0), total(1)
94875584Sru{
94975584Sru  label_pool += s;
95075584Sru}
95175584Sru
95275584Srustatic label_info **label_table = 0;
95375584Srustatic int label_table_size = 0;
95475584Srustatic int label_table_used = 0;
95575584Sru
95675584Srulabel_info *lookup_label(const string &label)
95775584Sru{
95875584Sru  if (label_table == 0) {
95975584Sru    label_table = new label_info *[17];
96075584Sru    label_table_size = 17;
96175584Sru    for (int i = 0; i < 17; i++)
96275584Sru      label_table[i] = 0;
96375584Sru  }
96475584Sru  unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
96575584Sru  label_info **ptr;
96675584Sru  for (ptr = label_table + h;
96775584Sru       *ptr != 0;
96875584Sru       (ptr == label_table)
96975584Sru       ? (ptr = label_table + label_table_size - 1)
97075584Sru       : ptr--)
97175584Sru    if ((*ptr)->length == label.length()
97275584Sru	&& memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
97375584Sru		  label.length()) == 0) {
97475584Sru      (*ptr)->total += 1;
97575584Sru      return *ptr;
97675584Sru    }
97775584Sru  label_info *result = *ptr = new label_info(label);
97875584Sru  if (++label_table_used * 2 > label_table_size) {
97975584Sru    // Rehash the table.
98075584Sru    label_info **old_table = label_table;
98175584Sru    int old_size = label_table_size;
98275584Sru    label_table_size = next_size(label_table_size);
98375584Sru    label_table = new label_info *[label_table_size];
98475584Sru    int i;
98575584Sru    for (i = 0; i < label_table_size; i++)
98675584Sru      label_table[i] = 0;
98775584Sru    for (i = 0; i < old_size; i++)
98875584Sru      if (old_table[i]) {
989151497Sru	h = hash_string(label_pool.contents() + old_table[i]->start,
990151497Sru			old_table[i]->length);
99175584Sru	label_info **p;
99275584Sru	for (p = label_table + (h % label_table_size);
99375584Sru	     *p != 0;
99475584Sru	     (p == label_table)
99575584Sru	     ? (p = label_table + label_table_size - 1)
99675584Sru	     : --p)
99775584Sru	    ;
99875584Sru	*p = old_table[i];
99975584Sru	}
100075584Sru    a_delete old_table;
100175584Sru  }
100275584Sru  return result;
100375584Sru}
100475584Sru
100575584Sruvoid clear_labels()
100675584Sru{
100775584Sru  for (int i = 0; i < label_table_size; i++) {
100875584Sru    delete label_table[i];
100975584Sru    label_table[i] = 0;
101075584Sru  }
101175584Sru  label_table_used = 0;
101275584Sru  label_pool.clear();
101375584Sru}
101475584Sru
101575584Srustatic void consider_authors(reference **start, reference **end, int i);
101675584Sru
101775584Sruvoid compute_labels(reference **v, int n)
101875584Sru{
101975584Sru  if (parsed_label
102075584Sru      && (parsed_label->analyze() & expression::CONTAINS_AT)
102175584Sru      && sort_fields.length() >= 2
102275584Sru      && sort_fields[0] == 'A'
102375584Sru      && sort_fields[1] == '+')
102475584Sru    consider_authors(v, v + n, 0);
102575584Sru  for (int i = 0; i < n; i++)
102675584Sru    v[i]->compute_label();
102775584Sru}
102875584Sru
102975584Sru
103075584Sru/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
103175584Sruwhere 0 <= i <= N if there exists a reference with a list of authors
103275584Sru<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
103375584Sruand Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
103475584SruA1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
103575584Sru<B0,B1,...,BM>.  If a reference needs author i we only have to call
103675584Sruneed_author(j) for some j >= i such that the reference also needs
103775584Sruauthor j. */
103875584Sru
103975584Sru/* This function handles 2 tasks:
104075584Srudetermine which authors are needed (cannot be elided with et al.);
104175584Srudetermine which authors can have only last names in the labels.
104275584Sru
104375584SruReferences >= start and < end have the same first i author names.
104475584SruAlso they're sorted by A+. */
104575584Sru
104675584Srustatic void consider_authors(reference **start, reference **end, int i)
104775584Sru{
104875584Sru  if (start >= end)
104975584Sru    return;
105075584Sru  reference **p = start;
105175584Sru  if (i >= (*p)->get_nauthors()) {
105275584Sru    for (++p; p < end && i >= (*p)->get_nauthors(); p++)
105375584Sru      ;
105475584Sru    if (p < end && i > 0) {
105575584Sru      // If we have an author list <A B C> and an author list <A B C D>,
105675584Sru      // then both lists need C.
105775584Sru      for (reference **q = start; q < end; q++)
105875584Sru	(*q)->need_author(i - 1);
105975584Sru    }
106075584Sru    start = p;
106175584Sru  }
106275584Sru  while (p < end) {
106375584Sru    reference **last_name_start = p;
106475584Sru    reference **name_start = p;
106575584Sru    for (++p;
106675584Sru	 p < end && i < (*p)->get_nauthors()
106775584Sru	 && same_author_last_name(**last_name_start, **p, i);
106875584Sru	 p++) {
106975584Sru      if (!same_author_name(**name_start, **p, i)) {
107075584Sru	consider_authors(name_start, p, i + 1);
107175584Sru	name_start = p;
107275584Sru      }
107375584Sru    }
107475584Sru    consider_authors(name_start, p, i + 1);
107575584Sru    if (last_name_start == name_start) {
107675584Sru      for (reference **q = last_name_start; q < p; q++)
107775584Sru	(*q)->set_last_name_unambiguous(i);
107875584Sru    }
107975584Sru    // If we have an author list <A B C D> and <A B C E>, then the lists
108075584Sru    // need author D and E respectively.
108175584Sru    if (name_start > start || p < end) {
108275584Sru      for (reference **q = last_name_start; q < p; q++)
108375584Sru	(*q)->need_author(i);
108475584Sru    }
108575584Sru  }
108675584Sru}
108775584Sru
108875584Sruint same_author_last_name(const reference &r1, const reference &r2, int n)
108975584Sru{
109075584Sru  const char *ae1;
109175584Sru  const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
109275584Sru  const char *ae2;
109375584Sru  const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
1094151497Sru  if (!as1 && !as2) return 1;	// they are the same
1095151497Sru  if (!as1 || !as2) return 0;
109675584Sru  return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
109775584Sru}
109875584Sru
109975584Sruint same_author_name(const reference &r1, const reference &r2, int n)
110075584Sru{
110175584Sru  const char *ae1;
110275584Sru  const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
110375584Sru  const char *ae2;
110475584Sru  const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
1105151497Sru  if (!as1 && !as2) return 1;	// they are the same
1106151497Sru  if (!as1 || !as2) return 0;
110775584Sru  return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
110875584Sru}
110975584Sru
111075584Sru
111175584Sruvoid int_set::set(int i)
111275584Sru{
111375584Sru  assert(i >= 0);
111475584Sru  int bytei = i >> 3;
111575584Sru  if (bytei >= v.length()) {
111675584Sru    int old_length = v.length();
111775584Sru    v.set_length(bytei + 1);
111875584Sru    for (int j = old_length; j <= bytei; j++)
111975584Sru      v[j] = 0;
112075584Sru  }
112175584Sru  v[bytei] |= 1 << (i & 7);
112275584Sru}
112375584Sru
112475584Sruint int_set::get(int i) const
112575584Sru{
112675584Sru  assert(i >= 0);
112775584Sru  int bytei = i >> 3;
112875584Sru  return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
112975584Sru}
113075584Sru
113175584Sruvoid reference::set_last_name_unambiguous(int i)
113275584Sru{
113375584Sru  last_name_unambiguous.set(i);
113475584Sru}
113575584Sru
113675584Sruvoid reference::need_author(int n)
113775584Sru{
113875584Sru  if (n > last_needed_author)
113975584Sru    last_needed_author = n;
114075584Sru}
114175584Sru
114275584Sruconst char *reference::get_authors(const char **end) const
114375584Sru{
114475584Sru  if (!computed_authors) {
114575584Sru    ((reference *)this)->computed_authors = 1;
114675584Sru    string &result = ((reference *)this)->authors;
114775584Sru    int na = get_nauthors();
114875584Sru    result.clear();
114975584Sru    for (int i = 0; i < na; i++) {
115075584Sru      if (last_name_unambiguous.get(i)) {
115175584Sru	const char *e, *start = get_author_last_name(i, &e);
115275584Sru	assert(start != 0);
115375584Sru	result.append(start, e - start);
115475584Sru      }
115575584Sru      else {
115675584Sru	const char *e, *start = get_author(i, &e);
115775584Sru	assert(start != 0);
115875584Sru	result.append(start, e - start);
115975584Sru      }
116075584Sru      if (i == last_needed_author
116175584Sru	  && et_al.length() > 0
116275584Sru	  && et_al_min_elide > 0
116375584Sru	  && last_needed_author + et_al_min_elide < na
116475584Sru	  && na >= et_al_min_total) {
116575584Sru	result += et_al;
116675584Sru	break;
116775584Sru      }
116875584Sru      if (i < na - 1) {
116975584Sru	if (na == 2)
117075584Sru	  result += join_authors_exactly_two;
117175584Sru	else if (i < na - 2)
117275584Sru	  result += join_authors_default;
117375584Sru	else
117475584Sru	  result += join_authors_last_two;
117575584Sru      }
117675584Sru    }
117775584Sru  }
117875584Sru  const char *start = authors.contents();
117975584Sru  *end = start + authors.length();
118075584Sru  return start;
118175584Sru}
118275584Sru
118375584Sruint reference::get_nauthors() const
118475584Sru{
118575584Sru  if (nauthors < 0) {
118675584Sru    const char *dummy;
118775584Sru    int na;
118875584Sru    for (na = 0; get_author(na, &dummy) != 0; na++)
118975584Sru      ;
119075584Sru    ((reference *)this)->nauthors = na;
119175584Sru  }
119275584Sru  return nauthors;
119375584Sru}
1194