number.cpp revision 151497
185587Sobrien// -*- C++ -*-
285587Sobrien/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002, 2004
385587Sobrien   Free Software Foundation, Inc.
485587Sobrien     Written by James Clark (jjc@jclark.com)
585587Sobrien
685587SobrienThis file is part of groff.
785587Sobrien
885587Sobriengroff is free software; you can redistribute it and/or modify it under
985587Sobrienthe terms of the GNU General Public License as published by the Free
1085587SobrienSoftware Foundation; either version 2, or (at your option) any later
1185587Sobrienversion.
1285587Sobrien
1385587Sobriengroff is distributed in the hope that it will be useful, but WITHOUT ANY
1485587SobrienWARRANTY; without even the implied warranty of MERCHANTABILITY or
1585587SobrienFITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
1685587Sobrienfor more details.
1785587Sobrien
1885587SobrienYou should have received a copy of the GNU General Public License along
1985587Sobrienwith groff; see the file COPYING.  If not, write to the Free Software
2085587SobrienFoundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
2185587Sobrien
2285587Sobrien
2385587Sobrien#include "troff.h"
2485587Sobrien#include "hvunits.h"
2585587Sobrien#include "stringclass.h"
2685587Sobrien#include "mtsm.h"
2785587Sobrien#include "env.h"
2885587Sobrien#include "token.h"
2985587Sobrien#include "div.h"
3085587Sobrien
3185587Sobrienvunits V0;
3285587Sobrienhunits H0;
3385587Sobrien
3485587Sobrienint hresolution = 1;
3585587Sobrienint vresolution = 1;
3685587Sobrienint units_per_inch;
3785587Sobrienint sizescale;
3885587Sobrien
3985587Sobrienstatic int parse_expr(units *v, int scale_indicator,
4085587Sobrien		      int parenthesised, int rigid = 0);
4185587Sobrienstatic int start_number();
4285587Sobrien
4385587Sobrienint get_vunits(vunits *res, unsigned char si)
4485587Sobrien{
4585587Sobrien  if (!start_number())
4685587Sobrien    return 0;
4785587Sobrien  units x;
4885587Sobrien  if (parse_expr(&x, si, 0)) {
4985587Sobrien    *res = vunits(x);
5085587Sobrien    return 1;
5185587Sobrien  }
5285587Sobrien  else
5385587Sobrien    return 0;
5485587Sobrien}
5585587Sobrien
5685587Sobrienint get_hunits(hunits *res, unsigned char si)
5785587Sobrien{
5885587Sobrien  if (!start_number())
5985587Sobrien    return 0;
6085587Sobrien  units x;
6185587Sobrien  if (parse_expr(&x, si, 0)) {
6285587Sobrien    *res = hunits(x);
6385587Sobrien    return 1;
6485587Sobrien  }
6585587Sobrien  else
6685587Sobrien    return 0;
6785587Sobrien}
6885587Sobrien
6985587Sobrien// for \B
7085587Sobrien
7185587Sobrienint get_number_rigidly(units *res, unsigned char si)
7285587Sobrien{
7385587Sobrien  if (!start_number())
7485587Sobrien    return 0;
7585587Sobrien  units x;
7685587Sobrien  if (parse_expr(&x, si, 0, 1)) {
7785587Sobrien    *res = x;
7885587Sobrien    return 1;
7985587Sobrien  }
8085587Sobrien  else
8185587Sobrien    return 0;
8285587Sobrien}
8385587Sobrien
8485587Sobrienint get_number(units *res, unsigned char si)
8585587Sobrien{
8685587Sobrien  if (!start_number())
8785587Sobrien    return 0;
8885587Sobrien  units x;
8985587Sobrien  if (parse_expr(&x, si, 0)) {
9085587Sobrien    *res = x;
9185587Sobrien    return 1;
9285587Sobrien  }
9385587Sobrien  else
9485587Sobrien    return 0;
9585587Sobrien}
9685587Sobrien
9785587Sobrienint get_integer(int *res)
9885587Sobrien{
9985587Sobrien  if (!start_number())
10085587Sobrien    return 0;
10185587Sobrien  units x;
10285587Sobrien  if (parse_expr(&x, 0, 0)) {
10385587Sobrien    *res = x;
10485587Sobrien    return 1;
10585587Sobrien  }
10685587Sobrien  else
10785587Sobrien    return 0;
10885587Sobrien}
10985587Sobrien
11085587Sobrienenum incr_number_result { BAD, ABSOLUTE, INCREMENT, DECREMENT };
11185587Sobrien
11285587Sobrienstatic incr_number_result get_incr_number(units *res, unsigned char);
11385587Sobrien
11485587Sobrienint get_vunits(vunits *res, unsigned char si, vunits prev_value)
11585587Sobrien{
11685587Sobrien  units v;
11785587Sobrien  switch (get_incr_number(&v, si)) {
11885587Sobrien  case BAD:
11985587Sobrien    return 0;
12085587Sobrien  case ABSOLUTE:
12185587Sobrien    *res = v;
12285587Sobrien    break;
12385587Sobrien  case INCREMENT:
12485587Sobrien    *res = prev_value + v;
12585587Sobrien    break;
12685587Sobrien  case DECREMENT:
12785587Sobrien    *res = prev_value - v;
12885587Sobrien    break;
12985587Sobrien  default:
13085587Sobrien    assert(0);
13185587Sobrien  }
13285587Sobrien  return 1;
13385587Sobrien}
13485587Sobrien
13585587Sobrienint get_hunits(hunits *res, unsigned char si, hunits prev_value)
13685587Sobrien{
13785587Sobrien  units v;
13885587Sobrien  switch (get_incr_number(&v, si)) {
13985587Sobrien  case BAD:
14085587Sobrien    return 0;
14185587Sobrien  case ABSOLUTE:
14285587Sobrien    *res = v;
14385587Sobrien    break;
14485587Sobrien  case INCREMENT:
14585587Sobrien    *res = prev_value + v;
14685587Sobrien    break;
14785587Sobrien  case DECREMENT:
14885587Sobrien    *res = prev_value - v;
14985587Sobrien    break;
15085587Sobrien  default:
15185587Sobrien    assert(0);
15285587Sobrien  }
15385587Sobrien  return 1;
15485587Sobrien}
15585587Sobrien
15685587Sobrienint get_number(units *res, unsigned char si, units prev_value)
15785587Sobrien{
15885587Sobrien  units v;
15985587Sobrien  switch (get_incr_number(&v, si)) {
16085587Sobrien  case BAD:
16185587Sobrien    return 0;
16285587Sobrien  case ABSOLUTE:
16385587Sobrien    *res = v;
16485587Sobrien    break;
16585587Sobrien  case INCREMENT:
16685587Sobrien    *res = prev_value + v;
16785587Sobrien    break;
16885587Sobrien  case DECREMENT:
16985587Sobrien    *res = prev_value - v;
17085587Sobrien    break;
17185587Sobrien  default:
17285587Sobrien    assert(0);
17385587Sobrien  }
17485587Sobrien  return 1;
17585587Sobrien}
17685587Sobrien
17785587Sobrienint get_integer(int *res, int prev_value)
17885587Sobrien{
17985587Sobrien  units v;
18085587Sobrien  switch (get_incr_number(&v, 0)) {
18185587Sobrien  case BAD:
18285587Sobrien    return 0;
18385587Sobrien  case ABSOLUTE:
18485587Sobrien    *res = v;
18585587Sobrien    break;
18685587Sobrien  case INCREMENT:
18785587Sobrien    *res = prev_value + int(v);
18885587Sobrien    break;
18985587Sobrien  case DECREMENT:
19085587Sobrien    *res = prev_value - int(v);
19185587Sobrien    break;
19285587Sobrien  default:
19385587Sobrien    assert(0);
19485587Sobrien  }
19585587Sobrien  return 1;
19685587Sobrien}
19785587Sobrien
19885587Sobrien
19985587Sobrienstatic incr_number_result get_incr_number(units *res, unsigned char si)
20085587Sobrien{
20185587Sobrien  if (!start_number())
20285587Sobrien    return BAD;
20385587Sobrien  incr_number_result result = ABSOLUTE;
20485587Sobrien  if (tok.ch() == '+') {
20585587Sobrien    tok.next();
20685587Sobrien    result = INCREMENT;
20785587Sobrien  }
20885587Sobrien  else if (tok.ch() == '-') {
20985587Sobrien    tok.next();
21085587Sobrien    result = DECREMENT;
21185587Sobrien  }
21285587Sobrien  if (parse_expr(res, si, 0))
21385587Sobrien    return result;
21485587Sobrien  else
21585587Sobrien    return BAD;
21685587Sobrien}
21785587Sobrien
21885587Sobrienstatic int start_number()
21985587Sobrien{
22085587Sobrien  while (tok.space())
22185587Sobrien    tok.next();
22285587Sobrien  if (tok.newline()) {
22385587Sobrien    warning(WARN_MISSING, "missing number");
22485587Sobrien    return 0;
22585587Sobrien  }
22685587Sobrien  if (tok.tab()) {
22785587Sobrien    warning(WARN_TAB, "tab character where number expected");
22885587Sobrien    return 0;
22985587Sobrien  }
23085587Sobrien  if (tok.right_brace()) {
23185587Sobrien    warning(WARN_RIGHT_BRACE, "`\\}' where number expected");
232    return 0;
233  }
234  return 1;
235}
236
237enum { OP_LEQ = 'L', OP_GEQ = 'G', OP_MAX = 'X', OP_MIN = 'N' };
238
239#define SCALE_INDICATOR_CHARS "icfPmnpuvMsz"
240
241static int parse_term(units *v, int scale_indicator,
242		      int parenthesised, int rigid);
243
244static int parse_expr(units *v, int scale_indicator,
245		      int parenthesised, int rigid)
246{
247  int result = parse_term(v, scale_indicator, parenthesised, rigid);
248  while (result) {
249    if (parenthesised)
250      tok.skip();
251    int op = tok.ch();
252    switch (op) {
253    case '+':
254    case '-':
255    case '/':
256    case '*':
257    case '%':
258    case ':':
259    case '&':
260      tok.next();
261      break;
262    case '>':
263      tok.next();
264      if (tok.ch() == '=') {
265	tok.next();
266	op = OP_GEQ;
267      }
268      else if (tok.ch() == '?') {
269	tok.next();
270	op = OP_MAX;
271      }
272      break;
273    case '<':
274      tok.next();
275      if (tok.ch() == '=') {
276	tok.next();
277	op = OP_LEQ;
278      }
279      else if (tok.ch() == '?') {
280	tok.next();
281	op = OP_MIN;
282      }
283      break;
284    case '=':
285      tok.next();
286      if (tok.ch() == '=')
287	tok.next();
288      break;
289    default:
290      return result;
291    }
292    units v2;
293    if (!parse_term(&v2, scale_indicator, parenthesised, rigid))
294      return 0;
295    int overflow = 0;
296    switch (op) {
297    case '<':
298      *v = *v < v2;
299      break;
300    case '>':
301      *v = *v > v2;
302      break;
303    case OP_LEQ:
304      *v = *v <= v2;
305      break;
306    case OP_GEQ:
307      *v = *v >= v2;
308      break;
309    case OP_MIN:
310      if (*v > v2)
311	*v = v2;
312      break;
313    case OP_MAX:
314      if (*v < v2)
315	*v = v2;
316      break;
317    case '=':
318      *v = *v == v2;
319      break;
320    case '&':
321      *v = *v > 0 && v2 > 0;
322      break;
323    case ':':
324      *v = *v > 0 || v2 > 0;
325      break;
326    case '+':
327      if (v2 < 0) {
328	if (*v < INT_MIN - v2)
329	  overflow = 1;
330      }
331      else if (v2 > 0) {
332	if (*v > INT_MAX - v2)
333	  overflow = 1;
334      }
335      if (overflow) {
336	error("addition overflow");
337	return 0;
338      }
339      *v += v2;
340      break;
341    case '-':
342      if (v2 < 0) {
343	if (*v > INT_MAX + v2)
344	  overflow = 1;
345      }
346      else if (v2 > 0) {
347	if (*v < INT_MIN + v2)
348	  overflow = 1;
349      }
350      if (overflow) {
351	error("subtraction overflow");
352	return 0;
353      }
354      *v -= v2;
355      break;
356    case '*':
357      if (v2 < 0) {
358	if (*v > 0) {
359	  if (*v > -(unsigned)INT_MIN / -(unsigned)v2)
360	    overflow = 1;
361	}
362	else if (-(unsigned)*v > INT_MAX / -(unsigned)v2)
363	  overflow = 1;
364      }
365      else if (v2 > 0) {
366	if (*v > 0) {
367	  if (*v > INT_MAX / v2)
368	    overflow = 1;
369	}
370	else if (-(unsigned)*v > -(unsigned)INT_MIN / v2)
371	  overflow = 1;
372      }
373      if (overflow) {
374	error("multiplication overflow");
375	return 0;
376      }
377      *v *= v2;
378      break;
379    case '/':
380      if (v2 == 0) {
381	error("division by zero");
382	return 0;
383      }
384      *v /= v2;
385      break;
386    case '%':
387      if (v2 == 0) {
388	error("modulus by zero");
389	return 0;
390      }
391      *v %= v2;
392      break;
393    default:
394      assert(0);
395    }
396  }
397  return result;
398}
399
400static int parse_term(units *v, int scale_indicator,
401		      int parenthesised, int rigid)
402{
403  int negative = 0;
404  for (;;)
405    if (parenthesised && tok.space())
406      tok.next();
407    else if (tok.ch() == '+')
408      tok.next();
409    else if (tok.ch() == '-') {
410      tok.next();
411      negative = !negative;
412    }
413    else
414      break;
415  unsigned char c = tok.ch();
416  switch (c) {
417  case '|':
418    // | is not restricted to the outermost level
419    // tbl uses this
420    tok.next();
421    if (!parse_term(v, scale_indicator, parenthesised, rigid))
422      return 0;
423    int tem;
424    tem = (scale_indicator == 'v'
425	   ? curdiv->get_vertical_position().to_units()
426	   : curenv->get_input_line_position().to_units());
427    if (tem >= 0) {
428      if (*v < INT_MIN + tem) {
429	error("numeric overflow");
430	return 0;
431      }
432    }
433    else {
434      if (*v > INT_MAX + tem) {
435	error("numeric overflow");
436	return 0;
437      }
438    }
439    *v -= tem;
440    if (negative) {
441      if (*v == INT_MIN) {
442	error("numeric overflow");
443	return 0;
444      }
445      *v = -*v;
446    }
447    return 1;
448  case '(':
449    tok.next();
450    c = tok.ch();
451    if (c == ')') {
452      if (rigid)
453	return 0;
454      warning(WARN_SYNTAX, "empty parentheses");
455      tok.next();
456      *v = 0;
457      return 1;
458    }
459    else if (c != 0 && strchr(SCALE_INDICATOR_CHARS, c) != 0) {
460      tok.next();
461      if (tok.ch() == ';') {
462	tok.next();
463	scale_indicator = c;
464      }
465      else {
466	error("expected `;' after scale-indicator (got %1)",
467	      tok.description());
468	return 0;
469      }
470    }
471    else if (c == ';') {
472      scale_indicator = 0;
473      tok.next();
474    }
475    if (!parse_expr(v, scale_indicator, 1, rigid))
476      return 0;
477    tok.skip();
478    if (tok.ch() != ')') {
479      if (rigid)
480	return 0;
481      warning(WARN_SYNTAX, "missing `)' (got %1)", tok.description());
482    }
483    else
484      tok.next();
485    if (negative) {
486      if (*v == INT_MIN) {
487	error("numeric overflow");
488	return 0;
489      }
490      *v = -*v;
491    }
492    return 1;
493  case '.':
494    *v = 0;
495    break;
496  case '0':
497  case '1':
498  case '2':
499  case '3':
500  case '4':
501  case '5':
502  case '6':
503  case '7':
504  case '8':
505  case '9':
506    *v = 0;
507    do {
508      if (*v > INT_MAX/10) {
509	error("numeric overflow");
510	return 0;
511      }
512      *v *= 10;
513      if (*v > INT_MAX - (int(c) - '0')) {
514	error("numeric overflow");
515	return 0;
516      }
517      *v += c - '0';
518      tok.next();
519      c = tok.ch();
520    } while (csdigit(c));
521    break;
522  case '/':
523  case '*':
524  case '%':
525  case ':':
526  case '&':
527  case '>':
528  case '<':
529  case '=':
530    warning(WARN_SYNTAX, "empty left operand");
531    *v = 0;
532    return rigid ? 0 : 1;
533  default:
534    warning(WARN_NUMBER, "numeric expression expected (got %1)",
535	    tok.description());
536    return 0;
537  }
538  int divisor = 1;
539  if (tok.ch() == '.') {
540    tok.next();
541    for (;;) {
542      c = tok.ch();
543      if (!csdigit(c))
544	break;
545      // we may multiply the divisor by 254 later on
546      if (divisor <= INT_MAX/2540 && *v <= (INT_MAX - 9)/10) {
547	*v *= 10;
548	*v += c - '0';
549	divisor *= 10;
550      }
551      tok.next();
552    }
553  }
554  int si = scale_indicator;
555  int do_next = 0;
556  if ((c = tok.ch()) != 0 && strchr(SCALE_INDICATOR_CHARS, c) != 0) {
557    switch (scale_indicator) {
558    case 'z':
559      if (c != 'u' && c != 'z') {
560	warning(WARN_SCALE,
561		"only `z' and `u' scale indicators valid in this context");
562	break;
563      }
564      si = c;
565      break;
566    case 0:
567      warning(WARN_SCALE, "scale indicator invalid in this context");
568      break;
569    case 'u':
570      si = c;
571      break;
572    default:
573      if (c == 'z') {
574	warning(WARN_SCALE, "`z' scale indicator invalid in this context");
575	break;
576      }
577      si = c;
578      break;
579    }
580    // Don't do tok.next() here because the next token might be \s, which
581    // would affect the interpretation of m.
582    do_next = 1;
583  }
584  switch (si) {
585  case 'i':
586    *v = scale(*v, units_per_inch, divisor);
587    break;
588  case 'c':
589    *v = scale(*v, units_per_inch*100, divisor*254);
590    break;
591  case 0:
592  case 'u':
593    if (divisor != 1)
594      *v /= divisor;
595    break;
596  case 'f':
597    *v = scale(*v, 65536, divisor);
598    break;
599  case 'p':
600    *v = scale(*v, units_per_inch, divisor*72);
601    break;
602  case 'P':
603    *v = scale(*v, units_per_inch, divisor*6);
604    break;
605  case 'm':
606    {
607      // Convert to hunits so that with -Tascii `m' behaves as in nroff.
608      hunits em = curenv->get_size();
609      *v = scale(*v, em.is_zero() ? hresolution : em.to_units(), divisor);
610    }
611    break;
612  case 'M':
613    {
614      hunits em = curenv->get_size();
615      *v = scale(*v, em.is_zero() ? hresolution : em.to_units(), divisor*100);
616    }
617    break;
618  case 'n':
619    {
620      // Convert to hunits so that with -Tascii `n' behaves as in nroff.
621      hunits en = curenv->get_size()/2;
622      *v = scale(*v, en.is_zero() ? hresolution : en.to_units(), divisor);
623    }
624    break;
625  case 'v':
626    *v = scale(*v, curenv->get_vertical_spacing().to_units(), divisor);
627    break;
628  case 's':
629    while (divisor > INT_MAX/(sizescale*72)) {
630      divisor /= 10;
631      *v /= 10;
632    }
633    *v = scale(*v, units_per_inch, divisor*sizescale*72);
634    break;
635  case 'z':
636    *v = scale(*v, sizescale, divisor);
637    break;
638  default:
639    assert(0);
640  }
641  if (do_next)
642    tok.next();
643  if (negative) {
644    if (*v == INT_MIN) {
645      error("numeric overflow");
646      return 0;
647    }
648    *v = -*v;
649  }
650  return 1;
651}
652
653units scale(units n, units x, units y)
654{
655  assert(x >= 0 && y > 0);
656  if (x == 0)
657    return 0;
658  if (n >= 0) {
659    if (n <= INT_MAX/x)
660      return (n*x)/y;
661  }
662  else {
663    if (-(unsigned)n <= -(unsigned)INT_MIN/x)
664      return (n*x)/y;
665  }
666  double res = n*double(x)/double(y);
667  if (res > INT_MAX) {
668    error("numeric overflow");
669    return INT_MAX;
670  }
671  else if (res < INT_MIN) {
672    error("numeric overflow");
673    return INT_MIN;
674  }
675  return int(res);
676}
677
678vunits::vunits(units x)
679{
680  // don't depend on the rounding direction for division of negative integers
681  if (vresolution == 1)
682    n = x;
683  else
684    n = (x < 0
685	 ? -((-x + vresolution/2 - 1)/vresolution)
686	 : (x + vresolution/2 - 1)/vresolution);
687}
688
689hunits::hunits(units x)
690{
691  // don't depend on the rounding direction for division of negative integers
692  if (hresolution == 1)
693    n = x;
694  else
695    n = (x < 0
696	 ? -((-x + hresolution/2 - 1)/hresolution)
697	 : (x + hresolution/2 - 1)/hresolution);
698}
699