1/*	Id: term.c,v 1.280 2019/01/15 12:16:18 schwarze Exp  */
2/*
3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2010-2019 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21
22#include <assert.h>
23#include <ctype.h>
24#include <stdint.h>
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28
29#include "mandoc.h"
30#include "mandoc_aux.h"
31#include "out.h"
32#include "term.h"
33#include "main.h"
34
35static	size_t		 cond_width(const struct termp *, int, int *);
36static	void		 adjbuf(struct termp_col *, size_t);
37static	void		 bufferc(struct termp *, char);
38static	void		 encode(struct termp *, const char *, size_t);
39static	void		 encode1(struct termp *, int);
40static	void		 endline(struct termp *);
41static	void		 term_field(struct termp *, size_t, size_t,
42				size_t, size_t);
43static	void		 term_fill(struct termp *, size_t *, size_t *,
44				size_t);
45
46
47void
48term_setcol(struct termp *p, size_t maxtcol)
49{
50	if (maxtcol > p->maxtcol) {
51		p->tcols = mandoc_recallocarray(p->tcols,
52		    p->maxtcol, maxtcol, sizeof(*p->tcols));
53		p->maxtcol = maxtcol;
54	}
55	p->lasttcol = maxtcol - 1;
56	p->tcol = p->tcols;
57}
58
59void
60term_free(struct termp *p)
61{
62	for (p->tcol = p->tcols; p->tcol < p->tcols + p->maxtcol; p->tcol++)
63		free(p->tcol->buf);
64	free(p->tcols);
65	free(p->fontq);
66	free(p);
67}
68
69void
70term_begin(struct termp *p, term_margin head,
71		term_margin foot, const struct roff_meta *arg)
72{
73
74	p->headf = head;
75	p->footf = foot;
76	p->argf = arg;
77	(*p->begin)(p);
78}
79
80void
81term_end(struct termp *p)
82{
83
84	(*p->end)(p);
85}
86
87/*
88 * Flush a chunk of text.  By default, break the output line each time
89 * the right margin is reached, and continue output on the next line
90 * at the same offset as the chunk itself.  By default, also break the
91 * output line at the end of the chunk.  There are many flags modifying
92 * this behaviour, see the comments in the body of the function.
93 */
94void
95term_flushln(struct termp *p)
96{
97	size_t	 vbl;      /* Number of blanks to prepend to the output. */
98	size_t	 vbr;      /* Actual visual position of the end of field. */
99	size_t	 vfield;   /* Desired visual field width. */
100	size_t	 vtarget;  /* Desired visual position of the right margin. */
101	size_t	 ic;       /* Character position in the input buffer. */
102	size_t	 nbr;      /* Number of characters to print in this field. */
103
104	/*
105	 * Normally, start writing at the left margin, but with the
106	 * NOPAD flag, start writing at the current position instead.
107	 */
108
109	vbl = (p->flags & TERMP_NOPAD) || p->tcol->offset < p->viscol ?
110	    0 : p->tcol->offset - p->viscol;
111	if (p->minbl && vbl < p->minbl)
112		vbl = p->minbl;
113
114	if ((p->flags & TERMP_MULTICOL) == 0)
115		p->tcol->col = 0;
116
117	/* Loop over output lines. */
118
119	for (;;) {
120		vfield = p->tcol->rmargin > p->viscol + vbl ?
121		    p->tcol->rmargin - p->viscol - vbl : 0;
122
123		/*
124		 * Normally, break the line at the the right margin
125		 * of the field, but with the NOBREAK flag, only
126		 * break it at the max right margin of the screen,
127		 * and with the BRNEVER flag, never break it at all.
128		 */
129
130		vtarget = p->flags & TERMP_BRNEVER ? SIZE_MAX :
131		    (p->flags & TERMP_NOBREAK) == 0 ? vfield :
132		    p->maxrmargin > p->viscol + vbl ?
133		    p->maxrmargin - p->viscol - vbl : 0;
134
135		/*
136		 * Figure out how much text will fit in the field.
137		 * If there is whitespace only, print nothing.
138		 */
139
140		term_fill(p, &nbr, &vbr, vtarget);
141		if (nbr == 0)
142			break;
143
144		/*
145		 * With the CENTER or RIGHT flag, increase the indentation
146		 * to center the text between the left and right margins
147		 * or to adjust it to the right margin, respectively.
148		 */
149
150		if (vbr < vtarget) {
151			if (p->flags & TERMP_CENTER)
152				vbl += (vtarget - vbr) / 2;
153			else if (p->flags & TERMP_RIGHT)
154				vbl += vtarget - vbr;
155		}
156
157		/* Finally, print the field content. */
158
159		term_field(p, vbl, nbr, vbr, vtarget);
160
161		/*
162		 * If there is no text left in the field, exit the loop.
163		 * If the BRTRSP flag is set, consider trailing
164		 * whitespace significant when deciding whether
165		 * the field fits or not.
166		 */
167
168		for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
169			switch (p->tcol->buf[ic]) {
170			case '\t':
171				if (p->flags & TERMP_BRTRSP)
172					vbr = term_tab_next(vbr);
173				continue;
174			case ' ':
175				if (p->flags & TERMP_BRTRSP)
176					vbr += (*p->width)(p, ' ');
177				continue;
178			case '\n':
179			case ASCII_BREAK:
180				continue;
181			default:
182				break;
183			}
184			break;
185		}
186		if (ic == p->tcol->lastcol)
187			break;
188
189		/*
190		 * At the location of an automtic line break, input
191		 * space characters are consumed by the line break.
192		 */
193
194		while (p->tcol->col < p->tcol->lastcol &&
195		    p->tcol->buf[p->tcol->col] == ' ')
196			p->tcol->col++;
197
198		/*
199		 * In multi-column mode, leave the rest of the text
200		 * in the buffer to be handled by a subsequent
201		 * invocation, such that the other columns of the
202		 * table can be handled first.
203		 * In single-column mode, simply break the line.
204		 */
205
206		if (p->flags & TERMP_MULTICOL)
207			return;
208
209		endline(p);
210		p->viscol = 0;
211
212		/*
213		 * Normally, start the next line at the same indentation
214		 * as this one, but with the BRIND flag, start it at the
215		 * right margin instead.  This is used together with
216		 * NOBREAK for the tags in various kinds of tagged lists.
217		 */
218
219		vbl = p->flags & TERMP_BRIND ?
220		    p->tcol->rmargin : p->tcol->offset;
221	}
222
223	/* Reset output state in preparation for the next field. */
224
225	p->col = p->tcol->col = p->tcol->lastcol = 0;
226	p->minbl = p->trailspace;
227	p->flags &= ~(TERMP_BACKAFTER | TERMP_BACKBEFORE | TERMP_NOPAD);
228
229	if (p->flags & TERMP_MULTICOL)
230		return;
231
232	/*
233	 * The HANG flag means that the next field
234	 * always follows on the same line.
235	 * The NOBREAK flag means that the next field
236	 * follows on the same line unless the field was overrun.
237	 * Normally, break the line at the end of each field.
238	 */
239
240	if ((p->flags & TERMP_HANG) == 0 &&
241	    ((p->flags & TERMP_NOBREAK) == 0 ||
242	     vbr + term_len(p, p->trailspace) > vfield))
243		endline(p);
244}
245
246/*
247 * Store the number of input characters to print in this field in *nbr
248 * and their total visual width to print in *vbr.
249 * If there is only whitespace in the field, both remain zero.
250 * The desired visual width of the field is provided by vtarget.
251 * If the first word is longer, the field will be overrun.
252 */
253static void
254term_fill(struct termp *p, size_t *nbr, size_t *vbr, size_t vtarget)
255{
256	size_t	 ic;        /* Character position in the input buffer. */
257	size_t	 vis;       /* Visual position of the current character. */
258	size_t	 vn;        /* Visual position of the next character. */
259	int	 breakline; /* Break at the end of this word. */
260	int	 graph;     /* Last character was non-blank. */
261
262	*nbr = *vbr = vis = 0;
263	breakline = graph = 0;
264	for (ic = p->tcol->col; ic < p->tcol->lastcol; ic++) {
265		switch (p->tcol->buf[ic]) {
266		case '\b':  /* Escape \o (overstrike) or backspace markup. */
267			assert(ic > 0);
268			vis -= (*p->width)(p, p->tcol->buf[ic - 1]);
269			continue;
270
271		case '\t':  /* Normal ASCII whitespace. */
272		case ' ':
273		case ASCII_BREAK:  /* Escape \: (breakpoint). */
274			switch (p->tcol->buf[ic]) {
275			case '\t':
276				vn = term_tab_next(vis);
277				break;
278			case ' ':
279				vn = vis + (*p->width)(p, ' ');
280				break;
281			case ASCII_BREAK:
282				vn = vis;
283				break;
284			default:
285				abort();
286			}
287			/* Can break at the end of a word. */
288			if (breakline || vn > vtarget)
289				break;
290			if (graph) {
291				*nbr = ic;
292				*vbr = vis;
293				graph = 0;
294			}
295			vis = vn;
296			continue;
297
298		case '\n':  /* Escape \p (break at the end of the word). */
299			breakline = 1;
300			continue;
301
302		case ASCII_HYPH:  /* Breakable hyphen. */
303			graph = 1;
304			/*
305			 * We are about to decide whether to break the
306			 * line or not, so we no longer need this hyphen
307			 * to be marked as breakable.  Put back a real
308			 * hyphen such that we get the correct width.
309			 */
310			p->tcol->buf[ic] = '-';
311			vis += (*p->width)(p, '-');
312			if (vis > vtarget) {
313				ic++;
314				break;
315			}
316			*nbr = ic + 1;
317			*vbr = vis;
318			continue;
319
320		case ASCII_NBRSP:  /* Non-breakable space. */
321			p->tcol->buf[ic] = ' ';
322			/* FALLTHROUGH */
323		default:  /* Printable character. */
324			graph = 1;
325			vis += (*p->width)(p, p->tcol->buf[ic]);
326			if (vis > vtarget && *nbr > 0)
327				return;
328			continue;
329		}
330		break;
331	}
332
333	/*
334	 * If the last word extends to the end of the field without any
335	 * trailing whitespace, the loop could not check yet whether it
336	 * can remain on this line.  So do the check now.
337	 */
338
339	if (graph && (vis <= vtarget || *nbr == 0)) {
340		*nbr = ic;
341		*vbr = vis;
342	}
343}
344
345/*
346 * Print the contents of one field
347 * with an indentation of	 vbl	  visual columns,
348 * an input string length of	 nbr	  characters,
349 * an output width of		 vbr	  visual columns,
350 * and a desired field width of	 vtarget  visual columns.
351 */
352static void
353term_field(struct termp *p, size_t vbl, size_t nbr, size_t vbr, size_t vtarget)
354{
355	size_t	 ic;	/* Character position in the input buffer. */
356	size_t	 vis;	/* Visual position of the current character. */
357	size_t	 dv;	/* Visual width of the current character. */
358	size_t	 vn;	/* Visual position of the next character. */
359
360	vis = 0;
361	for (ic = p->tcol->col; ic < nbr; ic++) {
362
363		/*
364		 * To avoid the printing of trailing whitespace,
365		 * do not print whitespace right away, only count it.
366		 */
367
368		switch (p->tcol->buf[ic]) {
369		case '\n':
370		case ASCII_BREAK:
371			continue;
372		case '\t':
373			vn = term_tab_next(vis);
374			vbl += vn - vis;
375			vis = vn;
376			continue;
377		case ' ':
378		case ASCII_NBRSP:
379			dv = (*p->width)(p, ' ');
380			vbl += dv;
381			vis += dv;
382			continue;
383		default:
384			break;
385		}
386
387		/*
388		 * We found a non-blank character to print,
389		 * so write preceding white space now.
390		 */
391
392		if (vbl > 0) {
393			(*p->advance)(p, vbl);
394			p->viscol += vbl;
395			vbl = 0;
396		}
397
398		/* Print the character and adjust the visual position. */
399
400		(*p->letter)(p, p->tcol->buf[ic]);
401		if (p->tcol->buf[ic] == '\b') {
402			dv = (*p->width)(p, p->tcol->buf[ic - 1]);
403			p->viscol -= dv;
404			vis -= dv;
405		} else {
406			dv = (*p->width)(p, p->tcol->buf[ic]);
407			p->viscol += dv;
408			vis += dv;
409		}
410	}
411	p->tcol->col = nbr;
412}
413
414static void
415endline(struct termp *p)
416{
417	if ((p->flags & (TERMP_NEWMC | TERMP_ENDMC)) == TERMP_ENDMC) {
418		p->mc = NULL;
419		p->flags &= ~TERMP_ENDMC;
420	}
421	if (p->mc != NULL) {
422		if (p->viscol && p->maxrmargin >= p->viscol)
423			(*p->advance)(p, p->maxrmargin - p->viscol + 1);
424		p->flags |= TERMP_NOBUF | TERMP_NOSPACE;
425		term_word(p, p->mc);
426		p->flags &= ~(TERMP_NOBUF | TERMP_NEWMC);
427	}
428	p->viscol = 0;
429	p->minbl = 0;
430	(*p->endline)(p);
431}
432
433/*
434 * A newline only breaks an existing line; it won't assert vertical
435 * space.  All data in the output buffer is flushed prior to the newline
436 * assertion.
437 */
438void
439term_newln(struct termp *p)
440{
441
442	p->flags |= TERMP_NOSPACE;
443	if (p->tcol->lastcol || p->viscol)
444		term_flushln(p);
445}
446
447/*
448 * Asserts a vertical space (a full, empty line-break between lines).
449 * Note that if used twice, this will cause two blank spaces and so on.
450 * All data in the output buffer is flushed prior to the newline
451 * assertion.
452 */
453void
454term_vspace(struct termp *p)
455{
456
457	term_newln(p);
458	p->viscol = 0;
459	p->minbl = 0;
460	if (0 < p->skipvsp)
461		p->skipvsp--;
462	else
463		(*p->endline)(p);
464}
465
466/* Swap current and previous font; for \fP and .ft P */
467void
468term_fontlast(struct termp *p)
469{
470	enum termfont	 f;
471
472	f = p->fontl;
473	p->fontl = p->fontq[p->fonti];
474	p->fontq[p->fonti] = f;
475}
476
477/* Set font, save current, discard previous; for \f, .ft, .B etc. */
478void
479term_fontrepl(struct termp *p, enum termfont f)
480{
481
482	p->fontl = p->fontq[p->fonti];
483	p->fontq[p->fonti] = f;
484}
485
486/* Set font, save previous. */
487void
488term_fontpush(struct termp *p, enum termfont f)
489{
490
491	p->fontl = p->fontq[p->fonti];
492	if (++p->fonti == p->fontsz) {
493		p->fontsz += 8;
494		p->fontq = mandoc_reallocarray(p->fontq,
495		    p->fontsz, sizeof(*p->fontq));
496	}
497	p->fontq[p->fonti] = f;
498}
499
500/* Flush to make the saved pointer current again. */
501void
502term_fontpopq(struct termp *p, int i)
503{
504
505	assert(i >= 0);
506	if (p->fonti > i)
507		p->fonti = i;
508}
509
510/* Pop one font off the stack. */
511void
512term_fontpop(struct termp *p)
513{
514
515	assert(p->fonti);
516	p->fonti--;
517}
518
519/*
520 * Handle pwords, partial words, which may be either a single word or a
521 * phrase that cannot be broken down (such as a literal string).  This
522 * handles word styling.
523 */
524void
525term_word(struct termp *p, const char *word)
526{
527	struct roffsu	 su;
528	const char	 nbrsp[2] = { ASCII_NBRSP, 0 };
529	const char	*seq, *cp;
530	int		 sz, uc;
531	size_t		 csz, lsz, ssz;
532	enum mandoc_esc	 esc;
533
534	if ((p->flags & TERMP_NOBUF) == 0) {
535		if ((p->flags & TERMP_NOSPACE) == 0) {
536			if ((p->flags & TERMP_KEEP) == 0) {
537				bufferc(p, ' ');
538				if (p->flags & TERMP_SENTENCE)
539					bufferc(p, ' ');
540			} else
541				bufferc(p, ASCII_NBRSP);
542		}
543		if (p->flags & TERMP_PREKEEP)
544			p->flags |= TERMP_KEEP;
545		if (p->flags & TERMP_NONOSPACE)
546			p->flags |= TERMP_NOSPACE;
547		else
548			p->flags &= ~TERMP_NOSPACE;
549		p->flags &= ~(TERMP_SENTENCE | TERMP_NONEWLINE);
550		p->skipvsp = 0;
551	}
552
553	while ('\0' != *word) {
554		if ('\\' != *word) {
555			if (TERMP_NBRWORD & p->flags) {
556				if (' ' == *word) {
557					encode(p, nbrsp, 1);
558					word++;
559					continue;
560				}
561				ssz = strcspn(word, "\\ ");
562			} else
563				ssz = strcspn(word, "\\");
564			encode(p, word, ssz);
565			word += (int)ssz;
566			continue;
567		}
568
569		word++;
570		esc = mandoc_escape(&word, &seq, &sz);
571		switch (esc) {
572		case ESCAPE_UNICODE:
573			uc = mchars_num2uc(seq + 1, sz - 1);
574			break;
575		case ESCAPE_NUMBERED:
576			uc = mchars_num2char(seq, sz);
577			if (uc < 0)
578				continue;
579			break;
580		case ESCAPE_SPECIAL:
581			if (p->enc == TERMENC_ASCII) {
582				cp = mchars_spec2str(seq, sz, &ssz);
583				if (cp != NULL)
584					encode(p, cp, ssz);
585			} else {
586				uc = mchars_spec2cp(seq, sz);
587				if (uc > 0)
588					encode1(p, uc);
589			}
590			continue;
591		case ESCAPE_UNDEF:
592			uc = *seq;
593			break;
594		case ESCAPE_FONTBOLD:
595			term_fontrepl(p, TERMFONT_BOLD);
596			continue;
597		case ESCAPE_FONTITALIC:
598			term_fontrepl(p, TERMFONT_UNDER);
599			continue;
600		case ESCAPE_FONTBI:
601			term_fontrepl(p, TERMFONT_BI);
602			continue;
603		case ESCAPE_FONT:
604		case ESCAPE_FONTCW:
605		case ESCAPE_FONTROMAN:
606			term_fontrepl(p, TERMFONT_NONE);
607			continue;
608		case ESCAPE_FONTPREV:
609			term_fontlast(p);
610			continue;
611		case ESCAPE_BREAK:
612			bufferc(p, '\n');
613			continue;
614		case ESCAPE_NOSPACE:
615			if (p->flags & TERMP_BACKAFTER)
616				p->flags &= ~TERMP_BACKAFTER;
617			else if (*word == '\0')
618				p->flags |= (TERMP_NOSPACE | TERMP_NONEWLINE);
619			continue;
620		case ESCAPE_DEVICE:
621			if (p->type == TERMTYPE_PDF)
622				encode(p, "pdf", 3);
623			else if (p->type == TERMTYPE_PS)
624				encode(p, "ps", 2);
625			else if (p->enc == TERMENC_ASCII)
626				encode(p, "ascii", 5);
627			else
628				encode(p, "utf8", 4);
629			continue;
630		case ESCAPE_HORIZ:
631			if (*seq == '|') {
632				seq++;
633				uc = -p->col;
634			} else
635				uc = 0;
636			if (a2roffsu(seq, &su, SCALE_EM) == NULL)
637				continue;
638			uc += term_hen(p, &su);
639			if (uc > 0)
640				while (uc-- > 0)
641					bufferc(p, ASCII_NBRSP);
642			else if (p->col > (size_t)(-uc))
643				p->col += uc;
644			else {
645				uc += p->col;
646				p->col = 0;
647				if (p->tcol->offset > (size_t)(-uc)) {
648					p->ti += uc;
649					p->tcol->offset += uc;
650				} else {
651					p->ti -= p->tcol->offset;
652					p->tcol->offset = 0;
653				}
654			}
655			continue;
656		case ESCAPE_HLINE:
657			if ((cp = a2roffsu(seq, &su, SCALE_EM)) == NULL)
658				continue;
659			uc = term_hen(p, &su);
660			if (uc <= 0) {
661				if (p->tcol->rmargin <= p->tcol->offset)
662					continue;
663				lsz = p->tcol->rmargin - p->tcol->offset;
664			} else
665				lsz = uc;
666			if (*cp == seq[-1])
667				uc = -1;
668			else if (*cp == '\\') {
669				seq = cp + 1;
670				esc = mandoc_escape(&seq, &cp, &sz);
671				switch (esc) {
672				case ESCAPE_UNICODE:
673					uc = mchars_num2uc(cp + 1, sz - 1);
674					break;
675				case ESCAPE_NUMBERED:
676					uc = mchars_num2char(cp, sz);
677					break;
678				case ESCAPE_SPECIAL:
679					uc = mchars_spec2cp(cp, sz);
680					break;
681				case ESCAPE_UNDEF:
682					uc = *seq;
683					break;
684				default:
685					uc = -1;
686					break;
687				}
688			} else
689				uc = *cp;
690			if (uc < 0x20 || (uc > 0x7E && uc < 0xA0))
691				uc = '_';
692			if (p->enc == TERMENC_ASCII) {
693				cp = ascii_uc2str(uc);
694				csz = term_strlen(p, cp);
695				ssz = strlen(cp);
696			} else
697				csz = (*p->width)(p, uc);
698			while (lsz >= csz) {
699				if (p->enc == TERMENC_ASCII)
700					encode(p, cp, ssz);
701				else
702					encode1(p, uc);
703				lsz -= csz;
704			}
705			continue;
706		case ESCAPE_SKIPCHAR:
707			p->flags |= TERMP_BACKAFTER;
708			continue;
709		case ESCAPE_OVERSTRIKE:
710			cp = seq + sz;
711			while (seq < cp) {
712				if (*seq == '\\') {
713					mandoc_escape(&seq, NULL, NULL);
714					continue;
715				}
716				encode1(p, *seq++);
717				if (seq < cp) {
718					if (p->flags & TERMP_BACKBEFORE)
719						p->flags |= TERMP_BACKAFTER;
720					else
721						p->flags |= TERMP_BACKBEFORE;
722				}
723			}
724			/* Trim trailing backspace/blank pair. */
725			if (p->tcol->lastcol > 2 &&
726			    (p->tcol->buf[p->tcol->lastcol - 1] == ' ' ||
727			     p->tcol->buf[p->tcol->lastcol - 1] == '\t'))
728				p->tcol->lastcol -= 2;
729			if (p->col > p->tcol->lastcol)
730				p->col = p->tcol->lastcol;
731			continue;
732		default:
733			continue;
734		}
735
736		/*
737		 * Common handling for Unicode and numbered
738		 * character escape sequences.
739		 */
740
741		if (p->enc == TERMENC_ASCII) {
742			cp = ascii_uc2str(uc);
743			encode(p, cp, strlen(cp));
744		} else {
745			if ((uc < 0x20 && uc != 0x09) ||
746			    (uc > 0x7E && uc < 0xA0))
747				uc = 0xFFFD;
748			encode1(p, uc);
749		}
750	}
751	p->flags &= ~TERMP_NBRWORD;
752}
753
754static void
755adjbuf(struct termp_col *c, size_t sz)
756{
757	if (c->maxcols == 0)
758		c->maxcols = 1024;
759	while (c->maxcols <= sz)
760		c->maxcols <<= 2;
761	c->buf = mandoc_reallocarray(c->buf, c->maxcols, sizeof(*c->buf));
762}
763
764static void
765bufferc(struct termp *p, char c)
766{
767	if (p->flags & TERMP_NOBUF) {
768		(*p->letter)(p, c);
769		return;
770	}
771	if (p->col + 1 >= p->tcol->maxcols)
772		adjbuf(p->tcol, p->col + 1);
773	if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
774		p->tcol->buf[p->col] = c;
775	if (p->tcol->lastcol < ++p->col)
776		p->tcol->lastcol = p->col;
777}
778
779/*
780 * See encode().
781 * Do this for a single (probably unicode) value.
782 * Does not check for non-decorated glyphs.
783 */
784static void
785encode1(struct termp *p, int c)
786{
787	enum termfont	  f;
788
789	if (p->flags & TERMP_NOBUF) {
790		(*p->letter)(p, c);
791		return;
792	}
793
794	if (p->col + 7 >= p->tcol->maxcols)
795		adjbuf(p->tcol, p->col + 7);
796
797	f = (c == ASCII_HYPH || c > 127 || isgraph(c)) ?
798	    p->fontq[p->fonti] : TERMFONT_NONE;
799
800	if (p->flags & TERMP_BACKBEFORE) {
801		if (p->tcol->buf[p->col - 1] == ' ' ||
802		    p->tcol->buf[p->col - 1] == '\t')
803			p->col--;
804		else
805			p->tcol->buf[p->col++] = '\b';
806		p->flags &= ~TERMP_BACKBEFORE;
807	}
808	if (f == TERMFONT_UNDER || f == TERMFONT_BI) {
809		p->tcol->buf[p->col++] = '_';
810		p->tcol->buf[p->col++] = '\b';
811	}
812	if (f == TERMFONT_BOLD || f == TERMFONT_BI) {
813		if (c == ASCII_HYPH)
814			p->tcol->buf[p->col++] = '-';
815		else
816			p->tcol->buf[p->col++] = c;
817		p->tcol->buf[p->col++] = '\b';
818	}
819	if (p->tcol->lastcol <= p->col || (c != ' ' && c != ASCII_NBRSP))
820		p->tcol->buf[p->col] = c;
821	if (p->tcol->lastcol < ++p->col)
822		p->tcol->lastcol = p->col;
823	if (p->flags & TERMP_BACKAFTER) {
824		p->flags |= TERMP_BACKBEFORE;
825		p->flags &= ~TERMP_BACKAFTER;
826	}
827}
828
829static void
830encode(struct termp *p, const char *word, size_t sz)
831{
832	size_t		  i;
833
834	if (p->flags & TERMP_NOBUF) {
835		for (i = 0; i < sz; i++)
836			(*p->letter)(p, word[i]);
837		return;
838	}
839
840	if (p->col + 2 + (sz * 5) >= p->tcol->maxcols)
841		adjbuf(p->tcol, p->col + 2 + (sz * 5));
842
843	for (i = 0; i < sz; i++) {
844		if (ASCII_HYPH == word[i] ||
845		    isgraph((unsigned char)word[i]))
846			encode1(p, word[i]);
847		else {
848			if (p->tcol->lastcol <= p->col ||
849			    (word[i] != ' ' && word[i] != ASCII_NBRSP))
850				p->tcol->buf[p->col] = word[i];
851			p->col++;
852
853			/*
854			 * Postpone the effect of \z while handling
855			 * an overstrike sequence from ascii_uc2str().
856			 */
857
858			if (word[i] == '\b' &&
859			    (p->flags & TERMP_BACKBEFORE)) {
860				p->flags &= ~TERMP_BACKBEFORE;
861				p->flags |= TERMP_BACKAFTER;
862			}
863		}
864	}
865	if (p->tcol->lastcol < p->col)
866		p->tcol->lastcol = p->col;
867}
868
869void
870term_setwidth(struct termp *p, const char *wstr)
871{
872	struct roffsu	 su;
873	int		 iop, width;
874
875	iop = 0;
876	width = 0;
877	if (NULL != wstr) {
878		switch (*wstr) {
879		case '+':
880			iop = 1;
881			wstr++;
882			break;
883		case '-':
884			iop = -1;
885			wstr++;
886			break;
887		default:
888			break;
889		}
890		if (a2roffsu(wstr, &su, SCALE_MAX) != NULL)
891			width = term_hspan(p, &su);
892		else
893			iop = 0;
894	}
895	(*p->setwidth)(p, iop, width);
896}
897
898size_t
899term_len(const struct termp *p, size_t sz)
900{
901
902	return (*p->width)(p, ' ') * sz;
903}
904
905static size_t
906cond_width(const struct termp *p, int c, int *skip)
907{
908
909	if (*skip) {
910		(*skip) = 0;
911		return 0;
912	} else
913		return (*p->width)(p, c);
914}
915
916size_t
917term_strlen(const struct termp *p, const char *cp)
918{
919	size_t		 sz, rsz, i;
920	int		 ssz, skip, uc;
921	const char	*seq, *rhs;
922	enum mandoc_esc	 esc;
923	static const char rej[] = { '\\', ASCII_NBRSP, ASCII_HYPH,
924			ASCII_BREAK, '\0' };
925
926	/*
927	 * Account for escaped sequences within string length
928	 * calculations.  This follows the logic in term_word() as we
929	 * must calculate the width of produced strings.
930	 */
931
932	sz = 0;
933	skip = 0;
934	while ('\0' != *cp) {
935		rsz = strcspn(cp, rej);
936		for (i = 0; i < rsz; i++)
937			sz += cond_width(p, *cp++, &skip);
938
939		switch (*cp) {
940		case '\\':
941			cp++;
942			rhs = NULL;
943			esc = mandoc_escape(&cp, &seq, &ssz);
944			switch (esc) {
945			case ESCAPE_UNICODE:
946				uc = mchars_num2uc(seq + 1, ssz - 1);
947				break;
948			case ESCAPE_NUMBERED:
949				uc = mchars_num2char(seq, ssz);
950				if (uc < 0)
951					continue;
952				break;
953			case ESCAPE_SPECIAL:
954				if (p->enc == TERMENC_ASCII) {
955					rhs = mchars_spec2str(seq, ssz, &rsz);
956					if (rhs != NULL)
957						break;
958				} else {
959					uc = mchars_spec2cp(seq, ssz);
960					if (uc > 0)
961						sz += cond_width(p, uc, &skip);
962				}
963				continue;
964			case ESCAPE_UNDEF:
965				uc = *seq;
966				break;
967			case ESCAPE_DEVICE:
968				if (p->type == TERMTYPE_PDF) {
969					rhs = "pdf";
970					rsz = 3;
971				} else if (p->type == TERMTYPE_PS) {
972					rhs = "ps";
973					rsz = 2;
974				} else if (p->enc == TERMENC_ASCII) {
975					rhs = "ascii";
976					rsz = 5;
977				} else {
978					rhs = "utf8";
979					rsz = 4;
980				}
981				break;
982			case ESCAPE_SKIPCHAR:
983				skip = 1;
984				continue;
985			case ESCAPE_OVERSTRIKE:
986				rsz = 0;
987				rhs = seq + ssz;
988				while (seq < rhs) {
989					if (*seq == '\\') {
990						mandoc_escape(&seq, NULL, NULL);
991						continue;
992					}
993					i = (*p->width)(p, *seq++);
994					if (rsz < i)
995						rsz = i;
996				}
997				sz += rsz;
998				continue;
999			default:
1000				continue;
1001			}
1002
1003			/*
1004			 * Common handling for Unicode and numbered
1005			 * character escape sequences.
1006			 */
1007
1008			if (rhs == NULL) {
1009				if (p->enc == TERMENC_ASCII) {
1010					rhs = ascii_uc2str(uc);
1011					rsz = strlen(rhs);
1012				} else {
1013					if ((uc < 0x20 && uc != 0x09) ||
1014					    (uc > 0x7E && uc < 0xA0))
1015						uc = 0xFFFD;
1016					sz += cond_width(p, uc, &skip);
1017					continue;
1018				}
1019			}
1020
1021			if (skip) {
1022				skip = 0;
1023				break;
1024			}
1025
1026			/*
1027			 * Common handling for all escape sequences
1028			 * printing more than one character.
1029			 */
1030
1031			for (i = 0; i < rsz; i++)
1032				sz += (*p->width)(p, *rhs++);
1033			break;
1034		case ASCII_NBRSP:
1035			sz += cond_width(p, ' ', &skip);
1036			cp++;
1037			break;
1038		case ASCII_HYPH:
1039			sz += cond_width(p, '-', &skip);
1040			cp++;
1041			break;
1042		default:
1043			break;
1044		}
1045	}
1046
1047	return sz;
1048}
1049
1050int
1051term_vspan(const struct termp *p, const struct roffsu *su)
1052{
1053	double		 r;
1054	int		 ri;
1055
1056	switch (su->unit) {
1057	case SCALE_BU:
1058		r = su->scale / 40.0;
1059		break;
1060	case SCALE_CM:
1061		r = su->scale * 6.0 / 2.54;
1062		break;
1063	case SCALE_FS:
1064		r = su->scale * 65536.0 / 40.0;
1065		break;
1066	case SCALE_IN:
1067		r = su->scale * 6.0;
1068		break;
1069	case SCALE_MM:
1070		r = su->scale * 0.006;
1071		break;
1072	case SCALE_PC:
1073		r = su->scale;
1074		break;
1075	case SCALE_PT:
1076		r = su->scale / 12.0;
1077		break;
1078	case SCALE_EN:
1079	case SCALE_EM:
1080		r = su->scale * 0.6;
1081		break;
1082	case SCALE_VS:
1083		r = su->scale;
1084		break;
1085	default:
1086		abort();
1087	}
1088	ri = r > 0.0 ? r + 0.4995 : r - 0.4995;
1089	return ri < 66 ? ri : 1;
1090}
1091
1092/*
1093 * Convert a scaling width to basic units, rounding towards 0.
1094 */
1095int
1096term_hspan(const struct termp *p, const struct roffsu *su)
1097{
1098
1099	return (*p->hspan)(p, su);
1100}
1101
1102/*
1103 * Convert a scaling width to basic units, rounding to closest.
1104 */
1105int
1106term_hen(const struct termp *p, const struct roffsu *su)
1107{
1108	int bu;
1109
1110	if ((bu = (*p->hspan)(p, su)) >= 0)
1111		return (bu + 11) / 24;
1112	else
1113		return -((-bu + 11) / 24);
1114}
1115