1/*	$Id: html.c,v 1.255 2019/04/30 15:53:00 schwarze Exp $ */
2/*
3 * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2015, 2017-2019 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#include "config.h"
19
20#include <sys/types.h>
21#include <sys/stat.h>
22
23#include <assert.h>
24#include <ctype.h>
25#include <stdarg.h>
26#include <stddef.h>
27#include <stdio.h>
28#include <stdint.h>
29#include <stdlib.h>
30#include <string.h>
31#include <unistd.h>
32
33#include "mandoc_aux.h"
34#include "mandoc_ohash.h"
35#include "mandoc.h"
36#include "roff.h"
37#include "out.h"
38#include "html.h"
39#include "manconf.h"
40#include "main.h"
41
42struct	htmldata {
43	const char	 *name;
44	int		  flags;
45#define	HTML_NOSTACK	 (1 << 0)
46#define	HTML_AUTOCLOSE	 (1 << 1)
47#define	HTML_NLBEFORE	 (1 << 2)
48#define	HTML_NLBEGIN	 (1 << 3)
49#define	HTML_NLEND	 (1 << 4)
50#define	HTML_NLAFTER	 (1 << 5)
51#define	HTML_NLAROUND	 (HTML_NLBEFORE | HTML_NLAFTER)
52#define	HTML_NLINSIDE	 (HTML_NLBEGIN | HTML_NLEND)
53#define	HTML_NLALL	 (HTML_NLAROUND | HTML_NLINSIDE)
54#define	HTML_INDENT	 (1 << 6)
55#define	HTML_NOINDENT	 (1 << 7)
56};
57
58static	const struct htmldata htmltags[TAG_MAX] = {
59	{"html",	HTML_NLALL},
60	{"head",	HTML_NLALL | HTML_INDENT},
61	{"body",	HTML_NLALL},
62	{"meta",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
63	{"title",	HTML_NLAROUND},
64	{"div",		HTML_NLAROUND},
65	{"div",		0},
66	{"section",	HTML_NLALL},
67	{"h1",		HTML_NLAROUND},
68	{"h2",		HTML_NLAROUND},
69	{"span",	0},
70	{"link",	HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
71	{"br",		HTML_NOSTACK | HTML_AUTOCLOSE | HTML_NLALL},
72	{"a",		0},
73	{"table",	HTML_NLALL | HTML_INDENT},
74	{"tr",		HTML_NLALL | HTML_INDENT},
75	{"td",		HTML_NLAROUND},
76	{"li",		HTML_NLAROUND | HTML_INDENT},
77	{"ul",		HTML_NLALL | HTML_INDENT},
78	{"ol",		HTML_NLALL | HTML_INDENT},
79	{"dl",		HTML_NLALL | HTML_INDENT},
80	{"dt",		HTML_NLAROUND},
81	{"dd",		HTML_NLAROUND | HTML_INDENT},
82	{"p",		HTML_NLAROUND | HTML_INDENT},
83	{"pre",		HTML_NLALL | HTML_NOINDENT},
84	{"var",		0},
85	{"cite",	0},
86	{"b",		0},
87	{"i",		0},
88	{"code",	0},
89	{"small",	0},
90	{"style",	HTML_NLALL | HTML_INDENT},
91	{"math",	HTML_NLALL | HTML_INDENT},
92	{"mrow",	0},
93	{"mi",		0},
94	{"mn",		0},
95	{"mo",		0},
96	{"msup",	0},
97	{"msub",	0},
98	{"msubsup",	0},
99	{"mfrac",	0},
100	{"msqrt",	0},
101	{"mfenced",	0},
102	{"mtable",	0},
103	{"mtr",		0},
104	{"mtd",		0},
105	{"munderover",	0},
106	{"munder",	0},
107	{"mover",	0},
108};
109
110/* Avoid duplicate HTML id= attributes. */
111static	struct ohash	 id_unique;
112
113static	void	 html_reset_internal(struct html *);
114static	void	 print_byte(struct html *, char);
115static	void	 print_endword(struct html *);
116static	void	 print_indent(struct html *);
117static	void	 print_word(struct html *, const char *);
118
119static	void	 print_ctag(struct html *, struct tag *);
120static	int	 print_escape(struct html *, char);
121static	int	 print_encode(struct html *, const char *, const char *, int);
122static	void	 print_href(struct html *, const char *, const char *, int);
123static	void	 print_metaf(struct html *);
124
125
126void *
127html_alloc(const struct manoutput *outopts)
128{
129	struct html	*h;
130
131	h = mandoc_calloc(1, sizeof(struct html));
132
133	h->tag = NULL;
134	h->style = outopts->style;
135	if ((h->base_man1 = outopts->man) == NULL)
136		h->base_man2 = NULL;
137	else if ((h->base_man2 = strchr(h->base_man1, ';')) != NULL)
138		*h->base_man2++ = '\0';
139	h->base_includes = outopts->includes;
140	if (outopts->fragment)
141		h->oflags |= HTML_FRAGMENT;
142	if (outopts->toc)
143		h->oflags |= HTML_TOC;
144
145	mandoc_ohash_init(&id_unique, 4, 0);
146
147	return h;
148}
149
150static void
151html_reset_internal(struct html *h)
152{
153	struct tag	*tag;
154	char		*cp;
155	unsigned int	 slot;
156
157	while ((tag = h->tag) != NULL) {
158		h->tag = tag->next;
159		free(tag);
160	}
161	cp = ohash_first(&id_unique, &slot);
162	while (cp != NULL) {
163		free(cp);
164		cp = ohash_next(&id_unique, &slot);
165	}
166	ohash_delete(&id_unique);
167}
168
169void
170html_reset(void *p)
171{
172	html_reset_internal(p);
173	mandoc_ohash_init(&id_unique, 4, 0);
174}
175
176void
177html_free(void *p)
178{
179	html_reset_internal(p);
180	free(p);
181}
182
183void
184print_gen_head(struct html *h)
185{
186	struct tag	*t;
187
188	print_otag(h, TAG_META, "?", "charset", "utf-8");
189	if (h->style != NULL) {
190		print_otag(h, TAG_LINK, "?h??", "rel", "stylesheet",
191		    h->style, "type", "text/css", "media", "all");
192		return;
193	}
194
195	/*
196	 * Print a minimal embedded style sheet.
197	 */
198
199	t = print_otag(h, TAG_STYLE, "");
200	print_text(h, "table.head, table.foot { width: 100%; }");
201	print_endline(h);
202	print_text(h, "td.head-rtitle, td.foot-os { text-align: right; }");
203	print_endline(h);
204	print_text(h, "td.head-vol { text-align: center; }");
205	print_endline(h);
206	print_text(h, "div.Pp { margin: 1ex 0ex; }");
207	print_endline(h);
208	print_text(h, "div.Nd, div.Bf, div.Op { display: inline; }");
209	print_endline(h);
210	print_text(h, "span.Pa, span.Ad { font-style: italic; }");
211	print_endline(h);
212	print_text(h, "span.Ms { font-weight: bold; }");
213	print_endline(h);
214	print_text(h, "dl.Bl-diag ");
215	print_byte(h, '>');
216	print_text(h, " dt { font-weight: bold; }");
217	print_endline(h);
218	print_text(h, "code.Nm, code.Fl, code.Cm, code.Ic, "
219	    "code.In, code.Fd, code.Fn,");
220	print_endline(h);
221	print_text(h, "code.Cd { font-weight: bold; "
222	    "font-family: inherit; }");
223	print_tagq(h, t);
224}
225
226int
227html_setfont(struct html *h, enum mandoc_esc font)
228{
229	switch (font) {
230	case ESCAPE_FONTPREV:
231		font = h->metal;
232		break;
233	case ESCAPE_FONTITALIC:
234	case ESCAPE_FONTBOLD:
235	case ESCAPE_FONTBI:
236	case ESCAPE_FONTCW:
237	case ESCAPE_FONTROMAN:
238		break;
239	case ESCAPE_FONT:
240		font = ESCAPE_FONTROMAN;
241		break;
242	default:
243		return 0;
244	}
245	h->metal = h->metac;
246	h->metac = font;
247	return 1;
248}
249
250static void
251print_metaf(struct html *h)
252{
253	if (h->metaf) {
254		print_tagq(h, h->metaf);
255		h->metaf = NULL;
256	}
257	switch (h->metac) {
258	case ESCAPE_FONTITALIC:
259		h->metaf = print_otag(h, TAG_I, "");
260		break;
261	case ESCAPE_FONTBOLD:
262		h->metaf = print_otag(h, TAG_B, "");
263		break;
264	case ESCAPE_FONTBI:
265		h->metaf = print_otag(h, TAG_B, "");
266		print_otag(h, TAG_I, "");
267		break;
268	case ESCAPE_FONTCW:
269		h->metaf = print_otag(h, TAG_SPAN, "c", "Li");
270		break;
271	default:
272		break;
273	}
274}
275
276void
277html_close_paragraph(struct html *h)
278{
279	struct tag	*t;
280
281	for (t = h->tag; t != NULL && t->closed == 0; t = t->next) {
282		switch(t->tag) {
283		case TAG_P:
284		case TAG_PRE:
285			print_tagq(h, t);
286			break;
287		case TAG_A:
288			print_tagq(h, t);
289			continue;
290		default:
291			continue;
292		}
293		break;
294	}
295}
296
297/*
298 * ROFF_nf switches to no-fill mode, ROFF_fi to fill mode.
299 * TOKEN_NONE does not switch.  The old mode is returned.
300 */
301enum roff_tok
302html_fillmode(struct html *h, enum roff_tok want)
303{
304	struct tag	*t;
305	enum roff_tok	 had;
306
307	for (t = h->tag; t != NULL; t = t->next)
308		if (t->tag == TAG_PRE)
309			break;
310
311	had = t == NULL ? ROFF_fi : ROFF_nf;
312
313	if (want != had) {
314		switch (want) {
315		case ROFF_fi:
316			print_tagq(h, t);
317			break;
318		case ROFF_nf:
319			html_close_paragraph(h);
320			print_otag(h, TAG_PRE, "");
321			break;
322		case TOKEN_NONE:
323			break;
324		default:
325			abort();
326		}
327	}
328	return had;
329}
330
331char *
332html_make_id(const struct roff_node *n, int unique)
333{
334	const struct roff_node	*nch;
335	char			*buf, *bufs, *cp;
336	unsigned int		 slot;
337	int			 suffix;
338
339	for (nch = n->child; nch != NULL; nch = nch->next)
340		if (nch->type != ROFFT_TEXT)
341			return NULL;
342
343	buf = NULL;
344	deroff(&buf, n);
345	if (buf == NULL)
346		return NULL;
347
348	/*
349	 * In ID attributes, only use ASCII characters that are
350	 * permitted in URL-fragment strings according to the
351	 * explicit list at:
352	 * https://url.spec.whatwg.org/#url-fragment-string
353	 */
354
355	for (cp = buf; *cp != '\0'; cp++)
356		if (isalnum((unsigned char)*cp) == 0 &&
357		    strchr("!$&'()*+,-./:;=?@_~", *cp) == NULL)
358			*cp = '_';
359
360	if (unique == 0)
361		return buf;
362
363	/* Avoid duplicate HTML id= attributes. */
364
365	bufs = NULL;
366	suffix = 1;
367	slot = ohash_qlookup(&id_unique, buf);
368	cp = ohash_find(&id_unique, slot);
369	if (cp != NULL) {
370		while (cp != NULL) {
371			free(bufs);
372			if (++suffix > 127) {
373				free(buf);
374				return NULL;
375			}
376			mandoc_asprintf(&bufs, "%s_%d", buf, suffix);
377			slot = ohash_qlookup(&id_unique, bufs);
378			cp = ohash_find(&id_unique, slot);
379		}
380		free(buf);
381		buf = bufs;
382	}
383	ohash_insert(&id_unique, slot, buf);
384	return buf;
385}
386
387static int
388print_escape(struct html *h, char c)
389{
390
391	switch (c) {
392	case '<':
393		print_word(h, "&lt;");
394		break;
395	case '>':
396		print_word(h, "&gt;");
397		break;
398	case '&':
399		print_word(h, "&amp;");
400		break;
401	case '"':
402		print_word(h, "&quot;");
403		break;
404	case ASCII_NBRSP:
405		print_word(h, "&nbsp;");
406		break;
407	case ASCII_HYPH:
408		print_byte(h, '-');
409		break;
410	case ASCII_BREAK:
411		break;
412	default:
413		return 0;
414	}
415	return 1;
416}
417
418static int
419print_encode(struct html *h, const char *p, const char *pend, int norecurse)
420{
421	char		 numbuf[16];
422	const char	*seq;
423	size_t		 sz;
424	int		 c, len, breakline, nospace;
425	enum mandoc_esc	 esc;
426	static const char rejs[10] = { ' ', '\\', '<', '>', '&', '"',
427		ASCII_NBRSP, ASCII_HYPH, ASCII_BREAK, '\0' };
428
429	if (pend == NULL)
430		pend = strchr(p, '\0');
431
432	breakline = 0;
433	nospace = 0;
434
435	while (p < pend) {
436		if (HTML_SKIPCHAR & h->flags && '\\' != *p) {
437			h->flags &= ~HTML_SKIPCHAR;
438			p++;
439			continue;
440		}
441
442		for (sz = strcspn(p, rejs); sz-- && p < pend; p++)
443			print_byte(h, *p);
444
445		if (breakline &&
446		    (p >= pend || *p == ' ' || *p == ASCII_NBRSP)) {
447			print_otag(h, TAG_BR, "");
448			breakline = 0;
449			while (p < pend && (*p == ' ' || *p == ASCII_NBRSP))
450				p++;
451			continue;
452		}
453
454		if (p >= pend)
455			break;
456
457		if (*p == ' ') {
458			print_endword(h);
459			p++;
460			continue;
461		}
462
463		if (print_escape(h, *p++))
464			continue;
465
466		esc = mandoc_escape(&p, &seq, &len);
467		switch (esc) {
468		case ESCAPE_FONT:
469		case ESCAPE_FONTPREV:
470		case ESCAPE_FONTBOLD:
471		case ESCAPE_FONTITALIC:
472		case ESCAPE_FONTBI:
473		case ESCAPE_FONTCW:
474		case ESCAPE_FONTROMAN:
475			if (0 == norecurse) {
476				h->flags |= HTML_NOSPACE;
477				if (html_setfont(h, esc))
478					print_metaf(h);
479				h->flags &= ~HTML_NOSPACE;
480			}
481			continue;
482		case ESCAPE_SKIPCHAR:
483			h->flags |= HTML_SKIPCHAR;
484			continue;
485		case ESCAPE_ERROR:
486			continue;
487		default:
488			break;
489		}
490
491		if (h->flags & HTML_SKIPCHAR) {
492			h->flags &= ~HTML_SKIPCHAR;
493			continue;
494		}
495
496		switch (esc) {
497		case ESCAPE_UNICODE:
498			/* Skip past "u" header. */
499			c = mchars_num2uc(seq + 1, len - 1);
500			break;
501		case ESCAPE_NUMBERED:
502			c = mchars_num2char(seq, len);
503			if (c < 0)
504				continue;
505			break;
506		case ESCAPE_SPECIAL:
507			c = mchars_spec2cp(seq, len);
508			if (c <= 0)
509				continue;
510			break;
511		case ESCAPE_UNDEF:
512			c = *seq;
513			break;
514		case ESCAPE_DEVICE:
515			print_word(h, "html");
516			continue;
517		case ESCAPE_BREAK:
518			breakline = 1;
519			continue;
520		case ESCAPE_NOSPACE:
521			if ('\0' == *p)
522				nospace = 1;
523			continue;
524		case ESCAPE_OVERSTRIKE:
525			if (len == 0)
526				continue;
527			c = seq[len - 1];
528			break;
529		default:
530			continue;
531		}
532		if ((c < 0x20 && c != 0x09) ||
533		    (c > 0x7E && c < 0xA0))
534			c = 0xFFFD;
535		if (c > 0x7E) {
536			(void)snprintf(numbuf, sizeof(numbuf), "&#x%.4X;", c);
537			print_word(h, numbuf);
538		} else if (print_escape(h, c) == 0)
539			print_byte(h, c);
540	}
541
542	return nospace;
543}
544
545static void
546print_href(struct html *h, const char *name, const char *sec, int man)
547{
548	struct stat	 sb;
549	const char	*p, *pp;
550	char		*filename;
551
552	if (man) {
553		pp = h->base_man1;
554		if (h->base_man2 != NULL) {
555			mandoc_asprintf(&filename, "%s.%s", name, sec);
556			if (stat(filename, &sb) == -1)
557				pp = h->base_man2;
558			free(filename);
559		}
560	} else
561		pp = h->base_includes;
562
563	while ((p = strchr(pp, '%')) != NULL) {
564		print_encode(h, pp, p, 1);
565		if (man && p[1] == 'S') {
566			if (sec == NULL)
567				print_byte(h, '1');
568			else
569				print_encode(h, sec, NULL, 1);
570		} else if ((man && p[1] == 'N') ||
571		    (man == 0 && p[1] == 'I'))
572			print_encode(h, name, NULL, 1);
573		else
574			print_encode(h, p, p + 2, 1);
575		pp = p + 2;
576	}
577	if (*pp != '\0')
578		print_encode(h, pp, NULL, 1);
579}
580
581struct tag *
582print_otag(struct html *h, enum htmltag tag, const char *fmt, ...)
583{
584	va_list		 ap;
585	struct tag	*t;
586	const char	*attr;
587	char		*arg1, *arg2;
588	int		 style_written, tflags;
589
590	tflags = htmltags[tag].flags;
591
592	/* Push this tag onto the stack of open scopes. */
593
594	if ((tflags & HTML_NOSTACK) == 0) {
595		t = mandoc_malloc(sizeof(struct tag));
596		t->tag = tag;
597		t->next = h->tag;
598		t->refcnt = 0;
599		t->closed = 0;
600		h->tag = t;
601	} else
602		t = NULL;
603
604	if (tflags & HTML_NLBEFORE)
605		print_endline(h);
606	if (h->col == 0)
607		print_indent(h);
608	else if ((h->flags & HTML_NOSPACE) == 0) {
609		if (h->flags & HTML_KEEP)
610			print_word(h, "&#x00A0;");
611		else {
612			if (h->flags & HTML_PREKEEP)
613				h->flags |= HTML_KEEP;
614			print_endword(h);
615		}
616	}
617
618	if ( ! (h->flags & HTML_NONOSPACE))
619		h->flags &= ~HTML_NOSPACE;
620	else
621		h->flags |= HTML_NOSPACE;
622
623	/* Print out the tag name and attributes. */
624
625	print_byte(h, '<');
626	print_word(h, htmltags[tag].name);
627
628	va_start(ap, fmt);
629
630	while (*fmt != '\0' && *fmt != 's') {
631
632		/* Parse attributes and arguments. */
633
634		arg1 = va_arg(ap, char *);
635		arg2 = NULL;
636		switch (*fmt++) {
637		case 'c':
638			attr = "class";
639			break;
640		case 'h':
641			attr = "href";
642			break;
643		case 'i':
644			attr = "id";
645			break;
646		case '?':
647			attr = arg1;
648			arg1 = va_arg(ap, char *);
649			break;
650		default:
651			abort();
652		}
653		if (*fmt == 'M')
654			arg2 = va_arg(ap, char *);
655		if (arg1 == NULL)
656			continue;
657
658		/* Print the attributes. */
659
660		print_byte(h, ' ');
661		print_word(h, attr);
662		print_byte(h, '=');
663		print_byte(h, '"');
664		switch (*fmt) {
665		case 'I':
666			print_href(h, arg1, NULL, 0);
667			fmt++;
668			break;
669		case 'M':
670			print_href(h, arg1, arg2, 1);
671			fmt++;
672			break;
673		case 'R':
674			print_byte(h, '#');
675			print_encode(h, arg1, NULL, 1);
676			fmt++;
677			break;
678		default:
679			print_encode(h, arg1, NULL, 1);
680			break;
681		}
682		print_byte(h, '"');
683	}
684
685	style_written = 0;
686	while (*fmt++ == 's') {
687		arg1 = va_arg(ap, char *);
688		arg2 = va_arg(ap, char *);
689		if (arg2 == NULL)
690			continue;
691		print_byte(h, ' ');
692		if (style_written == 0) {
693			print_word(h, "style=\"");
694			style_written = 1;
695		}
696		print_word(h, arg1);
697		print_byte(h, ':');
698		print_byte(h, ' ');
699		print_word(h, arg2);
700		print_byte(h, ';');
701	}
702	if (style_written)
703		print_byte(h, '"');
704
705	va_end(ap);
706
707	/* Accommodate for "well-formed" singleton escaping. */
708
709	if (HTML_AUTOCLOSE & htmltags[tag].flags)
710		print_byte(h, '/');
711
712	print_byte(h, '>');
713
714	if (tflags & HTML_NLBEGIN)
715		print_endline(h);
716	else
717		h->flags |= HTML_NOSPACE;
718
719	if (tflags & HTML_INDENT)
720		h->indent++;
721	if (tflags & HTML_NOINDENT)
722		h->noindent++;
723
724	return t;
725}
726
727static void
728print_ctag(struct html *h, struct tag *tag)
729{
730	int	 tflags;
731
732	if (tag->closed == 0) {
733		tag->closed = 1;
734		if (tag == h->metaf)
735			h->metaf = NULL;
736		if (tag == h->tblt)
737			h->tblt = NULL;
738
739		tflags = htmltags[tag->tag].flags;
740		if (tflags & HTML_INDENT)
741			h->indent--;
742		if (tflags & HTML_NOINDENT)
743			h->noindent--;
744		if (tflags & HTML_NLEND)
745			print_endline(h);
746		print_indent(h);
747		print_byte(h, '<');
748		print_byte(h, '/');
749		print_word(h, htmltags[tag->tag].name);
750		print_byte(h, '>');
751		if (tflags & HTML_NLAFTER)
752			print_endline(h);
753	}
754	if (tag->refcnt == 0) {
755		h->tag = tag->next;
756		free(tag);
757	}
758}
759
760void
761print_gen_decls(struct html *h)
762{
763	print_word(h, "<!DOCTYPE html>");
764	print_endline(h);
765}
766
767void
768print_gen_comment(struct html *h, struct roff_node *n)
769{
770	int	 wantblank;
771
772	print_word(h, "<!-- This is an automatically generated file."
773	    "  Do not edit.");
774	h->indent = 1;
775	wantblank = 0;
776	while (n != NULL && n->type == ROFFT_COMMENT) {
777		if (strstr(n->string, "-->") == NULL &&
778		    (wantblank || *n->string != '\0')) {
779			print_endline(h);
780			print_indent(h);
781			print_word(h, n->string);
782			wantblank = *n->string != '\0';
783		}
784		n = n->next;
785	}
786	if (wantblank)
787		print_endline(h);
788	print_word(h, " -->");
789	print_endline(h);
790	h->indent = 0;
791}
792
793void
794print_text(struct html *h, const char *word)
795{
796	if (h->col && (h->flags & HTML_NOSPACE) == 0) {
797		if ( ! (HTML_KEEP & h->flags)) {
798			if (HTML_PREKEEP & h->flags)
799				h->flags |= HTML_KEEP;
800			print_endword(h);
801		} else
802			print_word(h, "&#x00A0;");
803	}
804
805	assert(h->metaf == NULL);
806	print_metaf(h);
807	print_indent(h);
808	if ( ! print_encode(h, word, NULL, 0)) {
809		if ( ! (h->flags & HTML_NONOSPACE))
810			h->flags &= ~HTML_NOSPACE;
811		h->flags &= ~HTML_NONEWLINE;
812	} else
813		h->flags |= HTML_NOSPACE | HTML_NONEWLINE;
814
815	if (h->metaf != NULL) {
816		print_tagq(h, h->metaf);
817		h->metaf = NULL;
818	}
819
820	h->flags &= ~HTML_IGNDELIM;
821}
822
823void
824print_tagq(struct html *h, const struct tag *until)
825{
826	struct tag	*this, *next;
827
828	for (this = h->tag; this != NULL; this = next) {
829		next = this == until ? NULL : this->next;
830		print_ctag(h, this);
831	}
832}
833
834/*
835 * Close out all open elements up to but excluding suntil.
836 * Note that a paragraph just inside stays open together with it
837 * because paragraphs include subsequent phrasing content.
838 */
839void
840print_stagq(struct html *h, const struct tag *suntil)
841{
842	struct tag	*this, *next;
843
844	for (this = h->tag; this != NULL; this = next) {
845		next = this->next;
846		if (this == suntil || (next == suntil &&
847		    (this->tag == TAG_P || this->tag == TAG_PRE)))
848			break;
849		print_ctag(h, this);
850	}
851}
852
853
854/***********************************************************************
855 * Low level output functions.
856 * They implement line breaking using a short static buffer.
857 ***********************************************************************/
858
859/*
860 * Buffer one HTML output byte.
861 * If the buffer is full, flush and deactivate it and start a new line.
862 * If the buffer is inactive, print directly.
863 */
864static void
865print_byte(struct html *h, char c)
866{
867	if ((h->flags & HTML_BUFFER) == 0) {
868		putchar(c);
869		h->col++;
870		return;
871	}
872
873	if (h->col + h->bufcol < sizeof(h->buf)) {
874		h->buf[h->bufcol++] = c;
875		return;
876	}
877
878	putchar('\n');
879	h->col = 0;
880	print_indent(h);
881	putchar(' ');
882	putchar(' ');
883	fwrite(h->buf, h->bufcol, 1, stdout);
884	putchar(c);
885	h->col = (h->indent + 1) * 2 + h->bufcol + 1;
886	h->bufcol = 0;
887	h->flags &= ~HTML_BUFFER;
888}
889
890/*
891 * If something was printed on the current output line, end it.
892 * Not to be called right after print_indent().
893 */
894void
895print_endline(struct html *h)
896{
897	if (h->col == 0)
898		return;
899
900	if (h->bufcol) {
901		putchar(' ');
902		fwrite(h->buf, h->bufcol, 1, stdout);
903		h->bufcol = 0;
904	}
905	putchar('\n');
906	h->col = 0;
907	h->flags |= HTML_NOSPACE;
908	h->flags &= ~HTML_BUFFER;
909}
910
911/*
912 * Flush the HTML output buffer.
913 * If it is inactive, activate it.
914 */
915static void
916print_endword(struct html *h)
917{
918	if (h->noindent) {
919		print_byte(h, ' ');
920		return;
921	}
922
923	if ((h->flags & HTML_BUFFER) == 0) {
924		h->col++;
925		h->flags |= HTML_BUFFER;
926	} else if (h->bufcol) {
927		putchar(' ');
928		fwrite(h->buf, h->bufcol, 1, stdout);
929		h->col += h->bufcol + 1;
930	}
931	h->bufcol = 0;
932}
933
934/*
935 * If at the beginning of a new output line,
936 * perform indentation and mark the line as containing output.
937 * Make sure to really produce some output right afterwards,
938 * but do not use print_otag() for producing it.
939 */
940static void
941print_indent(struct html *h)
942{
943	size_t	 i;
944
945	if (h->col)
946		return;
947
948	if (h->noindent == 0) {
949		h->col = h->indent * 2;
950		for (i = 0; i < h->col; i++)
951			putchar(' ');
952	}
953	h->flags &= ~HTML_NOSPACE;
954}
955
956/*
957 * Print or buffer some characters
958 * depending on the current HTML output buffer state.
959 */
960static void
961print_word(struct html *h, const char *cp)
962{
963	while (*cp != '\0')
964		print_byte(h, *cp++);
965}
966