1/* $OpenBSD: tag.c,v 1.38 2023/11/24 04:48:02 schwarze Exp $ */
2/*
3 * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023
4 *               Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * Functions to tag syntax tree nodes.
19 * For internal use by mandoc(1) validation modules only.
20 */
21#include <sys/types.h>
22
23#include <assert.h>
24#include <limits.h>
25#include <stddef.h>
26#include <stdint.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30
31#include "mandoc_aux.h"
32#include "mandoc_ohash.h"
33#include "mandoc.h"
34#include "roff.h"
35#include "mdoc.h"
36#include "roff_int.h"
37#include "tag.h"
38
39struct tag_entry {
40	struct roff_node **nodes;
41	size_t	 maxnodes;
42	size_t	 nnodes;
43	int	 prio;
44	char	 s[];
45};
46
47static void		 tag_move_href(struct roff_man *,
48				struct roff_node *, const char *);
49static void		 tag_move_id(struct roff_node *);
50
51static struct ohash	 tag_data;
52
53
54/*
55 * Set up the ohash table to collect nodes
56 * where various marked-up terms are documented.
57 */
58void
59tag_alloc(void)
60{
61	mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s));
62}
63
64void
65tag_free(void)
66{
67	struct tag_entry	*entry;
68	unsigned int		 slot;
69
70	if (tag_data.info.free == NULL)
71		return;
72	entry = ohash_first(&tag_data, &slot);
73	while (entry != NULL) {
74		free(entry->nodes);
75		free(entry);
76		entry = ohash_next(&tag_data, &slot);
77	}
78	ohash_delete(&tag_data);
79	tag_data.info.free = NULL;
80}
81
82/*
83 * Set a node where a term is defined,
84 * unless the term is already defined at a lower priority.
85 */
86void
87tag_put(const char *s, int prio, struct roff_node *n)
88{
89	struct tag_entry	*entry;
90	struct roff_node	*nold;
91	const char		*se, *src;
92	char			*cpy;
93	size_t			 len;
94	unsigned int		 slot;
95	int			 changed;
96
97	assert(prio <= TAG_FALLBACK);
98
99	/*
100	 * If the node is already tagged, the existing tag is
101	 * explicit and we are now about to add an implicit tag.
102	 * Don't do that; just skip implicit tagging if the author
103	 * specified an explicit tag.
104	 */
105
106	if (n->flags & NODE_ID)
107		return;
108
109	/* Determine the implicit tag. */
110
111	changed = 1;
112	if (s == NULL) {
113		if (n->child == NULL || n->child->type != ROFFT_TEXT)
114			return;
115		s = n->child->string;
116		switch (s[0]) {
117		case '-':
118			s++;
119			break;
120		case '\\':
121			switch (s[1]) {
122			case '&':
123			case '-':
124			case 'e':
125				s += 2;
126				break;
127			default:
128				return;
129			}
130			break;
131		default:
132			changed = 0;
133			break;
134		}
135	}
136
137	/*
138	 * Translate \- and ASCII_HYPH to plain '-'.
139	 * Skip whitespace and escapes and whatever follows,
140	 * and if there is any, downgrade the priority.
141	 */
142
143	cpy = mandoc_malloc(strlen(s) + 1);
144	for (src = s, len = 0; *src != '\0'; src++, len++) {
145		switch (*src) {
146		case '\t':
147		case ' ':
148			changed = 1;
149			break;
150		case ASCII_HYPH:
151			cpy[len] = '-';
152			changed = 1;
153			continue;
154		case '\\':
155			if (src[1] != '-')
156				break;
157			src++;
158			changed = 1;
159			/* FALLTHROUGH */
160		default:
161			cpy[len] = *src;
162			continue;
163		}
164		break;
165	}
166	if (len == 0)
167		goto out;
168	cpy[len] = '\0';
169
170	if (*src != '\0' && prio < TAG_WEAK)
171		prio = TAG_WEAK;
172
173	s = cpy;
174	se = cpy + len;
175	slot = ohash_qlookupi(&tag_data, s, &se);
176	entry = ohash_find(&tag_data, slot);
177
178	/* Build a new entry. */
179
180	if (entry == NULL) {
181		entry = mandoc_malloc(sizeof(*entry) + len + 1);
182		memcpy(entry->s, s, len + 1);
183		entry->nodes = NULL;
184		entry->maxnodes = entry->nnodes = 0;
185		ohash_insert(&tag_data, slot, entry);
186	}
187
188	/*
189	 * Lower priority numbers take precedence.
190	 * If a better entry is already present, ignore the new one.
191	 */
192
193	else if (entry->prio < prio)
194		goto out;
195
196	/*
197	 * If the existing entry is worse, clear it.
198	 * In addition, a tag with priority TAG_FALLBACK
199	 * is only used if the tag occurs exactly once.
200	 */
201
202	else if (entry->prio > prio || prio == TAG_FALLBACK) {
203		while (entry->nnodes > 0) {
204			nold = entry->nodes[--entry->nnodes];
205			nold->flags &= ~NODE_ID;
206			free(nold->tag);
207			nold->tag = NULL;
208		}
209		if (prio == TAG_FALLBACK) {
210			entry->prio = TAG_DELETE;
211			goto out;
212		}
213	}
214
215	/* Remember the new node. */
216
217	if (entry->maxnodes == entry->nnodes) {
218		entry->maxnodes += 4;
219		entry->nodes = mandoc_reallocarray(entry->nodes,
220		    entry->maxnodes, sizeof(*entry->nodes));
221	}
222	entry->nodes[entry->nnodes++] = n;
223	entry->prio = prio;
224	n->flags |= NODE_ID;
225	if (changed) {
226		assert(n->tag == NULL);
227		n->tag = mandoc_strndup(s, len);
228	}
229
230 out:
231	free(cpy);
232}
233
234int
235tag_exists(const char *tag)
236{
237	return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL;
238}
239
240/*
241 * For in-line elements, move the link target
242 * to the enclosing paragraph when appropriate.
243 */
244static void
245tag_move_id(struct roff_node *n)
246{
247	struct roff_node *np;
248
249	np = n;
250	for (;;) {
251		if (np->prev != NULL)
252			np = np->prev;
253		else if ((np = np->parent) == NULL)
254			return;
255		switch (np->tok) {
256		case MDOC_It:
257			switch (np->parent->parent->norm->Bl.type) {
258			case LIST_column:
259				/* Target the ROFFT_BLOCK = <tr>. */
260				np = np->parent;
261				break;
262			case LIST_diag:
263			case LIST_hang:
264			case LIST_inset:
265			case LIST_ohang:
266			case LIST_tag:
267				/* Target the ROFFT_HEAD = <dt>. */
268				np = np->parent->head;
269				break;
270			default:
271				/* Target the ROFF_BODY = <li>. */
272				break;
273			}
274			/* FALLTHROUGH */
275		case MDOC_Pp:	/* Target the ROFFT_ELEM = <p>. */
276			if (np->tag == NULL) {
277				np->tag = mandoc_strdup(n->tag == NULL ?
278				    n->child->string : n->tag);
279				np->flags |= NODE_ID;
280				n->flags &= ~NODE_ID;
281			}
282			return;
283		case MDOC_Sh:
284		case MDOC_Ss:
285		case MDOC_Bd:
286		case MDOC_Bl:
287		case MDOC_D1:
288		case MDOC_Dl:
289		case MDOC_Rs:
290			/* Do not move past major blocks. */
291			return;
292		default:
293			/*
294			 * Move past in-line content and partial
295			 * blocks, for example .It Xo or .It Bq Er.
296			 */
297			break;
298		}
299	}
300}
301
302/*
303 * When a paragraph is tagged and starts with text,
304 * move the permalink to the first few words.
305 */
306static void
307tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag)
308{
309	char	*cp;
310
311	if (n == NULL || n->type != ROFFT_TEXT ||
312	    *n->string == '\0' || *n->string == ' ')
313		return;
314
315	cp = n->string;
316	while (cp != NULL && cp - n->string < 5)
317		cp = strchr(cp + 1, ' ');
318
319	/* If the first text node is longer, split it. */
320
321	if (cp != NULL && cp[1] != '\0') {
322		man->last = n;
323		man->next = ROFF_NEXT_SIBLING;
324		roff_word_alloc(man, n->line,
325		    n->pos + (cp - n->string), cp + 1);
326		man->last->flags = n->flags & ~NODE_LINE;
327		*cp = '\0';
328	}
329
330	assert(n->tag == NULL);
331	n->tag = mandoc_strdup(tag);
332	n->flags |= NODE_HREF;
333}
334
335/*
336 * When all tags have been set, decide where to put
337 * the associated permalinks, and maybe move some tags
338 * to the beginning of the respective paragraphs.
339 */
340void
341tag_postprocess(struct roff_man *man, struct roff_node *n)
342{
343	if (n->flags & NODE_ID) {
344		switch (n->tok) {
345		case MDOC_Pp:
346			tag_move_href(man, n->next, n->tag);
347			break;
348		case MDOC_Bd:
349		case MDOC_D1:
350		case MDOC_Dl:
351			tag_move_href(man, n->child, n->tag);
352			break;
353		case MDOC_Bl:
354			/* XXX No permalink for now. */
355			break;
356		default:
357			if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo)
358				tag_move_id(n);
359			if (n->tok != MDOC_Tg)
360				n->flags |= NODE_HREF;
361			else if ((n->flags & NODE_ID) == 0) {
362				n->flags |= NODE_NOPRT;
363				free(n->tag);
364				n->tag = NULL;
365			}
366			break;
367		}
368	}
369	for (n = n->child; n != NULL; n = n->next)
370		tag_postprocess(man, n);
371}
372