1/* $OpenBSD: tag.c,v 1.38 2023/11/24 04:48:02 schwarze Exp $ */ 2/* 3 * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023 4 * Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 * 18 * Functions to tag syntax tree nodes. 19 * For internal use by mandoc(1) validation modules only. 20 */ 21#include <sys/types.h> 22 23#include <assert.h> 24#include <limits.h> 25#include <stddef.h> 26#include <stdint.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30 31#include "mandoc_aux.h" 32#include "mandoc_ohash.h" 33#include "mandoc.h" 34#include "roff.h" 35#include "mdoc.h" 36#include "roff_int.h" 37#include "tag.h" 38 39struct tag_entry { 40 struct roff_node **nodes; 41 size_t maxnodes; 42 size_t nnodes; 43 int prio; 44 char s[]; 45}; 46 47static void tag_move_href(struct roff_man *, 48 struct roff_node *, const char *); 49static void tag_move_id(struct roff_node *); 50 51static struct ohash tag_data; 52 53 54/* 55 * Set up the ohash table to collect nodes 56 * where various marked-up terms are documented. 57 */ 58void 59tag_alloc(void) 60{ 61 mandoc_ohash_init(&tag_data, 4, offsetof(struct tag_entry, s)); 62} 63 64void 65tag_free(void) 66{ 67 struct tag_entry *entry; 68 unsigned int slot; 69 70 if (tag_data.info.free == NULL) 71 return; 72 entry = ohash_first(&tag_data, &slot); 73 while (entry != NULL) { 74 free(entry->nodes); 75 free(entry); 76 entry = ohash_next(&tag_data, &slot); 77 } 78 ohash_delete(&tag_data); 79 tag_data.info.free = NULL; 80} 81 82/* 83 * Set a node where a term is defined, 84 * unless the term is already defined at a lower priority. 85 */ 86void 87tag_put(const char *s, int prio, struct roff_node *n) 88{ 89 struct tag_entry *entry; 90 struct roff_node *nold; 91 const char *se, *src; 92 char *cpy; 93 size_t len; 94 unsigned int slot; 95 int changed; 96 97 assert(prio <= TAG_FALLBACK); 98 99 /* 100 * If the node is already tagged, the existing tag is 101 * explicit and we are now about to add an implicit tag. 102 * Don't do that; just skip implicit tagging if the author 103 * specified an explicit tag. 104 */ 105 106 if (n->flags & NODE_ID) 107 return; 108 109 /* Determine the implicit tag. */ 110 111 changed = 1; 112 if (s == NULL) { 113 if (n->child == NULL || n->child->type != ROFFT_TEXT) 114 return; 115 s = n->child->string; 116 switch (s[0]) { 117 case '-': 118 s++; 119 break; 120 case '\\': 121 switch (s[1]) { 122 case '&': 123 case '-': 124 case 'e': 125 s += 2; 126 break; 127 default: 128 return; 129 } 130 break; 131 default: 132 changed = 0; 133 break; 134 } 135 } 136 137 /* 138 * Translate \- and ASCII_HYPH to plain '-'. 139 * Skip whitespace and escapes and whatever follows, 140 * and if there is any, downgrade the priority. 141 */ 142 143 cpy = mandoc_malloc(strlen(s) + 1); 144 for (src = s, len = 0; *src != '\0'; src++, len++) { 145 switch (*src) { 146 case '\t': 147 case ' ': 148 changed = 1; 149 break; 150 case ASCII_HYPH: 151 cpy[len] = '-'; 152 changed = 1; 153 continue; 154 case '\\': 155 if (src[1] != '-') 156 break; 157 src++; 158 changed = 1; 159 /* FALLTHROUGH */ 160 default: 161 cpy[len] = *src; 162 continue; 163 } 164 break; 165 } 166 if (len == 0) 167 goto out; 168 cpy[len] = '\0'; 169 170 if (*src != '\0' && prio < TAG_WEAK) 171 prio = TAG_WEAK; 172 173 s = cpy; 174 se = cpy + len; 175 slot = ohash_qlookupi(&tag_data, s, &se); 176 entry = ohash_find(&tag_data, slot); 177 178 /* Build a new entry. */ 179 180 if (entry == NULL) { 181 entry = mandoc_malloc(sizeof(*entry) + len + 1); 182 memcpy(entry->s, s, len + 1); 183 entry->nodes = NULL; 184 entry->maxnodes = entry->nnodes = 0; 185 ohash_insert(&tag_data, slot, entry); 186 } 187 188 /* 189 * Lower priority numbers take precedence. 190 * If a better entry is already present, ignore the new one. 191 */ 192 193 else if (entry->prio < prio) 194 goto out; 195 196 /* 197 * If the existing entry is worse, clear it. 198 * In addition, a tag with priority TAG_FALLBACK 199 * is only used if the tag occurs exactly once. 200 */ 201 202 else if (entry->prio > prio || prio == TAG_FALLBACK) { 203 while (entry->nnodes > 0) { 204 nold = entry->nodes[--entry->nnodes]; 205 nold->flags &= ~NODE_ID; 206 free(nold->tag); 207 nold->tag = NULL; 208 } 209 if (prio == TAG_FALLBACK) { 210 entry->prio = TAG_DELETE; 211 goto out; 212 } 213 } 214 215 /* Remember the new node. */ 216 217 if (entry->maxnodes == entry->nnodes) { 218 entry->maxnodes += 4; 219 entry->nodes = mandoc_reallocarray(entry->nodes, 220 entry->maxnodes, sizeof(*entry->nodes)); 221 } 222 entry->nodes[entry->nnodes++] = n; 223 entry->prio = prio; 224 n->flags |= NODE_ID; 225 if (changed) { 226 assert(n->tag == NULL); 227 n->tag = mandoc_strndup(s, len); 228 } 229 230 out: 231 free(cpy); 232} 233 234int 235tag_exists(const char *tag) 236{ 237 return ohash_find(&tag_data, ohash_qlookup(&tag_data, tag)) != NULL; 238} 239 240/* 241 * For in-line elements, move the link target 242 * to the enclosing paragraph when appropriate. 243 */ 244static void 245tag_move_id(struct roff_node *n) 246{ 247 struct roff_node *np; 248 249 np = n; 250 for (;;) { 251 if (np->prev != NULL) 252 np = np->prev; 253 else if ((np = np->parent) == NULL) 254 return; 255 switch (np->tok) { 256 case MDOC_It: 257 switch (np->parent->parent->norm->Bl.type) { 258 case LIST_column: 259 /* Target the ROFFT_BLOCK = <tr>. */ 260 np = np->parent; 261 break; 262 case LIST_diag: 263 case LIST_hang: 264 case LIST_inset: 265 case LIST_ohang: 266 case LIST_tag: 267 /* Target the ROFFT_HEAD = <dt>. */ 268 np = np->parent->head; 269 break; 270 default: 271 /* Target the ROFF_BODY = <li>. */ 272 break; 273 } 274 /* FALLTHROUGH */ 275 case MDOC_Pp: /* Target the ROFFT_ELEM = <p>. */ 276 if (np->tag == NULL) { 277 np->tag = mandoc_strdup(n->tag == NULL ? 278 n->child->string : n->tag); 279 np->flags |= NODE_ID; 280 n->flags &= ~NODE_ID; 281 } 282 return; 283 case MDOC_Sh: 284 case MDOC_Ss: 285 case MDOC_Bd: 286 case MDOC_Bl: 287 case MDOC_D1: 288 case MDOC_Dl: 289 case MDOC_Rs: 290 /* Do not move past major blocks. */ 291 return; 292 default: 293 /* 294 * Move past in-line content and partial 295 * blocks, for example .It Xo or .It Bq Er. 296 */ 297 break; 298 } 299 } 300} 301 302/* 303 * When a paragraph is tagged and starts with text, 304 * move the permalink to the first few words. 305 */ 306static void 307tag_move_href(struct roff_man *man, struct roff_node *n, const char *tag) 308{ 309 char *cp; 310 311 if (n == NULL || n->type != ROFFT_TEXT || 312 *n->string == '\0' || *n->string == ' ') 313 return; 314 315 cp = n->string; 316 while (cp != NULL && cp - n->string < 5) 317 cp = strchr(cp + 1, ' '); 318 319 /* If the first text node is longer, split it. */ 320 321 if (cp != NULL && cp[1] != '\0') { 322 man->last = n; 323 man->next = ROFF_NEXT_SIBLING; 324 roff_word_alloc(man, n->line, 325 n->pos + (cp - n->string), cp + 1); 326 man->last->flags = n->flags & ~NODE_LINE; 327 *cp = '\0'; 328 } 329 330 assert(n->tag == NULL); 331 n->tag = mandoc_strdup(tag); 332 n->flags |= NODE_HREF; 333} 334 335/* 336 * When all tags have been set, decide where to put 337 * the associated permalinks, and maybe move some tags 338 * to the beginning of the respective paragraphs. 339 */ 340void 341tag_postprocess(struct roff_man *man, struct roff_node *n) 342{ 343 if (n->flags & NODE_ID) { 344 switch (n->tok) { 345 case MDOC_Pp: 346 tag_move_href(man, n->next, n->tag); 347 break; 348 case MDOC_Bd: 349 case MDOC_D1: 350 case MDOC_Dl: 351 tag_move_href(man, n->child, n->tag); 352 break; 353 case MDOC_Bl: 354 /* XXX No permalink for now. */ 355 break; 356 default: 357 if (n->type == ROFFT_ELEM || n->tok == MDOC_Fo) 358 tag_move_id(n); 359 if (n->tok != MDOC_Tg) 360 n->flags |= NODE_HREF; 361 else if ((n->flags & NODE_ID) == 0) { 362 n->flags |= NODE_NOPRT; 363 free(n->tag); 364 n->tag = NULL; 365 } 366 break; 367 } 368 } 369 for (n = n->child; n != NULL; n = n->next) 370 tag_postprocess(man, n); 371} 372