chars.c revision 274880
1/*	$Id: chars.c,v 1.58 2014/07/23 15:00:08 schwarze Exp $ */
2/*
3 * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18#ifdef HAVE_CONFIG_H
19#include "config.h"
20#endif
21
22#include <assert.h>
23#include <ctype.h>
24#include <stdlib.h>
25#include <string.h>
26
27#include "mandoc.h"
28#include "mandoc_aux.h"
29#include "libmandoc.h"
30
31#define	PRINT_HI	 126
32#define	PRINT_LO	 32
33
34struct	ln {
35	struct ln	 *next;
36	const char	 *code;
37	const char	 *ascii;
38	int		  unicode;
39};
40
41#define	LINES_MAX	  330
42
43#define CHAR(in, ch, code) \
44	{ NULL, (in), (ch), (code) },
45
46#define	CHAR_TBL_START	  static struct ln lines[LINES_MAX] = {
47#define	CHAR_TBL_END	  };
48
49#include "chars.in"
50
51struct	mchars {
52	struct ln	**htab;
53};
54
55static	const struct ln	 *find(const struct mchars *,
56				const char *, size_t);
57
58
59void
60mchars_free(struct mchars *arg)
61{
62
63	free(arg->htab);
64	free(arg);
65}
66
67struct mchars *
68mchars_alloc(void)
69{
70	struct mchars	 *tab;
71	struct ln	**htab;
72	struct ln	 *pp;
73	int		  i, hash;
74
75	/*
76	 * Constructs a very basic chaining hashtable.  The hash routine
77	 * is simply the integral value of the first character.
78	 * Subsequent entries are chained in the order they're processed.
79	 */
80
81	tab = mandoc_malloc(sizeof(struct mchars));
82	htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *));
83
84	for (i = 0; i < LINES_MAX; i++) {
85		hash = (int)lines[i].code[0] - PRINT_LO;
86
87		if (NULL == (pp = htab[hash])) {
88			htab[hash] = &lines[i];
89			continue;
90		}
91
92		for ( ; pp->next; pp = pp->next)
93			/* Scan ahead. */ ;
94		pp->next = &lines[i];
95	}
96
97	tab->htab = htab;
98	return(tab);
99}
100
101int
102mchars_spec2cp(const struct mchars *arg, const char *p, size_t sz)
103{
104	const struct ln	*ln;
105
106	ln = find(arg, p, sz);
107	if (NULL == ln)
108		return(-1);
109	return(ln->unicode);
110}
111
112char
113mchars_num2char(const char *p, size_t sz)
114{
115	int	  i;
116
117	if ((i = mandoc_strntoi(p, sz, 10)) < 0)
118		return('\0');
119
120	return(i > 0 && i < 256 && isprint(i) ? i : '\0');
121}
122
123int
124mchars_num2uc(const char *p, size_t sz)
125{
126	int	 i;
127
128	if ((i = mandoc_strntoi(p, sz, 16)) < 0)
129		return('\0');
130
131	/*
132	 * Security warning:
133	 * Never extend the range of accepted characters
134	 * to overlap with the ASCII range, 0x00-0x7F
135	 * without re-auditing the callers of this function.
136	 * Some callers might relay on the fact that we never
137	 * return ASCII characters for their escaping decisions.
138	 *
139	 * XXX Code is missing here to exclude bogus ranges.
140	 */
141
142	return(i > 0x80 && i <= 0x10FFFF ? i : '\0');
143}
144
145const char *
146mchars_spec2str(const struct mchars *arg,
147		const char *p, size_t sz, size_t *rsz)
148{
149	const struct ln	*ln;
150
151	ln = find(arg, p, sz);
152	if (NULL == ln) {
153		*rsz = 1;
154		return(NULL);
155	}
156
157	*rsz = strlen(ln->ascii);
158	return(ln->ascii);
159}
160
161static const struct ln *
162find(const struct mchars *tab, const char *p, size_t sz)
163{
164	const struct ln	 *pp;
165	int		  hash;
166
167	assert(p);
168
169	if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI)
170		return(NULL);
171
172	hash = (int)p[0] - PRINT_LO;
173
174	for (pp = tab->htab[hash]; pp; pp = pp->next)
175		if (0 == strncmp(pp->code, p, sz) &&
176		    '\0' == pp->code[(int)sz])
177			return(pp);
178
179	return(NULL);
180}
181