chars.c revision 275432
1275432Sbapt/* $Id: chars.c,v 1.65 2014/10/29 00:17:43 schwarze Exp $ */ 2241675Suqs/* 3241675Suqs * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4275432Sbapt * Copyright (c) 2011, 2014 Ingo Schwarze <schwarze@openbsd.org> 5241675Suqs * 6241675Suqs * Permission to use, copy, modify, and distribute this software for any 7241675Suqs * purpose with or without fee is hereby granted, provided that the above 8241675Suqs * copyright notice and this permission notice appear in all copies. 9241675Suqs * 10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17241675Suqs */ 18241675Suqs#include "config.h" 19241675Suqs 20275432Sbapt#include <sys/types.h> 21275432Sbapt 22241675Suqs#include <assert.h> 23241675Suqs#include <ctype.h> 24241675Suqs#include <stdlib.h> 25241675Suqs#include <string.h> 26241675Suqs 27241675Suqs#include "mandoc.h" 28274880Sbapt#include "mandoc_aux.h" 29241675Suqs#include "libmandoc.h" 30241675Suqs 31241675Suqs#define PRINT_HI 126 32241675Suqs#define PRINT_LO 32 33241675Suqs 34241675Suqsstruct ln { 35241675Suqs struct ln *next; 36241675Suqs const char *code; 37241675Suqs const char *ascii; 38241675Suqs int unicode; 39241675Suqs}; 40241675Suqs 41274880Sbapt#define LINES_MAX 330 42241675Suqs 43241675Suqs#define CHAR(in, ch, code) \ 44241675Suqs { NULL, (in), (ch), (code) }, 45241675Suqs 46241675Suqs#define CHAR_TBL_START static struct ln lines[LINES_MAX] = { 47241675Suqs#define CHAR_TBL_END }; 48241675Suqs 49241675Suqs#include "chars.in" 50241675Suqs 51241675Suqsstruct mchars { 52241675Suqs struct ln **htab; 53241675Suqs}; 54241675Suqs 55274880Sbaptstatic const struct ln *find(const struct mchars *, 56241675Suqs const char *, size_t); 57241675Suqs 58274880Sbapt 59241675Suqsvoid 60241675Suqsmchars_free(struct mchars *arg) 61241675Suqs{ 62241675Suqs 63241675Suqs free(arg->htab); 64241675Suqs free(arg); 65241675Suqs} 66241675Suqs 67241675Suqsstruct mchars * 68241675Suqsmchars_alloc(void) 69241675Suqs{ 70241675Suqs struct mchars *tab; 71241675Suqs struct ln **htab; 72241675Suqs struct ln *pp; 73241675Suqs int i, hash; 74241675Suqs 75241675Suqs /* 76241675Suqs * Constructs a very basic chaining hashtable. The hash routine 77241675Suqs * is simply the integral value of the first character. 78241675Suqs * Subsequent entries are chained in the order they're processed. 79241675Suqs */ 80241675Suqs 81241675Suqs tab = mandoc_malloc(sizeof(struct mchars)); 82261344Suqs htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln *)); 83241675Suqs 84241675Suqs for (i = 0; i < LINES_MAX; i++) { 85241675Suqs hash = (int)lines[i].code[0] - PRINT_LO; 86241675Suqs 87241675Suqs if (NULL == (pp = htab[hash])) { 88241675Suqs htab[hash] = &lines[i]; 89241675Suqs continue; 90241675Suqs } 91241675Suqs 92241675Suqs for ( ; pp->next; pp = pp->next) 93241675Suqs /* Scan ahead. */ ; 94241675Suqs pp->next = &lines[i]; 95241675Suqs } 96241675Suqs 97241675Suqs tab->htab = htab; 98241675Suqs return(tab); 99241675Suqs} 100241675Suqs 101241675Suqsint 102241675Suqsmchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) 103241675Suqs{ 104241675Suqs const struct ln *ln; 105241675Suqs 106241675Suqs ln = find(arg, p, sz); 107275432Sbapt return(ln != NULL ? ln->unicode : sz == 1 ? (unsigned char)*p : -1); 108241675Suqs} 109241675Suqs 110275432Sbaptint 111241675Suqsmchars_num2char(const char *p, size_t sz) 112241675Suqs{ 113274880Sbapt int i; 114241675Suqs 115275432Sbapt i = mandoc_strntoi(p, sz, 10); 116275432Sbapt return(i >= 0 && i < 256 ? i : -1); 117241675Suqs} 118241675Suqs 119241675Suqsint 120241675Suqsmchars_num2uc(const char *p, size_t sz) 121241675Suqs{ 122274880Sbapt int i; 123241675Suqs 124275432Sbapt i = mandoc_strntoi(p, sz, 16); 125275432Sbapt assert(i >= 0 && i <= 0x10FFFF); 126275432Sbapt return(i); 127241675Suqs} 128241675Suqs 129241675Suqsconst char * 130274880Sbaptmchars_spec2str(const struct mchars *arg, 131241675Suqs const char *p, size_t sz, size_t *rsz) 132241675Suqs{ 133241675Suqs const struct ln *ln; 134241675Suqs 135241675Suqs ln = find(arg, p, sz); 136275432Sbapt if (ln == NULL) { 137241675Suqs *rsz = 1; 138275432Sbapt return(sz == 1 ? p : NULL); 139241675Suqs } 140241675Suqs 141241675Suqs *rsz = strlen(ln->ascii); 142241675Suqs return(ln->ascii); 143241675Suqs} 144241675Suqs 145275432Sbaptconst char * 146275432Sbaptmchars_uc2str(int uc) 147275432Sbapt{ 148275432Sbapt int i; 149275432Sbapt 150275432Sbapt for (i = 0; i < LINES_MAX; i++) 151275432Sbapt if (uc == lines[i].unicode) 152275432Sbapt return(lines[i].ascii); 153275432Sbapt return("<?>"); 154275432Sbapt} 155275432Sbapt 156241675Suqsstatic const struct ln * 157241675Suqsfind(const struct mchars *tab, const char *p, size_t sz) 158241675Suqs{ 159241675Suqs const struct ln *pp; 160241675Suqs int hash; 161241675Suqs 162241675Suqs assert(p); 163241675Suqs 164241675Suqs if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI) 165241675Suqs return(NULL); 166241675Suqs 167241675Suqs hash = (int)p[0] - PRINT_LO; 168241675Suqs 169241675Suqs for (pp = tab->htab[hash]; pp; pp = pp->next) 170274880Sbapt if (0 == strncmp(pp->code, p, sz) && 171274880Sbapt '\0' == pp->code[(int)sz]) 172241675Suqs return(pp); 173241675Suqs 174241675Suqs return(NULL); 175241675Suqs} 176