1241675Suqs/* $Id: chars.c,v 1.52 2011/11/08 00:15:23 kristaps Exp $ */ 2241675Suqs/* 3241675Suqs * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4241675Suqs * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5241675Suqs * 6241675Suqs * Permission to use, copy, modify, and distribute this software for any 7241675Suqs * purpose with or without fee is hereby granted, provided that the above 8241675Suqs * copyright notice and this permission notice appear in all copies. 9241675Suqs * 10241675Suqs * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11241675Suqs * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12241675Suqs * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13241675Suqs * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14241675Suqs * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15241675Suqs * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16241675Suqs * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17241675Suqs */ 18241675Suqs#ifdef HAVE_CONFIG_H 19241675Suqs#include "config.h" 20241675Suqs#endif 21241675Suqs 22241675Suqs#include <assert.h> 23241675Suqs#include <ctype.h> 24241675Suqs#include <stdlib.h> 25241675Suqs#include <string.h> 26241675Suqs 27241675Suqs#include "mandoc.h" 28241675Suqs#include "libmandoc.h" 29241675Suqs 30241675Suqs#define PRINT_HI 126 31241675Suqs#define PRINT_LO 32 32241675Suqs 33241675Suqsstruct ln { 34241675Suqs struct ln *next; 35241675Suqs const char *code; 36241675Suqs const char *ascii; 37241675Suqs int unicode; 38241675Suqs}; 39241675Suqs 40241675Suqs#define LINES_MAX 328 41241675Suqs 42241675Suqs#define CHAR(in, ch, code) \ 43241675Suqs { NULL, (in), (ch), (code) }, 44241675Suqs 45241675Suqs#define CHAR_TBL_START static struct ln lines[LINES_MAX] = { 46241675Suqs#define CHAR_TBL_END }; 47241675Suqs 48241675Suqs#include "chars.in" 49241675Suqs 50241675Suqsstruct mchars { 51241675Suqs struct ln **htab; 52241675Suqs}; 53241675Suqs 54241675Suqsstatic const struct ln *find(const struct mchars *, 55241675Suqs const char *, size_t); 56241675Suqs 57241675Suqsvoid 58241675Suqsmchars_free(struct mchars *arg) 59241675Suqs{ 60241675Suqs 61241675Suqs free(arg->htab); 62241675Suqs free(arg); 63241675Suqs} 64241675Suqs 65241675Suqsstruct mchars * 66241675Suqsmchars_alloc(void) 67241675Suqs{ 68241675Suqs struct mchars *tab; 69241675Suqs struct ln **htab; 70241675Suqs struct ln *pp; 71241675Suqs int i, hash; 72241675Suqs 73241675Suqs /* 74241675Suqs * Constructs a very basic chaining hashtable. The hash routine 75241675Suqs * is simply the integral value of the first character. 76241675Suqs * Subsequent entries are chained in the order they're processed. 77241675Suqs */ 78241675Suqs 79241675Suqs tab = mandoc_malloc(sizeof(struct mchars)); 80241675Suqs htab = mandoc_calloc(PRINT_HI - PRINT_LO + 1, sizeof(struct ln **)); 81241675Suqs 82241675Suqs for (i = 0; i < LINES_MAX; i++) { 83241675Suqs hash = (int)lines[i].code[0] - PRINT_LO; 84241675Suqs 85241675Suqs if (NULL == (pp = htab[hash])) { 86241675Suqs htab[hash] = &lines[i]; 87241675Suqs continue; 88241675Suqs } 89241675Suqs 90241675Suqs for ( ; pp->next; pp = pp->next) 91241675Suqs /* Scan ahead. */ ; 92241675Suqs pp->next = &lines[i]; 93241675Suqs } 94241675Suqs 95241675Suqs tab->htab = htab; 96241675Suqs return(tab); 97241675Suqs} 98241675Suqs 99241675Suqsint 100241675Suqsmchars_spec2cp(const struct mchars *arg, const char *p, size_t sz) 101241675Suqs{ 102241675Suqs const struct ln *ln; 103241675Suqs 104241675Suqs ln = find(arg, p, sz); 105241675Suqs if (NULL == ln) 106241675Suqs return(-1); 107241675Suqs return(ln->unicode); 108241675Suqs} 109241675Suqs 110241675Suqschar 111241675Suqsmchars_num2char(const char *p, size_t sz) 112241675Suqs{ 113241675Suqs int i; 114241675Suqs 115241675Suqs if ((i = mandoc_strntoi(p, sz, 10)) < 0) 116241675Suqs return('\0'); 117241675Suqs return(i > 0 && i < 256 && isprint(i) ? 118241675Suqs /* LINTED */ i : '\0'); 119241675Suqs} 120241675Suqs 121241675Suqsint 122241675Suqsmchars_num2uc(const char *p, size_t sz) 123241675Suqs{ 124241675Suqs int i; 125241675Suqs 126241675Suqs if ((i = mandoc_strntoi(p, sz, 16)) < 0) 127241675Suqs return('\0'); 128241675Suqs /* FIXME: make sure we're not in a bogus range. */ 129241675Suqs return(i > 0x80 && i <= 0x10FFFF ? i : '\0'); 130241675Suqs} 131241675Suqs 132241675Suqsconst char * 133241675Suqsmchars_spec2str(const struct mchars *arg, 134241675Suqs const char *p, size_t sz, size_t *rsz) 135241675Suqs{ 136241675Suqs const struct ln *ln; 137241675Suqs 138241675Suqs ln = find(arg, p, sz); 139241675Suqs if (NULL == ln) { 140241675Suqs *rsz = 1; 141241675Suqs return(NULL); 142241675Suqs } 143241675Suqs 144241675Suqs *rsz = strlen(ln->ascii); 145241675Suqs return(ln->ascii); 146241675Suqs} 147241675Suqs 148241675Suqsstatic const struct ln * 149241675Suqsfind(const struct mchars *tab, const char *p, size_t sz) 150241675Suqs{ 151241675Suqs const struct ln *pp; 152241675Suqs int hash; 153241675Suqs 154241675Suqs assert(p); 155241675Suqs 156241675Suqs if (0 == sz || p[0] < PRINT_LO || p[0] > PRINT_HI) 157241675Suqs return(NULL); 158241675Suqs 159241675Suqs hash = (int)p[0] - PRINT_LO; 160241675Suqs 161241675Suqs for (pp = tab->htab[hash]; pp; pp = pp->next) 162241675Suqs if (0 == strncmp(pp->code, p, sz) && 163241675Suqs '\0' == pp->code[(int)sz]) 164241675Suqs return(pp); 165241675Suqs 166241675Suqs return(NULL); 167241675Suqs} 168