cmap.c revision 131846
1231990Smp/*- 259243Sobrien * Copyright (c) 2004 Tim J. Robbins. 359243Sobrien * All rights reserved. 459243Sobrien * 559243Sobrien * Redistribution and use in source and binary forms, with or without 659243Sobrien * modification, are permitted provided that the following conditions 759243Sobrien * are met: 859243Sobrien * 1. Redistributions of source code must retain the above copyright 959243Sobrien * notice, this list of conditions and the following disclaimer. 1059243Sobrien * 2. Redistributions in binary form must reproduce the above copyright 1159243Sobrien * notice, this list of conditions and the following disclaimer in the 1259243Sobrien * documentation and/or other materials provided with the distribution. 1359243Sobrien * 1459243Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1559243Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1659243Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 1759243Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18100616Smp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 1959243Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2059243Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2159243Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2259243Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2359243Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2459243Sobrien * SUCH DAMAGE. 2559243Sobrien */ 2659243Sobrien/* 2759243Sobrien * "Character map" ADT. Stores mappings between pairs of characters in a 2859243Sobrien * splay tree, with a lookup table cache to simplify looking up the first 2959243Sobrien * bunch of characters (which are presumably more common than others). 3059243Sobrien */ 3159243Sobrien 3259243Sobrien#include <sys/cdefs.h> 3359243Sobrien__FBSDID("$FreeBSD: head/usr.bin/tr/cmap.c 131846 2004-07-09 02:08:07Z tjr $"); 3459243Sobrien 3559243Sobrien#include <assert.h> 36231990Smp#include <limits.h> 3759243Sobrien#include <stdbool.h> 3859243Sobrien#include <stdlib.h> 3959243Sobrien#include <wchar.h> 4069408Sache#include "cmap.h" 4159243Sobrien 4269408Sachestatic struct cmapnode *cmap_splay(struct cmapnode *, wint_t); 4359243Sobrien 4459243Sobrien/* 4559243Sobrien * cmap_alloc -- 4659243Sobrien * Allocate a character map. 4759243Sobrien */ 4859243Sobrienstruct cmap * 4959243Sobriencmap_alloc(void) 5059243Sobrien{ 5159243Sobrien struct cmap *cm; 5259243Sobrien 53231990Smp cm = malloc(sizeof(*cm)); 5459243Sobrien if (cm == NULL) 55145479Smp return (NULL); 5659243Sobrien cm->cm_root = NULL; 5759243Sobrien cm->cm_def = CM_DEF_SELF; 5859243Sobrien cm->cm_havecache = false; 59167465Smp cm->cm_min = cm->cm_max = 0; 6059243Sobrien return (cm); 61167465Smp} 62167465Smp 63167465Smp/* 6459243Sobrien * cmap_add -- 6559243Sobrien * Add a mapping from "from" to "to" to the map. 6659243Sobrien */ 6759243Sobrienbool 6859243Sobriencmap_add(struct cmap *cm, wint_t from, wint_t to) 6959243Sobrien{ 7059243Sobrien struct cmapnode *cmn, *ncmn; 7159243Sobrien 7259243Sobrien cm->cm_havecache = false; 7359243Sobrien 7459243Sobrien if (cm->cm_root == NULL) { 7559243Sobrien cmn = malloc(sizeof(*cmn)); 7659243Sobrien if (cmn == NULL) 7759243Sobrien return (false); 7859243Sobrien cmn->cmn_from = from; 7959243Sobrien cmn->cmn_to = to; 8059243Sobrien cmn->cmn_left = cmn->cmn_right = NULL; 8159243Sobrien cm->cm_root = cmn; 8259243Sobrien cm->cm_min = cm->cm_max = from; 8359243Sobrien return (true); 8459243Sobrien } 8559243Sobrien 8659243Sobrien cmn = cm->cm_root = cmap_splay(cm->cm_root, from); 87167465Smp 88167465Smp if (cmn->cmn_from == from) { 8959243Sobrien cmn->cmn_to = to; 90145479Smp return (true); 91167465Smp } 92167465Smp 9359243Sobrien ncmn = malloc(sizeof(*ncmn)); 94167465Smp if (ncmn == NULL) 95167465Smp return (false); 9659243Sobrien ncmn->cmn_from = from; 9759243Sobrien ncmn->cmn_to = to; 9859243Sobrien if (from < cmn->cmn_from) { 9959243Sobrien ncmn->cmn_left = cmn->cmn_left; 10059243Sobrien ncmn->cmn_right = cmn; 10159243Sobrien cmn->cmn_left = NULL; 10259243Sobrien } else { 10359243Sobrien ncmn->cmn_right = cmn->cmn_right; 10459243Sobrien ncmn->cmn_left = cmn; 10559243Sobrien cmn->cmn_right = NULL; 10659243Sobrien } 10759243Sobrien if (from < cm->cm_min) 10869408Sache cm->cm_min = from; 10959243Sobrien if (from > cm->cm_max) 11059243Sobrien cm->cm_max = from; 11159243Sobrien cm->cm_root = ncmn; 11259243Sobrien 11359243Sobrien return (true); 11459243Sobrien} 11559243Sobrien 11659243Sobrien/* 11759243Sobrien * cmap_lookup_hard -- 11859243Sobrien * Look up the mapping for a character using the cache. 11959243Sobrien */ 12059243Sobrienwint_t 12169408Sachecmap_lookup_hard(struct cmap *cm, wint_t ch) 12259243Sobrien{ 12359243Sobrien 12459243Sobrien if (cm->cm_root != NULL) { 12559243Sobrien cm->cm_root = cmap_splay(cm->cm_root, ch); 12659243Sobrien if (cm->cm_root->cmn_from == ch) 12759243Sobrien return (cm->cm_root->cmn_to); 12859243Sobrien } 12959243Sobrien return (cm->cm_def == CM_DEF_SELF ? ch : cm->cm_def); 130167465Smp} 13159243Sobrien 13259243Sobrien/* 13359243Sobrien * cmap_cache -- 134167465Smp * Update the cache. 135167465Smp */ 136167465Smpvoid 137167465Smpcmap_cache(struct cmap *cm) 13859243Sobrien{ 139167465Smp wint_t ch; 140167465Smp 141167465Smp for (ch = 0; ch < CM_CACHE_SIZE; ch++) 142167465Smp cm->cm_cache[ch] = cmap_lookup_hard(cm, ch); 143167465Smp 144167465Smp cm->cm_havecache = true; 145167465Smp} 146167465Smp 147167465Smp/* 148167465Smp * cmap_default -- 149167465Smp * Change the value that characters without mappings map to, and 150167465Smp * return the old value. The special character value CM_MAP_SELF 151167465Smp * means characters map to themselves. 152167465Smp */ 153167465Smpwint_t 154167465Smpcmap_default(struct cmap *cm, wint_t def) 155167465Smp{ 156167465Smp wint_t old; 157167465Smp 15859243Sobrien old = cm->cm_def; 159167465Smp cm->cm_def = def; 16059243Sobrien cm->cm_havecache = false; 161167465Smp return (old); 16259243Sobrien} 163167465Smp 164167465Smpstatic struct cmapnode * 16569408Sachecmap_splay(struct cmapnode *t, wint_t ch) 166167465Smp{ 16769408Sache struct cmapnode N, *l, *r, *y; 168167465Smp 16959243Sobrien /* 17059243Sobrien * Based on public domain code from Sleator. 17159243Sobrien */ 17259243Sobrien 17359243Sobrien assert(t != NULL); 17459243Sobrien 175167465Smp N.cmn_left = N.cmn_right = NULL; 17659243Sobrien l = r = &N; 17759243Sobrien for (;;) { 17859243Sobrien if (ch < t->cmn_from) { 17959243Sobrien if (t->cmn_left != NULL && 18059243Sobrien ch < t->cmn_left->cmn_from) { 18159243Sobrien y = t->cmn_left; 18259243Sobrien t->cmn_left = y->cmn_right; 18359243Sobrien y->cmn_right = t; 18459243Sobrien t = y; 18559243Sobrien } 18659243Sobrien if (t->cmn_left == NULL) 18759243Sobrien break; 18859243Sobrien r->cmn_left = t; 18959243Sobrien r = t; 19059243Sobrien t = t->cmn_left; 19159243Sobrien } else if (ch > t->cmn_from) { 19259243Sobrien if (t->cmn_right != NULL && 193167465Smp ch > t->cmn_right->cmn_from) { 194167465Smp y = t->cmn_right; 19559243Sobrien t->cmn_right = y->cmn_left; 19659243Sobrien y->cmn_left = t; 19759243Sobrien t = y; 19859243Sobrien } 19959243Sobrien if (t->cmn_right == NULL) 20059243Sobrien break; 20159243Sobrien l->cmn_right = t; 20259243Sobrien l = t; 20359243Sobrien t = t->cmn_right; 20459243Sobrien } else 20559243Sobrien break; 20659243Sobrien } 20759243Sobrien l->cmn_right = t->cmn_left; 20859243Sobrien r->cmn_left = t->cmn_right; 20959243Sobrien t->cmn_left = N.cmn_right; 21059243Sobrien t->cmn_right = N.cmn_left; 21159243Sobrien return (t); 21259243Sobrien} 21359243Sobrien