cmap.c revision 131846
1231990Smp/*-
259243Sobrien * Copyright (c) 2004 Tim J. Robbins.
359243Sobrien * All rights reserved.
459243Sobrien *
559243Sobrien * Redistribution and use in source and binary forms, with or without
659243Sobrien * modification, are permitted provided that the following conditions
759243Sobrien * are met:
859243Sobrien * 1. Redistributions of source code must retain the above copyright
959243Sobrien *    notice, this list of conditions and the following disclaimer.
1059243Sobrien * 2. Redistributions in binary form must reproduce the above copyright
1159243Sobrien *    notice, this list of conditions and the following disclaimer in the
1259243Sobrien *    documentation and/or other materials provided with the distribution.
1359243Sobrien *
1459243Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1559243Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1659243Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1759243Sobrien * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18100616Smp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
1959243Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2059243Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2159243Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2259243Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2359243Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2459243Sobrien * SUCH DAMAGE.
2559243Sobrien */
2659243Sobrien/*
2759243Sobrien * "Character map" ADT. Stores mappings between pairs of characters in a
2859243Sobrien * splay tree, with a lookup table cache to simplify looking up the first
2959243Sobrien * bunch of characters (which are presumably more common than others).
3059243Sobrien */
3159243Sobrien
3259243Sobrien#include <sys/cdefs.h>
3359243Sobrien__FBSDID("$FreeBSD: head/usr.bin/tr/cmap.c 131846 2004-07-09 02:08:07Z tjr $");
3459243Sobrien
3559243Sobrien#include <assert.h>
36231990Smp#include <limits.h>
3759243Sobrien#include <stdbool.h>
3859243Sobrien#include <stdlib.h>
3959243Sobrien#include <wchar.h>
4069408Sache#include "cmap.h"
4159243Sobrien
4269408Sachestatic struct cmapnode *cmap_splay(struct cmapnode *, wint_t);
4359243Sobrien
4459243Sobrien/*
4559243Sobrien * cmap_alloc --
4659243Sobrien *	Allocate a character map.
4759243Sobrien */
4859243Sobrienstruct cmap *
4959243Sobriencmap_alloc(void)
5059243Sobrien{
5159243Sobrien	struct cmap *cm;
5259243Sobrien
53231990Smp	cm = malloc(sizeof(*cm));
5459243Sobrien	if (cm == NULL)
55145479Smp		return (NULL);
5659243Sobrien	cm->cm_root = NULL;
5759243Sobrien	cm->cm_def = CM_DEF_SELF;
5859243Sobrien	cm->cm_havecache = false;
59167465Smp	cm->cm_min = cm->cm_max = 0;
6059243Sobrien	return (cm);
61167465Smp}
62167465Smp
63167465Smp/*
6459243Sobrien * cmap_add --
6559243Sobrien *	Add a mapping from "from" to "to" to the map.
6659243Sobrien */
6759243Sobrienbool
6859243Sobriencmap_add(struct cmap *cm, wint_t from, wint_t to)
6959243Sobrien{
7059243Sobrien	struct cmapnode *cmn, *ncmn;
7159243Sobrien
7259243Sobrien	cm->cm_havecache = false;
7359243Sobrien
7459243Sobrien	if (cm->cm_root == NULL) {
7559243Sobrien		cmn = malloc(sizeof(*cmn));
7659243Sobrien		if (cmn == NULL)
7759243Sobrien			return (false);
7859243Sobrien		cmn->cmn_from = from;
7959243Sobrien		cmn->cmn_to = to;
8059243Sobrien		cmn->cmn_left = cmn->cmn_right = NULL;
8159243Sobrien		cm->cm_root = cmn;
8259243Sobrien		cm->cm_min = cm->cm_max = from;
8359243Sobrien		return (true);
8459243Sobrien	}
8559243Sobrien
8659243Sobrien	cmn = cm->cm_root = cmap_splay(cm->cm_root, from);
87167465Smp
88167465Smp	if (cmn->cmn_from == from) {
8959243Sobrien		cmn->cmn_to = to;
90145479Smp		return (true);
91167465Smp	}
92167465Smp
9359243Sobrien	ncmn = malloc(sizeof(*ncmn));
94167465Smp	if (ncmn == NULL)
95167465Smp		return (false);
9659243Sobrien	ncmn->cmn_from = from;
9759243Sobrien	ncmn->cmn_to = to;
9859243Sobrien	if (from < cmn->cmn_from) {
9959243Sobrien		ncmn->cmn_left = cmn->cmn_left;
10059243Sobrien		ncmn->cmn_right = cmn;
10159243Sobrien		cmn->cmn_left = NULL;
10259243Sobrien	} else {
10359243Sobrien		ncmn->cmn_right = cmn->cmn_right;
10459243Sobrien		ncmn->cmn_left = cmn;
10559243Sobrien		cmn->cmn_right = NULL;
10659243Sobrien	}
10759243Sobrien	if (from < cm->cm_min)
10869408Sache		cm->cm_min = from;
10959243Sobrien	if (from > cm->cm_max)
11059243Sobrien		cm->cm_max = from;
11159243Sobrien        cm->cm_root = ncmn;
11259243Sobrien
11359243Sobrien	return (true);
11459243Sobrien}
11559243Sobrien
11659243Sobrien/*
11759243Sobrien * cmap_lookup_hard --
11859243Sobrien *	Look up the mapping for a character using the cache.
11959243Sobrien */
12059243Sobrienwint_t
12169408Sachecmap_lookup_hard(struct cmap *cm, wint_t ch)
12259243Sobrien{
12359243Sobrien
12459243Sobrien	if (cm->cm_root != NULL) {
12559243Sobrien		cm->cm_root = cmap_splay(cm->cm_root, ch);
12659243Sobrien		if (cm->cm_root->cmn_from == ch)
12759243Sobrien			return (cm->cm_root->cmn_to);
12859243Sobrien	}
12959243Sobrien	return (cm->cm_def == CM_DEF_SELF ? ch : cm->cm_def);
130167465Smp}
13159243Sobrien
13259243Sobrien/*
13359243Sobrien * cmap_cache --
134167465Smp *	Update the cache.
135167465Smp */
136167465Smpvoid
137167465Smpcmap_cache(struct cmap *cm)
13859243Sobrien{
139167465Smp	wint_t ch;
140167465Smp
141167465Smp	for (ch = 0; ch < CM_CACHE_SIZE; ch++)
142167465Smp		cm->cm_cache[ch] = cmap_lookup_hard(cm, ch);
143167465Smp
144167465Smp	cm->cm_havecache = true;
145167465Smp}
146167465Smp
147167465Smp/*
148167465Smp * cmap_default --
149167465Smp *	Change the value that characters without mappings map to, and
150167465Smp *	return the old value. The special character value CM_MAP_SELF
151167465Smp *	means characters map to themselves.
152167465Smp */
153167465Smpwint_t
154167465Smpcmap_default(struct cmap *cm, wint_t def)
155167465Smp{
156167465Smp	wint_t old;
157167465Smp
15859243Sobrien	old = cm->cm_def;
159167465Smp	cm->cm_def = def;
16059243Sobrien	cm->cm_havecache = false;
161167465Smp	return (old);
16259243Sobrien}
163167465Smp
164167465Smpstatic struct cmapnode *
16569408Sachecmap_splay(struct cmapnode *t, wint_t ch)
166167465Smp{
16769408Sache	struct cmapnode N, *l, *r, *y;
168167465Smp
16959243Sobrien	/*
17059243Sobrien	 * Based on public domain code from Sleator.
17159243Sobrien	 */
17259243Sobrien
17359243Sobrien	assert(t != NULL);
17459243Sobrien
175167465Smp	N.cmn_left = N.cmn_right = NULL;
17659243Sobrien	l = r = &N;
17759243Sobrien	for (;;) {
17859243Sobrien		if (ch < t->cmn_from) {
17959243Sobrien			if (t->cmn_left != NULL &&
18059243Sobrien			    ch < t->cmn_left->cmn_from) {
18159243Sobrien				y = t->cmn_left;
18259243Sobrien				t->cmn_left = y->cmn_right;
18359243Sobrien				y->cmn_right = t;
18459243Sobrien				t = y;
18559243Sobrien			}
18659243Sobrien			if (t->cmn_left == NULL)
18759243Sobrien				break;
18859243Sobrien			r->cmn_left = t;
18959243Sobrien			r = t;
19059243Sobrien			t = t->cmn_left;
19159243Sobrien		} else if (ch > t->cmn_from) {
19259243Sobrien			if (t->cmn_right != NULL &&
193167465Smp			    ch > t->cmn_right->cmn_from) {
194167465Smp				y = t->cmn_right;
19559243Sobrien				t->cmn_right = y->cmn_left;
19659243Sobrien				y->cmn_left = t;
19759243Sobrien				t = y;
19859243Sobrien			}
19959243Sobrien			if (t->cmn_right == NULL)
20059243Sobrien				break;
20159243Sobrien			l->cmn_right = t;
20259243Sobrien			l = t;
20359243Sobrien			t = t->cmn_right;
20459243Sobrien		} else
20559243Sobrien			break;
20659243Sobrien	}
20759243Sobrien	l->cmn_right = t->cmn_left;
20859243Sobrien	r->cmn_left = t->cmn_right;
20959243Sobrien	t->cmn_left = N.cmn_right;
21059243Sobrien	t->cmn_right = N.cmn_left;
21159243Sobrien	return (t);
21259243Sobrien}
21359243Sobrien