1219019Sgabor/* $FreeBSD$ */ 2219019Sgabor/* $NetBSD: citrus_csmapper.c,v 1.10 2009/01/11 02:46:24 christos Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)2003 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor */ 29219019Sgabor 30219019Sgabor#include <sys/cdefs.h> 31219019Sgabor#include <sys/endian.h> 32219019Sgabor#include <sys/types.h> 33219019Sgabor#include <sys/queue.h> 34219019Sgabor 35219019Sgabor#include <assert.h> 36219019Sgabor#include <errno.h> 37219019Sgabor#include <limits.h> 38219019Sgabor#include <paths.h> 39219019Sgabor#include <stdio.h> 40219019Sgabor#include <stdlib.h> 41219019Sgabor#include <string.h> 42219019Sgabor 43219019Sgabor#include "citrus_namespace.h" 44219019Sgabor#include "citrus_types.h" 45219019Sgabor#include "citrus_bcs.h" 46219019Sgabor#include "citrus_region.h" 47219019Sgabor#include "citrus_lock.h" 48219019Sgabor#include "citrus_memstream.h" 49219019Sgabor#include "citrus_mmap.h" 50219019Sgabor#include "citrus_module.h" 51219019Sgabor#include "citrus_hash.h" 52219019Sgabor#include "citrus_mapper.h" 53219019Sgabor#include "citrus_csmapper.h" 54219019Sgabor#include "citrus_pivot_file.h" 55219019Sgabor#include "citrus_db.h" 56219019Sgabor#include "citrus_db_hash.h" 57219019Sgabor#include "citrus_lookup.h" 58219019Sgabor 59219019Sgaborstatic struct _citrus_mapper_area *maparea = NULL; 60219019Sgabor 61252584Speterstatic pthread_rwlock_t ma_lock = PTHREAD_RWLOCK_INITIALIZER; 62252584Speter 63219019Sgabor#define CS_ALIAS _PATH_CSMAPPER "/charset.alias" 64219019Sgabor#define CS_PIVOT _PATH_CSMAPPER "/charset.pivot" 65219019Sgabor 66219019Sgabor 67219019Sgabor/* ---------------------------------------------------------------------- */ 68219019Sgabor 69219019Sgaborstatic int 70219019Sgaborget32(struct _region *r, uint32_t *rval) 71219019Sgabor{ 72219019Sgabor 73219019Sgabor if (_region_size(r) != 4) 74219019Sgabor return (EFTYPE); 75219019Sgabor 76219019Sgabor memcpy(rval, _region_head(r), (size_t)4); 77219019Sgabor *rval = be32toh(*rval); 78219019Sgabor 79219019Sgabor return (0); 80219019Sgabor} 81219019Sgabor 82219019Sgaborstatic int 83219019Sgaboropen_subdb(struct _citrus_db **subdb, struct _citrus_db *db, const char *src) 84219019Sgabor{ 85219019Sgabor struct _region r; 86219019Sgabor int ret; 87219019Sgabor 88219019Sgabor ret = _db_lookup_by_s(db, src, &r, NULL); 89219019Sgabor if (ret) 90219019Sgabor return (ret); 91219019Sgabor ret = _db_open(subdb, &r, _CITRUS_PIVOT_SUB_MAGIC, _db_hash_std, NULL); 92219019Sgabor if (ret) 93219019Sgabor return (ret); 94219019Sgabor 95219019Sgabor return (0); 96219019Sgabor} 97219019Sgabor 98219019Sgabor 99219019Sgabor#define NO_SUCH_FILE EOPNOTSUPP 100219019Sgaborstatic int 101219019Sgaborfind_best_pivot_pvdb(const char *src, const char *dst, char *pivot, 102219019Sgabor size_t pvlen, unsigned long *rnorm) 103219019Sgabor{ 104219019Sgabor struct _citrus_db *db1, *db2, *db3; 105219019Sgabor struct _region fr, r1, r2; 106219019Sgabor char buf[LINE_MAX]; 107219019Sgabor uint32_t val32; 108219019Sgabor unsigned long norm; 109219019Sgabor int i, num, ret; 110219019Sgabor 111219019Sgabor ret = _map_file(&fr, CS_PIVOT ".pvdb"); 112219019Sgabor if (ret) { 113219019Sgabor if (ret == ENOENT) 114219019Sgabor ret = NO_SUCH_FILE; 115219019Sgabor return (ret); 116219019Sgabor } 117219019Sgabor ret = _db_open(&db1, &fr, _CITRUS_PIVOT_MAGIC, _db_hash_std, NULL); 118219019Sgabor if (ret) 119219019Sgabor goto quit1; 120219019Sgabor ret = open_subdb(&db2, db1, src); 121219019Sgabor if (ret) 122219019Sgabor goto quit2; 123219019Sgabor 124219019Sgabor num = _db_get_num_entries(db2); 125219019Sgabor *rnorm = ULONG_MAX; 126219019Sgabor for (i = 0; i < num; i++) { 127219019Sgabor /* iterate each pivot */ 128219019Sgabor ret = _db_get_entry(db2, i, &r1, &r2); 129219019Sgabor if (ret) 130219019Sgabor goto quit3; 131219019Sgabor /* r1:pivot name, r2:norm among src and pivot */ 132219019Sgabor ret = get32(&r2, &val32); 133219019Sgabor if (ret) 134219019Sgabor goto quit3; 135219019Sgabor norm = val32; 136219019Sgabor snprintf(buf, sizeof(buf), "%.*s", 137219019Sgabor (int)_region_size(&r1), (char *)_region_head(&r1)); 138219019Sgabor /* buf: pivot name */ 139219019Sgabor ret = open_subdb(&db3, db1, buf); 140219019Sgabor if (ret) 141219019Sgabor goto quit3; 142219019Sgabor if (_db_lookup_by_s(db3, dst, &r2, NULL) != 0) 143219019Sgabor goto quit4; 144219019Sgabor /* r2: norm among pivot and dst */ 145219019Sgabor ret = get32(&r2, &val32); 146219019Sgabor if (ret) 147219019Sgabor goto quit4; 148219019Sgabor norm += val32; 149219019Sgabor /* judge minimum norm */ 150219019Sgabor if (norm < *rnorm) { 151219019Sgabor *rnorm = norm; 152219019Sgabor strlcpy(pivot, buf, pvlen); 153219019Sgabor } 154219019Sgaborquit4: 155219019Sgabor _db_close(db3); 156219019Sgabor if (ret) 157219019Sgabor goto quit3; 158219019Sgabor } 159219019Sgaborquit3: 160219019Sgabor _db_close(db2); 161219019Sgaborquit2: 162219019Sgabor _db_close(db1); 163219019Sgaborquit1: 164219019Sgabor _unmap_file(&fr); 165219019Sgabor if (ret) 166219019Sgabor return (ret); 167219019Sgabor 168219019Sgabor if (*rnorm == ULONG_MAX) 169219019Sgabor return (ENOENT); 170219019Sgabor 171219019Sgabor return (0); 172219019Sgabor} 173219019Sgabor 174219019Sgabor/* ---------------------------------------------------------------------- */ 175219019Sgabor 176219019Sgaborstruct zone { 177219019Sgabor const char *begin, *end; 178219019Sgabor}; 179219019Sgabor 180219019Sgaborstruct parse_arg { 181219019Sgabor char dst[PATH_MAX]; 182219019Sgabor unsigned long norm; 183219019Sgabor}; 184219019Sgabor 185219019Sgaborstatic int 186219019Sgaborparse_line(struct parse_arg *pa, struct _region *r) 187219019Sgabor{ 188219019Sgabor struct zone z1, z2; 189219019Sgabor char buf[20]; 190219019Sgabor size_t len; 191219019Sgabor 192219019Sgabor len = _region_size(r); 193219019Sgabor z1.begin = _bcs_skip_ws_len(_region_head(r), &len); 194219019Sgabor if (len == 0) 195219019Sgabor return (EFTYPE); 196219019Sgabor z1.end = _bcs_skip_nonws_len(z1.begin, &len); 197219019Sgabor if (len == 0) 198219019Sgabor return (EFTYPE); 199219019Sgabor z2.begin = _bcs_skip_ws_len(z1.end, &len); 200219019Sgabor if (len == 0) 201219019Sgabor return (EFTYPE); 202219019Sgabor z2.end = _bcs_skip_nonws_len(z2.begin, &len); 203219019Sgabor 204219019Sgabor /* z1 : dst name, z2 : norm */ 205219019Sgabor snprintf(pa->dst, sizeof(pa->dst), 206219019Sgabor "%.*s", (int)(z1.end-z1.begin), z1.begin); 207219019Sgabor snprintf(buf, sizeof(buf), 208219019Sgabor "%.*s", (int)(z2.end-z2.begin), z2.begin); 209219019Sgabor pa->norm = _bcs_strtoul(buf, NULL, 0); 210219019Sgabor 211219019Sgabor return (0); 212219019Sgabor} 213219019Sgabor 214219019Sgaborstatic int 215219019Sgaborfind_dst(struct parse_arg *pasrc, const char *dst) 216219019Sgabor{ 217219019Sgabor struct _lookup *cl; 218219019Sgabor struct parse_arg padst; 219219019Sgabor struct _region data; 220219019Sgabor int ret; 221219019Sgabor 222219019Sgabor ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE); 223219019Sgabor if (ret) 224219019Sgabor return (ret); 225219019Sgabor 226219019Sgabor ret = _lookup_seq_lookup(cl, pasrc->dst, &data); 227219019Sgabor while (ret == 0) { 228219019Sgabor ret = parse_line(&padst, &data); 229219019Sgabor if (ret) 230219019Sgabor break; 231219019Sgabor if (strcmp(dst, padst.dst) == 0) { 232219019Sgabor pasrc->norm += padst.norm; 233219019Sgabor break; 234219019Sgabor } 235219019Sgabor ret = _lookup_seq_next(cl, NULL, &data); 236219019Sgabor } 237219019Sgabor _lookup_seq_close(cl); 238219019Sgabor 239219019Sgabor return (ret); 240219019Sgabor} 241219019Sgabor 242219019Sgaborstatic int 243219019Sgaborfind_best_pivot_lookup(const char *src, const char *dst, char *pivot, 244219019Sgabor size_t pvlen, unsigned long *rnorm) 245219019Sgabor{ 246219019Sgabor struct _lookup *cl; 247219019Sgabor struct _region data; 248219019Sgabor struct parse_arg pa; 249219019Sgabor char pivot_min[PATH_MAX]; 250219019Sgabor unsigned long norm_min; 251219019Sgabor int ret; 252219019Sgabor 253219019Sgabor ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE); 254219019Sgabor if (ret) 255219019Sgabor return (ret); 256219019Sgabor 257219019Sgabor norm_min = ULONG_MAX; 258219019Sgabor 259219019Sgabor /* find pivot code */ 260219019Sgabor ret = _lookup_seq_lookup(cl, src, &data); 261219019Sgabor while (ret == 0) { 262219019Sgabor ret = parse_line(&pa, &data); 263219019Sgabor if (ret) 264219019Sgabor break; 265219019Sgabor ret = find_dst(&pa, dst); 266219019Sgabor if (ret) 267219019Sgabor break; 268219019Sgabor if (pa.norm < norm_min) { 269219019Sgabor norm_min = pa.norm; 270219019Sgabor strlcpy(pivot_min, pa.dst, sizeof(pivot_min)); 271219019Sgabor } 272219019Sgabor ret = _lookup_seq_next(cl, NULL, &data); 273219019Sgabor } 274219019Sgabor _lookup_seq_close(cl); 275219019Sgabor 276219019Sgabor if (ret != ENOENT) 277219019Sgabor return (ret); 278219019Sgabor if (norm_min == ULONG_MAX) 279219019Sgabor return (ENOENT); 280219019Sgabor strlcpy(pivot, pivot_min, pvlen); 281219019Sgabor if (rnorm) 282219019Sgabor *rnorm = norm_min; 283219019Sgabor 284219019Sgabor return (0); 285219019Sgabor} 286219019Sgabor 287219019Sgaborstatic int 288219019Sgaborfind_best_pivot(const char *src, const char *dst, char *pivot, size_t pvlen, 289219019Sgabor unsigned long *rnorm) 290219019Sgabor{ 291219019Sgabor int ret; 292219019Sgabor 293219019Sgabor ret = find_best_pivot_pvdb(src, dst, pivot, pvlen, rnorm); 294219019Sgabor if (ret == NO_SUCH_FILE) 295219019Sgabor ret = find_best_pivot_lookup(src, dst, pivot, pvlen, rnorm); 296219019Sgabor 297219019Sgabor return (ret); 298219019Sgabor} 299219019Sgabor 300219019Sgaborstatic __inline int 301219019Sgaboropen_serial_mapper(struct _citrus_mapper_area *__restrict ma, 302219019Sgabor struct _citrus_mapper * __restrict * __restrict rcm, 303219019Sgabor const char *src, const char *pivot, const char *dst) 304219019Sgabor{ 305219019Sgabor char buf[PATH_MAX]; 306219019Sgabor 307219019Sgabor snprintf(buf, sizeof(buf), "%s/%s,%s/%s", src, pivot, pivot, dst); 308219019Sgabor 309219019Sgabor return (_mapper_open_direct(ma, rcm, "mapper_serial", buf)); 310219019Sgabor} 311219019Sgabor 312219019Sgaborstatic struct _citrus_csmapper *csm_none = NULL; 313219019Sgaborstatic int 314219019Sgaborget_none(struct _citrus_mapper_area *__restrict ma, 315219019Sgabor struct _citrus_csmapper *__restrict *__restrict rcsm) 316219019Sgabor{ 317219019Sgabor int ret; 318219019Sgabor 319252584Speter WLOCK(&ma_lock); 320219019Sgabor if (csm_none) { 321219019Sgabor *rcsm = csm_none; 322219019Sgabor ret = 0; 323219019Sgabor goto quit; 324219019Sgabor } 325219019Sgabor 326219019Sgabor ret = _mapper_open_direct(ma, &csm_none, "mapper_none", ""); 327219019Sgabor if (ret) 328219019Sgabor goto quit; 329219019Sgabor _mapper_set_persistent(csm_none); 330219019Sgabor 331219019Sgabor *rcsm = csm_none; 332219019Sgabor ret = 0; 333219019Sgaborquit: 334252584Speter UNLOCK(&ma_lock); 335219019Sgabor return (ret); 336219019Sgabor} 337219019Sgabor 338219019Sgaborint 339219019Sgabor_citrus_csmapper_open(struct _citrus_csmapper * __restrict * __restrict rcsm, 340219019Sgabor const char * __restrict src, const char * __restrict dst, uint32_t flags, 341219019Sgabor unsigned long *rnorm) 342219019Sgabor{ 343219019Sgabor const char *realsrc, *realdst; 344219019Sgabor char buf1[PATH_MAX], buf2[PATH_MAX], key[PATH_MAX], pivot[PATH_MAX]; 345219019Sgabor unsigned long norm; 346219019Sgabor int ret; 347219019Sgabor 348219019Sgabor norm = 0; 349219019Sgabor 350219019Sgabor ret = _citrus_mapper_create_area(&maparea, _PATH_CSMAPPER); 351219019Sgabor if (ret) 352219019Sgabor return (ret); 353219019Sgabor 354219019Sgabor realsrc = _lookup_alias(CS_ALIAS, src, buf1, sizeof(buf1), 355219019Sgabor _LOOKUP_CASE_IGNORE); 356219019Sgabor realdst = _lookup_alias(CS_ALIAS, dst, buf2, sizeof(buf2), 357219019Sgabor _LOOKUP_CASE_IGNORE); 358219019Sgabor if (!strcmp(realsrc, realdst)) { 359219019Sgabor ret = get_none(maparea, rcsm); 360219019Sgabor if (ret == 0 && rnorm != NULL) 361219019Sgabor *rnorm = 0; 362219019Sgabor return (ret); 363219019Sgabor } 364219019Sgabor 365219019Sgabor snprintf(key, sizeof(key), "%s/%s", realsrc, realdst); 366219019Sgabor 367219019Sgabor ret = _mapper_open(maparea, rcsm, key); 368219019Sgabor if (ret == 0) { 369219019Sgabor if (rnorm != NULL) 370219019Sgabor *rnorm = 0; 371219019Sgabor return (0); 372219019Sgabor } 373219019Sgabor if (ret != ENOENT || (flags & _CSMAPPER_F_PREVENT_PIVOT)!=0) 374219019Sgabor return (ret); 375219019Sgabor 376219019Sgabor ret = find_best_pivot(realsrc, realdst, pivot, sizeof(pivot), &norm); 377219019Sgabor if (ret) 378219019Sgabor return (ret); 379219019Sgabor 380219019Sgabor ret = open_serial_mapper(maparea, rcsm, realsrc, pivot, realdst); 381219019Sgabor if (ret == 0 && rnorm != NULL) 382219019Sgabor *rnorm = norm; 383219019Sgabor 384219019Sgabor return (ret); 385219019Sgabor} 386