1219019Sgabor/* $FreeBSD$ */ 2219019Sgabor/* $NetBSD: citrus_csmapper.c,v 1.10 2009/01/11 02:46:24 christos Exp $ */ 3219019Sgabor 4219019Sgabor/*- 5219019Sgabor * Copyright (c)2003 Citrus Project, 6219019Sgabor * All rights reserved. 7219019Sgabor * 8219019Sgabor * Redistribution and use in source and binary forms, with or without 9219019Sgabor * modification, are permitted provided that the following conditions 10219019Sgabor * are met: 11219019Sgabor * 1. Redistributions of source code must retain the above copyright 12219019Sgabor * notice, this list of conditions and the following disclaimer. 13219019Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14219019Sgabor * notice, this list of conditions and the following disclaimer in the 15219019Sgabor * documentation and/or other materials provided with the distribution. 16219019Sgabor * 17219019Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18219019Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19219019Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20219019Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21219019Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22219019Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23219019Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24219019Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25219019Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26219019Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27219019Sgabor * SUCH DAMAGE. 28219019Sgabor */ 29219019Sgabor 30219019Sgabor#include <sys/cdefs.h> 31219019Sgabor#include <sys/endian.h> 32219019Sgabor#include <sys/types.h> 33219019Sgabor#include <sys/queue.h> 34219019Sgabor 35219019Sgabor#include <assert.h> 36219019Sgabor#include <errno.h> 37219019Sgabor#include <limits.h> 38219019Sgabor#include <paths.h> 39219019Sgabor#include <stdio.h> 40219019Sgabor#include <stdlib.h> 41219019Sgabor#include <string.h> 42219019Sgabor 43219019Sgabor#include "citrus_namespace.h" 44219019Sgabor#include "citrus_types.h" 45219019Sgabor#include "citrus_bcs.h" 46219019Sgabor#include "citrus_region.h" 47219019Sgabor#include "citrus_lock.h" 48219019Sgabor#include "citrus_memstream.h" 49219019Sgabor#include "citrus_mmap.h" 50219019Sgabor#include "citrus_module.h" 51219019Sgabor#include "citrus_hash.h" 52219019Sgabor#include "citrus_mapper.h" 53219019Sgabor#include "citrus_csmapper.h" 54219019Sgabor#include "citrus_pivot_file.h" 55219019Sgabor#include "citrus_db.h" 56219019Sgabor#include "citrus_db_hash.h" 57219019Sgabor#include "citrus_lookup.h" 58219019Sgabor 59219019Sgaborstatic struct _citrus_mapper_area *maparea = NULL; 60219019Sgabor 61219019Sgabor#define CS_ALIAS _PATH_CSMAPPER "/charset.alias" 62219019Sgabor#define CS_PIVOT _PATH_CSMAPPER "/charset.pivot" 63219019Sgabor 64219019Sgabor 65219019Sgabor/* ---------------------------------------------------------------------- */ 66219019Sgabor 67219019Sgaborstatic int 68219019Sgaborget32(struct _region *r, uint32_t *rval) 69219019Sgabor{ 70219019Sgabor 71219019Sgabor if (_region_size(r) != 4) 72219019Sgabor return (EFTYPE); 73219019Sgabor 74219019Sgabor memcpy(rval, _region_head(r), (size_t)4); 75219019Sgabor *rval = be32toh(*rval); 76219019Sgabor 77219019Sgabor return (0); 78219019Sgabor} 79219019Sgabor 80219019Sgaborstatic int 81219019Sgaboropen_subdb(struct _citrus_db **subdb, struct _citrus_db *db, const char *src) 82219019Sgabor{ 83219019Sgabor struct _region r; 84219019Sgabor int ret; 85219019Sgabor 86219019Sgabor ret = _db_lookup_by_s(db, src, &r, NULL); 87219019Sgabor if (ret) 88219019Sgabor return (ret); 89219019Sgabor ret = _db_open(subdb, &r, _CITRUS_PIVOT_SUB_MAGIC, _db_hash_std, NULL); 90219019Sgabor if (ret) 91219019Sgabor return (ret); 92219019Sgabor 93219019Sgabor return (0); 94219019Sgabor} 95219019Sgabor 96219019Sgabor 97219019Sgabor#define NO_SUCH_FILE EOPNOTSUPP 98219019Sgaborstatic int 99219019Sgaborfind_best_pivot_pvdb(const char *src, const char *dst, char *pivot, 100219019Sgabor size_t pvlen, unsigned long *rnorm) 101219019Sgabor{ 102219019Sgabor struct _citrus_db *db1, *db2, *db3; 103219019Sgabor struct _region fr, r1, r2; 104219019Sgabor char buf[LINE_MAX]; 105219019Sgabor uint32_t val32; 106219019Sgabor unsigned long norm; 107219019Sgabor int i, num, ret; 108219019Sgabor 109219019Sgabor ret = _map_file(&fr, CS_PIVOT ".pvdb"); 110219019Sgabor if (ret) { 111219019Sgabor if (ret == ENOENT) 112219019Sgabor ret = NO_SUCH_FILE; 113219019Sgabor return (ret); 114219019Sgabor } 115219019Sgabor ret = _db_open(&db1, &fr, _CITRUS_PIVOT_MAGIC, _db_hash_std, NULL); 116219019Sgabor if (ret) 117219019Sgabor goto quit1; 118219019Sgabor ret = open_subdb(&db2, db1, src); 119219019Sgabor if (ret) 120219019Sgabor goto quit2; 121219019Sgabor 122219019Sgabor num = _db_get_num_entries(db2); 123219019Sgabor *rnorm = ULONG_MAX; 124219019Sgabor for (i = 0; i < num; i++) { 125219019Sgabor /* iterate each pivot */ 126219019Sgabor ret = _db_get_entry(db2, i, &r1, &r2); 127219019Sgabor if (ret) 128219019Sgabor goto quit3; 129219019Sgabor /* r1:pivot name, r2:norm among src and pivot */ 130219019Sgabor ret = get32(&r2, &val32); 131219019Sgabor if (ret) 132219019Sgabor goto quit3; 133219019Sgabor norm = val32; 134219019Sgabor snprintf(buf, sizeof(buf), "%.*s", 135219019Sgabor (int)_region_size(&r1), (char *)_region_head(&r1)); 136219019Sgabor /* buf: pivot name */ 137219019Sgabor ret = open_subdb(&db3, db1, buf); 138219019Sgabor if (ret) 139219019Sgabor goto quit3; 140219019Sgabor if (_db_lookup_by_s(db3, dst, &r2, NULL) != 0) 141219019Sgabor goto quit4; 142219019Sgabor /* r2: norm among pivot and dst */ 143219019Sgabor ret = get32(&r2, &val32); 144219019Sgabor if (ret) 145219019Sgabor goto quit4; 146219019Sgabor norm += val32; 147219019Sgabor /* judge minimum norm */ 148219019Sgabor if (norm < *rnorm) { 149219019Sgabor *rnorm = norm; 150219019Sgabor strlcpy(pivot, buf, pvlen); 151219019Sgabor } 152219019Sgaborquit4: 153219019Sgabor _db_close(db3); 154219019Sgabor if (ret) 155219019Sgabor goto quit3; 156219019Sgabor } 157219019Sgaborquit3: 158219019Sgabor _db_close(db2); 159219019Sgaborquit2: 160219019Sgabor _db_close(db1); 161219019Sgaborquit1: 162219019Sgabor _unmap_file(&fr); 163219019Sgabor if (ret) 164219019Sgabor return (ret); 165219019Sgabor 166219019Sgabor if (*rnorm == ULONG_MAX) 167219019Sgabor return (ENOENT); 168219019Sgabor 169219019Sgabor return (0); 170219019Sgabor} 171219019Sgabor 172219019Sgabor/* ---------------------------------------------------------------------- */ 173219019Sgabor 174219019Sgaborstruct zone { 175219019Sgabor const char *begin, *end; 176219019Sgabor}; 177219019Sgabor 178219019Sgaborstruct parse_arg { 179219019Sgabor char dst[PATH_MAX]; 180219019Sgabor unsigned long norm; 181219019Sgabor}; 182219019Sgabor 183219019Sgaborstatic int 184219019Sgaborparse_line(struct parse_arg *pa, struct _region *r) 185219019Sgabor{ 186219019Sgabor struct zone z1, z2; 187219019Sgabor char buf[20]; 188219019Sgabor size_t len; 189219019Sgabor 190219019Sgabor len = _region_size(r); 191219019Sgabor z1.begin = _bcs_skip_ws_len(_region_head(r), &len); 192219019Sgabor if (len == 0) 193219019Sgabor return (EFTYPE); 194219019Sgabor z1.end = _bcs_skip_nonws_len(z1.begin, &len); 195219019Sgabor if (len == 0) 196219019Sgabor return (EFTYPE); 197219019Sgabor z2.begin = _bcs_skip_ws_len(z1.end, &len); 198219019Sgabor if (len == 0) 199219019Sgabor return (EFTYPE); 200219019Sgabor z2.end = _bcs_skip_nonws_len(z2.begin, &len); 201219019Sgabor 202219019Sgabor /* z1 : dst name, z2 : norm */ 203219019Sgabor snprintf(pa->dst, sizeof(pa->dst), 204219019Sgabor "%.*s", (int)(z1.end-z1.begin), z1.begin); 205219019Sgabor snprintf(buf, sizeof(buf), 206219019Sgabor "%.*s", (int)(z2.end-z2.begin), z2.begin); 207219019Sgabor pa->norm = _bcs_strtoul(buf, NULL, 0); 208219019Sgabor 209219019Sgabor return (0); 210219019Sgabor} 211219019Sgabor 212219019Sgaborstatic int 213219019Sgaborfind_dst(struct parse_arg *pasrc, const char *dst) 214219019Sgabor{ 215219019Sgabor struct _lookup *cl; 216219019Sgabor struct parse_arg padst; 217219019Sgabor struct _region data; 218219019Sgabor int ret; 219219019Sgabor 220219019Sgabor ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE); 221219019Sgabor if (ret) 222219019Sgabor return (ret); 223219019Sgabor 224219019Sgabor ret = _lookup_seq_lookup(cl, pasrc->dst, &data); 225219019Sgabor while (ret == 0) { 226219019Sgabor ret = parse_line(&padst, &data); 227219019Sgabor if (ret) 228219019Sgabor break; 229219019Sgabor if (strcmp(dst, padst.dst) == 0) { 230219019Sgabor pasrc->norm += padst.norm; 231219019Sgabor break; 232219019Sgabor } 233219019Sgabor ret = _lookup_seq_next(cl, NULL, &data); 234219019Sgabor } 235219019Sgabor _lookup_seq_close(cl); 236219019Sgabor 237219019Sgabor return (ret); 238219019Sgabor} 239219019Sgabor 240219019Sgaborstatic int 241219019Sgaborfind_best_pivot_lookup(const char *src, const char *dst, char *pivot, 242219019Sgabor size_t pvlen, unsigned long *rnorm) 243219019Sgabor{ 244219019Sgabor struct _lookup *cl; 245219019Sgabor struct _region data; 246219019Sgabor struct parse_arg pa; 247219019Sgabor char pivot_min[PATH_MAX]; 248219019Sgabor unsigned long norm_min; 249219019Sgabor int ret; 250219019Sgabor 251219019Sgabor ret = _lookup_seq_open(&cl, CS_PIVOT, _LOOKUP_CASE_IGNORE); 252219019Sgabor if (ret) 253219019Sgabor return (ret); 254219019Sgabor 255219019Sgabor norm_min = ULONG_MAX; 256219019Sgabor 257219019Sgabor /* find pivot code */ 258219019Sgabor ret = _lookup_seq_lookup(cl, src, &data); 259219019Sgabor while (ret == 0) { 260219019Sgabor ret = parse_line(&pa, &data); 261219019Sgabor if (ret) 262219019Sgabor break; 263219019Sgabor ret = find_dst(&pa, dst); 264219019Sgabor if (ret) 265219019Sgabor break; 266219019Sgabor if (pa.norm < norm_min) { 267219019Sgabor norm_min = pa.norm; 268219019Sgabor strlcpy(pivot_min, pa.dst, sizeof(pivot_min)); 269219019Sgabor } 270219019Sgabor ret = _lookup_seq_next(cl, NULL, &data); 271219019Sgabor } 272219019Sgabor _lookup_seq_close(cl); 273219019Sgabor 274219019Sgabor if (ret != ENOENT) 275219019Sgabor return (ret); 276219019Sgabor if (norm_min == ULONG_MAX) 277219019Sgabor return (ENOENT); 278219019Sgabor strlcpy(pivot, pivot_min, pvlen); 279219019Sgabor if (rnorm) 280219019Sgabor *rnorm = norm_min; 281219019Sgabor 282219019Sgabor return (0); 283219019Sgabor} 284219019Sgabor 285219019Sgaborstatic int 286219019Sgaborfind_best_pivot(const char *src, const char *dst, char *pivot, size_t pvlen, 287219019Sgabor unsigned long *rnorm) 288219019Sgabor{ 289219019Sgabor int ret; 290219019Sgabor 291219019Sgabor ret = find_best_pivot_pvdb(src, dst, pivot, pvlen, rnorm); 292219019Sgabor if (ret == NO_SUCH_FILE) 293219019Sgabor ret = find_best_pivot_lookup(src, dst, pivot, pvlen, rnorm); 294219019Sgabor 295219019Sgabor return (ret); 296219019Sgabor} 297219019Sgabor 298219019Sgaborstatic __inline int 299219019Sgaboropen_serial_mapper(struct _citrus_mapper_area *__restrict ma, 300219019Sgabor struct _citrus_mapper * __restrict * __restrict rcm, 301219019Sgabor const char *src, const char *pivot, const char *dst) 302219019Sgabor{ 303219019Sgabor char buf[PATH_MAX]; 304219019Sgabor 305219019Sgabor snprintf(buf, sizeof(buf), "%s/%s,%s/%s", src, pivot, pivot, dst); 306219019Sgabor 307219019Sgabor return (_mapper_open_direct(ma, rcm, "mapper_serial", buf)); 308219019Sgabor} 309219019Sgabor 310219019Sgaborstatic struct _citrus_csmapper *csm_none = NULL; 311219019Sgaborstatic int 312219019Sgaborget_none(struct _citrus_mapper_area *__restrict ma, 313219019Sgabor struct _citrus_csmapper *__restrict *__restrict rcsm) 314219019Sgabor{ 315219019Sgabor int ret; 316219019Sgabor 317219019Sgabor WLOCK; 318219019Sgabor if (csm_none) { 319219019Sgabor *rcsm = csm_none; 320219019Sgabor ret = 0; 321219019Sgabor goto quit; 322219019Sgabor } 323219019Sgabor 324219019Sgabor ret = _mapper_open_direct(ma, &csm_none, "mapper_none", ""); 325219019Sgabor if (ret) 326219019Sgabor goto quit; 327219019Sgabor _mapper_set_persistent(csm_none); 328219019Sgabor 329219019Sgabor *rcsm = csm_none; 330219019Sgabor ret = 0; 331219019Sgaborquit: 332219019Sgabor UNLOCK; 333219019Sgabor return (ret); 334219019Sgabor} 335219019Sgabor 336219019Sgaborint 337219019Sgabor_citrus_csmapper_open(struct _citrus_csmapper * __restrict * __restrict rcsm, 338219019Sgabor const char * __restrict src, const char * __restrict dst, uint32_t flags, 339219019Sgabor unsigned long *rnorm) 340219019Sgabor{ 341219019Sgabor const char *realsrc, *realdst; 342219019Sgabor char buf1[PATH_MAX], buf2[PATH_MAX], key[PATH_MAX], pivot[PATH_MAX]; 343219019Sgabor unsigned long norm; 344219019Sgabor int ret; 345219019Sgabor 346219019Sgabor norm = 0; 347219019Sgabor 348219019Sgabor ret = _citrus_mapper_create_area(&maparea, _PATH_CSMAPPER); 349219019Sgabor if (ret) 350219019Sgabor return (ret); 351219019Sgabor 352219019Sgabor realsrc = _lookup_alias(CS_ALIAS, src, buf1, sizeof(buf1), 353219019Sgabor _LOOKUP_CASE_IGNORE); 354219019Sgabor realdst = _lookup_alias(CS_ALIAS, dst, buf2, sizeof(buf2), 355219019Sgabor _LOOKUP_CASE_IGNORE); 356219019Sgabor if (!strcmp(realsrc, realdst)) { 357219019Sgabor ret = get_none(maparea, rcsm); 358219019Sgabor if (ret == 0 && rnorm != NULL) 359219019Sgabor *rnorm = 0; 360219019Sgabor return (ret); 361219019Sgabor } 362219019Sgabor 363219019Sgabor snprintf(key, sizeof(key), "%s/%s", realsrc, realdst); 364219019Sgabor 365219019Sgabor ret = _mapper_open(maparea, rcsm, key); 366219019Sgabor if (ret == 0) { 367219019Sgabor if (rnorm != NULL) 368219019Sgabor *rnorm = 0; 369219019Sgabor return (0); 370219019Sgabor } 371219019Sgabor if (ret != ENOENT || (flags & _CSMAPPER_F_PREVENT_PIVOT)!=0) 372219019Sgabor return (ret); 373219019Sgabor 374219019Sgabor ret = find_best_pivot(realsrc, realdst, pivot, sizeof(pivot), &norm); 375219019Sgabor if (ret) 376219019Sgabor return (ret); 377219019Sgabor 378219019Sgabor ret = open_serial_mapper(maparea, rcsm, realsrc, pivot, realdst); 379219019Sgabor if (ret == 0 && rnorm != NULL) 380219019Sgabor *rnorm = norm; 381219019Sgabor 382219019Sgabor return (ret); 383219019Sgabor} 384