1/* Id: unicode.c,v 1.9 2015/07/19 13:20:37 ragge Exp */ 2/* $NetBSD: unicode.c,v 1.1.1.2 2016/02/09 20:29:20 plunky Exp $ */ 3/* 4 * Copyright (c) 2014 Eric Olson <ejolson@renomath.org> 5 * Some rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 21 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 22 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 24 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 26 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 28 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <stdio.h> 33#include <ctype.h> 34#include "pass1.h" 35#include "manifest.h" 36#include "unicode.h" 37 38/* 39 * decode 32-bit code point from UTF-8 40 * move pointer 41 */ 42long 43u82cp(char **q) 44{ 45 unsigned char *t = (unsigned char *)*q; 46 unsigned long c, r; 47 int i, sz; 48 49 if (*t == '\\') 50 c = esccon((char **)&t); 51 else 52 c = *t++; 53 54 /* always eat the first value */ 55 *q = (char *)t; 56 57 if (c > 0x7F) { 58 if ((c & 0xE0) == 0xC0) { 59 sz = 2; 60 r = c & 0x1F; 61 } else if ((c & 0xF0) == 0xE0) { 62 sz = 3; 63 r = c & 0x0F; 64 } else if ((c & 0xF8) == 0xF0) { 65 sz = 4; 66 r = c & 0x07; 67 } else if ((c & 0xFC) == 0xF8) { 68 sz = 5; 69 r = c & 0x03; 70 } else if ((c & 0xFE) == 0xFC) { 71 sz = 6; 72 r = c & 0x01; 73 } else { 74 u8error("invalid utf-8 prefix"); 75 return 0xFFFFUL; 76 } 77 78 for (i = 1; i < sz; i++) { 79 if (*t == '\\') 80 c = esccon((char **)&t); 81 else 82 c = *t++; 83 84 if ((c & 0xC0) == 0x80) { 85 r = (r << 6) + (c & 0x3F); 86 } else { 87 u8error("utf-8 encoding %d bytes too short", sz - i); 88 return 0xFFFFUL; 89 } 90 } 91 92 *q = (char *)t; 93 } else { 94 r = c; 95 } 96 97 return r; 98} 99 100/* 101 * Create UTF-16 from unicode number. 102 * Expects s to point to two words. 103 */ 104void 105cp2u16(long num, unsigned short *s) 106{ 107 s[0] = s[1] = 0; 108 if (num <= 0xd7ff || (num >= 0xe000 && num <= 0xffffL)) { 109 *s = num; 110 } else if (num >= 0x010000L && num <= 0x10ffffL) { 111 num -= 0x010000L; 112 s[0] = ((num >> 10) + 0xd800); 113 s[1] = ((num & 0x3ff) + 0xdc00); 114 } else if (num > 0x10ffffL) 115 werror("illegal UTF-16 value"); 116} 117