efichar.c revision 329175
1/*-
2 * Copyright (c) 2010 Marcel Moolenaar
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27#include <sys/cdefs.h>
28__FBSDID("$FreeBSD: stable/11/stand/efi/libefi/efichar.c 329175 2018-02-12 17:44:35Z kevans $");
29
30#include <sys/types.h>
31#include <errno.h>
32#ifdef _STANDALONE
33#include <stand.h>
34#else
35#include <stddef.h>
36#include <stdlib.h>
37#include <string.h>
38#include <unistd.h>
39#include <sys/efi.h>
40#include <machine/efi.h>
41#endif
42
43#include "efichar.h"
44
45int
46ucs2len(const efi_char *str)
47{
48	int i;
49
50	i = 0;
51	while (*str++)
52		i++;
53	return (i);
54}
55
56/*
57 * If nm were converted to utf8, what what would strlen
58 * return on the resulting string?
59 */
60static size_t
61utf8_len_of_ucs2(const efi_char *nm)
62{
63	size_t len;
64	efi_char c;
65
66	len = 0;
67	while (*nm) {
68		c = *nm++;
69		if (c > 0x7ff)
70			len += 3;
71		else if (c > 0x7f)
72			len += 2;
73		else
74			len++;
75	}
76
77	return (len);
78}
79
80int
81ucs2_to_utf8(const efi_char *nm, char **name)
82{
83	size_t len, sz;
84	efi_char c;
85	char *cp;
86	int freeit = *name == NULL;
87
88	sz = utf8_len_of_ucs2(nm) + 1;
89	len = 0;
90	if (*name != NULL)
91		cp = *name;
92	else
93		cp = *name = malloc(sz);
94	if (*name == NULL)
95		return (ENOMEM);
96
97	while (*nm) {
98		c = *nm++;
99		if (c > 0x7ff) {
100			if (len++ < sz)
101				*cp++ = (char)(0xE0 | (c >> 12));
102			if (len++ < sz)
103				*cp++ = (char)(0x80 | ((c >> 6) & 0x3f));
104			if (len++ < sz)
105				*cp++ = (char)(0x80 | (c & 0x3f));
106		} else if (c > 0x7f) {
107			if (len++ < sz)
108				*cp++ = (char)(0xC0 | ((c >> 6) & 0x1f));
109			if (len++ < sz)
110				*cp++ = (char)(0x80 | (c & 0x3f));
111		} else {
112			if (len++ < sz)
113				*cp++ = (char)(c & 0x7f);
114		}
115	}
116
117	if (len >= sz) {
118		/* Absent bugs, we'll never return EOVERFLOW */
119		if (freeit)
120			free(*name);
121		return (EOVERFLOW);
122	}
123	*cp++ = '\0';
124
125	return (0);
126}
127
128int
129utf8_to_ucs2(const char *name, efi_char **nmp, size_t *len)
130{
131	efi_char *nm;
132	size_t sz;
133	uint32_t ucs4;
134	int c, bytes;
135	int freeit = *nmp == NULL;
136
137	sz = strlen(name) * 2 + 2;
138	if (*nmp == NULL)
139		*nmp = malloc(sz);
140	nm = *nmp;
141	*len = sz;
142
143	ucs4 = 0;
144	bytes = 0;
145	while (sz > 1 && *name != '\0') {
146		c = *name++;
147		/*
148		 * Conditionalize on the two major character types:
149		 * initial and followup characters.
150		 */
151		if ((c & 0xc0) != 0x80) {
152			/* Initial characters. */
153			if (bytes != 0) {
154				if (freeit)
155					free(nm);
156				return (EILSEQ);
157			}
158			if ((c & 0xf8) == 0xf0) {
159				ucs4 = c & 0x07;
160				bytes = 3;
161			} else if ((c & 0xf0) == 0xe0) {
162				ucs4 = c & 0x0f;
163				bytes = 2;
164			} else if ((c & 0xe0) == 0xc0) {
165				ucs4 = c & 0x1f;
166				bytes = 1;
167			} else {
168				ucs4 = c & 0x7f;
169				bytes = 0;
170			}
171		} else {
172			/* Followup characters. */
173			if (bytes > 0) {
174				ucs4 = (ucs4 << 6) + (c & 0x3f);
175				bytes--;
176			} else if (bytes == 0) {
177				if (freeit)
178					free(nm);
179				return (EILSEQ);
180			}
181		}
182		if (bytes == 0) {
183			if (ucs4 > 0xffff) {
184				if (freeit)
185					free(nm);
186				return (EILSEQ);
187			}
188			*nm++ = (efi_char)ucs4;
189			sz -= 2;
190		}
191	}
192	if (sz < 2) {
193		if (freeit)
194			free(nm);
195		return (EDOOFUS);
196	}
197	sz -= 2;
198	*nm = 0;
199	*len -= sz;
200	return (0);
201}
202