kiconv.h revision 5206:34f0b41fc3c5
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#ifndef _SYS_KICONV_H
27#define	_SYS_KICONV_H
28
29#pragma ident	"%Z%%M%	%I%	%E% SMI"
30
31#ifdef __cplusplus
32extern "C" {
33#endif
34
35#include <sys/types.h>
36
37#ifdef	_KERNEL
38
39/*
40 * Supported fromcode/tocode values are saved in the following component type
41 * of (name, id) pair. The id values of fromcode and tocode are used to
42 * find out the corresponding code conversions.
43 */
44typedef struct {
45	char		*name;
46	size_t		id;
47} kiconv_code_list_t;
48
49/*
50 * Each unique kiconv code conversion identified by tocode and fromcode ids
51 * have corresponding module id and internal function pointers to open(),
52 * kiconv(), close(), and kiconvstr().
53 */
54typedef struct {
55	uint16_t	tid;		/* tocode id. */
56	uint16_t	fid;		/* fromcode id. */
57	uint16_t	mid;		/* module id. */
58	void		*(*open)(void);
59	size_t		(*kiconv)(void *, char **, size_t *, char **, size_t *,
60			int *);
61	int		(*close)(void *);
62	size_t		(*kiconvstr)(char *, size_t *, char *, size_t *, int,
63			int *);
64} kiconv_conv_list_t;
65
66/*
67 * Each module id has a corresponding module name that is used to load
68 * the module as needed and a reference counter.
69 */
70typedef struct {
71	char		*name;
72	uint_t		refcount;
73} kiconv_mod_list_t;
74
75/*
76 * The following two data structures are being used to transfer information
77 * on the supported kiconv code conversions from a module to the framework.
78 *
79 * Details can be found from kiconv_ops(9S) and kiconv_module_info(9S)
80 * man pages at PSARC/2007/173.
81 */
82typedef struct {
83	char		*tocode;
84	char		*fromcode;
85	void		*(*kiconv_open)(void);
86	size_t		(*kiconv)(void *, char **, size_t *, char **, size_t *,
87			int *);
88	int		(*kiconv_close)(void *);
89	size_t		(*kiconvstr)(char *, size_t *, char *, size_t *, int,
90			int *);
91} kiconv_ops_t;
92
93typedef struct kiconv_mod_info {
94	char		*module_name;
95	size_t		kiconv_num_convs;
96	kiconv_ops_t	*kiconv_ops_tbl;
97	size_t		kiconv_num_aliases;
98	char		**aliases;
99	char		**canonicals;
100	int		nowait;
101} kiconv_module_info_t;
102
103/* The kiconv code conversion descriptor data structure. */
104typedef struct {
105	void		*handle;	/* Handle from the actual open(). */
106	size_t		id;		/* Index to the conv_list[]. */
107} kiconv_data_t, *kiconv_t;
108
109/* Common conversion state data structure. */
110typedef struct {
111	uint8_t		id;
112	uint8_t		bom_processed;
113} kiconv_state_data_t, *kiconv_state_t;
114
115/* Common component types for possible code conversion mapping tables. */
116typedef struct {
117	uchar_t		u8[3];
118} kiconv_to_utf8_tbl_comp_t;
119
120typedef struct {
121	uint32_t	u8:24;
122	uint32_t	sb:8;
123} kiconv_to_sb_tbl_comp_t;
124
125/*
126 * The maximum name length for any given codeset or alias names; the following
127 * should be plenty big enough.
128 */
129#define	KICONV_MAX_CODENAME_LEN		63
130
131/* The following characters do not exist in the normalized code names. */
132#define	KICONV_SKIPPABLE_CHAR(c)	\
133	((c) == '-' || (c) == '_' || (c) == '.' || (c) == '@')
134
135/*
136 * When we encounter non-identical characters, as like iconv(3C) we have,
137 * map them into either one of the replacement characters based on what is
138 * the current target tocde.
139 *
140 * The 0xefbfdb in UTF-8 is U+FFFD in Unicode scalar value.
141 */
142#define	KICONV_ASCII_REPLACEMENT_CHAR	('?')
143#define	KICONV_UTF8_REPLACEMENT_CHAR	(0xefbfbd)
144
145/* Numeric ids for kiconv modules. */
146#define	KICONV_EMBEDDED			(0)
147#define	KICONV_MODULE_ID_JA		(1)
148#define	KICONV_MODULE_ID_SC		(2)
149#define	KICONV_MODULE_ID_KO		(3)
150#define	KICONV_MODULE_ID_TC		(4)
151#define	KICONV_MODULE_ID_EMEA		(5)
152
153#define	KICONV_MAX_MODULE_ID		KICONV_MODULE_ID_EMEA
154
155/* Functions used in kiconv conversion and module management. */
156extern void	kiconv_init();
157extern int	kiconv_register_module(kiconv_module_info_t *);
158extern int	kiconv_unregister_module(kiconv_module_info_t *);
159extern size_t	kiconv_module_ref_count(size_t);
160
161#endif	/* _KERNEL */
162
163#ifdef __cplusplus
164}
165#endif
166
167#endif /* _SYS_KICONV_H */
168