1/* 2 Unix SMB/CIFS implementation. 3 charset defines 4 Copyright (C) Andrew Tridgell 2001 5 Copyright (C) Jelmer Vernooij 2002 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 3 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program. If not, see <http://www.gnu.org/licenses/>. 19*/ 20 21/* This is a public header file that is installed as part of Samba. 22 * If you remove any functions or change their signature, update 23 * the so version number. */ 24 25#ifndef __CHARSET_H__ 26#define __CHARSET_H__ 27 28#include <talloc.h> 29 30/* this defines the charset types used in samba */ 31typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t; 32 33#define NUM_CHARSETS 7 34 35/* 36 * SMB UCS2 (16-bit unicode) internal type. 37 * smb_ucs2_t is *always* in little endian format. 38 */ 39 40typedef uint16_t smb_ucs2_t; 41 42/* 43 * for each charset we have a function that pulls from that charset to 44 * a ucs2 buffer, and a function that pushes to a ucs2 buffer 45 * */ 46 47struct charset_functions { 48 const char *name; 49 size_t (*pull)(void *, const char **inbuf, size_t *inbytesleft, 50 char **outbuf, size_t *outbytesleft); 51 size_t (*push)(void *, const char **inbuf, size_t *inbytesleft, 52 char **outbuf, size_t *outbytesleft); 53 struct charset_functions *prev, *next; 54}; 55 56/* this type is used for manipulating unicode codepoints */ 57typedef uint32_t codepoint_t; 58 59#define INVALID_CODEPOINT ((codepoint_t)-1) 60 61/* 62 * This is auxiliary struct used by source/script/gen-8-bit-gap.sh script 63 * during generation of an encoding table for charset module 64 * */ 65 66struct charset_gap_table { 67 uint16_t start; 68 uint16_t end; 69 int32_t idx; 70}; 71 72 73/* generic iconv conversion structure */ 74typedef struct smb_iconv_s { 75 size_t (*direct)(void *cd, const char **inbuf, size_t *inbytesleft, 76 char **outbuf, size_t *outbytesleft); 77 size_t (*pull)(void *cd, const char **inbuf, size_t *inbytesleft, 78 char **outbuf, size_t *outbytesleft); 79 size_t (*push)(void *cd, const char **inbuf, size_t *inbytesleft, 80 char **outbuf, size_t *outbytesleft); 81 void *cd_direct, *cd_pull, *cd_push; 82 char *from_name, *to_name; 83} *smb_iconv_t; 84 85/* string manipulation flags */ 86#define STR_TERMINATE 1 87#define STR_UPPER 2 88#define STR_ASCII 4 89#define STR_UNICODE 8 90#define STR_NOALIGN 16 91#define STR_NO_RANGE_CHECK 32 92#define STR_LEN8BIT 64 93#define STR_TERMINATE_ASCII 128 /* only terminate if ascii */ 94#define STR_LEN_NOTERM 256 /* the length field is the unterminated length */ 95 96struct loadparm_context; 97struct smb_iconv_convenience; 98 99/* replace some string functions with multi-byte 100 versions */ 101#define strlower(s) strlower_m(s) 102#define strupper(s) strupper_m(s) 103 104char *strchr_m(const char *s, char c); 105size_t strlen_m_term(const char *s); 106size_t strlen_m_term_null(const char *s); 107size_t strlen_m(const char *s); 108char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength); 109void string_replace_m(char *s, char oldc, char newc); 110bool strcsequal_m(const char *s1,const char *s2); 111bool strequal_m(const char *s1, const char *s2); 112int strncasecmp_m(const char *s1, const char *s2, size_t n); 113bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize); 114int strcasecmp_m(const char *s1, const char *s2); 115size_t count_chars_m(const char *s, char c); 116void strupper_m(char *s); 117void strlower_m(char *s); 118char *strupper_talloc(TALLOC_CTX *ctx, const char *src); 119char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src); 120char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n); 121char *strlower_talloc(TALLOC_CTX *ctx, const char *src); 122bool strhasupper(const char *string); 123bool strhaslower(const char *string); 124char *strrchr_m(const char *s, char c); 125char *strchr_m(const char *s, char c); 126 127bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); 128bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size); 129bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); 130bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); 131bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size); 132bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size); 133ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags); 134ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags); 135 136bool convert_string_talloc(TALLOC_CTX *ctx, 137 charset_t from, charset_t to, 138 void const *src, size_t srclen, 139 void *dest, size_t *converted_size, 140 bool allow_badcharcnv); 141 142size_t convert_string(charset_t from, charset_t to, 143 void const *src, size_t srclen, 144 void *dest, size_t destlen, bool allow_badcharcnv); 145 146ssize_t iconv_talloc(TALLOC_CTX *mem_ctx, 147 smb_iconv_t cd, 148 void const *src, size_t srclen, 149 void *dest); 150 151extern struct smb_iconv_convenience *global_iconv_convenience; 152 153codepoint_t next_codepoint(const char *str, size_t *size); 154ssize_t push_codepoint(char *str, codepoint_t c); 155 156/* codepoints */ 157codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic, 158 const char *str, size_t *size); 159ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic, 160 char *str, codepoint_t c); 161codepoint_t toupper_m(codepoint_t val); 162codepoint_t tolower_m(codepoint_t val); 163int codepoint_cmpi(codepoint_t c1, codepoint_t c2); 164 165/* Iconv convenience functions */ 166struct smb_iconv_convenience *smb_iconv_convenience_init(TALLOC_CTX *mem_ctx, 167 const char *dos_charset, 168 const char *unix_charset, 169 bool native_iconv); 170 171bool convert_string_convenience(struct smb_iconv_convenience *ic, 172 charset_t from, charset_t to, 173 void const *src, size_t srclen, 174 void *dest, size_t destlen, size_t *converted_size, 175 bool allow_badcharcnv); 176bool convert_string_talloc_convenience(TALLOC_CTX *ctx, 177 struct smb_iconv_convenience *ic, 178 charset_t from, charset_t to, 179 void const *src, size_t srclen, 180 void *dest, size_t *converted_size, bool allow_badcharcnv); 181/* iconv */ 182smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode); 183int smb_iconv_close(smb_iconv_t cd); 184size_t smb_iconv(smb_iconv_t cd, 185 const char **inbuf, size_t *inbytesleft, 186 char **outbuf, size_t *outbytesleft); 187smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode, 188 const char *fromcode, bool native_iconv); 189 190void load_case_tables(void); 191bool charset_register_backend(const void *_funcs); 192 193/* 194 * Define stub for charset module which implements 8-bit encoding with gaps. 195 * Encoding tables for such module should be produced from glibc's CHARMAPs 196 * using script source/script/gen-8bit-gap.sh 197 * CHARSETNAME is CAPITALIZED charset name 198 * 199 * */ 200#define SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CHARSETNAME) \ 201static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytesleft, \ 202 char **outbuf, size_t *outbytesleft) \ 203{ \ 204 while (*inbytesleft >= 2 && *outbytesleft >= 1) { \ 205 int i; \ 206 int done = 0; \ 207 \ 208 uint16 ch = SVAL(*inbuf,0); \ 209 \ 210 for (i=0; from_idx[i].start != 0xffff; i++) { \ 211 if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) { \ 212 ((unsigned char*)(*outbuf))[0] = from_ucs2[from_idx[i].idx+ch]; \ 213 (*inbytesleft) -= 2; \ 214 (*outbytesleft) -= 1; \ 215 (*inbuf) += 2; \ 216 (*outbuf) += 1; \ 217 done = 1; \ 218 break; \ 219 } \ 220 } \ 221 if (!done) { \ 222 errno = EINVAL; \ 223 return -1; \ 224 } \ 225 \ 226 } \ 227 \ 228 if (*inbytesleft == 1) { \ 229 errno = EINVAL; \ 230 return -1; \ 231 } \ 232 \ 233 if (*inbytesleft > 1) { \ 234 errno = E2BIG; \ 235 return -1; \ 236 } \ 237 \ 238 return 0; \ 239} \ 240 \ 241static size_t CHARSETNAME ## _pull(void *cd, const char **inbuf, size_t *inbytesleft, \ 242 char **outbuf, size_t *outbytesleft) \ 243{ \ 244 while (*inbytesleft >= 1 && *outbytesleft >= 2) { \ 245 SSVAL(*outbuf, 0, to_ucs2[((unsigned char*)(*inbuf))[0]]); \ 246 (*inbytesleft) -= 1; \ 247 (*outbytesleft) -= 2; \ 248 (*inbuf) += 1; \ 249 (*outbuf) += 2; \ 250 } \ 251 \ 252 if (*inbytesleft > 0) { \ 253 errno = E2BIG; \ 254 return -1; \ 255 } \ 256 \ 257 return 0; \ 258} \ 259 \ 260struct charset_functions CHARSETNAME ## _functions = \ 261 {#CHARSETNAME, CHARSETNAME ## _pull, CHARSETNAME ## _push}; \ 262 \ 263NTSTATUS charset_ ## CHARSETNAME ## _init(void); \ 264NTSTATUS charset_ ## CHARSETNAME ## _init(void) \ 265{ \ 266 return smb_register_charset(& CHARSETNAME ## _functions); \ 267} \ 268 269 270#endif /* __CHARSET_H__ */ 271