1/*
2   Unix SMB/CIFS implementation.
3   Character set conversion Extensions
4   Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5   Copyright (C) Andrew Tridgell 2001
6   Copyright (C) Simo Sorce 2001
7   Copyright (C) Martin Pool 2003
8
9   This program is free software; you can redistribute it and/or modify
10   it under the terms of the GNU General Public License as published by
11   the Free Software Foundation; either version 2 of the License, or
12   (at your option) any later version.
13
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with this program; if not, write to the Free Software
21   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22
23*/
24#include "includes.h"
25
26/* We can parameterize this if someone complains.... JRA. */
27
28char lp_failed_convert_char(void)
29{
30	return '_';
31}
32
33/**
34 * @file
35 *
36 * @brief Character-set conversion routines built on our iconv.
37 *
38 * @note Samba's internal character set (at least in the 3.0 series)
39 * is always the same as the one for the Unix filesystem.  It is
40 * <b>not</b> necessarily UTF-8 and may be different on machines that
41 * need i18n filenames to be compatible with Unix software.  It does
42 * have to be a superset of ASCII.  All multibyte sequences must start
43 * with a byte with the high bit set.
44 *
45 * @sa lib/iconv.c
46 */
47
48
49static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
50static BOOL conv_silent; /* Should we do a debug if the conversion fails ? */
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57	const char *ret = NULL;
58
59	if (ch == CH_UCS2) ret = "UTF-16LE";
60	else if (ch == CH_UNIX) ret = lp_unix_charset();
61	else if (ch == CH_DOS) ret = lp_dos_charset();
62	else if (ch == CH_DISPLAY) ret = lp_display_charset();
63	else if (ch == CH_UTF8) ret = "UTF8";
64
65#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
66	if (ret && !strcmp(ret, "LOCALE")) {
67		const char *ln = NULL;
68
69#ifdef HAVE_SETLOCALE
70		setlocale(LC_ALL, "");
71#endif
72		ln = nl_langinfo(CODESET);
73		if (ln) {
74			/* Check whether the charset name is supported
75			   by iconv */
76			smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
77			if (handle == (smb_iconv_t) -1) {
78				DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
79				ln = NULL;
80			} else {
81				DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
82				smb_iconv_close(handle);
83			}
84		}
85		ret = ln;
86	}
87#ifdef HAVE_SETLOCALE
88	/* We set back the locale to C to get ASCII-compatible toupper/lower functions.
89	   For now we do not need any other POSIX localisations anyway. When we should
90	   really need localized string functions one day we need to write our own
91	   ascii_tolower etc.
92	*/
93	setlocale(LC_ALL, "C");
94 #endif
95
96#endif
97
98	if (!ret || !*ret) ret = "ASCII";
99	return ret;
100}
101
102void lazy_initialize_conv(void)
103{
104	static int initialized = False;
105
106	if (!initialized) {
107		initialized = True;
108		load_case_tables();
109		init_iconv();
110	}
111}
112
113/**
114 * Initialize iconv conversion descriptors.
115 *
116 * This is called the first time it is needed, and also called again
117 * every time the configuration is reloaded, because the charset or
118 * codepage might have changed.
119 **/
120void init_iconv(void)
121{
122	int c1, c2;
123	BOOL did_reload = False;
124
125	/* so that charset_name() works we need to get the UNIX<->UCS2 going
126	   first */
127	if (!conv_handles[CH_UNIX][CH_UCS2])
128		conv_handles[CH_UNIX][CH_UCS2] = smb_iconv_open(charset_name(CH_UCS2), "ASCII");
129
130	if (!conv_handles[CH_UCS2][CH_UNIX])
131		conv_handles[CH_UCS2][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UCS2));
132
133	for (c1=0;c1<NUM_CHARSETS;c1++) {
134		for (c2=0;c2<NUM_CHARSETS;c2++) {
135			const char *n1 = charset_name((charset_t)c1);
136			const char *n2 = charset_name((charset_t)c2);
137			if (conv_handles[c1][c2] &&
138			    strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
139			    strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
140				continue;
141
142			did_reload = True;
143
144			if (conv_handles[c1][c2])
145				smb_iconv_close(conv_handles[c1][c2]);
146
147			conv_handles[c1][c2] = smb_iconv_open(n2,n1);
148			if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
149				DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
150					 charset_name((charset_t)c1), charset_name((charset_t)c2)));
151				if (c1 != CH_UCS2) {
152					n1 = "ASCII";
153				}
154				if (c2 != CH_UCS2) {
155					n2 = "ASCII";
156				}
157				DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
158					n1, n2 ));
159				conv_handles[c1][c2] = smb_iconv_open(n2,n1);
160				if (!conv_handles[c1][c2]) {
161					DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
162					smb_panic("init_iconv: conv_handle initialization failed.");
163				}
164			}
165		}
166	}
167
168	if (did_reload) {
169		/* XXX: Does this really get called every time the dos
170		 * codepage changes? */
171		/* XXX: Is the did_reload test too strict? */
172		conv_silent = True;
173		init_doschar_table();
174		init_valid_table();
175		conv_silent = False;
176	}
177}
178
179/**
180 * Convert string from one encoding to another, making error checking etc
181 * Slow path version - uses (slow) iconv.
182 *
183 * @param src pointer to source string (multibyte or singlebyte)
184 * @param srclen length of the source string in bytes
185 * @param dest pointer to destination string (multibyte or singlebyte)
186 * @param destlen maximal length allowed for string
187 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
188 * @returns the number of bytes occupied in the destination
189 *
190 * Ensure the srclen contains the terminating zero.
191 *
192 **/
193
194static size_t convert_string_internal(charset_t from, charset_t to,
195		      void const *src, size_t srclen,
196		      void *dest, size_t destlen, BOOL allow_bad_conv)
197{
198	size_t i_len, o_len;
199	size_t retval;
200	const char* inbuf = (const char*)src;
201	char* outbuf = (char*)dest;
202	smb_iconv_t descriptor;
203
204	lazy_initialize_conv();
205
206	descriptor = conv_handles[from][to];
207
208	if (srclen == (size_t)-1) {
209		if (from == CH_UCS2) {
210			srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
211		} else {
212			srclen = strlen((const char *)src)+1;
213		}
214	}
215
216
217	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
218		if (!conv_silent)
219			DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
220		return (size_t)-1;
221	}
222
223	i_len=srclen;
224	o_len=destlen;
225
226 again:
227
228	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
229	if(retval==(size_t)-1) {
230	    	const char *reason="unknown error";
231		switch(errno) {
232			case EINVAL:
233				reason="Incomplete multibyte sequence";
234				if (!conv_silent)
235					DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
236				if (allow_bad_conv)
237					goto use_as_is;
238				break;
239			case E2BIG:
240				reason="No more room";
241				if (!conv_silent) {
242					if (from == CH_UNIX) {
243						DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
244							charset_name(from), charset_name(to),
245							(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
246					} else {
247						DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
248							charset_name(from), charset_name(to),
249							(unsigned int)srclen, (unsigned int)destlen));
250					}
251				}
252				break;
253			case EILSEQ:
254				reason="Illegal multibyte sequence";
255				if (!conv_silent)
256					DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
257				if (allow_bad_conv)
258					goto use_as_is;
259				break;
260			default:
261				if (!conv_silent)
262					DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
263				break;
264		}
265		/* smb_panic(reason); */
266	}
267	return destlen-o_len;
268
269 use_as_is:
270
271	/*
272	 * Conversion not supported. This is actually an error, but there are so
273	 * many misconfigured iconv systems and smb.conf's out there we can't just
274	 * fail. Do a very bad conversion instead.... JRA.
275	 */
276
277	{
278		if (o_len == 0 || i_len == 0)
279			return destlen - o_len;
280
281		if (from == CH_UCS2 && to != CH_UCS2) {
282			/* Can't convert from ucs2 to multibyte. Replace with the default fail char. */
283			if (i_len < 2)
284				return destlen - o_len;
285			if (i_len >= 2) {
286				*outbuf = lp_failed_convert_char();
287
288				outbuf++;
289				o_len--;
290
291				inbuf += 2;
292				i_len -= 2;
293			}
294
295			if (o_len == 0 || i_len == 0)
296				return destlen - o_len;
297
298			/* Keep trying with the next char... */
299			goto again;
300
301		} else if (from != CH_UCS2 && to == CH_UCS2) {
302			/* Can't convert to ucs2 - just widen by adding the default fail char then zero. */
303			if (o_len < 2)
304				return destlen - o_len;
305
306			outbuf[0] = lp_failed_convert_char();
307			outbuf[1] = '\0';
308
309			inbuf++;
310			i_len--;
311
312			outbuf += 2;
313			o_len -= 2;
314
315			if (o_len == 0 || i_len == 0)
316				return destlen - o_len;
317
318			/* Keep trying with the next char... */
319			goto again;
320
321		} else if (from != CH_UCS2 && to != CH_UCS2) {
322			/* Failed multibyte to multibyte. Just copy the default fail char and
323				try again. */
324			outbuf[0] = lp_failed_convert_char();
325
326			inbuf++;
327			i_len--;
328
329			outbuf++;
330			o_len--;
331
332			if (o_len == 0 || i_len == 0)
333				return destlen - o_len;
334
335			/* Keep trying with the next char... */
336			goto again;
337
338		} else {
339			/* Keep compiler happy.... */
340			return destlen - o_len;
341		}
342	}
343}
344
345/**
346 * Convert string from one encoding to another, making error checking etc
347 * Fast path version - handles ASCII first.
348 *
349 * @param src pointer to source string (multibyte or singlebyte)
350 * @param srclen length of the source string in bytes, or -1 for nul terminated.
351 * @param dest pointer to destination string (multibyte or singlebyte)
352 * @param destlen maximal length allowed for string - *NEVER* -1.
353 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
354 * @returns the number of bytes occupied in the destination
355 *
356 * Ensure the srclen contains the terminating zero.
357 *
358 * This function has been hand-tuned to provide a fast path.
359 * Don't change unless you really know what you are doing. JRA.
360 **/
361
362size_t convert_string(charset_t from, charset_t to,
363		      void const *src, size_t srclen,
364		      void *dest, size_t destlen, BOOL allow_bad_conv)
365{
366	/*
367	 * NB. We deliberately don't do a strlen here if srclen == -1.
368	 * This is very expensive over millions of calls and is taken
369	 * care of in the slow path in convert_string_internal. JRA.
370	 */
371
372#ifdef DEVELOPER
373	SMB_ASSERT(destlen != (size_t)-1);
374#endif
375
376	if (srclen == 0)
377		return 0;
378
379	if (from != CH_UCS2 && to != CH_UCS2) {
380		const unsigned char *p = (const unsigned char *)src;
381		unsigned char *q = (unsigned char *)dest;
382		size_t slen = srclen;
383		size_t dlen = destlen;
384		unsigned char lastp = '\0';
385		size_t retval = 0;
386
387		/* If all characters are ascii, fast path here. */
388		while (slen && dlen) {
389			if ((lastp = *p) <= 0x7f) {
390				*q++ = *p++;
391				if (slen != (size_t)-1) {
392					slen--;
393				}
394				dlen--;
395				retval++;
396				if (!lastp)
397					break;
398			} else {
399#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
400				goto general_case;
401#else
402				return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
403#endif
404			}
405		}
406		if (!dlen) {
407			/* Even if we fast path we should note if we ran out of room. */
408			if (((slen != (size_t)-1) && slen) ||
409					((slen == (size_t)-1) && lastp)) {
410				errno = E2BIG;
411			}
412		}
413		return retval;
414	} else if (from == CH_UCS2 && to != CH_UCS2) {
415		const unsigned char *p = (const unsigned char *)src;
416		unsigned char *q = (unsigned char *)dest;
417		size_t retval = 0;
418		size_t slen = srclen;
419		size_t dlen = destlen;
420		unsigned char lastp = '\0';
421
422		/* If all characters are ascii, fast path here. */
423		while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
424			if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
425				*q++ = *p;
426				if (slen != (size_t)-1) {
427					slen -= 2;
428				}
429				p += 2;
430				dlen--;
431				retval++;
432				if (!lastp)
433					break;
434			} else {
435#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
436				goto general_case;
437#else
438				return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
439#endif
440			}
441		}
442		if (!dlen) {
443			/* Even if we fast path we should note if we ran out of room. */
444			if (((slen != (size_t)-1) && slen) ||
445					((slen == (size_t)-1) && lastp)) {
446				errno = E2BIG;
447			}
448		}
449		return retval;
450	} else if (from != CH_UCS2 && to == CH_UCS2) {
451		const unsigned char *p = (const unsigned char *)src;
452		unsigned char *q = (unsigned char *)dest;
453		size_t retval = 0;
454		size_t slen = srclen;
455		size_t dlen = destlen;
456		unsigned char lastp = '\0';
457
458		/* If all characters are ascii, fast path here. */
459		while (slen && (dlen >= 2)) {
460			if ((lastp = *p) <= 0x7F) {
461				*q++ = *p++;
462				*q++ = '\0';
463				if (slen != (size_t)-1) {
464					slen--;
465				}
466				dlen -= 2;
467				retval += 2;
468				if (!lastp)
469					break;
470			} else {
471#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
472				goto general_case;
473#else
474				return retval + convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
475#endif
476			}
477		}
478		if (!dlen) {
479			/* Even if we fast path we should note if we ran out of room. */
480			if (((slen != (size_t)-1) && slen) ||
481					((slen == (size_t)-1) && lastp)) {
482				errno = E2BIG;
483			}
484		}
485		return retval;
486	}
487
488#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
489  general_case:
490#endif
491	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
492}
493
494/**
495 * Convert between character sets, allocating a new buffer for the result.
496 *
497 * @param ctx TALLOC_CTX to use to allocate with. If NULL use malloc.
498 * @param srclen length of source buffer.
499 * @param dest always set at least to NULL
500 * @note -1 is not accepted for srclen.
501 *
502 * @returns Size in bytes of the converted string; or -1 in case of error.
503 *
504 * Ensure the srclen contains the terminating zero.
505 *
506 * I hate the goto's in this function. It's embarressing.....
507 * There has to be a cleaner way to do this. JRA.
508 **/
509
510size_t convert_string_allocate(TALLOC_CTX *ctx, charset_t from, charset_t to,
511			       void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
512{
513	size_t i_len, o_len, destlen = MAX(srclen, 512);
514	size_t retval;
515	const char *inbuf = (const char *)src;
516	char *outbuf = NULL, *ob = NULL;
517	smb_iconv_t descriptor;
518
519	*dest = NULL;
520
521	if (src == NULL || srclen == (size_t)-1)
522		return (size_t)-1;
523	if (srclen == 0)
524		return 0;
525
526	lazy_initialize_conv();
527
528	descriptor = conv_handles[from][to];
529
530	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
531		if (!conv_silent)
532			DEBUG(0,("convert_string_allocate: Conversion not supported.\n"));
533		return (size_t)-1;
534	}
535
536  convert:
537
538	if ((destlen*2) < destlen) {
539		/* wrapped ! abort. */
540		if (!conv_silent)
541			DEBUG(0, ("convert_string_allocate: destlen wrapped !\n"));
542		if (!ctx)
543			SAFE_FREE(outbuf);
544		return (size_t)-1;
545	} else {
546		destlen = destlen * 2;
547	}
548
549	if (ctx)
550		ob = (char *)TALLOC_REALLOC(ctx, ob, destlen);
551	else
552		ob = (char *)SMB_REALLOC(ob, destlen);
553
554	if (!ob) {
555		DEBUG(0, ("convert_string_allocate: realloc failed!\n"));
556		if (!ctx)
557			SAFE_FREE(outbuf);
558		return (size_t)-1;
559	} else {
560		outbuf = ob;
561	}
562	i_len = srclen;
563	o_len = destlen;
564
565 again:
566
567	retval = smb_iconv(descriptor,
568			   &inbuf, &i_len,
569			   &outbuf, &o_len);
570	if(retval == (size_t)-1) 		{
571	    	const char *reason="unknown error";
572		switch(errno) {
573			case EINVAL:
574				reason="Incomplete multibyte sequence";
575				if (!conv_silent)
576					DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
577				if (allow_bad_conv)
578					goto use_as_is;
579				break;
580			case E2BIG:
581				goto convert;
582			case EILSEQ:
583				reason="Illegal multibyte sequence";
584				if (!conv_silent)
585					DEBUG(3,("convert_string_allocate: Conversion error: %s(%s)\n",reason,inbuf));
586				if (allow_bad_conv)
587					goto use_as_is;
588				break;
589		}
590		if (!conv_silent)
591			DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
592		/* smb_panic(reason); */
593		return (size_t)-1;
594	}
595
596  out:
597
598	destlen = destlen - o_len;
599	if (ctx)
600		*dest = (char *)TALLOC_REALLOC(ctx,ob,destlen);
601	else
602		*dest = (char *)SMB_REALLOC(ob,destlen);
603	if (destlen && !*dest) {
604		DEBUG(0, ("convert_string_allocate: out of memory!\n"));
605		if (!ctx)
606			SAFE_FREE(ob);
607		return (size_t)-1;
608	}
609
610	return destlen;
611
612 use_as_is:
613
614	/*
615	 * Conversion not supported. This is actually an error, but there are so
616	 * many misconfigured iconv systems and smb.conf's out there we can't just
617	 * fail. Do a very bad conversion instead.... JRA.
618	 */
619
620	{
621		if (o_len == 0 || i_len == 0)
622			goto out;
623
624		if (from == CH_UCS2 && to != CH_UCS2) {
625			/* Can't convert from ucs2 to multibyte. Just use the default fail char. */
626			if (i_len < 2)
627				goto out;
628
629			if (i_len >= 2) {
630				*outbuf = lp_failed_convert_char();
631
632				outbuf++;
633				o_len--;
634
635				inbuf += 2;
636				i_len -= 2;
637			}
638
639			if (o_len == 0 || i_len == 0)
640				goto out;
641
642			/* Keep trying with the next char... */
643			goto again;
644
645		} else if (from != CH_UCS2 && to == CH_UCS2) {
646			/* Can't convert to ucs2 - just widen by adding the default fail char then zero. */
647			if (o_len < 2)
648				goto out;
649
650			outbuf[0] = lp_failed_convert_char();
651			outbuf[1] = '\0';
652
653			inbuf++;
654			i_len--;
655
656			outbuf += 2;
657			o_len -= 2;
658
659			if (o_len == 0 || i_len == 0)
660				goto out;
661
662			/* Keep trying with the next char... */
663			goto again;
664
665		} else if (from != CH_UCS2 && to != CH_UCS2) {
666			/* Failed multibyte to multibyte. Just copy the default fail char and
667				try again. */
668			outbuf[0] = lp_failed_convert_char();
669
670			inbuf++;
671			i_len--;
672
673			outbuf++;
674			o_len--;
675
676			if (o_len == 0 || i_len == 0)
677				goto out;
678
679			/* Keep trying with the next char... */
680			goto again;
681
682		} else {
683			/* Keep compiler happy.... */
684			goto out;
685		}
686	}
687}
688
689/**
690 * Convert between character sets, allocating a new buffer using talloc for the result.
691 *
692 * @param srclen length of source buffer.
693 * @param dest always set at least to NULL
694 * @note -1 is not accepted for srclen.
695 *
696 * @returns Size in bytes of the converted string; or -1 in case of error.
697 **/
698static size_t convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
699		      		void const *src, size_t srclen, void **dest, BOOL allow_bad_conv)
700{
701	size_t dest_len;
702
703	*dest = NULL;
704	dest_len=convert_string_allocate(ctx, from, to, src, srclen, dest, allow_bad_conv);
705	if (dest_len == (size_t)-1)
706		return (size_t)-1;
707	if (*dest == NULL)
708		return (size_t)-1;
709	return dest_len;
710}
711
712size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
713{
714	size_t size;
715	smb_ucs2_t *buffer;
716
717	size = push_ucs2_allocate(&buffer, src);
718	if (size == (size_t)-1) {
719		smb_panic("failed to create UCS2 buffer");
720	}
721	if (!strupper_w(buffer) && (dest == src)) {
722		free(buffer);
723		return srclen;
724	}
725
726	size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
727	free(buffer);
728	return size;
729}
730
731/**
732 strdup() a unix string to upper case.
733 Max size is pstring.
734**/
735
736char *strdup_upper(const char *s)
737{
738	pstring out_buffer;
739	const unsigned char *p = (const unsigned char *)s;
740	unsigned char *q = (unsigned char *)out_buffer;
741
742	/* this is quite a common operation, so we want it to be
743	   fast. We optimise for the ascii case, knowing that all our
744	   supported multi-byte character sets are ascii-compatible
745	   (ie. they match for the first 128 chars) */
746
747	while (1) {
748		if (*p & 0x80)
749			break;
750		*q++ = toupper(*p);
751		if (!*p)
752			break;
753		p++;
754		if (p - ( const unsigned char *)s >= sizeof(pstring))
755			break;
756	}
757
758	if (*p) {
759		/* MB case. */
760		size_t size;
761		wpstring buffer;
762		size = convert_string(CH_UNIX, CH_UCS2, s, -1, buffer, sizeof(buffer), True);
763		if (size == (size_t)-1) {
764			return NULL;
765		}
766
767		strupper_w(buffer);
768
769		size = convert_string(CH_UCS2, CH_UNIX, buffer, -1, out_buffer, sizeof(out_buffer), True);
770		if (size == (size_t)-1) {
771			return NULL;
772		}
773	}
774
775	return SMB_STRDUP(out_buffer);
776}
777
778size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
779{
780	size_t size;
781	smb_ucs2_t *buffer = NULL;
782
783	size = convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, srclen,
784				       (void **) &buffer, True);
785	if (size == (size_t)-1 || !buffer) {
786		smb_panic("failed to create UCS2 buffer");
787	}
788	if (!strlower_w(buffer) && (dest == src)) {
789		SAFE_FREE(buffer);
790		return srclen;
791	}
792	size = convert_string(CH_UCS2, CH_UNIX, buffer, size, dest, destlen, True);
793	SAFE_FREE(buffer);
794	return size;
795}
796
797/**
798 strdup() a unix string to lower case.
799**/
800
801char *strdup_lower(const char *s)
802{
803	size_t size;
804	smb_ucs2_t *buffer = NULL;
805	char *out_buffer;
806
807	size = push_ucs2_allocate(&buffer, s);
808	if (size == -1 || !buffer) {
809		return NULL;
810	}
811
812	strlower_w(buffer);
813
814	size = pull_ucs2_allocate(&out_buffer, buffer);
815	SAFE_FREE(buffer);
816
817	if (size == (size_t)-1) {
818		return NULL;
819	}
820
821	return out_buffer;
822}
823
824static size_t ucs2_align(const void *base_ptr, const void *p, int flags)
825{
826	if (flags & (STR_NOALIGN|STR_ASCII))
827		return 0;
828	return PTR_DIFF(p, base_ptr) & 1;
829}
830
831
832/**
833 * Copy a string from a char* unix src to a dos codepage string destination.
834 *
835 * @return the number of bytes occupied by the string in the destination.
836 *
837 * @param flags can include
838 * <dl>
839 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
840 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
841 * </dl>
842 *
843 * @param dest_len the maximum length in bytes allowed in the
844 * destination.  If @p dest_len is -1 then no maximum is used.
845 **/
846size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
847{
848	size_t src_len = strlen(src);
849	pstring tmpbuf;
850
851	/* treat a pstring as "unlimited" length */
852	if (dest_len == (size_t)-1)
853		dest_len = sizeof(pstring);
854
855	if (flags & STR_UPPER) {
856		pstrcpy(tmpbuf, src);
857		strupper_m(tmpbuf);
858		src = tmpbuf;
859	}
860
861	if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
862		src_len++;
863
864	return convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
865}
866
867size_t push_ascii_fstring(void *dest, const char *src)
868{
869	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
870}
871
872size_t push_ascii_pstring(void *dest, const char *src)
873{
874	return push_ascii(dest, src, sizeof(pstring), STR_TERMINATE);
875}
876
877/********************************************************************
878 Push an nstring - ensure null terminated. Written by
879 moriyama@miraclelinux.com (MORIYAMA Masayuki).
880********************************************************************/
881
882size_t push_ascii_nstring(void *dest, const char *src)
883{
884	size_t i, buffer_len, dest_len;
885	smb_ucs2_t *buffer;
886
887	conv_silent = True;
888	buffer_len = push_ucs2_allocate(&buffer, src);
889	if (buffer_len == (size_t)-1) {
890		smb_panic("failed to create UCS2 buffer");
891	}
892
893	/* We're using buffer_len below to count ucs2 characters, not bytes. */
894	buffer_len /= sizeof(smb_ucs2_t);
895
896	dest_len = 0;
897	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
898		unsigned char mb[10];
899		/* Convert one smb_ucs2_t character at a time. */
900		size_t mb_len = convert_string(CH_UCS2, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
901		if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
902			memcpy((char *)dest + dest_len, mb, mb_len);
903			dest_len += mb_len;
904		} else {
905			errno = E2BIG;
906			break;
907		}
908	}
909	((char *)dest)[dest_len] = '\0';
910
911	SAFE_FREE(buffer);
912	conv_silent = False;
913	return dest_len;
914}
915
916/**
917 * Copy a string from a dos codepage source to a unix char* destination.
918 *
919 * The resulting string in "dest" is always null terminated.
920 *
921 * @param flags can have:
922 * <dl>
923 * <dt>STR_TERMINATE</dt>
924 * <dd>STR_TERMINATE means the string in @p src
925 * is null terminated, and src_len is ignored.</dd>
926 * </dl>
927 *
928 * @param src_len is the length of the source area in bytes.
929 * @returns the number of bytes occupied by the string in @p src.
930 **/
931size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
932{
933	size_t ret;
934
935	if (dest_len == (size_t)-1)
936		dest_len = sizeof(pstring);
937
938	if (flags & STR_TERMINATE) {
939		if (src_len == (size_t)-1) {
940			src_len = strlen(src) + 1;
941		} else {
942			size_t len = strnlen(src, src_len);
943			if (len < src_len)
944				len++;
945			src_len = len;
946		}
947	}
948
949	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
950	if (ret == (size_t)-1) {
951		dest_len = 0;
952	}
953
954	if (dest_len)
955		dest[MIN(ret, dest_len-1)] = 0;
956	else
957		dest[0] = 0;
958
959	return src_len;
960}
961
962size_t pull_ascii_pstring(char *dest, const void *src)
963{
964	return pull_ascii(dest, src, sizeof(pstring), -1, STR_TERMINATE);
965}
966
967size_t pull_ascii_fstring(char *dest, const void *src)
968{
969	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
970}
971
972/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
973
974size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
975{
976	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
977}
978
979/**
980 * Copy a string from a char* src to a unicode destination.
981 *
982 * @returns the number of bytes occupied by the string in the destination.
983 *
984 * @param flags can have:
985 *
986 * <dl>
987 * <dt>STR_TERMINATE <dd>means include the null termination.
988 * <dt>STR_UPPER     <dd>means uppercase in the destination.
989 * <dt>STR_NOALIGN   <dd>means don't do alignment.
990 * </dl>
991 *
992 * @param dest_len is the maximum length allowed in the
993 * destination. If dest_len is -1 then no maxiumum is used.
994 **/
995
996size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
997{
998	size_t len=0;
999	size_t src_len;
1000	size_t ret;
1001
1002	/* treat a pstring as "unlimited" length */
1003	if (dest_len == (size_t)-1)
1004		dest_len = sizeof(pstring);
1005
1006	if (flags & STR_TERMINATE)
1007		src_len = (size_t)-1;
1008	else
1009		src_len = strlen(src);
1010
1011	if (ucs2_align(base_ptr, dest, flags)) {
1012		*(char *)dest = 0;
1013		dest = (void *)((char *)dest + 1);
1014		if (dest_len)
1015			dest_len--;
1016		len++;
1017	}
1018
1019	/* ucs2 is always a multiple of 2 bytes */
1020	dest_len &= ~1;
1021
1022	ret =  convert_string(CH_UNIX, CH_UCS2, src, src_len, dest, dest_len, True);
1023	if (ret == (size_t)-1) {
1024		return 0;
1025	}
1026
1027	len += ret;
1028
1029	if (flags & STR_UPPER) {
1030		smb_ucs2_t *dest_ucs2 = dest;
1031		size_t i;
1032		for (i = 0; i < (dest_len / 2) && dest_ucs2[i]; i++) {
1033			smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1034			if (v != dest_ucs2[i]) {
1035				dest_ucs2[i] = v;
1036			}
1037		}
1038	}
1039
1040	return len;
1041}
1042
1043
1044/**
1045 * Copy a string from a unix char* src to a UCS2 destination,
1046 * allocating a buffer using talloc().
1047 *
1048 * @param dest always set at least to NULL
1049 *
1050 * @returns The number of bytes occupied by the string in the destination
1051 *         or -1 in case of error.
1052 **/
1053size_t push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src)
1054{
1055	size_t src_len = strlen(src)+1;
1056
1057	*dest = NULL;
1058	return convert_string_talloc(ctx, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1059}
1060
1061
1062/**
1063 * Copy a string from a unix char* src to a UCS2 destination, allocating a buffer
1064 *
1065 * @param dest always set at least to NULL
1066 *
1067 * @returns The number of bytes occupied by the string in the destination
1068 *         or -1 in case of error.
1069 **/
1070
1071size_t push_ucs2_allocate(smb_ucs2_t **dest, const char *src)
1072{
1073	size_t src_len = strlen(src)+1;
1074
1075	*dest = NULL;
1076	return convert_string_allocate(NULL, CH_UNIX, CH_UCS2, src, src_len, (void **)dest, True);
1077}
1078
1079/**
1080 Copy a string from a char* src to a UTF-8 destination.
1081 Return the number of bytes occupied by the string in the destination
1082 Flags can have:
1083  STR_TERMINATE means include the null termination
1084  STR_UPPER     means uppercase in the destination
1085 dest_len is the maximum length allowed in the destination. If dest_len
1086 is -1 then no maxiumum is used.
1087**/
1088
1089static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1090{
1091	size_t src_len = strlen(src);
1092	pstring tmpbuf;
1093
1094	/* treat a pstring as "unlimited" length */
1095	if (dest_len == (size_t)-1)
1096		dest_len = sizeof(pstring);
1097
1098	if (flags & STR_UPPER) {
1099		pstrcpy(tmpbuf, src);
1100		strupper_m(tmpbuf);
1101		src = tmpbuf;
1102	}
1103
1104	if (flags & STR_TERMINATE)
1105		src_len++;
1106
1107	return convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1108}
1109
1110size_t push_utf8_fstring(void *dest, const char *src)
1111{
1112	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1113}
1114
1115/**
1116 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1117 *
1118 * @param dest always set at least to NULL
1119 *
1120 * @returns The number of bytes occupied by the string in the destination
1121 **/
1122
1123size_t push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1124{
1125	size_t src_len = strlen(src)+1;
1126
1127	*dest = NULL;
1128	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len, (void**)dest, True);
1129}
1130
1131/**
1132 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer
1133 *
1134 * @param dest always set at least to NULL
1135 *
1136 * @returns The number of bytes occupied by the string in the destination
1137 **/
1138
1139size_t push_utf8_allocate(char **dest, const char *src)
1140{
1141	size_t src_len = strlen(src)+1;
1142
1143	*dest = NULL;
1144	return convert_string_allocate(NULL, CH_UNIX, CH_UTF8, src, src_len, (void **)dest, True);
1145}
1146
1147/**
1148 Copy a string from a ucs2 source to a unix char* destination.
1149 Flags can have:
1150  STR_TERMINATE means the string in src is null terminated.
1151  STR_NOALIGN   means don't try to align.
1152 if STR_TERMINATE is set then src_len is ignored if it is -1.
1153 src_len is the length of the source area in bytes
1154 Return the number of bytes occupied by the string in src.
1155 The resulting string in "dest" is always null terminated.
1156**/
1157
1158size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1159{
1160	size_t ret;
1161
1162	if (dest_len == (size_t)-1)
1163		dest_len = sizeof(pstring);
1164
1165	if (ucs2_align(base_ptr, src, flags)) {
1166		src = (const void *)((const char *)src + 1);
1167		if (src_len != (size_t)-1)
1168			src_len--;
1169	}
1170
1171	if (flags & STR_TERMINATE) {
1172		/* src_len -1 is the default for null terminated strings. */
1173		if (src_len != (size_t)-1) {
1174			size_t len = strnlen_w(src, src_len/2);
1175			if (len < src_len/2)
1176				len++;
1177			src_len = len*2;
1178		}
1179	}
1180
1181	/* ucs2 is always a multiple of 2 bytes */
1182	if (src_len != (size_t)-1)
1183		src_len &= ~1;
1184
1185	ret = convert_string(CH_UCS2, CH_UNIX, src, src_len, dest, dest_len, True);
1186	if (ret == (size_t)-1) {
1187		return 0;
1188	}
1189
1190	if (src_len == (size_t)-1)
1191		src_len = ret*2;
1192
1193	if (dest_len)
1194		dest[MIN(ret, dest_len-1)] = 0;
1195	else
1196		dest[0] = 0;
1197
1198	return src_len;
1199}
1200
1201size_t pull_ucs2_pstring(char *dest, const void *src)
1202{
1203	return pull_ucs2(NULL, dest, src, sizeof(pstring), -1, STR_TERMINATE);
1204}
1205
1206size_t pull_ucs2_fstring(char *dest, const void *src)
1207{
1208	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1209}
1210
1211/**
1212 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1213 *
1214 * @param dest always set at least to NULL
1215 *
1216 * @returns The number of bytes occupied by the string in the destination
1217 **/
1218
1219size_t pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src)
1220{
1221	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1222	*dest = NULL;
1223	return convert_string_talloc(ctx, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1224}
1225
1226/**
1227 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer
1228 *
1229 * @param dest always set at least to NULL
1230 *
1231 * @returns The number of bytes occupied by the string in the destination
1232 **/
1233
1234size_t pull_ucs2_allocate(char **dest, const smb_ucs2_t *src)
1235{
1236	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1237	*dest = NULL;
1238	return convert_string_allocate(NULL, CH_UCS2, CH_UNIX, src, src_len, (void **)dest, True);
1239}
1240
1241/**
1242 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1243 *
1244 * @param dest always set at least to NULL
1245 *
1246 * @returns The number of bytes occupied by the string in the destination
1247 **/
1248
1249size_t pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src)
1250{
1251	size_t src_len = strlen(src)+1;
1252	*dest = NULL;
1253	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1254}
1255
1256/**
1257 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer
1258 *
1259 * @param dest always set at least to NULL
1260 *
1261 * @returns The number of bytes occupied by the string in the destination
1262 **/
1263
1264size_t pull_utf8_allocate(char **dest, const char *src)
1265{
1266	size_t src_len = strlen(src)+1;
1267	*dest = NULL;
1268	return convert_string_allocate(NULL, CH_UTF8, CH_UNIX, src, src_len, (void **)dest, True);
1269}
1270
1271/**
1272 Copy a string from a char* src to a unicode or ascii
1273 dos codepage destination choosing unicode or ascii based on the
1274 flags in the SMB buffer starting at base_ptr.
1275 Return the number of bytes occupied by the string in the destination.
1276 flags can have:
1277  STR_TERMINATE means include the null termination.
1278  STR_UPPER     means uppercase in the destination.
1279  STR_ASCII     use ascii even with unicode packet.
1280  STR_NOALIGN   means don't do alignment.
1281 dest_len is the maximum length allowed in the destination. If dest_len
1282 is -1 then no maxiumum is used.
1283**/
1284
1285size_t push_string_fn(const char *function, unsigned int line, const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1286{
1287#ifdef DEVELOPER
1288	/* We really need to zero fill here, not clobber
1289	 * region, as we want to ensure that valgrind thinks
1290	 * all of the outgoing buffer has been written to
1291	 * so a send() or write() won't trap an error.
1292	 * JRA.
1293	 */
1294#if 0
1295	if (dest_len != (size_t)-1)
1296		clobber_region(function, line, dest, dest_len);
1297#else
1298	if (dest_len != (size_t)-1)
1299		memset(dest, '\0', dest_len);
1300#endif
1301#endif
1302
1303	if (!(flags & STR_ASCII) && \
1304	    ((flags & STR_UNICODE || \
1305	      (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1306		return push_ucs2(base_ptr, dest, src, dest_len, flags);
1307	}
1308	return push_ascii(dest, src, dest_len, flags);
1309}
1310
1311
1312/**
1313 Copy a string from a unicode or ascii source (depending on
1314 the packet flags) to a char* destination.
1315 Flags can have:
1316  STR_TERMINATE means the string in src is null terminated.
1317  STR_UNICODE   means to force as unicode.
1318  STR_ASCII     use ascii even with unicode packet.
1319  STR_NOALIGN   means don't do alignment.
1320 if STR_TERMINATE is set then src_len is ignored is it is -1
1321 src_len is the length of the source area in bytes.
1322 Return the number of bytes occupied by the string in src.
1323 The resulting string in "dest" is always null terminated.
1324**/
1325
1326size_t pull_string_fn(const char *function, unsigned int line, const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1327{
1328#ifdef DEVELOPER
1329	if (dest_len != (size_t)-1)
1330		clobber_region(function, line, dest, dest_len);
1331#endif
1332
1333	if (!(flags & STR_ASCII) && \
1334	    ((flags & STR_UNICODE || \
1335	      (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1336		return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1337	}
1338	return pull_ascii(dest, src, dest_len, src_len, flags);
1339}
1340
1341size_t align_string(const void *base_ptr, const char *p, int flags)
1342{
1343	if (!(flags & STR_ASCII) && \
1344	    ((flags & STR_UNICODE || \
1345	      (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1346		return ucs2_align(base_ptr, p, flags);
1347	}
1348	return 0;
1349}
1350
1351/****************************************************************
1352 Calculate the size (in bytes) of the next multibyte character in
1353 our internal character set. Note that p must be pointing to a
1354 valid mb char, not within one.
1355****************************************************************/
1356
1357size_t next_mb_char_size(const char *s)
1358{
1359	size_t i;
1360
1361	if (!(*s & 0x80))
1362		return 1; /* ascii. */
1363
1364	conv_silent = True;
1365	for ( i = 1; i <=4; i++ ) {
1366		smb_ucs2_t uc;
1367		if (convert_string(CH_UNIX, CH_UCS2, s, i, &uc, 2, False) == 2) {
1368#if 0 /* JRATEST */
1369			DEBUG(10,("next_mb_char_size: size %u at string %s\n",
1370				(unsigned int)i, s));
1371#endif
1372			conv_silent = False;
1373			return i;
1374		}
1375	}
1376	/* We're hosed - we don't know how big this is... */
1377	DEBUG(10,("next_mb_char_size: unknown size at string %s\n", s));
1378	conv_silent = False;
1379	return 1;
1380}
1381