1/*
2   Unix SMB/CIFS implementation.
3   Character set conversion Extensions
4   Copyright (C) Igor Vergeichik <iverg@mail.ru> 2001
5   Copyright (C) Andrew Tridgell 2001
6   Copyright (C) Simo Sorce 2001
7   Copyright (C) Martin Pool 2003
8
9   This program is free software; you can redistribute it and/or modify
10   it under the terms of the GNU General Public License as published by
11   the Free Software Foundation; either version 3 of the License, or
12   (at your option) any later version.
13
14   This program is distributed in the hope that it will be useful,
15   but WITHOUT ANY WARRANTY; without even the implied warranty of
16   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   GNU General Public License for more details.
18
19   You should have received a copy of the GNU General Public License
20   along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
22*/
23#include "includes.h"
24
25/* We can parameterize this if someone complains.... JRA. */
26
27char lp_failed_convert_char(void)
28{
29	return '_';
30}
31
32/**
33 * @file
34 *
35 * @brief Character-set conversion routines built on our iconv.
36 *
37 * @note Samba's internal character set (at least in the 3.0 series)
38 * is always the same as the one for the Unix filesystem.  It is
39 * <b>not</b> necessarily UTF-8 and may be different on machines that
40 * need i18n filenames to be compatible with Unix software.  It does
41 * have to be a superset of ASCII.  All multibyte sequences must start
42 * with a byte with the high bit set.
43 *
44 * @sa lib/iconv.c
45 */
46
47
48static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];
49static bool conv_silent; /* Should we do a debug if the conversion fails ? */
50static bool initialized;
51
52/**
53 * Return the name of a charset to give to iconv().
54 **/
55static const char *charset_name(charset_t ch)
56{
57	const char *ret;
58
59	switch (ch) {
60	case CH_UTF16LE:
61		ret = "UTF-16LE";
62		break;
63	case CH_UTF16BE:
64		ret = "UTF-16BE";
65		break;
66	case CH_UNIX:
67		ret = lp_unix_charset();
68		break;
69	case CH_DOS:
70		ret = lp_dos_charset();
71		break;
72	case CH_DISPLAY:
73		ret = lp_display_charset();
74		break;
75	case CH_UTF8:
76		ret = "UTF8";
77		break;
78	default:
79		ret = NULL;
80	}
81
82#if defined(HAVE_NL_LANGINFO) && defined(CODESET)
83	if (ret && !strcmp(ret, "LOCALE")) {
84		const char *ln = NULL;
85
86#ifdef HAVE_SETLOCALE
87		setlocale(LC_ALL, "");
88#endif
89		ln = nl_langinfo(CODESET);
90		if (ln) {
91			/* Check whether the charset name is supported
92			   by iconv */
93			smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");
94			if (handle == (smb_iconv_t) -1) {
95				DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));
96				ln = NULL;
97			} else {
98				DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));
99				smb_iconv_close(handle);
100			}
101		}
102		ret = ln;
103	}
104#endif
105
106	if (!ret || !*ret) ret = "ASCII";
107	return ret;
108}
109
110void lazy_initialize_conv(void)
111{
112	if (!initialized) {
113		load_case_tables();
114		init_iconv();
115		initialized = true;
116	}
117}
118
119/**
120 * Destroy global objects allocated by init_iconv()
121 **/
122void gfree_charcnv(void)
123{
124	int c1, c2;
125
126	for (c1=0;c1<NUM_CHARSETS;c1++) {
127		for (c2=0;c2<NUM_CHARSETS;c2++) {
128			if ( conv_handles[c1][c2] ) {
129				smb_iconv_close( conv_handles[c1][c2] );
130				conv_handles[c1][c2] = 0;
131			}
132		}
133	}
134	initialized = false;
135}
136
137/**
138 * Initialize iconv conversion descriptors.
139 *
140 * This is called the first time it is needed, and also called again
141 * every time the configuration is reloaded, because the charset or
142 * codepage might have changed.
143 **/
144void init_iconv(void)
145{
146	int c1, c2;
147	bool did_reload = False;
148
149	/* so that charset_name() works we need to get the UNIX<->UCS2 going
150	   first */
151	if (!conv_handles[CH_UNIX][CH_UTF16LE])
152		conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII");
153
154	if (!conv_handles[CH_UTF16LE][CH_UNIX])
155		conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE));
156
157	for (c1=0;c1<NUM_CHARSETS;c1++) {
158		for (c2=0;c2<NUM_CHARSETS;c2++) {
159			const char *n1 = charset_name((charset_t)c1);
160			const char *n2 = charset_name((charset_t)c2);
161			if (conv_handles[c1][c2] &&
162			    strcmp(n1, conv_handles[c1][c2]->from_name) == 0 &&
163			    strcmp(n2, conv_handles[c1][c2]->to_name) == 0)
164				continue;
165
166			did_reload = True;
167
168			if (conv_handles[c1][c2])
169				smb_iconv_close(conv_handles[c1][c2]);
170
171			conv_handles[c1][c2] = smb_iconv_open(n2,n1);
172			if (conv_handles[c1][c2] == (smb_iconv_t)-1) {
173				DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n",
174					 charset_name((charset_t)c1), charset_name((charset_t)c2)));
175				if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) {
176					n1 = "ASCII";
177				}
178				if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) {
179					n2 = "ASCII";
180				}
181				DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n",
182					n1, n2 ));
183				conv_handles[c1][c2] = smb_iconv_open(n2,n1);
184				if (!conv_handles[c1][c2]) {
185					DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2));
186					smb_panic("init_iconv: conv_handle initialization failed");
187				}
188			}
189		}
190	}
191
192	if (did_reload) {
193		/* XXX: Does this really get called every time the dos
194		 * codepage changes? */
195		/* XXX: Is the did_reload test too strict? */
196		conv_silent = True;
197		init_valid_table();
198		conv_silent = False;
199	}
200}
201
202/**
203 * Convert string from one encoding to another, making error checking etc
204 * Slow path version - uses (slow) iconv.
205 *
206 * @param src pointer to source string (multibyte or singlebyte)
207 * @param srclen length of the source string in bytes
208 * @param dest pointer to destination string (multibyte or singlebyte)
209 * @param destlen maximal length allowed for string
210 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
211 * @returns the number of bytes occupied in the destination
212 *
213 * Ensure the srclen contains the terminating zero.
214 *
215 **/
216
217static size_t convert_string_internal(charset_t from, charset_t to,
218		      void const *src, size_t srclen,
219		      void *dest, size_t destlen, bool allow_bad_conv)
220{
221	size_t i_len, o_len;
222	size_t retval;
223	const char* inbuf = (const char*)src;
224	char* outbuf = (char*)dest;
225	smb_iconv_t descriptor;
226
227	lazy_initialize_conv();
228
229	descriptor = conv_handles[from][to];
230
231	if (srclen == (size_t)-1) {
232		if (from == CH_UTF16LE || from == CH_UTF16BE) {
233			srclen = (strlen_w((const smb_ucs2_t *)src)+1) * 2;
234		} else {
235			srclen = strlen((const char *)src)+1;
236		}
237	}
238
239
240	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
241		if (!conv_silent)
242			DEBUG(0,("convert_string_internal: Conversion not supported.\n"));
243		return (size_t)-1;
244	}
245
246	i_len=srclen;
247	o_len=destlen;
248
249 again:
250
251	retval = smb_iconv(descriptor, &inbuf, &i_len, &outbuf, &o_len);
252	if(retval==(size_t)-1) {
253	    	const char *reason="unknown error";
254		switch(errno) {
255			case EINVAL:
256				reason="Incomplete multibyte sequence";
257				if (!conv_silent)
258					DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
259				if (allow_bad_conv)
260					goto use_as_is;
261				return (size_t)-1;
262			case E2BIG:
263				reason="No more room";
264				if (!conv_silent) {
265					if (from == CH_UNIX) {
266						DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n",
267							charset_name(from), charset_name(to),
268							(unsigned int)srclen, (unsigned int)destlen, (const char *)src));
269					} else {
270						DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n",
271							charset_name(from), charset_name(to),
272							(unsigned int)srclen, (unsigned int)destlen));
273					}
274				}
275				break;
276			case EILSEQ:
277				reason="Illegal multibyte sequence";
278				if (!conv_silent)
279					DEBUG(3,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
280				if (allow_bad_conv)
281					goto use_as_is;
282
283				return (size_t)-1;
284			default:
285				if (!conv_silent)
286					DEBUG(0,("convert_string_internal: Conversion error: %s(%s)\n",reason,inbuf));
287				return (size_t)-1;
288		}
289		/* smb_panic(reason); */
290	}
291	return destlen-o_len;
292
293 use_as_is:
294
295	/*
296	 * Conversion not supported. This is actually an error, but there are so
297	 * many misconfigured iconv systems and smb.conf's out there we can't just
298	 * fail. Do a very bad conversion instead.... JRA.
299	 */
300
301	{
302		if (o_len == 0 || i_len == 0)
303			return destlen - o_len;
304
305		if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
306				((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
307			/* Can't convert from utf16 any endian to multibyte.
308			   Replace with the default fail char.
309			*/
310			if (i_len < 2)
311				return destlen - o_len;
312			if (i_len >= 2) {
313				*outbuf = lp_failed_convert_char();
314
315				outbuf++;
316				o_len--;
317
318				inbuf += 2;
319				i_len -= 2;
320			}
321
322			if (o_len == 0 || i_len == 0)
323				return destlen - o_len;
324
325			/* Keep trying with the next char... */
326			goto again;
327
328		} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
329			/* Can't convert to UTF16LE - just widen by adding the
330			   default fail char then zero.
331			*/
332			if (o_len < 2)
333				return destlen - o_len;
334
335			outbuf[0] = lp_failed_convert_char();
336			outbuf[1] = '\0';
337
338			inbuf++;
339			i_len--;
340
341			outbuf += 2;
342			o_len -= 2;
343
344			if (o_len == 0 || i_len == 0)
345				return destlen - o_len;
346
347			/* Keep trying with the next char... */
348			goto again;
349
350		} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
351				to != CH_UTF16LE && to != CH_UTF16BE) {
352			/* Failed multibyte to multibyte. Just copy the default fail char and
353				try again. */
354			outbuf[0] = lp_failed_convert_char();
355
356			inbuf++;
357			i_len--;
358
359			outbuf++;
360			o_len--;
361
362			if (o_len == 0 || i_len == 0)
363				return destlen - o_len;
364
365			/* Keep trying with the next char... */
366			goto again;
367
368		} else {
369			/* Keep compiler happy.... */
370			return destlen - o_len;
371		}
372	}
373}
374
375/**
376 * Convert string from one encoding to another, making error checking etc
377 * Fast path version - handles ASCII first.
378 *
379 * @param src pointer to source string (multibyte or singlebyte)
380 * @param srclen length of the source string in bytes, or -1 for nul terminated.
381 * @param dest pointer to destination string (multibyte or singlebyte)
382 * @param destlen maximal length allowed for string - *NEVER* -1.
383 * @param allow_bad_conv determines if a "best effort" conversion is acceptable (never returns errors)
384 * @returns the number of bytes occupied in the destination
385 *
386 * Ensure the srclen contains the terminating zero.
387 *
388 * This function has been hand-tuned to provide a fast path.
389 * Don't change unless you really know what you are doing. JRA.
390 **/
391
392size_t convert_string(charset_t from, charset_t to,
393		      void const *src, size_t srclen,
394		      void *dest, size_t destlen, bool allow_bad_conv)
395{
396	/*
397	 * NB. We deliberately don't do a strlen here if srclen == -1.
398	 * This is very expensive over millions of calls and is taken
399	 * care of in the slow path in convert_string_internal. JRA.
400	 */
401
402#ifdef DEVELOPER
403	SMB_ASSERT(destlen != (size_t)-1);
404#endif
405
406	if (srclen == 0)
407		return 0;
408
409	if (from != CH_UTF16LE && from != CH_UTF16BE && to != CH_UTF16LE && to != CH_UTF16BE) {
410		const unsigned char *p = (const unsigned char *)src;
411		unsigned char *q = (unsigned char *)dest;
412		size_t slen = srclen;
413		size_t dlen = destlen;
414		unsigned char lastp = '\0';
415		size_t retval = 0;
416
417		/* If all characters are ascii, fast path here. */
418		while (slen && dlen) {
419			if ((lastp = *p) <= 0x7f) {
420				*q++ = *p++;
421				if (slen != (size_t)-1) {
422					slen--;
423				}
424				dlen--;
425				retval++;
426				if (!lastp)
427					break;
428			} else {
429#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
430				goto general_case;
431#else
432				size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
433				if (ret == (size_t)-1) {
434					return ret;
435				}
436				return retval + ret;
437#endif
438			}
439		}
440		if (!dlen) {
441			/* Even if we fast path we should note if we ran out of room. */
442			if (((slen != (size_t)-1) && slen) ||
443					((slen == (size_t)-1) && lastp)) {
444				errno = E2BIG;
445			}
446		}
447		return retval;
448	} else if (from == CH_UTF16LE && to != CH_UTF16LE) {
449		const unsigned char *p = (const unsigned char *)src;
450		unsigned char *q = (unsigned char *)dest;
451		size_t retval = 0;
452		size_t slen = srclen;
453		size_t dlen = destlen;
454		unsigned char lastp = '\0';
455
456		/* If all characters are ascii, fast path here. */
457		while (((slen == (size_t)-1) || (slen >= 2)) && dlen) {
458			if (((lastp = *p) <= 0x7f) && (p[1] == 0)) {
459				*q++ = *p;
460				if (slen != (size_t)-1) {
461					slen -= 2;
462				}
463				p += 2;
464				dlen--;
465				retval++;
466				if (!lastp)
467					break;
468			} else {
469#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
470				goto general_case;
471#else
472				size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
473				if (ret == (size_t)-1) {
474					return ret;
475				}
476				return retval + ret;
477#endif
478			}
479		}
480		if (!dlen) {
481			/* Even if we fast path we should note if we ran out of room. */
482			if (((slen != (size_t)-1) && slen) ||
483					((slen == (size_t)-1) && lastp)) {
484				errno = E2BIG;
485			}
486		}
487		return retval;
488	} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
489		const unsigned char *p = (const unsigned char *)src;
490		unsigned char *q = (unsigned char *)dest;
491		size_t retval = 0;
492		size_t slen = srclen;
493		size_t dlen = destlen;
494		unsigned char lastp = '\0';
495
496		/* If all characters are ascii, fast path here. */
497		while (slen && (dlen >= 2)) {
498			if ((lastp = *p) <= 0x7F) {
499				*q++ = *p++;
500				*q++ = '\0';
501				if (slen != (size_t)-1) {
502					slen--;
503				}
504				dlen -= 2;
505				retval += 2;
506				if (!lastp)
507					break;
508			} else {
509#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
510				goto general_case;
511#else
512				size_t ret = convert_string_internal(from, to, p, slen, q, dlen, allow_bad_conv);
513				if (ret == (size_t)-1) {
514					return ret;
515				}
516				return retval + ret;
517#endif
518			}
519		}
520		if (!dlen) {
521			/* Even if we fast path we should note if we ran out of room. */
522			if (((slen != (size_t)-1) && slen) ||
523					((slen == (size_t)-1) && lastp)) {
524				errno = E2BIG;
525			}
526		}
527		return retval;
528	}
529
530#ifdef BROKEN_UNICODE_COMPOSE_CHARACTERS
531  general_case:
532#endif
533	return convert_string_internal(from, to, src, srclen, dest, destlen, allow_bad_conv);
534}
535
536/**
537 * Convert between character sets, allocating a new buffer using talloc for the result.
538 *
539 * @param srclen length of source buffer.
540 * @param dest always set at least to NULL
541 * @parm converted_size set to the number of bytes occupied by the string in
542 * the destination on success.
543 * @note -1 is not accepted for srclen.
544 *
545 * @return true if new buffer was correctly allocated, and string was
546 * converted.
547 *
548 * Ensure the srclen contains the terminating zero.
549 *
550 * I hate the goto's in this function. It's embarressing.....
551 * There has to be a cleaner way to do this. JRA.
552 */
553bool convert_string_talloc(TALLOC_CTX *ctx, charset_t from, charset_t to,
554			   void const *src, size_t srclen, void *dst,
555			   size_t *converted_size, bool allow_bad_conv)
556
557{
558	size_t i_len, o_len, destlen = (srclen * 3) / 2;
559	size_t retval;
560	const char *inbuf = (const char *)src;
561	char *outbuf = NULL, *ob = NULL;
562	smb_iconv_t descriptor;
563	void **dest = (void **)dst;
564
565	*dest = NULL;
566
567	if (!converted_size) {
568		errno = EINVAL;
569		return false;
570	}
571
572	if (src == NULL || srclen == (size_t)-1) {
573		errno = EINVAL;
574		return false;
575	}
576	if (srclen == 0) {
577		ob = talloc_strdup(ctx, "");
578		if (ob == NULL) {
579			errno = ENOMEM;
580			return false;
581		}
582		*dest = ob;
583		*converted_size = 0;
584		return true;
585	}
586
587	lazy_initialize_conv();
588
589	descriptor = conv_handles[from][to];
590
591	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
592		if (!conv_silent)
593			DEBUG(0,("convert_string_talloc: Conversion not supported.\n"));
594		errno = EOPNOTSUPP;
595		return false;
596	}
597
598  convert:
599
600	/* +2 is for ucs2 null termination. */
601	if ((destlen*2)+2 < destlen) {
602		/* wrapped ! abort. */
603		if (!conv_silent)
604			DEBUG(0, ("convert_string_talloc: destlen wrapped !\n"));
605		TALLOC_FREE(outbuf);
606		errno = EOPNOTSUPP;
607		return false;
608	} else {
609		destlen = destlen * 2;
610	}
611
612	/* +2 is for ucs2 null termination. */
613	ob = (char *)TALLOC_REALLOC(ctx, ob, destlen + 2);
614
615	if (!ob) {
616		DEBUG(0, ("convert_string_talloc: realloc failed!\n"));
617		errno = ENOMEM;
618		return false;
619	}
620	outbuf = ob;
621	i_len = srclen;
622	o_len = destlen;
623
624 again:
625
626	retval = smb_iconv(descriptor,
627			   &inbuf, &i_len,
628			   &outbuf, &o_len);
629	if(retval == (size_t)-1) 		{
630	    	const char *reason="unknown error";
631		switch(errno) {
632			case EINVAL:
633				reason="Incomplete multibyte sequence";
634				if (!conv_silent)
635					DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
636				if (allow_bad_conv)
637					goto use_as_is;
638				break;
639			case E2BIG:
640				goto convert;
641			case EILSEQ:
642				reason="Illegal multibyte sequence";
643				if (!conv_silent)
644					DEBUG(3,("convert_string_talloc: Conversion error: %s(%s)\n",reason,inbuf));
645				if (allow_bad_conv)
646					goto use_as_is;
647				break;
648		}
649		if (!conv_silent)
650			DEBUG(0,("Conversion error: %s(%s)\n",reason,inbuf));
651		/* smb_panic(reason); */
652		TALLOC_FREE(ob);
653		return false;
654	}
655
656  out:
657
658	destlen = destlen - o_len;
659	/* Don't shrink unless we're reclaiming a lot of
660	 * space. This is in the hot codepath and these
661	 * reallocs *cost*. JRA.
662	 */
663	if (o_len > 1024) {
664		/* We're shrinking here so we know the +2 is safe from wrap. */
665		ob = (char *)TALLOC_REALLOC(ctx,ob,destlen + 2);
666	}
667
668	if (destlen && !ob) {
669		DEBUG(0, ("convert_string_talloc: out of memory!\n"));
670		errno = ENOMEM;
671		return false;
672	}
673
674	*dest = ob;
675
676	/* Must ucs2 null terminate in the extra space we allocated. */
677	ob[destlen] = '\0';
678	ob[destlen+1] = '\0';
679
680	*converted_size = destlen;
681	return true;
682
683 use_as_is:
684
685	/*
686	 * Conversion not supported. This is actually an error, but there are so
687	 * many misconfigured iconv systems and smb.conf's out there we can't just
688	 * fail. Do a very bad conversion instead.... JRA.
689	 */
690
691	{
692		if (o_len == 0 || i_len == 0)
693			goto out;
694
695		if (((from == CH_UTF16LE)||(from == CH_UTF16BE)) &&
696				((to != CH_UTF16LE)||(to != CH_UTF16BE))) {
697			/* Can't convert from utf16 any endian to multibyte.
698			   Replace with the default fail char.
699			*/
700
701			if (i_len < 2)
702				goto out;
703
704			if (i_len >= 2) {
705				*outbuf = lp_failed_convert_char();
706
707				outbuf++;
708				o_len--;
709
710				inbuf += 2;
711				i_len -= 2;
712			}
713
714			if (o_len == 0 || i_len == 0)
715				goto out;
716
717			/* Keep trying with the next char... */
718			goto again;
719
720		} else if (from != CH_UTF16LE && from != CH_UTF16BE && to == CH_UTF16LE) {
721			/* Can't convert to UTF16LE - just widen by adding the
722			   default fail char then zero.
723			*/
724			if (o_len < 2)
725				goto out;
726
727			outbuf[0] = lp_failed_convert_char();
728			outbuf[1] = '\0';
729
730			inbuf++;
731			i_len--;
732
733			outbuf += 2;
734			o_len -= 2;
735
736			if (o_len == 0 || i_len == 0)
737				goto out;
738
739			/* Keep trying with the next char... */
740			goto again;
741
742		} else if (from != CH_UTF16LE && from != CH_UTF16BE &&
743				to != CH_UTF16LE && to != CH_UTF16BE) {
744			/* Failed multibyte to multibyte. Just copy the default fail char and
745			   try again. */
746			outbuf[0] = lp_failed_convert_char();
747
748			inbuf++;
749			i_len--;
750
751			outbuf++;
752			o_len--;
753
754			if (o_len == 0 || i_len == 0)
755				goto out;
756
757			/* Keep trying with the next char... */
758			goto again;
759
760		} else {
761			/* Keep compiler happy.... */
762			goto out;
763		}
764	}
765}
766
767size_t unix_strupper(const char *src, size_t srclen, char *dest, size_t destlen)
768{
769	size_t size;
770	smb_ucs2_t *buffer;
771
772	if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &size)) {
773		return (size_t)-1;
774	}
775
776	if (!strupper_w(buffer) && (dest == src)) {
777		TALLOC_FREE(buffer);
778		return srclen;
779	}
780
781	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
782	TALLOC_FREE(buffer);
783	return size;
784}
785
786/**
787 talloc_strdup() a unix string to upper case.
788**/
789
790char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *s)
791{
792	char *out_buffer = talloc_strdup(ctx,s);
793	const unsigned char *p = (const unsigned char *)s;
794	unsigned char *q = (unsigned char *)out_buffer;
795
796	if (!q) {
797		return NULL;
798	}
799
800	/* this is quite a common operation, so we want it to be
801	   fast. We optimise for the ascii case, knowing that all our
802	   supported multi-byte character sets are ascii-compatible
803	   (ie. they match for the first 128 chars) */
804
805	while (*p) {
806		if (*p & 0x80)
807			break;
808		*q++ = toupper_ascii_fast(*p);
809		p++;
810	}
811
812	if (*p) {
813		/* MB case. */
814		size_t converted_size, converted_size2;
815		smb_ucs2_t *ubuf = NULL;
816
817		/* We're not using the ascii buffer above. */
818		TALLOC_FREE(out_buffer);
819
820		if (!convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, s,
821					   strlen(s)+1, (void *)&ubuf,
822					   &converted_size, True))
823		{
824			return NULL;
825		}
826
827		strupper_w(ubuf);
828
829		if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, ubuf,
830					   converted_size, (void *)&out_buffer,
831					   &converted_size2, True))
832		{
833			TALLOC_FREE(ubuf);
834			return NULL;
835		}
836
837		/* Don't need the intermediate buffer
838 		 * anymore.
839 		 */
840		TALLOC_FREE(ubuf);
841	}
842
843	return out_buffer;
844}
845
846char *strupper_talloc(TALLOC_CTX *ctx, const char *s) {
847	return talloc_strdup_upper(ctx, s);
848}
849
850
851size_t unix_strlower(const char *src, size_t srclen, char *dest, size_t destlen)
852{
853	size_t size;
854	smb_ucs2_t *buffer = NULL;
855
856	if (!convert_string_talloc(talloc_tos(), CH_UNIX, CH_UTF16LE, src, srclen,
857				   (void **)(void *)&buffer, &size,
858				   True))
859	{
860		smb_panic("failed to create UCS2 buffer");
861	}
862	if (!strlower_w(buffer) && (dest == src)) {
863		TALLOC_FREE(buffer);
864		return srclen;
865	}
866	size = convert_string(CH_UTF16LE, CH_UNIX, buffer, size, dest, destlen, True);
867	TALLOC_FREE(buffer);
868	return size;
869}
870
871
872char *talloc_strdup_lower(TALLOC_CTX *ctx, const char *s)
873{
874	size_t converted_size;
875	smb_ucs2_t *buffer = NULL;
876	char *out_buffer;
877
878	if (!push_ucs2_talloc(ctx, &buffer, s, &converted_size)) {
879		return NULL;
880	}
881
882	strlower_w(buffer);
883
884	if (!pull_ucs2_talloc(ctx, &out_buffer, buffer, &converted_size)) {
885		TALLOC_FREE(buffer);
886		return NULL;
887	}
888
889	TALLOC_FREE(buffer);
890
891	return out_buffer;
892}
893
894char *strlower_talloc(TALLOC_CTX *ctx, const char *s) {
895	return talloc_strdup_lower(ctx, s);
896}
897
898size_t ucs2_align(const void *base_ptr, const void *p, int flags)
899{
900	if (flags & (STR_NOALIGN|STR_ASCII))
901		return 0;
902	return PTR_DIFF(p, base_ptr) & 1;
903}
904
905
906/**
907 * Copy a string from a char* unix src to a dos codepage string destination.
908 *
909 * @return the number of bytes occupied by the string in the destination.
910 *
911 * @param flags can include
912 * <dl>
913 * <dt>STR_TERMINATE</dt> <dd>means include the null termination</dd>
914 * <dt>STR_UPPER</dt> <dd>means uppercase in the destination</dd>
915 * </dl>
916 *
917 * @param dest_len the maximum length in bytes allowed in the
918 * destination.
919 **/
920size_t push_ascii(void *dest, const char *src, size_t dest_len, int flags)
921{
922	size_t src_len = strlen(src);
923	char *tmpbuf = NULL;
924	size_t ret;
925
926	/* No longer allow a length of -1. */
927	if (dest_len == (size_t)-1) {
928		smb_panic("push_ascii - dest_len == -1");
929	}
930
931	if (flags & STR_UPPER) {
932		tmpbuf = SMB_STRDUP(src);
933		if (!tmpbuf) {
934			smb_panic("malloc fail");
935		}
936		strupper_m(tmpbuf);
937		src = tmpbuf;
938	}
939
940	if (flags & (STR_TERMINATE | STR_TERMINATE_ASCII)) {
941		src_len++;
942	}
943
944	ret = convert_string(CH_UNIX, CH_DOS, src, src_len, dest, dest_len, True);
945	if (ret == (size_t)-1 &&
946			(flags & (STR_TERMINATE | STR_TERMINATE_ASCII))
947			&& dest_len > 0) {
948		((char *)dest)[0] = '\0';
949	}
950	SAFE_FREE(tmpbuf);
951	return ret;
952}
953
954size_t push_ascii_fstring(void *dest, const char *src)
955{
956	return push_ascii(dest, src, sizeof(fstring), STR_TERMINATE);
957}
958
959/********************************************************************
960 Push an nstring - ensure null terminated. Written by
961 moriyama@miraclelinux.com (MORIYAMA Masayuki).
962********************************************************************/
963
964size_t push_ascii_nstring(void *dest, const char *src)
965{
966	size_t i, buffer_len, dest_len;
967	smb_ucs2_t *buffer;
968
969	conv_silent = True;
970	if (!push_ucs2_talloc(talloc_tos(), &buffer, src, &buffer_len)) {
971		smb_panic("failed to create UCS2 buffer");
972	}
973
974	/* We're using buffer_len below to count ucs2 characters, not bytes. */
975	buffer_len /= sizeof(smb_ucs2_t);
976
977	dest_len = 0;
978	for (i = 0; buffer[i] != 0 && (i < buffer_len); i++) {
979		unsigned char mb[10];
980		/* Convert one smb_ucs2_t character at a time. */
981		size_t mb_len = convert_string(CH_UTF16LE, CH_DOS, buffer+i, sizeof(smb_ucs2_t), mb, sizeof(mb), False);
982		if ((mb_len != (size_t)-1) && (dest_len + mb_len <= MAX_NETBIOSNAME_LEN - 1)) {
983			memcpy((char *)dest + dest_len, mb, mb_len);
984			dest_len += mb_len;
985		} else {
986			errno = E2BIG;
987			break;
988		}
989	}
990	((char *)dest)[dest_len] = '\0';
991
992	conv_silent = False;
993	TALLOC_FREE(buffer);
994	return dest_len;
995}
996
997/********************************************************************
998 Push and malloc an ascii string. src and dest null terminated.
999********************************************************************/
1000
1001bool push_ascii_talloc(TALLOC_CTX *mem_ctx, char **dest, const char *src, size_t *converted_size)
1002{
1003	size_t src_len = strlen(src)+1;
1004
1005	*dest = NULL;
1006	return convert_string_talloc(mem_ctx, CH_UNIX, CH_DOS, src, src_len,
1007				     (void **)dest, converted_size, True);
1008}
1009
1010/**
1011 * Copy a string from a dos codepage source to a unix char* destination.
1012 *
1013 * The resulting string in "dest" is always null terminated.
1014 *
1015 * @param flags can have:
1016 * <dl>
1017 * <dt>STR_TERMINATE</dt>
1018 * <dd>STR_TERMINATE means the string in @p src
1019 * is null terminated, and src_len is ignored.</dd>
1020 * </dl>
1021 *
1022 * @param src_len is the length of the source area in bytes.
1023 * @returns the number of bytes occupied by the string in @p src.
1024 **/
1025size_t pull_ascii(char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1026{
1027	size_t ret;
1028
1029	if (dest_len == (size_t)-1) {
1030		/* No longer allow dest_len of -1. */
1031		smb_panic("pull_ascii - invalid dest_len of -1");
1032	}
1033
1034	if (flags & STR_TERMINATE) {
1035		if (src_len == (size_t)-1) {
1036			src_len = strlen((const char *)src) + 1;
1037		} else {
1038			size_t len = strnlen((const char *)src, src_len);
1039			if (len < src_len)
1040				len++;
1041			src_len = len;
1042		}
1043	}
1044
1045	ret = convert_string(CH_DOS, CH_UNIX, src, src_len, dest, dest_len, True);
1046	if (ret == (size_t)-1) {
1047		ret = 0;
1048		dest_len = 0;
1049	}
1050
1051	if (dest_len && ret) {
1052		/* Did we already process the terminating zero ? */
1053		if (dest[MIN(ret-1, dest_len-1)] != 0) {
1054			dest[MIN(ret, dest_len-1)] = 0;
1055		}
1056	} else  {
1057		dest[0] = 0;
1058	}
1059
1060	return src_len;
1061}
1062
1063/**
1064 * Copy a string from a dos codepage source to a unix char* destination.
1065 * Talloc version.
1066 *
1067 * The resulting string in "dest" is always null terminated.
1068 *
1069 * @param flags can have:
1070 * <dl>
1071 * <dt>STR_TERMINATE</dt>
1072 * <dd>STR_TERMINATE means the string in @p src
1073 * is null terminated, and src_len is ignored.</dd>
1074 * </dl>
1075 *
1076 * @param src_len is the length of the source area in bytes.
1077 * @returns the number of bytes occupied by the string in @p src.
1078 **/
1079
1080static size_t pull_ascii_base_talloc(TALLOC_CTX *ctx,
1081				     char **ppdest,
1082				     const void *src,
1083				     size_t src_len,
1084				     int flags)
1085{
1086	char *dest = NULL;
1087	size_t dest_len;
1088
1089	*ppdest = NULL;
1090
1091	if (!src_len) {
1092		return 0;
1093	}
1094
1095	if (flags & STR_TERMINATE) {
1096		if (src_len == (size_t)-1) {
1097			src_len = strlen((const char *)src) + 1;
1098		} else {
1099			size_t len = strnlen((const char *)src, src_len);
1100			if (len < src_len)
1101				len++;
1102			src_len = len;
1103		}
1104		/* Ensure we don't use an insane length from the client. */
1105		if (src_len >= 1024*1024) {
1106			char *msg = talloc_asprintf(ctx,
1107					"Bad src length (%u) in "
1108					"pull_ascii_base_talloc",
1109					(unsigned int)src_len);
1110			smb_panic(msg);
1111		}
1112	} else {
1113		/* Can't have an unlimited length
1114 		 * non STR_TERMINATE'd.
1115 		 */
1116		if (src_len == (size_t)-1) {
1117			errno = EINVAL;
1118			return 0;
1119		}
1120	}
1121
1122	/* src_len != -1 here. */
1123
1124	if (!convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len, &dest,
1125				     &dest_len, True)) {
1126		dest_len = 0;
1127	}
1128
1129	if (dest_len && dest) {
1130		/* Did we already process the terminating zero ? */
1131		if (dest[dest_len-1] != 0) {
1132			size_t size = talloc_get_size(dest);
1133			/* Have we got space to append the '\0' ? */
1134			if (size <= dest_len) {
1135				/* No, realloc. */
1136				dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1137						dest_len+1);
1138				if (!dest) {
1139					/* talloc fail. */
1140					dest_len = (size_t)-1;
1141					return 0;
1142				}
1143			}
1144			/* Yay - space ! */
1145			dest[dest_len] = '\0';
1146			dest_len++;
1147		}
1148	} else if (dest) {
1149		dest[0] = 0;
1150	}
1151
1152	*ppdest = dest;
1153	return src_len;
1154}
1155
1156size_t pull_ascii_fstring(char *dest, const void *src)
1157{
1158	return pull_ascii(dest, src, sizeof(fstring), -1, STR_TERMINATE);
1159}
1160
1161/* When pulling an nstring it can expand into a larger size (dos cp -> utf8). Cope with this. */
1162
1163size_t pull_ascii_nstring(char *dest, size_t dest_len, const void *src)
1164{
1165	return pull_ascii(dest, src, dest_len, sizeof(nstring)-1, STR_TERMINATE);
1166}
1167
1168/**
1169 * Copy a string from a char* src to a unicode destination.
1170 *
1171 * @returns the number of bytes occupied by the string in the destination.
1172 *
1173 * @param flags can have:
1174 *
1175 * <dl>
1176 * <dt>STR_TERMINATE <dd>means include the null termination.
1177 * <dt>STR_UPPER     <dd>means uppercase in the destination.
1178 * <dt>STR_NOALIGN   <dd>means don't do alignment.
1179 * </dl>
1180 *
1181 * @param dest_len is the maximum length allowed in the
1182 * destination.
1183 **/
1184
1185size_t push_ucs2(const void *base_ptr, void *dest, const char *src, size_t dest_len, int flags)
1186{
1187	size_t len=0;
1188	size_t src_len;
1189	size_t ret;
1190
1191	if (dest_len == (size_t)-1) {
1192		/* No longer allow dest_len of -1. */
1193		smb_panic("push_ucs2 - invalid dest_len of -1");
1194	}
1195
1196	if (flags & STR_TERMINATE)
1197		src_len = (size_t)-1;
1198	else
1199		src_len = strlen(src);
1200
1201	if (ucs2_align(base_ptr, dest, flags)) {
1202		*(char *)dest = 0;
1203		dest = (void *)((char *)dest + 1);
1204		if (dest_len)
1205			dest_len--;
1206		len++;
1207	}
1208
1209	/* ucs2 is always a multiple of 2 bytes */
1210	dest_len &= ~1;
1211
1212	ret =  convert_string(CH_UNIX, CH_UTF16LE, src, src_len, dest, dest_len, True);
1213	if (ret == (size_t)-1) {
1214		if ((flags & STR_TERMINATE) &&
1215				dest &&
1216				dest_len) {
1217			*(char *)dest = 0;
1218		}
1219		return len;
1220	}
1221
1222	len += ret;
1223
1224	if (flags & STR_UPPER) {
1225		smb_ucs2_t *dest_ucs2 = (smb_ucs2_t *)dest;
1226		size_t i;
1227
1228		/* We check for i < (ret / 2) below as the dest string isn't null
1229		   terminated if STR_TERMINATE isn't set. */
1230
1231		for (i = 0; i < (ret / 2) && i < (dest_len / 2) && dest_ucs2[i]; i++) {
1232			smb_ucs2_t v = toupper_w(dest_ucs2[i]);
1233			if (v != dest_ucs2[i]) {
1234				dest_ucs2[i] = v;
1235			}
1236		}
1237	}
1238
1239	return len;
1240}
1241
1242
1243/**
1244 * Copy a string from a unix char* src to a UCS2 destination,
1245 * allocating a buffer using talloc().
1246 *
1247 * @param dest always set at least to NULL
1248 * @parm converted_size set to the number of bytes occupied by the string in
1249 * the destination on success.
1250 *
1251 * @return true if new buffer was correctly allocated, and string was
1252 * converted.
1253 **/
1254bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src,
1255		      size_t *converted_size)
1256{
1257	size_t src_len = strlen(src)+1;
1258
1259	*dest = NULL;
1260	return convert_string_talloc(ctx, CH_UNIX, CH_UTF16LE, src, src_len,
1261				     (void **)dest, converted_size, True);
1262}
1263
1264
1265/**
1266 Copy a string from a char* src to a UTF-8 destination.
1267 Return the number of bytes occupied by the string in the destination
1268 Flags can have:
1269  STR_TERMINATE means include the null termination
1270  STR_UPPER     means uppercase in the destination
1271 dest_len is the maximum length allowed in the destination. If dest_len
1272 is -1 then no maxiumum is used.
1273**/
1274
1275static size_t push_utf8(void *dest, const char *src, size_t dest_len, int flags)
1276{
1277	size_t src_len = 0;
1278	size_t ret;
1279	char *tmpbuf = NULL;
1280
1281	if (dest_len == (size_t)-1) {
1282		/* No longer allow dest_len of -1. */
1283		smb_panic("push_utf8 - invalid dest_len of -1");
1284	}
1285
1286	if (flags & STR_UPPER) {
1287		tmpbuf = strupper_talloc(talloc_tos(), src);
1288		if (!tmpbuf) {
1289			return (size_t)-1;
1290		}
1291		src = tmpbuf;
1292		src_len = strlen(src);
1293	}
1294
1295	src_len = strlen(src);
1296	if (flags & STR_TERMINATE) {
1297		src_len++;
1298	}
1299
1300	ret = convert_string(CH_UNIX, CH_UTF8, src, src_len, dest, dest_len, True);
1301	TALLOC_FREE(tmpbuf);
1302	return ret;
1303}
1304
1305size_t push_utf8_fstring(void *dest, const char *src)
1306{
1307	return push_utf8(dest, src, sizeof(fstring), STR_TERMINATE);
1308}
1309
1310/**
1311 * Copy a string from a unix char* src to a UTF-8 destination, allocating a buffer using talloc
1312 *
1313 * @param dest always set at least to NULL
1314 * @parm converted_size set to the number of bytes occupied by the string in
1315 * the destination on success.
1316 *
1317 * @return true if new buffer was correctly allocated, and string was
1318 * converted.
1319 **/
1320
1321bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1322		      size_t *converted_size)
1323{
1324	size_t src_len = strlen(src)+1;
1325
1326	*dest = NULL;
1327	return convert_string_talloc(ctx, CH_UNIX, CH_UTF8, src, src_len,
1328				     (void**)dest, converted_size, True);
1329}
1330
1331/**
1332 Copy a string from a ucs2 source to a unix char* destination.
1333 Flags can have:
1334  STR_TERMINATE means the string in src is null terminated.
1335  STR_NOALIGN   means don't try to align.
1336 if STR_TERMINATE is set then src_len is ignored if it is -1.
1337 src_len is the length of the source area in bytes
1338 Return the number of bytes occupied by the string in src.
1339 The resulting string in "dest" is always null terminated.
1340**/
1341
1342size_t pull_ucs2(const void *base_ptr, char *dest, const void *src, size_t dest_len, size_t src_len, int flags)
1343{
1344	size_t ret;
1345
1346	if (dest_len == (size_t)-1) {
1347		/* No longer allow dest_len of -1. */
1348		smb_panic("pull_ucs2 - invalid dest_len of -1");
1349	}
1350
1351	if (!src_len) {
1352		if (dest && dest_len > 0) {
1353			dest[0] = '\0';
1354		}
1355		return 0;
1356	}
1357
1358	if (ucs2_align(base_ptr, src, flags)) {
1359		src = (const void *)((const char *)src + 1);
1360		if (src_len != (size_t)-1)
1361			src_len--;
1362	}
1363
1364	if (flags & STR_TERMINATE) {
1365		/* src_len -1 is the default for null terminated strings. */
1366		if (src_len != (size_t)-1) {
1367			size_t len = strnlen_w((const smb_ucs2_t *)src,
1368						src_len/2);
1369			if (len < src_len/2)
1370				len++;
1371			src_len = len*2;
1372		}
1373	}
1374
1375	/* ucs2 is always a multiple of 2 bytes */
1376	if (src_len != (size_t)-1)
1377		src_len &= ~1;
1378
1379	ret = convert_string(CH_UTF16LE, CH_UNIX, src, src_len, dest, dest_len, True);
1380	if (ret == (size_t)-1) {
1381		ret = 0;
1382		dest_len = 0;
1383	}
1384
1385	if (src_len == (size_t)-1)
1386		src_len = ret*2;
1387
1388	if (dest_len && ret) {
1389		/* Did we already process the terminating zero ? */
1390		if (dest[MIN(ret-1, dest_len-1)] != 0) {
1391			dest[MIN(ret, dest_len-1)] = 0;
1392		}
1393	} else {
1394		dest[0] = 0;
1395	}
1396
1397	return src_len;
1398}
1399
1400/**
1401 Copy a string from a ucs2 source to a unix char* destination.
1402 Talloc version with a base pointer.
1403 Uses malloc if TALLOC_CTX is NULL (this is a bad interface and
1404 needs fixing. JRA).
1405 Flags can have:
1406  STR_TERMINATE means the string in src is null terminated.
1407  STR_NOALIGN   means don't try to align.
1408 if STR_TERMINATE is set then src_len is ignored if it is -1.
1409 src_len is the length of the source area in bytes
1410 Return the number of bytes occupied by the string in src.
1411 The resulting string in "dest" is always null terminated.
1412**/
1413
1414size_t pull_ucs2_base_talloc(TALLOC_CTX *ctx,
1415			const void *base_ptr,
1416			char **ppdest,
1417			const void *src,
1418			size_t src_len,
1419			int flags)
1420{
1421	char *dest;
1422	size_t dest_len;
1423
1424	*ppdest = NULL;
1425
1426#ifdef DEVELOPER
1427	/* Ensure we never use the braindead "malloc" varient. */
1428	if (ctx == NULL) {
1429		smb_panic("NULL talloc CTX in pull_ucs2_base_talloc\n");
1430	}
1431#endif
1432
1433	if (!src_len) {
1434		return 0;
1435	}
1436
1437	if (ucs2_align(base_ptr, src, flags)) {
1438		src = (const void *)((const char *)src + 1);
1439		if (src_len != (size_t)-1)
1440			src_len--;
1441	}
1442
1443	if (flags & STR_TERMINATE) {
1444		/* src_len -1 is the default for null terminated strings. */
1445		if (src_len != (size_t)-1) {
1446			size_t len = strnlen_w((const smb_ucs2_t *)src,
1447						src_len/2);
1448			if (len < src_len/2)
1449				len++;
1450			src_len = len*2;
1451		} else {
1452			/*
1453			 * src_len == -1 - alloc interface won't take this
1454			 * so we must calculate.
1455			 */
1456			src_len = (strlen_w((const smb_ucs2_t *)src)+1)*sizeof(smb_ucs2_t);
1457		}
1458		/* Ensure we don't use an insane length from the client. */
1459		if (src_len >= 1024*1024) {
1460			smb_panic("Bad src length in pull_ucs2_base_talloc\n");
1461		}
1462	} else {
1463		/* Can't have an unlimited length
1464		 * non STR_TERMINATE'd.
1465		 */
1466		if (src_len == (size_t)-1) {
1467			errno = EINVAL;
1468			return 0;
1469		}
1470	}
1471
1472	/* src_len != -1 here. */
1473
1474	/* ucs2 is always a multiple of 2 bytes */
1475	src_len &= ~1;
1476
1477	if (!convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1478				   (void *)&dest, &dest_len, True)) {
1479		dest_len = 0;
1480	}
1481
1482	if (dest_len) {
1483		/* Did we already process the terminating zero ? */
1484		if (dest[dest_len-1] != 0) {
1485			size_t size = talloc_get_size(dest);
1486			/* Have we got space to append the '\0' ? */
1487			if (size <= dest_len) {
1488				/* No, realloc. */
1489				dest = TALLOC_REALLOC_ARRAY(ctx, dest, char,
1490						dest_len+1);
1491				if (!dest) {
1492					/* talloc fail. */
1493					dest_len = (size_t)-1;
1494					return 0;
1495				}
1496			}
1497			/* Yay - space ! */
1498			dest[dest_len] = '\0';
1499			dest_len++;
1500		}
1501	} else if (dest) {
1502		dest[0] = 0;
1503	}
1504
1505	*ppdest = dest;
1506	return src_len;
1507}
1508
1509size_t pull_ucs2_fstring(char *dest, const void *src)
1510{
1511	return pull_ucs2(NULL, dest, src, sizeof(fstring), -1, STR_TERMINATE);
1512}
1513
1514/**
1515 * Copy a string from a UCS2 src to a unix char * destination, allocating a buffer using talloc
1516 *
1517 * @param dest always set at least to NULL
1518 * @parm converted_size set to the number of bytes occupied by the string in
1519 * the destination on success.
1520 *
1521 * @return true if new buffer was correctly allocated, and string was
1522 * converted.
1523 **/
1524
1525bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src,
1526		      size_t *converted_size)
1527{
1528	size_t src_len = (strlen_w(src)+1) * sizeof(smb_ucs2_t);
1529
1530	*dest = NULL;
1531	return convert_string_talloc(ctx, CH_UTF16LE, CH_UNIX, src, src_len,
1532				     (void **)dest, converted_size, True);
1533}
1534
1535/**
1536 * Copy a string from a UTF-8 src to a unix char * destination, allocating a buffer using talloc
1537 *
1538 * @param dest always set at least to NULL
1539 * @parm converted_size set to the number of bytes occupied by the string in
1540 * the destination on success.
1541 *
1542 * @return true if new buffer was correctly allocated, and string was
1543 * converted.
1544 **/
1545
1546bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1547		      size_t *converted_size)
1548{
1549	size_t src_len = strlen(src)+1;
1550
1551	*dest = NULL;
1552	return convert_string_talloc(ctx, CH_UTF8, CH_UNIX, src, src_len,
1553				     (void **)dest, converted_size, True);
1554}
1555
1556
1557/**
1558 * Copy a string from a DOS src to a unix char * destination, allocating a buffer using talloc
1559 *
1560 * @param dest always set at least to NULL
1561 * @parm converted_size set to the number of bytes occupied by the string in
1562 * the destination on success.
1563 *
1564 * @return true if new buffer was correctly allocated, and string was
1565 * converted.
1566 **/
1567
1568bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src,
1569		       size_t *converted_size)
1570{
1571	size_t src_len = strlen(src)+1;
1572
1573	*dest = NULL;
1574	return convert_string_talloc(ctx, CH_DOS, CH_UNIX, src, src_len,
1575				     (void **)dest, converted_size, True);
1576}
1577
1578/**
1579 Copy a string from a char* src to a unicode or ascii
1580 dos codepage destination choosing unicode or ascii based on the
1581 flags supplied
1582 Return the number of bytes occupied by the string in the destination.
1583 flags can have:
1584  STR_TERMINATE means include the null termination.
1585  STR_UPPER     means uppercase in the destination.
1586  STR_ASCII     use ascii even with unicode packet.
1587  STR_NOALIGN   means don't do alignment.
1588 dest_len is the maximum length allowed in the destination. If dest_len
1589 is -1 then no maxiumum is used.
1590**/
1591
1592size_t push_string_check_fn(const char *function, unsigned int line,
1593			    void *dest, const char *src,
1594			    size_t dest_len, int flags)
1595{
1596#ifdef DEVELOPER
1597	/* We really need to zero fill here, not clobber
1598	 * region, as we want to ensure that valgrind thinks
1599	 * all of the outgoing buffer has been written to
1600	 * so a send() or write() won't trap an error.
1601	 * JRA.
1602	 */
1603#if 0
1604	clobber_region(function, line, dest, dest_len);
1605#else
1606	memset(dest, '\0', dest_len);
1607#endif
1608#endif
1609
1610	if (!(flags & STR_ASCII) && (flags & STR_UNICODE)) {
1611		return push_ucs2(NULL, dest, src, dest_len, flags);
1612	}
1613	return push_ascii(dest, src, dest_len, flags);
1614}
1615
1616
1617/**
1618 Copy a string from a char* src to a unicode or ascii
1619 dos codepage destination choosing unicode or ascii based on the
1620 flags in the SMB buffer starting at base_ptr.
1621 Return the number of bytes occupied by the string in the destination.
1622 flags can have:
1623  STR_TERMINATE means include the null termination.
1624  STR_UPPER     means uppercase in the destination.
1625  STR_ASCII     use ascii even with unicode packet.
1626  STR_NOALIGN   means don't do alignment.
1627 dest_len is the maximum length allowed in the destination. If dest_len
1628 is -1 then no maxiumum is used.
1629**/
1630
1631size_t push_string_base(const char *function, unsigned int line,
1632			const char *base, uint16 flags2,
1633			void *dest, const char *src,
1634			size_t dest_len, int flags)
1635{
1636#ifdef DEVELOPER
1637	/* We really need to zero fill here, not clobber
1638	 * region, as we want to ensure that valgrind thinks
1639	 * all of the outgoing buffer has been written to
1640	 * so a send() or write() won't trap an error.
1641	 * JRA.
1642	 */
1643#if 0
1644	clobber_region(function, line, dest, dest_len);
1645#else
1646	memset(dest, '\0', dest_len);
1647#endif
1648#endif
1649
1650	if (!(flags & STR_ASCII) && \
1651	    ((flags & STR_UNICODE || \
1652	      (flags2 & FLAGS2_UNICODE_STRINGS)))) {
1653		return push_ucs2(base, dest, src, dest_len, flags);
1654	}
1655	return push_ascii(dest, src, dest_len, flags);
1656}
1657
1658/**
1659 Copy a string from a char* src to a unicode or ascii
1660 dos codepage destination choosing unicode or ascii based on the
1661 flags supplied
1662 Return the number of bytes occupied by the string in the destination.
1663 flags can have:
1664  STR_TERMINATE means include the null termination.
1665  STR_UPPER     means uppercase in the destination.
1666  STR_ASCII     use ascii even with unicode packet.
1667  STR_NOALIGN   means don't do alignment.
1668 dest_len is the maximum length allowed in the destination. If dest_len
1669 is -1 then no maxiumum is used.
1670**/
1671
1672ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags)
1673{
1674	size_t ret;
1675#ifdef DEVELOPER
1676	/* We really need to zero fill here, not clobber
1677	 * region, as we want to ensure that valgrind thinks
1678	 * all of the outgoing buffer has been written to
1679	 * so a send() or write() won't trap an error.
1680	 * JRA.
1681	 */
1682	memset(dest, '\0', dest_len);
1683#endif
1684
1685	if (!(flags & STR_ASCII) && \
1686	    (flags & STR_UNICODE)) {
1687		ret = push_ucs2(NULL, dest, src, dest_len, flags);
1688	} else {
1689		ret = push_ascii(dest, src, dest_len, flags);
1690	}
1691	if (ret == (size_t)-1) {
1692		return -1;
1693	}
1694	return ret;
1695}
1696
1697/**
1698 Copy a string from a unicode or ascii source (depending on
1699 the packet flags) to a char* destination.
1700 Flags can have:
1701  STR_TERMINATE means the string in src is null terminated.
1702  STR_UNICODE   means to force as unicode.
1703  STR_ASCII     use ascii even with unicode packet.
1704  STR_NOALIGN   means don't do alignment.
1705 if STR_TERMINATE is set then src_len is ignored is it is -1
1706 src_len is the length of the source area in bytes.
1707 Return the number of bytes occupied by the string in src.
1708 The resulting string in "dest" is always null terminated.
1709**/
1710
1711size_t pull_string_fn(const char *function,
1712			unsigned int line,
1713			const void *base_ptr,
1714			uint16 smb_flags2,
1715			char *dest,
1716			const void *src,
1717			size_t dest_len,
1718			size_t src_len,
1719			int flags)
1720{
1721#ifdef DEVELOPER
1722	clobber_region(function, line, dest, dest_len);
1723#endif
1724
1725	if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1726		smb_panic("No base ptr to get flg2 and neither ASCII nor "
1727			  "UNICODE defined");
1728	}
1729
1730	if (!(flags & STR_ASCII) && \
1731	    ((flags & STR_UNICODE || \
1732	      (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1733		return pull_ucs2(base_ptr, dest, src, dest_len, src_len, flags);
1734	}
1735	return pull_ascii(dest, src, dest_len, src_len, flags);
1736}
1737
1738/**
1739 Copy a string from a unicode or ascii source (depending on
1740 the packet flags) to a char* destination.
1741 Variant that uses talloc.
1742 Flags can have:
1743  STR_TERMINATE means the string in src is null terminated.
1744  STR_UNICODE   means to force as unicode.
1745  STR_ASCII     use ascii even with unicode packet.
1746  STR_NOALIGN   means don't do alignment.
1747 if STR_TERMINATE is set then src_len is ignored is it is -1
1748 src_len is the length of the source area in bytes.
1749 Return the number of bytes occupied by the string in src.
1750 The resulting string in "dest" is always null terminated.
1751**/
1752
1753size_t pull_string_talloc_fn(const char *function,
1754			unsigned int line,
1755			TALLOC_CTX *ctx,
1756			const void *base_ptr,
1757			uint16 smb_flags2,
1758			char **ppdest,
1759			const void *src,
1760			size_t src_len,
1761			int flags)
1762{
1763	if ((base_ptr == NULL) && ((flags & (STR_ASCII|STR_UNICODE)) == 0)) {
1764		smb_panic("No base ptr to get flg2 and neither ASCII nor "
1765			  "UNICODE defined");
1766	}
1767
1768	if (!(flags & STR_ASCII) && \
1769	    ((flags & STR_UNICODE || \
1770	      (smb_flags2 & FLAGS2_UNICODE_STRINGS)))) {
1771		return pull_ucs2_base_talloc(ctx,
1772					base_ptr,
1773					ppdest,
1774					src,
1775					src_len,
1776					flags);
1777	}
1778	return pull_ascii_base_talloc(ctx,
1779					ppdest,
1780					src,
1781					src_len,
1782					flags);
1783}
1784
1785
1786size_t align_string(const void *base_ptr, const char *p, int flags)
1787{
1788	if (!(flags & STR_ASCII) && \
1789	    ((flags & STR_UNICODE || \
1790	      (SVAL(base_ptr, smb_flg2) & FLAGS2_UNICODE_STRINGS)))) {
1791		return ucs2_align(base_ptr, p, flags);
1792	}
1793	return 0;
1794}
1795
1796/*
1797  Return the unicode codepoint for the next multi-byte CH_UNIX character
1798  in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.
1799
1800  Also return the number of bytes consumed (which tells the caller
1801  how many bytes to skip to get to the next CH_UNIX character).
1802
1803  Return INVALID_CODEPOINT if the next character cannot be converted.
1804*/
1805
1806codepoint_t next_codepoint(const char *str, size_t *size)
1807{
1808	/* It cannot occupy more than 4 bytes in UTF16 format */
1809	uint8_t buf[4];
1810	smb_iconv_t descriptor;
1811	size_t ilen_orig;
1812	size_t ilen;
1813	size_t olen;
1814	char *outbuf;
1815
1816	if ((str[0] & 0x80) == 0) {
1817		*size = 1;
1818		return (codepoint_t)str[0];
1819	}
1820
1821	/* We assume that no multi-byte character can take
1822	   more than 5 bytes. This is OK as we only
1823	   support codepoints up to 1M */
1824
1825	ilen_orig = strnlen(str, 5);
1826	ilen = ilen_orig;
1827
1828        lazy_initialize_conv();
1829
1830        descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1831	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1832		*size = 1;
1833		return INVALID_CODEPOINT;
1834	}
1835
1836	/* This looks a little strange, but it is needed to cope
1837	   with codepoints above 64k which are encoded as per RFC2781. */
1838	olen = 2;
1839	outbuf = (char *)buf;
1840	smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);
1841	if (olen == 2) {
1842		/* We failed to convert to a 2 byte character.
1843		   See if we can convert to a 4 UTF16-LE byte char encoding.
1844		*/
1845		olen = 4;
1846		outbuf = (char *)buf;
1847		smb_iconv(descriptor,  &str, &ilen, &outbuf, &olen);
1848		if (olen == 4) {
1849			/* We didn't convert any bytes */
1850			*size = 1;
1851			return INVALID_CODEPOINT;
1852		}
1853		olen = 4 - olen;
1854	} else {
1855		olen = 2 - olen;
1856	}
1857
1858	*size = ilen_orig - ilen;
1859
1860	if (olen == 2) {
1861		/* 2 byte, UTF16-LE encoded value. */
1862		return (codepoint_t)SVAL(buf, 0);
1863	}
1864	if (olen == 4) {
1865		/* Decode a 4 byte UTF16-LE character manually.
1866		   See RFC2871 for the encoding machanism.
1867		*/
1868		codepoint_t w1 = SVAL(buf,0) & ~0xD800;
1869		codepoint_t w2 = SVAL(buf,2) & ~0xDC00;
1870
1871		return (codepoint_t)0x10000 +
1872				(w1 << 10) + w2;
1873	}
1874
1875	/* no other length is valid */
1876	return INVALID_CODEPOINT;
1877}
1878
1879/*
1880  push a single codepoint into a CH_UNIX string the target string must
1881  be able to hold the full character, which is guaranteed if it is at
1882  least 5 bytes in size. The caller may pass less than 5 bytes if they
1883  are sure the character will fit (for example, you can assume that
1884  uppercase/lowercase of a character will not add more than 1 byte)
1885
1886  return the number of bytes occupied by the CH_UNIX character, or
1887  -1 on failure
1888*/
1889_PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)
1890{
1891	smb_iconv_t descriptor;
1892	uint8_t buf[4];
1893	size_t ilen, olen;
1894	const char *inbuf;
1895
1896	if (c < 128) {
1897		*str = c;
1898		return 1;
1899	}
1900
1901	lazy_initialize_conv();
1902
1903	descriptor = conv_handles[CH_UNIX][CH_UTF16LE];
1904	if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {
1905		return -1;
1906	}
1907
1908	if (c < 0x10000) {
1909		ilen = 2;
1910		olen = 5;
1911		inbuf = (char *)buf;
1912		SSVAL(buf, 0, c);
1913		smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1914		if (ilen != 0) {
1915			return -1;
1916		}
1917		return 5 - olen;
1918	}
1919
1920	c -= 0x10000;
1921
1922	buf[0] = (c>>10) & 0xFF;
1923	buf[1] = (c>>18) | 0xd8;
1924	buf[2] = c & 0xFF;
1925	buf[3] = ((c>>8) & 0x3) | 0xdc;
1926
1927	ilen = 4;
1928	olen = 5;
1929	inbuf = (char *)buf;
1930
1931	smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);
1932	if (ilen != 0) {
1933		return -1;
1934	}
1935	return 5 - olen;
1936}
1937
1938
1939