1/* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
2
3   Python wrapper for Samba tdb pack/unpack functions
4   Copyright (C) Martin Pool 2002, 2003
5
6
7   NOTE PYTHON STYLE GUIDE
8   http://www.python.org/peps/pep-0007.html
9
10
11   This program is free software; you can redistribute it and/or modify
12   it under the terms of the GNU General Public License as published by
13   the Free Software Foundation; either version 2 of the License, or
14   (at your option) any later version.
15
16   This program is distributed in the hope that it will be useful,
17   but WITHOUT ANY WARRANTY; without even the implied warranty of
18   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   GNU General Public License for more details.
20
21   You should have received a copy of the GNU General Public License
22   along with this program; if not, write to the Free Software
23   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24*/
25
26#include "Python.h"
27
28/* This symbol is used in both config.h and Python.h which causes an
29   annoying compiler warning. */
30
31#ifdef HAVE_FSTAT
32#undef HAVE_FSTAT
33#endif
34
35/* This module is supposed to be standalone, however for portability
36   it would be good to use the FUNCTION_MACRO preprocessor define. */
37
38#include "include/config.h"
39
40#ifdef HAVE_FUNCTION_MACRO
41#define FUNCTION_MACRO  (__FUNCTION__)
42#else
43#define FUNCTION_MACRO  (__FILE__)
44#endif
45
46static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
47static PyObject * pytdbpack_str(char ch,
48				PyObject *val_iter, PyObject *packed_list,
49				const char *encoding);
50static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
51
52static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
53
54static PyObject *pytdbpack_data(const char *format_str,
55				     PyObject *val_seq,
56				     PyObject *val_list);
57
58static PyObject *
59pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
60
61static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
62
63
64static PyObject *pytdbpack_bad_type(char ch,
65				    const char *expected,
66				    PyObject *val_obj);
67
68static const char * pytdbpack_docstring =
69"Convert between Python values and Samba binary encodings.\n"
70"\n"
71"This module is conceptually similar to the standard 'struct' module, but it\n"
72"uses both a different binary format and a different description string.\n"
73"\n"
74"Samba's encoding is based on that used inside DCE-RPC and SMB: a\n"
75"little-endian, unpadded, non-self-describing binary format.  It is intended\n"
76"that these functions be as similar as possible to the routines in Samba's\n"
77"tdb/tdbutil module, with appropriate adjustments for Python datatypes.\n"
78"\n"
79"Python strings are used to specify the format of data to be packed or\n"
80"unpacked.\n"
81"\n"
82"String encodings are implied by the database format: they may be either DOS\n"
83"codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded\n"
84"to be the same as the default Python encoding).\n"
85"\n"
86"tdbpack format strings:\n"
87"\n"
88"    'f': NUL-terminated string in codepage iso8859-1\n"
89"   \n"
90"    'P': same as 'f'\n"
91"\n"
92"    'F': NUL-terminated string in iso-8859-1\n"
93"\n"
94"    'd':  4 byte little-endian unsigned number\n"
95"\n"
96"    'w':  2 byte little-endian unsigned number\n"
97"\n"
98"    'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is\n"
99"          really just an \"exists\" or \"does not exist\" flag.  The boolean\n"
100"          value of the Python object is used.\n"
101"    \n"
102"    'B': 4-byte LE length, followed by that many bytes of binary data.\n"
103"         Corresponds to a Python integer giving the length, followed by a byte\n"
104"         string of the appropriate length.\n"
105"\n"
106"    '$': Special flag indicating that the preceding format code should be\n"
107"         repeated while data remains.  This is only supported for unpacking.\n"
108"\n"
109"    Every code corresponds to a single Python object, except 'B' which\n"
110"    corresponds to two values (length and contents), and '$', which produces\n"
111"    however many make sense.\n";
112
113static char const pytdbpack_doc[] =
114"pack(format, values) -> buffer\n"
115"Pack Python objects into Samba binary format according to format string.\n"
116"\n"
117"arguments:\n"
118"    format -- string of tdbpack format characters\n"
119"    values -- sequence of value objects corresponding 1:1 to format characters\n"
120"\n"
121"returns:\n"
122"    buffer -- string containing packed data\n"
123"\n"
124"raises:\n"
125"    IndexError -- if there are too few values for the format\n"
126"    ValueError -- if any of the format characters is illegal\n"
127"    TypeError  -- if the format is not a string, or values is not a sequence,\n"
128"        or any of the values is of the wrong type for the corresponding\n"
129"        format character\n"
130"\n"
131"notes:\n"
132"    For historical reasons, it is not an error to pass more values than are consumed\n"
133"    by the format.\n";
134
135
136static char const pytdbunpack_doc[] =
137"unpack(format, buffer) -> (values, rest)\n"
138"Unpack Samba binary data according to format string.\n"
139"\n"
140"arguments:\n"
141"    format -- string of tdbpack characters\n"
142"    buffer -- string of packed binary data\n"
143"\n"
144"returns:\n"
145"    2-tuple of:\n"
146"        values -- sequence of values corresponding 1:1 to format characters\n"
147"        rest -- string containing data that was not decoded, or '' if the\n"
148"            whole string was consumed\n"
149"\n"
150"raises:\n"
151"    IndexError -- if there is insufficient data in the buffer for the\n"
152"        format (or if the data is corrupt and contains a variable-length\n"
153"        field extending past the end)\n"
154"    ValueError -- if any of the format characters is illegal\n"
155"\n"
156"notes:\n"
157"    Because unconsumed data is returned, you can feed it back in to the\n"
158"    unpacker to extract further fields.  Alternatively, if you wish to modify\n"
159"    some fields near the start of the data, you may be able to save time by\n"
160"    only unpacking and repacking the necessary part.\n";
161
162
163const char *pytdb_dos_encoding = "cp850";
164
165/* NULL, meaning that the Samba default encoding *must* be the same as the
166   Python default encoding. */
167const char *pytdb_unix_encoding = NULL;
168
169
170/*
171  * Pack objects to bytes.
172  *
173  * All objects are first individually encoded onto a list, and then the list
174  * of strings is concatenated.  This is faster than concatenating strings,
175  * and reasonably simple to code.
176  */
177static PyObject *
178pytdbpack(PyObject *self,
179	       PyObject *args)
180{
181	char *format_str;
182	PyObject *val_seq, *val_iter = NULL,
183		*packed_list = NULL, *packed_str = NULL,
184		*empty_str = NULL;
185
186	/* TODO: Test passing wrong types or too many arguments */
187	if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
188		return NULL;
189
190	if (!(val_iter = PyObject_GetIter(val_seq)))
191		goto out;
192
193	/* Create list to hold strings until we're done, then join them all. */
194	if (!(packed_list = PyList_New(0)))
195		goto out;
196
197	if (!pytdbpack_data(format_str, val_iter, packed_list))
198		goto out;
199
200	/* this function is not officially documented but it works */
201	if (!(empty_str = PyString_InternFromString("")))
202		goto out;
203
204	packed_str = _PyString_Join(empty_str, packed_list);
205
206  out:
207	Py_XDECREF(empty_str);
208	Py_XDECREF(val_iter);
209	Py_XDECREF(packed_list);
210
211	return packed_str;
212}
213
214
215/*
216  Pack data according to FORMAT_STR from the elements of VAL_SEQ into
217  PACKED_BUF.
218
219  The string has already been checked out, so we know that VAL_SEQ is large
220  enough to hold the packed data, and that there are enough value items.
221  (However, their types may not have been thoroughly checked yet.)
222
223  In addition, val_seq is a Python Fast sequence.
224
225  Returns NULL for error (with exception set), or None.
226*/
227PyObject *
228pytdbpack_data(const char *format_str,
229		    PyObject *val_iter,
230		    PyObject *packed_list)
231{
232	int format_i, val_i = 0;
233
234	for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
235		char ch = format_str[format_i];
236
237		switch (ch) {
238			/* dispatch to the appropriate packer for this type,
239			   which should pull things off the iterator, and
240			   append them to the packed_list */
241		case 'w':
242		case 'd':
243		case 'p':
244			if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
245				return NULL;
246			break;
247
248		case 'f':
249		case 'P':
250			if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
251				return NULL;
252			break;
253
254		case 'B':
255			if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
256				return NULL;
257			break;
258
259		default:
260			PyErr_Format(PyExc_ValueError,
261				     "%s: format character '%c' is not supported",
262				     FUNCTION_MACRO, ch);
263			return NULL;
264		}
265	}
266
267	return packed_list;
268}
269
270
271static PyObject *
272pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
273{
274	unsigned long val_long;
275	PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
276	PyObject *new_list = NULL;
277	unsigned char pack_buf[4];
278
279	if (!(val_obj = PyIter_Next(val_iter)))
280		goto out;
281
282	if (!(long_obj = PyNumber_Long(val_obj))) {
283		pytdbpack_bad_type(ch, "Number", val_obj);
284		goto out;
285	}
286
287	val_long = PyLong_AsUnsignedLong(long_obj);
288	pack_le_uint32(val_long, pack_buf);
289
290	/* pack as 32-bit; if just packing a 'w' 16-bit word then only take
291	   the first two bytes. */
292
293	if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
294		goto out;
295
296	if (PyList_Append(packed_list, result_obj) != -1)
297		new_list = packed_list;
298
299  out:
300	Py_XDECREF(val_obj);
301	Py_XDECREF(long_obj);
302	Py_XDECREF(result_obj);
303
304	return new_list;
305}
306
307
308/*
309 * Take one string from the iterator val_iter, convert it to 8-bit, and return
310 * it.
311 *
312 * If the input is neither a string nor Unicode, an exception is raised.
313 *
314 * If the input is Unicode, then it is converted to the appropriate encoding.
315 *
316 * If the input is a String, and encoding is not null, then it is converted to
317 * Unicode using the default decoding method, and then converted to the
318 * encoding.  If the encoding is NULL, then the string is written out as-is --
319 * this is used when the default Python encoding is the same as the Samba
320 * encoding.
321 *
322 * I hope this approach avoids being too fragile w.r.t. being passed either
323 * Unicode or String objects.
324 */
325static PyObject *
326pytdbpack_str(char ch,
327	      PyObject *val_iter, PyObject *packed_list, const char *encoding)
328{
329	PyObject *val_obj = NULL;
330	PyObject *unicode_obj = NULL;
331	PyObject *coded_str = NULL;
332	PyObject *nul_str = NULL;
333	PyObject *new_list = NULL;
334
335	if (!(val_obj = PyIter_Next(val_iter)))
336		goto out;
337
338	if (PyUnicode_Check(val_obj)) {
339		if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
340			goto out;
341	}
342	else if (PyString_Check(val_obj) && !encoding) {
343		/* For efficiency, we assume that the Python interpreter has
344		   the same default string encoding as Samba's native string
345		   encoding.  On the PSA, both are always 8859-1. */
346		coded_str = val_obj;
347		Py_INCREF(coded_str);
348	}
349	else if (PyString_Check(val_obj)) {
350		/* String, but needs to be converted */
351		if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
352			goto out;
353		if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
354			goto out;
355	}
356	else {
357		pytdbpack_bad_type(ch, "String or Unicode", val_obj);
358		goto out;
359	}
360
361	if (!nul_str)
362		/* this is constant and often-used; hold it forever */
363		if (!(nul_str = PyString_FromStringAndSize("", 1)))
364			goto out;
365
366	if ((PyList_Append(packed_list, coded_str) != -1)
367	    && (PyList_Append(packed_list, nul_str) != -1))
368		new_list = packed_list;
369
370  out:
371	Py_XDECREF(val_obj);
372	Py_XDECREF(unicode_obj);
373	Py_XDECREF(coded_str);
374
375	return new_list;
376}
377
378
379/*
380 * Pack (LENGTH, BUFFER) pair onto the list.
381 *
382 * The buffer must already be a String, not Unicode, because it contains 8-bit
383 * untranslated data.  In some cases it will actually be UTF_16_LE data.
384 */
385static PyObject *
386pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
387{
388	PyObject *val_obj;
389	PyObject *new_list = NULL;
390
391	/* pull off integer and stick onto list */
392	if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
393		return NULL;
394
395	/* this assumes that the string is the right length; the old code did
396	   the same. */
397	if (!(val_obj = PyIter_Next(val_iter)))
398		return NULL;
399
400	if (!PyString_Check(val_obj)) {
401		pytdbpack_bad_type('B', "String", val_obj);
402		goto out;
403	}
404
405	if (PyList_Append(packed_list, val_obj) != -1)
406		new_list = packed_list;
407
408  out:
409	Py_XDECREF(val_obj);
410	return new_list;
411}
412
413
414static PyObject *pytdbpack_bad_type(char ch,
415				    const char *expected,
416				    PyObject *val_obj)
417{
418	PyObject *r = PyObject_Repr(val_obj);
419	if (!r)
420		return NULL;
421	PyErr_Format(PyExc_TypeError,
422		     "tdbpack: format '%c' requires %s, not %s",
423		     ch, expected, PyString_AS_STRING(r));
424	Py_DECREF(r);
425	return val_obj;
426}
427
428
429/*
430  XXX: glib and Samba have quicker macro for doing the endianness conversions,
431  but I don't know of one in plain libc, and it's probably not a big deal.  I
432  realize this is kind of dumb because we'll almost always be on x86, but
433  being safe is important.
434*/
435static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
436{
437	pbuf[0] =         val_long & 0xff;
438	pbuf[1] = (val_long >> 8)  & 0xff;
439	pbuf[2] = (val_long >> 16) & 0xff;
440	pbuf[3] = (val_long >> 24) & 0xff;
441}
442
443
444static void pack_bytes(long len, const char *from,
445		       unsigned char **pbuf)
446{
447	memcpy(*pbuf, from, len);
448	(*pbuf) += len;
449}
450
451
452
453static PyObject *
454pytdbunpack(PyObject *self,
455		 PyObject *args)
456{
457	char *format_str, *packed_str, *ppacked;
458	PyObject *val_list = NULL, *ret_tuple = NULL;
459	PyObject *rest_string = NULL;
460	int format_len, packed_len;
461	char last_format = '#';	/* invalid */
462	int i;
463
464	/* get arguments */
465	if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
466		return NULL;
467
468	format_len = strlen(format_str);
469
470	/* Allocate list to hold results.  Initially empty, and we append
471	   results as we go along. */
472	val_list = PyList_New(0);
473	if (!val_list)
474		goto failed;
475	ret_tuple = PyTuple_New(2);
476	if (!ret_tuple)
477		goto failed;
478
479	/* For every object, unpack.  */
480	for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
481		last_format = format_str[i];
482		/* packed_len is reduced in place */
483		if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
484			goto failed;
485	}
486
487	/* If the last character was '$', keep going until out of space */
488	if (format_str[i] == '$') {
489		if (i == 0) {
490			PyErr_Format(PyExc_ValueError,
491				     "%s: '$' may not be first character in format",
492				     FUNCTION_MACRO);
493			return NULL;
494		}
495		while (packed_len > 0)
496			if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
497				goto failed;
498	}
499
500	/* save leftovers for next time */
501	rest_string = PyString_FromStringAndSize(ppacked, packed_len);
502	if (!rest_string)
503		goto failed;
504
505	/* return (values, rest) tuple; give up references to them */
506	PyTuple_SET_ITEM(ret_tuple, 0, val_list);
507	val_list = NULL;
508	PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
509	val_list = NULL;
510	return ret_tuple;
511
512  failed:
513	/* handle failure: deallocate anything.  XDECREF forms handle NULL
514	   pointers for objects that haven't been allocated yet. */
515	Py_XDECREF(val_list);
516	Py_XDECREF(ret_tuple);
517	Py_XDECREF(rest_string);
518	return NULL;
519}
520
521
522static void
523pytdbunpack_err_too_short(void)
524{
525	PyErr_Format(PyExc_IndexError,
526		     "%s: data too short for unpack format", FUNCTION_MACRO);
527}
528
529
530static PyObject *
531pytdbunpack_uint32(char **pbuf, int *plen)
532{
533	unsigned long v;
534	unsigned char *b;
535
536	if (*plen < 4) {
537		pytdbunpack_err_too_short();
538		return NULL;
539	}
540
541	b = *pbuf;
542	v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
543
544	(*pbuf) += 4;
545	(*plen) -= 4;
546
547	return PyLong_FromUnsignedLong(v);
548}
549
550
551static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
552{
553	long v;
554	unsigned char *b;
555
556	if (*plen < 2) {
557		pytdbunpack_err_too_short();
558		return NULL;
559	}
560
561	b = *pbuf;
562	v = b[0] | b[1]<<8;
563
564	(*pbuf) += 2;
565	(*plen) -= 2;
566
567	return PyInt_FromLong(v);
568}
569
570
571static PyObject *
572pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
573{
574	int len;
575	char *nul_ptr, *start;
576
577	start = *pbuf;
578
579	nul_ptr = memchr(start, '\0', *plen);
580	if (!nul_ptr) {
581		pytdbunpack_err_too_short();
582		return NULL;
583	}
584
585	len = nul_ptr - start;
586
587	*pbuf += len + 1;	/* skip \0 */
588	*plen -= len + 1;
589
590	return PyString_Decode(start, len, encoding, NULL);
591}
592
593
594static PyObject *
595pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
596{
597	/* first get 32-bit len */
598	long slen;
599	unsigned char *b;
600	unsigned char *start;
601	PyObject *str_obj = NULL, *len_obj = NULL;
602
603	if (*plen < 4) {
604		pytdbunpack_err_too_short();
605		return NULL;
606	}
607
608	b = *pbuf;
609	slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
610
611	if (slen < 0) { /* surely you jest */
612		PyErr_Format(PyExc_ValueError,
613			     "%s: buffer seems to have negative length", FUNCTION_MACRO);
614		return NULL;
615	}
616
617	(*pbuf) += 4;
618	(*plen) -= 4;
619	start = *pbuf;
620
621	if (*plen < slen) {
622		PyErr_Format(PyExc_IndexError,
623			     "%s: not enough data to unpack buffer: "
624			     "need %d bytes, have %d", FUNCTION_MACRO,
625			     (int) slen, *plen);
626		return NULL;
627	}
628
629	(*pbuf) += slen;
630	(*plen) -= slen;
631
632	if (!(len_obj = PyInt_FromLong(slen)))
633		goto failed;
634
635	if (PyList_Append(val_list, len_obj) == -1)
636		goto failed;
637
638	if (!(str_obj = PyString_FromStringAndSize(start, slen)))
639		goto failed;
640
641	if (PyList_Append(val_list, str_obj) == -1)
642		goto failed;
643
644	Py_DECREF(len_obj);
645	Py_DECREF(str_obj);
646
647	return val_list;
648
649  failed:
650	Py_XDECREF(len_obj);	/* handles NULL */
651	Py_XDECREF(str_obj);
652	return NULL;
653}
654
655
656/* Unpack a single field from packed data, according to format character CH.
657   Remaining data is at *PBUF, of *PLEN.
658
659   *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
660   been consumed.
661
662   Returns a reference to None, or NULL for failure.
663*/
664static PyObject *pytdbunpack_item(char ch,
665				  char **pbuf,
666				  int *plen,
667				  PyObject *val_list)
668{
669	PyObject *unpacked;
670
671	if (ch == 'w') {	/* 16-bit int */
672		unpacked = pytdbunpack_int16(pbuf, plen);
673	}
674	else if (ch == 'd' || ch == 'p') { /* 32-bit int */
675		/* pointers can just come through as integers */
676		unpacked = pytdbunpack_uint32(pbuf, plen);
677	}
678	else if (ch == 'f' || ch == 'P') { /* nul-term string  */
679		unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
680	}
681	else if (ch == 'B') { /* length, buffer */
682		return pytdbunpack_buffer(pbuf, plen, val_list);
683	}
684	else {
685		PyErr_Format(PyExc_ValueError,
686			     "%s: format character '%c' is not supported",
687                             FUNCTION_MACRO, ch);
688
689		return NULL;
690	}
691
692	/* otherwise OK */
693	if (!unpacked)
694		return NULL;
695
696	if (PyList_Append(val_list, unpacked) == -1)
697		val_list = NULL;
698
699	/* PyList_Append takes a new reference to the inserted object.
700	   Therefore, we no longer need the original reference. */
701	Py_DECREF(unpacked);
702
703	return val_list;
704}
705
706
707
708
709
710
711static PyMethodDef pytdbpack_methods[] = {
712	{ "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
713	{ "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
714};
715
716DL_EXPORT(void)
717inittdbpack(void)
718{
719	Py_InitModule3("tdbpack", pytdbpack_methods,
720		       (char *) pytdbpack_docstring);
721}
722