1/* base64.c -- routines to encode/decode base64 data */
2/* $OpenLDAP$ */
3/* This work is part of OpenLDAP Software <http://www.openldap.org/>.
4 *
5 * Copyright 1998-2011 The OpenLDAP Foundation.
6 * Portions Copyright 1998-2003 Kurt D. Zeilenga.
7 * Portions Copyright 1995 IBM Corporation.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted only as authorized by the OpenLDAP
12 * Public License.
13 *
14 * A copy of this license is available in the file LICENSE in the
15 * top-level directory of the distribution or, alternatively, at
16 * <http://www.OpenLDAP.org/license.html>.
17 */
18/* Portions Copyright (c) 1996, 1998 by Internet Software Consortium.
19 *
20 * Permission to use, copy, modify, and distribute this software for any
21 * purpose with or without fee is hereby granted, provided that the above
22 * copyright notice and this permission notice appear in all copies.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
25 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
26 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
27 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
28 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
29 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
30 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
31 * SOFTWARE.
32 */
33/* This work is based upon Base64 routines (developed by IBM) found
34 * Berkeley Internet Name Daemon (BIND) as distributed by ISC.  They
35 * were adapted for inclusion in OpenLDAP Software by Kurt D. Zeilenga.
36 */
37
38#include "portable.h"
39
40#include <ac/assert.h>
41#include <ac/stdlib.h>
42#include <ac/ctype.h>
43#include <ac/string.h>
44
45/* include socket.h to get sys/types.h and/or winsock2.h */
46#include <ac/socket.h>
47
48#include "lutil.h"
49
50static const char Base64[] =
51	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
52static const char Pad64 = '=';
53
54/* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
55   The following encoding technique is taken from RFC 1521 by Borenstein
56   and Freed.  It is reproduced here in a slightly edited form for
57   convenience.
58
59   A 65-character subset of US-ASCII is used, enabling 6 bits to be
60   represented per printable character. (The extra 65th character, "=",
61   is used to signify a special processing function.)
62
63   The encoding process represents 24-bit groups of input bits as output
64   strings of 4 encoded characters. Proceeding from left to right, a
65   24-bit input group is formed by concatenating 3 8-bit input groups.
66   These 24 bits are then treated as 4 concatenated 6-bit groups, each
67   of which is translated into a single digit in the base64 alphabet.
68
69   Each 6-bit group is used as an index into an array of 64 printable
70   characters. The character referenced by the index is placed in the
71   output string.
72
73                         Table 1: The Base64 Alphabet
74
75      Value Encoding  Value Encoding  Value Encoding  Value Encoding
76          0 A            17 R            34 i            51 z
77          1 B            18 S            35 j            52 0
78          2 C            19 T            36 k            53 1
79          3 D            20 U            37 l            54 2
80          4 E            21 V            38 m            55 3
81          5 F            22 W            39 n            56 4
82          6 G            23 X            40 o            57 5
83          7 H            24 Y            41 p            58 6
84          8 I            25 Z            42 q            59 7
85          9 J            26 a            43 r            60 8
86         10 K            27 b            44 s            61 9
87         11 L            28 c            45 t            62 +
88         12 M            29 d            46 u            63 /
89         13 N            30 e            47 v
90         14 O            31 f            48 w         (pad) =
91         15 P            32 g            49 x
92         16 Q            33 h            50 y
93
94   Special processing is performed if fewer than 24 bits are available
95   at the end of the data being encoded.  A full encoding quantum is
96   always completed at the end of a quantity.  When fewer than 24 input
97   bits are available in an input group, zero bits are added (on the
98   right) to form an integral number of 6-bit groups.  Padding at the
99   end of the data is performed using the '=' character.
100
101   Since all base64 input is an integral number of octets, only the
102         -------------------------------------------------
103   following cases can arise:
104
105       (1) the final quantum of encoding input is an integral
106           multiple of 24 bits; here, the final unit of encoded
107	   output will be an integral multiple of 4 characters
108	   with no "=" padding,
109       (2) the final quantum of encoding input is exactly 8 bits;
110           here, the final unit of encoded output will be two
111	   characters followed by two "=" padding characters, or
112       (3) the final quantum of encoding input is exactly 16 bits;
113           here, the final unit of encoded output will be three
114	   characters followed by one "=" padding character.
115   */
116
117int
118lutil_b64_ntop(
119	u_char const *src,
120	size_t srclength,
121	char *target,
122	size_t targsize)
123{
124	size_t datalength = 0;
125	u_char input[3];
126	u_char output[4];
127	size_t i;
128
129	while (2 < srclength) {
130		input[0] = *src++;
131		input[1] = *src++;
132		input[2] = *src++;
133		srclength -= 3;
134
135		output[0] = input[0] >> 2;
136		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
137		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
138		output[3] = input[2] & 0x3f;
139		assert(output[0] < 64);
140		assert(output[1] < 64);
141		assert(output[2] < 64);
142		assert(output[3] < 64);
143
144		if (datalength + 4 > targsize)
145			return (-1);
146		target[datalength++] = Base64[output[0]];
147		target[datalength++] = Base64[output[1]];
148		target[datalength++] = Base64[output[2]];
149		target[datalength++] = Base64[output[3]];
150	}
151
152	/* Now we worry about padding. */
153	if (0 != srclength) {
154		/* Get what's left. */
155		input[0] = input[1] = input[2] = '\0';
156		for (i = 0; i < srclength; i++)
157			input[i] = *src++;
158
159		output[0] = input[0] >> 2;
160		output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
161		output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
162		assert(output[0] < 64);
163		assert(output[1] < 64);
164		assert(output[2] < 64);
165
166		if (datalength + 4 > targsize)
167			return (-1);
168		target[datalength++] = Base64[output[0]];
169		target[datalength++] = Base64[output[1]];
170		if (srclength == 1)
171			target[datalength++] = Pad64;
172		else
173			target[datalength++] = Base64[output[2]];
174		target[datalength++] = Pad64;
175	}
176	if (datalength >= targsize)
177		return (-1);
178	target[datalength] = '\0';	/* Returned value doesn't count \0. */
179	return (datalength);
180}
181
182/* skips all whitespace anywhere.
183   converts characters, four at a time, starting at (or after)
184   src from base - 64 numbers into three 8 bit bytes in the target area.
185   it returns the number of data bytes stored at the target, or -1 on error.
186 */
187
188int
189lutil_b64_pton(
190	char const *src,
191	u_char *target,
192	size_t targsize)
193{
194	int tarindex, state, ch;
195	char *pos;
196
197	state = 0;
198	tarindex = 0;
199
200	while ((ch = *src++) != '\0') {
201		if (isascii(ch) && isspace(ch))	/* Skip whitespace anywhere. */
202			continue;
203
204		if (ch == Pad64)
205			break;
206
207		pos = strchr(Base64, ch);
208		if (pos == 0) 		/* A non-base64 character. */
209			return (-1);
210
211		switch (state) {
212		case 0:
213			if (target) {
214				if ((size_t)tarindex >= targsize)
215					return (-1);
216				target[tarindex] = (pos - Base64) << 2;
217			}
218			state = 1;
219			break;
220		case 1:
221			if (target) {
222				if ((size_t)tarindex + 1 >= targsize)
223					return (-1);
224				target[tarindex]   |=  (pos - Base64) >> 4;
225				target[tarindex+1]  = ((pos - Base64) & 0x0f)
226							<< 4 ;
227			}
228			tarindex++;
229			state = 2;
230			break;
231		case 2:
232			if (target) {
233				if ((size_t)tarindex + 1 >= targsize)
234					return (-1);
235				target[tarindex]   |=  (pos - Base64) >> 2;
236				target[tarindex+1]  = ((pos - Base64) & 0x03)
237							<< 6;
238			}
239			tarindex++;
240			state = 3;
241			break;
242		case 3:
243			if (target) {
244				if ((size_t)tarindex >= targsize)
245					return (-1);
246				target[tarindex] |= (pos - Base64);
247			}
248			tarindex++;
249			state = 0;
250			break;
251		default:
252			abort();
253		}
254	}
255
256	/*
257	 * We are done decoding Base-64 chars.  Let's see if we ended
258	 * on a byte boundary, and/or with erroneous trailing characters.
259	 */
260
261	if (ch == Pad64) {		/* We got a pad char. */
262		ch = *src++;		/* Skip it, get next. */
263		switch (state) {
264		case 0:		/* Invalid = in first position */
265		case 1:		/* Invalid = in second position */
266			return (-1);
267
268		case 2:		/* Valid, means one byte of info */
269			/* Skip any number of spaces. */
270			for ((void)NULL; ch != '\0'; ch = *src++)
271				if (! (isascii(ch) && isspace(ch)))
272					break;
273			/* Make sure there is another trailing = sign. */
274			if (ch != Pad64)
275				return (-1);
276			ch = *src++;		/* Skip the = */
277			/* Fall through to "single trailing =" case. */
278			/* FALLTHROUGH */
279
280		case 3:		/* Valid, means two bytes of info */
281			/*
282			 * We know this char is an =.  Is there anything but
283			 * whitespace after it?
284			 */
285			for ((void)NULL; ch != '\0'; ch = *src++)
286				if (! (isascii(ch) && isspace(ch)))
287					return (-1);
288
289			/*
290			 * Now make sure for cases 2 and 3 that the "extra"
291			 * bits that slopped past the last full byte were
292			 * zeros.  If we don't check them, they become a
293			 * subliminal channel.
294			 */
295			if (target && target[tarindex] != 0)
296				return (-1);
297		}
298	} else {
299		/*
300		 * We ended by seeing the end of the string.  Make sure we
301		 * have no partial bytes lying around.
302		 */
303		if (state != 0)
304			return (-1);
305	}
306
307	return (tarindex);
308}
309