1/*********************************************************************\
2
3MODULE NAME:    b64.c
4
5AUTHOR:         Bob Trower 08/04/01
6
7PROJECT:        Crypt Data Packaging
8
9COPYRIGHT:      Copyright (c) Trantor Standard Systems Inc., 2001
10
11NOTE:           This source code may be used as you wish, subject to
12                the MIT license.  See the LICENCE section below.
13
14DESCRIPTION:
15                This little utility implements the Base64
16                Content-Transfer-Encoding standard described in
17                RFC1113 (http://www.faqs.org/rfcs/rfc1113.html).
18
19                This is the coding scheme used by MIME to allow
20                binary data to be transferred by SMTP mail.
21
22                Groups of 3 bytes from a binary stream are coded as
23                groups of 4 bytes in a text stream.
24
25                The input stream is 'padded' with zeros to create
26                an input that is an even multiple of 3.
27
28                A special character ('=') is used to denote padding so
29                that the stream can be decoded back to its exact size.
30
31                Encoded output is formatted in lines which should
32                be a maximum of 72 characters to conform to the
33                specification.  This program defaults to 72 characters,
34                but will allow more or less through the use of a
35                switch.  The program enforces a minimum line size
36                of 4 characters.
37
38                Example encoding:
39
40                The stream 'ABCD' is 32 bits long.  It is mapped as
41                follows:
42
43                ABCD
44
45                 A (65)     B (66)     C (67)     D (68)   (None) (None)
46                01000001   01000010   01000011   01000100
47
48                16 (Q)  20 (U)  9 (J)   3 (D)    17 (R) 0 (A)  NA (=) NA (=)
49                010000  010100  001001  000011   010001 000000 000000 000000
50
51
52                QUJDRA==
53
54                Decoding is the process in reverse.  A 'decode' lookup
55                table has been created to avoid string scans.
56
57DESIGN GOALS:	Specifically:
58		Code is a stand-alone utility to perform base64
59		encoding/decoding. It should be genuinely useful
60		when the need arises and it meets a need that is
61		likely to occur for some users.
62		Code acts as sample code to show the author's
63		design and coding style.
64
65		Generally:
66		This program is designed to survive:
67		Everything you need is in a single source file.
68		It compiles cleanly using a vanilla ANSI C compiler.
69		It does its job correctly with a minimum of fuss.
70		The code is not overly clever, not overly simplistic
71		and not overly verbose.
72		Access is 'cut and paste' from a web page.
73		Terms of use are reasonable.
74
75VALIDATION:     Non-trivial code is never without errors.  This
76                file likely has some problems, since it has only
77                been tested by the author.  It is expected with most
78                source code that there is a period of 'burn-in' when
79                problems are identified and corrected.  That being
80                said, it is possible to have 'reasonably correct'
81                code by following a regime of unit test that covers
82                the most likely cases and regression testing prior
83                to release.  This has been done with this code and
84                it has a good probability of performing as expected.
85
86                Unit Test Cases:
87
88                case 0:empty file:
89                    CASE0.DAT  ->  ->
90                    (Zero length target file created
91                    on both encode and decode.)
92
93                case 1:One input character:
94                    CASE1.DAT A -> QQ== -> A
95
96                case 2:Two input characters:
97                    CASE2.DAT AB -> QUJD -> AB
98
99                case 3:Three input characters:
100                    CASE3.DAT ABC -> QUJD -> ABC
101
102                case 4:Four input characters:
103                    case4.dat ABCD -> QUJDRA== -> ABCD
104
105                case 5:All chars from 0 to ff, linesize set to 50:
106
107                    AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIj
108                    JCUmJygpKissLS4vMDEyMzQ1Njc4OTo7PD0+P0BBQkNERUZH
109                    SElKS0xNTk9QUVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWpr
110                    bG1ub3BxcnN0dXZ3eHl6e3x9fn+AgYKDhIWGh4iJiouMjY6P
111                    kJGSk5SVlpeYmZqbnJ2en6ChoqOkpaanqKmqq6ytrq+wsbKz
112                    tLW2t7i5uru8vb6/wMHCw8TFxsfIycrLzM3Oz9DR0tPU1dbX
113                    2Nna29zd3t/g4eLj5OXm5+jp6uvs7e7v8PHy8/T19vf4+fr7
114                    /P3+/w==
115
116                case 6:Mime Block from e-mail:
117                    (Data same as test case 5)
118
119                case 7: Large files:
120                    Tested 28 MB file in/out.
121
122                case 8: Random Binary Integrity:
123                    This binary program (b64.exe) was encoded to base64,
124                    back to binary and then executed.
125
126                case 9 Stress:
127                    All files in a working directory encoded/decoded
128                    and compared with file comparison utility to
129                    ensure that multiple runs do not cause problems
130                    such as exhausting file handles, tmp storage, etc.
131
132                -------------
133
134                Syntax, operation and failure:
135                    All options/switches tested.  Performs as
136                    expected.
137
138                case 10:
139                    No Args -- Shows Usage Screen
140                    Return Code 1 (Invalid Syntax)
141                case 11:
142                    One Arg (invalid) -- Shows Usage Screen
143                    Return Code 1 (Invalid Syntax)
144                case 12:
145                    One Arg Help (-?) -- Shows detailed Usage Screen.
146                    Return Code 0 (Success -- help request is valid).
147                case 13:
148                    One Arg Help (-h) -- Shows detailed Usage Screen.
149                    Return Code 0 (Success -- help request is valid).
150                case 14:
151                    One Arg (valid) -- Uses stdin/stdout (filter)
152                    Return Code 0 (Sucess)
153                case 15:
154                    Two Args (invalid file) -- shows system error.
155                    Return Code 2 (File Error)
156                case 16:
157                    Encode non-existent file -- shows system error.
158                    Return Code 2 (File Error)
159                case 17:
160                    Out of disk space -- shows system error.
161                    Return Code 3 (File I/O Error)
162
163                -------------
164
165                Compile/Regression test:
166                    gcc compiled binary under Cygwin
167                    Microsoft Visual Studio under Windows 2000
168                    Microsoft Version 6.0 C under Windows 2000
169
170DEPENDENCIES:   None
171
172LICENCE:        Copyright (c) 2001 Bob Trower, Trantor Standard Systems Inc.
173
174                Permission is hereby granted, free of charge, to any person
175                obtaining a copy of this software and associated
176                documentation files (the "Software"), to deal in the
177                Software without restriction, including without limitation
178                the rights to use, copy, modify, merge, publish, distribute,
179                sublicense, and/or sell copies of the Software, and to
180                permit persons to whom the Software is furnished to do so,
181                subject to the following conditions:
182
183                The above copyright notice and this permission notice shall
184                be included in all copies or substantial portions of the
185                Software.
186
187                THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
188                KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
189                WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
190                PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS
191                OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
192                OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
193                OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
194                SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
195
196VERSION HISTORY:
197                Bob Trower 08/04/01 -- Create Version 0.00.00B
198
199\******************************************************************* */
200
201#include <inttypes.h>
202#include <stdio.h>
203#include <stdlib.h>
204
205#include "b64.h"
206
207/*
208** Translation Table as described in RFC1113
209*/
210static const char cb64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
211
212/*
213** Translation Table to decode (created by author)
214*/
215static const char cd64[] = "|$$$}rstuvwxyz{$$$$$$$>?@ABCDEFGHIJKLMNOPQRSTUVW$$$$$$XYZ[\\]^_`abcdefghijklmnopq";
216
217/*
218** encodeblock
219**
220** encode 3 8-bit binary bytes as 4 '6-bit' characters
221*/
222static void
223encodeblock(uint8_t *wordin, uint8_t *wordout, int wordlen)
224{
225	wordout[0] = cb64[(unsigned)wordin[0] >> 2];
226	wordout[1] = cb64[((unsigned)(wordin[0] & 0x03) << 4) | ((unsigned)(wordin[1] & 0xf0) >> 4)];
227	wordout[2] = (uint8_t)(wordlen > 1) ?
228		cb64[((unsigned)(wordin[1] & 0x0f) << 2) | ((unsigned)(wordin[2] & 0xc0) >> 6)] : '=';
229	wordout[3] = (uint8_t)(wordlen > 2) ? cb64[wordin[2] & 0x3f] : '=';
230}
231
232/*
233** encode
234**
235** base64 encode a stream adding padding and line breaks as per spec.
236*/
237int
238netpgpv_b64encode(const char *in, const size_t insize, void *vp, size_t outsize, int linesize)
239{
240	const char	*inp;
241	unsigned	 i;
242	uint8_t		 wordout[4];
243	uint8_t		 wordin[3];
244	char		*out = vp;
245	char		*outp;
246	int              blocksout;
247	int              wordlen;
248
249	if (in == NULL || vp == NULL) {
250		return 0;
251	}
252	wordlen = 0;
253	for (blocksout = 0, inp = in, outp = out; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize;) {
254		for (wordlen = 0, i = 0; i < sizeof(wordin); i++) {
255			wordin[i] = (uint8_t) *inp++;
256			if ((size_t)(inp - in) <= insize) {
257				wordlen++;
258			} else {
259				wordin[i] = 0x0;
260			}
261		}
262		if (wordlen > 0) {
263			encodeblock(wordin, wordout, wordlen);
264			for (i = 0; i < sizeof(wordout) ; i++) {
265				*outp++ = wordout[i];
266			}
267			blocksout++;
268		}
269		if (linesize > 0) {
270			if (blocksout >= (int)(linesize / sizeof(wordout)) ||
271			    (size_t)(inp - in) >= insize) {
272				if (blocksout) {
273					*outp++ = '\r';
274					*outp++ = '\n';
275				}
276				blocksout = 0;
277			}
278		}
279	}
280	return (int)(outp - out);
281}
282
283/*
284** decodeblock
285**
286** decode 4 '6-bit' characters into 3 8-bit binary bytes
287*/
288static void
289decodeblock(uint8_t wordin[4], uint8_t wordout[3])
290{
291	wordout[0] = (uint8_t) ((unsigned)wordin[0] << 2 | (unsigned)wordin[1] >> 4);
292	wordout[1] = (uint8_t) ((unsigned)wordin[1] << 4 | (unsigned)wordin[2] >> 2);
293	wordout[2] = (uint8_t) (((wordin[2] << 6) & 0xc0) | wordin[3]);
294}
295
296/*
297** decode
298**
299** decode a base64 encoded stream discarding padding, line breaks and noise
300*/
301int
302netpgpv_b64decode(const char *in, const size_t insize, void *vp, size_t outsize)
303{
304	const char	*inp;
305	unsigned	 wordlen;
306	unsigned	 i;
307	uint8_t    	 wordout[3];
308	uint8_t    	 wordin[4];
309	uint8_t    	 v;
310	char		*out = vp;
311	char		*outp;
312
313	if (in == NULL || vp == NULL) {
314		return 0;
315	}
316	for (inp = in, outp = out ; (size_t)(inp - in) < insize && (size_t)(outp - out) < outsize ; ) {
317		for (wordlen = 0, i = 0 ; i < sizeof(wordin) && (size_t)(inp - in) < insize ; i++) {
318			/* get a single character */
319			for (v = 0; (size_t)(inp - in) <= insize && v == 0 ; ) {
320				if (*inp == '\r' && *(inp + 1) == '\n') {
321					inp += 2;
322				} else {
323					v = (uint8_t) *inp++;
324					v = (uint8_t) ((v < 43 || v > 122) ? 0 : cd64[v - 43]);
325					if (v) {
326						v = (uint8_t) ((v == '$') ? 0 : v - 61);
327					}
328				}
329			}
330			/* perhaps 0x0 pad */
331			if ((size_t)(inp - in) <= insize) {
332				wordlen += 1;
333				if (v) {
334					wordin[i] = (uint8_t) (v - 1);
335				}
336			} else {
337				wordin[i] = 0x0;
338			}
339		}
340		if (wordlen > 0) {
341			decodeblock(wordin, wordout);
342			for (i = 0; i < wordlen - 1 ; i++) {
343				*outp++ = wordout[i];
344			}
345		}
346	}
347	return (int)(outp - out);
348}
349
350/* return the encoded size for n bytes input */
351int
352netpgpv_b64_encsize(unsigned n)
353{
354	return ((4 * n) / 3) + 4;
355}
356