1/* crypto/evp/encode.c */
2/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
3 * All rights reserved.
4 *
5 * This package is an SSL implementation written
6 * by Eric Young (eay@cryptsoft.com).
7 * The implementation was written so as to conform with Netscapes SSL.
8 *
9 * This library is free for commercial and non-commercial use as long as
10 * the following conditions are aheared to.  The following conditions
11 * apply to all code found in this distribution, be it the RC4, RSA,
12 * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
13 * included with this distribution is covered by the same copyright terms
14 * except that the holder is Tim Hudson (tjh@cryptsoft.com).
15 *
16 * Copyright remains Eric Young's, and as such any Copyright notices in
17 * the code are not to be removed.
18 * If this package is used in a product, Eric Young should be given attribution
19 * as the author of the parts of the library used.
20 * This can be in the form of a textual message at program startup or
21 * in documentation (online or textual) provided with the package.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the copyright
27 *    notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 *    notice, this list of conditions and the following disclaimer in the
30 *    documentation and/or other materials provided with the distribution.
31 * 3. All advertising materials mentioning features or use of this software
32 *    must display the following acknowledgement:
33 *    "This product includes cryptographic software written by
34 *     Eric Young (eay@cryptsoft.com)"
35 *    The word 'cryptographic' can be left out if the rouines from the library
36 *    being used are not cryptographic related :-).
37 * 4. If you include any Windows specific code (or a derivative thereof) from
38 *    the apps directory (application code) you must include an acknowledgement:
39 *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
40 *
41 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
42 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
43 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
44 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
45 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
46 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
47 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
48 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
49 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
50 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
51 * SUCH DAMAGE.
52 *
53 * The licence and distribution terms for any publically available version or
54 * derivative of this code cannot be changed.  i.e. this code cannot simply be
55 * copied and put under another distribution licence
56 * [including the GNU Public Licence.]
57 */
58
59#include <stdio.h>
60#include <limits.h>
61#include "cryptlib.h"
62#include <openssl/evp.h>
63
64static unsigned char conv_ascii2bin(unsigned char a);
65#ifndef CHARSET_EBCDIC
66# define conv_bin2ascii(a)       (data_bin2ascii[(a)&0x3f])
67#else
68/*
69 * We assume that PEM encoded files are EBCDIC files (i.e., printable text
70 * files). Convert them here while decoding. When encoding, output is EBCDIC
71 * (text) format again. (No need for conversion in the conv_bin2ascii macro,
72 * as the underlying textstring data_bin2ascii[] is already EBCDIC)
73 */
74# define conv_bin2ascii(a)       (data_bin2ascii[(a)&0x3f])
75#endif
76
77/*-
78 * 64 char lines
79 * pad input with 0
80 * left over chars are set to =
81 * 1 byte  => xx==
82 * 2 bytes => xxx=
83 * 3 bytes => xxxx
84 */
85#define BIN_PER_LINE    (64/4*3)
86#define CHUNKS_PER_LINE (64/4)
87#define CHAR_PER_LINE   (64+1)
88
89static const unsigned char data_bin2ascii[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ\
90abcdefghijklmnopqrstuvwxyz0123456789+/";
91
92/*-
93 * 0xF0 is a EOLN
94 * 0xF1 is ignore but next needs to be 0xF0 (for \r\n processing).
95 * 0xF2 is EOF
96 * 0xE0 is ignore at start of line.
97 * 0xFF is error
98 */
99
100#define B64_EOLN                0xF0
101#define B64_CR                  0xF1
102#define B64_EOF                 0xF2
103#define B64_WS                  0xE0
104#define B64_ERROR               0xFF
105#define B64_NOT_BASE64(a)       (((a)|0x13) == 0xF3)
106#define B64_BASE64(a)           !B64_NOT_BASE64(a)
107
108static const unsigned char data_ascii2bin[128] = {
109    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
110    0xFF, 0xE0, 0xF0, 0xFF, 0xFF, 0xF1, 0xFF, 0xFF,
111    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
112    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
113    0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
114    0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0xF2, 0xFF, 0x3F,
115    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B,
116    0x3C, 0x3D, 0xFF, 0xFF, 0xFF, 0x00, 0xFF, 0xFF,
117    0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
118    0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
119    0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16,
120    0x17, 0x18, 0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
121    0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20,
122    0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
123    0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,
124    0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
125};
126
127#ifndef CHARSET_EBCDIC
128static unsigned char conv_ascii2bin(unsigned char a)
129{
130    if (a & 0x80)
131        return B64_ERROR;
132    return data_ascii2bin[a];
133}
134#else
135static unsigned char conv_ascii2bin(unsigned char a)
136{
137    a = os_toascii[a];
138    if (a & 0x80)
139        return B64_ERROR;
140    return data_ascii2bin[a];
141}
142#endif
143
144void EVP_EncodeInit(EVP_ENCODE_CTX *ctx)
145{
146    ctx->length = 48;
147    ctx->num = 0;
148    ctx->line_num = 0;
149}
150
151void EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,
152                      const unsigned char *in, int inl)
153{
154    int i, j;
155    size_t total = 0;
156
157    *outl = 0;
158    if (inl <= 0)
159        return;
160    OPENSSL_assert(ctx->length <= (int)sizeof(ctx->enc_data));
161    if (ctx->length - ctx->num > inl) {
162        memcpy(&(ctx->enc_data[ctx->num]), in, inl);
163        ctx->num += inl;
164        return;
165    }
166    if (ctx->num != 0) {
167        i = ctx->length - ctx->num;
168        memcpy(&(ctx->enc_data[ctx->num]), in, i);
169        in += i;
170        inl -= i;
171        j = EVP_EncodeBlock(out, ctx->enc_data, ctx->length);
172        ctx->num = 0;
173        out += j;
174        *(out++) = '\n';
175        *out = '\0';
176        total = j + 1;
177    }
178    while (inl >= ctx->length && total <= INT_MAX) {
179        j = EVP_EncodeBlock(out, in, ctx->length);
180        in += ctx->length;
181        inl -= ctx->length;
182        out += j;
183        *(out++) = '\n';
184        *out = '\0';
185        total += j + 1;
186    }
187    if (total > INT_MAX) {
188        /* Too much output data! */
189        *outl = 0;
190        return;
191    }
192    if (inl != 0)
193        memcpy(&(ctx->enc_data[0]), in, inl);
194    ctx->num = inl;
195    *outl = total;
196}
197
198void EVP_EncodeFinal(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl)
199{
200    unsigned int ret = 0;
201
202    if (ctx->num != 0) {
203        ret = EVP_EncodeBlock(out, ctx->enc_data, ctx->num);
204        out[ret++] = '\n';
205        out[ret] = '\0';
206        ctx->num = 0;
207    }
208    *outl = ret;
209}
210
211int EVP_EncodeBlock(unsigned char *t, const unsigned char *f, int dlen)
212{
213    int i, ret = 0;
214    unsigned long l;
215
216    for (i = dlen; i > 0; i -= 3) {
217        if (i >= 3) {
218            l = (((unsigned long)f[0]) << 16L) |
219                (((unsigned long)f[1]) << 8L) | f[2];
220            *(t++) = conv_bin2ascii(l >> 18L);
221            *(t++) = conv_bin2ascii(l >> 12L);
222            *(t++) = conv_bin2ascii(l >> 6L);
223            *(t++) = conv_bin2ascii(l);
224        } else {
225            l = ((unsigned long)f[0]) << 16L;
226            if (i == 2)
227                l |= ((unsigned long)f[1] << 8L);
228
229            *(t++) = conv_bin2ascii(l >> 18L);
230            *(t++) = conv_bin2ascii(l >> 12L);
231            *(t++) = (i == 1) ? '=' : conv_bin2ascii(l >> 6L);
232            *(t++) = '=';
233        }
234        ret += 4;
235        f += 3;
236    }
237
238    *t = '\0';
239    return (ret);
240}
241
242void EVP_DecodeInit(EVP_ENCODE_CTX *ctx)
243{
244    /* Only ctx->num is used during decoding. */
245    ctx->num = 0;
246    ctx->length = 0;
247    ctx->line_num = 0;
248    ctx->expect_nl = 0;
249}
250
251/*-
252 * -1 for error
253 *  0 for last line
254 *  1 for full line
255 *
256 * Note: even though EVP_DecodeUpdate attempts to detect and report end of
257 * content, the context doesn't currently remember it and will accept more data
258 * in the next call. Therefore, the caller is responsible for checking and
259 * rejecting a 0 return value in the middle of content.
260 *
261 * Note: even though EVP_DecodeUpdate has historically tried to detect end of
262 * content based on line length, this has never worked properly. Therefore,
263 * we now return 0 when one of the following is true:
264 *   - Padding or B64_EOF was detected and the last block is complete.
265 *   - Input has zero-length.
266 * -1 is returned if:
267 *   - Invalid characters are detected.
268 *   - There is extra trailing padding, or data after padding.
269 *   - B64_EOF is detected after an incomplete base64 block.
270 */
271int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,
272                     const unsigned char *in, int inl)
273{
274    int seof = 0, eof = 0, rv = -1, ret = 0, i, v, tmp, n, decoded_len;
275    unsigned char *d;
276
277    n = ctx->num;
278    d = ctx->enc_data;
279
280    if (n > 0 && d[n - 1] == '=') {
281        eof++;
282        if (n > 1 && d[n - 2] == '=')
283            eof++;
284    }
285
286     /* Legacy behaviour: an empty input chunk signals end of input. */
287    if (inl == 0) {
288        rv = 0;
289        goto end;
290    }
291
292    for (i = 0; i < inl; i++) {
293        tmp = *(in++);
294        v = conv_ascii2bin(tmp);
295        if (v == B64_ERROR) {
296            rv = -1;
297            goto end;
298        }
299
300        if (tmp == '=') {
301            eof++;
302        } else if (eof > 0 && B64_BASE64(v)) {
303            /* More data after padding. */
304            rv = -1;
305            goto end;
306        }
307
308        if (eof > 2) {
309            rv = -1;
310            goto end;
311        }
312
313        if (v == B64_EOF) {
314            seof = 1;
315            goto tail;
316        }
317
318        /* Only save valid base64 characters. */
319        if (B64_BASE64(v)) {
320            if (n >= 64) {
321                /*
322                 * We increment n once per loop, and empty the buffer as soon as
323                 * we reach 64 characters, so this can only happen if someone's
324                 * manually messed with the ctx. Refuse to write any more data.
325                 */
326                rv = -1;
327                goto end;
328            }
329            OPENSSL_assert(n < (int)sizeof(ctx->enc_data));
330            d[n++] = tmp;
331        }
332
333        if (n == 64) {
334            decoded_len = EVP_DecodeBlock(out, d, n);
335            n = 0;
336            if (decoded_len < 0 || eof > decoded_len) {
337                rv = -1;
338                goto end;
339            }
340            ret += decoded_len - eof;
341            out += decoded_len - eof;
342        }
343    }
344
345    /*
346     * Legacy behaviour: if the current line is a full base64-block (i.e., has
347     * 0 mod 4 base64 characters), it is processed immediately. We keep this
348     * behaviour as applications may not be calling EVP_DecodeFinal properly.
349     */
350tail:
351    if (n > 0) {
352        if ((n & 3) == 0) {
353            decoded_len = EVP_DecodeBlock(out, d, n);
354            n = 0;
355            if (decoded_len < 0 || eof > decoded_len) {
356                rv = -1;
357                goto end;
358            }
359            ret += (decoded_len - eof);
360        } else if (seof) {
361            /* EOF in the middle of a base64 block. */
362            rv = -1;
363            goto end;
364        }
365    }
366
367    rv = seof || (n == 0 && eof) ? 0 : 1;
368end:
369    /* Legacy behaviour. This should probably rather be zeroed on error. */
370    *outl = ret;
371    ctx->num = n;
372    return (rv);
373}
374
375int EVP_DecodeBlock(unsigned char *t, const unsigned char *f, int n)
376{
377    int i, ret = 0, a, b, c, d;
378    unsigned long l;
379
380    /* trim white space from the start of the line. */
381    while ((conv_ascii2bin(*f) == B64_WS) && (n > 0)) {
382        f++;
383        n--;
384    }
385
386    /*
387     * strip off stuff at the end of the line ascii2bin values B64_WS,
388     * B64_EOLN, B64_EOLN and B64_EOF
389     */
390    while ((n > 3) && (B64_NOT_BASE64(conv_ascii2bin(f[n - 1]))))
391        n--;
392
393    if (n % 4 != 0)
394        return (-1);
395
396    for (i = 0; i < n; i += 4) {
397        a = conv_ascii2bin(*(f++));
398        b = conv_ascii2bin(*(f++));
399        c = conv_ascii2bin(*(f++));
400        d = conv_ascii2bin(*(f++));
401        if ((a & 0x80) || (b & 0x80) || (c & 0x80) || (d & 0x80))
402            return (-1);
403        l = ((((unsigned long)a) << 18L) |
404             (((unsigned long)b) << 12L) |
405             (((unsigned long)c) << 6L) | (((unsigned long)d)));
406        *(t++) = (unsigned char)(l >> 16L) & 0xff;
407        *(t++) = (unsigned char)(l >> 8L) & 0xff;
408        *(t++) = (unsigned char)(l) & 0xff;
409        ret += 3;
410    }
411    return (ret);
412}
413
414int EVP_DecodeFinal(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl)
415{
416    int i;
417
418    *outl = 0;
419    if (ctx->num != 0) {
420        i = EVP_DecodeBlock(out, ctx->enc_data, ctx->num);
421        if (i < 0)
422            return (-1);
423        ctx->num = 0;
424        *outl = i;
425        return (1);
426    } else
427        return (1);
428}
429
430#ifdef undef
431int EVP_DecodeValid(unsigned char *buf, int len)
432{
433    int i, num = 0, bad = 0;
434
435    if (len == 0)
436        return (-1);
437    while (conv_ascii2bin(*buf) == B64_WS) {
438        buf++;
439        len--;
440        if (len == 0)
441            return (-1);
442    }
443
444    for (i = len; i >= 4; i -= 4) {
445        if ((conv_ascii2bin(buf[0]) >= 0x40) ||
446            (conv_ascii2bin(buf[1]) >= 0x40) ||
447            (conv_ascii2bin(buf[2]) >= 0x40) ||
448            (conv_ascii2bin(buf[3]) >= 0x40))
449            return (-1);
450        buf += 4;
451        num += 1 + (buf[2] != '=') + (buf[3] != '=');
452    }
453    if ((i == 1) && (conv_ascii2bin(buf[0]) == B64_EOLN))
454        return (num);
455    if ((i == 2) && (conv_ascii2bin(buf[0]) == B64_EOLN) &&
456        (conv_ascii2bin(buf[0]) == B64_EOLN))
457        return (num);
458    return (1);
459}
460#endif
461