1/* base32.c -- Encode binary data using printable characters.
2   Copyright (C) 1999-2001, 2004-2006, 2009-2014 Free Software Foundation, Inc.
3
4   This program is free software; you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation; either version 3, or (at your option)
7   any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program; if not, see <http://www.gnu.org/licenses/>.  */
16
17/* Adapted from Simon Josefsson's base64 code by Gijs van Tulder.
18 *
19 * See also RFC 4648 <http://www.ietf.org/rfc/rfc4648.txt>.
20 *
21 * Be careful with error checking.  Here is how you would typically
22 * use these functions:
23 *
24 * bool ok = base32_decode_alloc (in, inlen, &out, &outlen);
25 * if (!ok)
26 *   FAIL: input was not valid base32
27 * if (out == NULL)
28 *   FAIL: memory allocation error
29 * OK: data in OUT/OUTLEN
30 *
31 * size_t outlen = base32_encode_alloc (in, inlen, &out);
32 * if (out == NULL && outlen == 0 && inlen != 0)
33 *   FAIL: input too long
34 * if (out == NULL)
35 *   FAIL: memory allocation error
36 * OK: data in OUT/OUTLEN.
37 *
38 */
39
40#include <config.h>
41
42/* Get prototype. */
43#include "base32.h"
44
45/* Get malloc. */
46#include <stdlib.h>
47
48/* Get UCHAR_MAX. */
49#include <limits.h>
50
51#include <string.h>
52
53/* C89 compliant way to cast 'char' to 'unsigned char'. */
54static unsigned char
55to_uchar (char ch)
56{
57  return ch;
58}
59
60/* Base32 encode IN array of size INLEN into OUT array of size OUTLEN.
61   If OUTLEN is less than BASE32_LENGTH(INLEN), write as many bytes as
62   possible.  If OUTLEN is larger than BASE32_LENGTH(INLEN), also zero
63   terminate the output buffer. */
64void
65base32_encode (const char *restrict in, size_t inlen,
66               char *restrict out, size_t outlen)
67{
68  static const char b32str[32] =
69    "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567";
70
71  while (inlen && outlen)
72    {
73      *out++ = b32str[(to_uchar (in[0]) >> 3) & 0x1f];
74      if (!--outlen)
75        break;
76      *out++ = b32str[((to_uchar (in[0]) << 2)
77                       + (--inlen ? to_uchar (in[1]) >> 6 : 0))
78                      & 0x1f];
79      if (!--outlen)
80        break;
81      *out++ =
82        (inlen
83         ? b32str[(to_uchar (in[1]) >> 1) & 0x1f]
84         : '=');
85      if (!--outlen)
86        break;
87      *out++ =
88        (inlen
89         ? b32str[((to_uchar (in[1]) << 4)
90                   + (--inlen ? to_uchar (in[2]) >> 4 : 0))
91                  & 0x1f]
92         : '=');
93      if (!--outlen)
94        break;
95      *out++ =
96        (inlen
97         ? b32str[((to_uchar (in[2]) << 1)
98                   + (--inlen ? to_uchar (in[3]) >> 7 : 0))
99                  & 0x1f]
100         : '=');
101      if (!--outlen)
102        break;
103      *out++ =
104        (inlen
105         ? b32str[(to_uchar (in[3]) >> 2) & 0x1f]
106         : '=');
107      if (!--outlen)
108        break;
109      *out++ =
110        (inlen
111         ? b32str[((to_uchar (in[3]) << 3)
112                   + (--inlen ? to_uchar (in[4]) >> 5 : 0))
113                  & 0x1f]
114         : '=');
115      if (!--outlen)
116        break;
117      *out++ = inlen ? b32str[to_uchar (in[4]) & 0x1f] : '=';
118      if (!--outlen)
119        break;
120      if (inlen)
121        inlen--;
122      if (inlen)
123        in += 5;
124    }
125
126  if (outlen)
127    *out = '\0';
128}
129
130/* Allocate a buffer and store zero terminated base32 encoded data
131   from array IN of size INLEN, returning BASE32_LENGTH(INLEN), i.e.,
132   the length of the encoded data, excluding the terminating zero.  On
133   return, the OUT variable will hold a pointer to newly allocated
134   memory that must be deallocated by the caller.  If output string
135   length would overflow, 0 is returned and OUT is set to NULL.  If
136   memory allocation failed, OUT is set to NULL, and the return value
137   indicates length of the requested memory block, i.e.,
138   BASE32_LENGTH(inlen) + 1. */
139size_t
140base32_encode_alloc (const char *in, size_t inlen, char **out)
141{
142  size_t outlen = 1 + BASE32_LENGTH (inlen);
143
144  /* Check for overflow in outlen computation.
145   *
146   * If there is no overflow, outlen >= inlen.
147   *
148   * TODO Is this a sufficient check?  (See the notes in base64.c.)
149   */
150  if (inlen > outlen)
151    {
152      *out = NULL;
153      return 0;
154    }
155
156  *out = malloc (outlen);
157  if (!*out)
158    return outlen;
159
160  base32_encode (in, inlen, *out, outlen);
161
162  return outlen - 1;
163}
164
165/* With this approach this file works independent of the charset used
166   (think EBCDIC).  However, it does assume that the characters in the
167   Base32 alphabet (A-Z2-7) are encoded in 0..255.  POSIX
168   1003.1-2001 require that char and unsigned char are 8-bit
169   quantities, though, taking care of that problem.  But this may be a
170   potential problem on non-POSIX C99 platforms.
171
172   IBM C V6 for AIX mishandles "#define B32(x) ...'x'...", so use "_"
173   as the formal parameter rather than "x".  */
174#define B32(_)                                  \
175  ((_) == 'A' ? 0                               \
176   : (_) == 'B' ? 1                             \
177   : (_) == 'C' ? 2                             \
178   : (_) == 'D' ? 3                             \
179   : (_) == 'E' ? 4                             \
180   : (_) == 'F' ? 5                             \
181   : (_) == 'G' ? 6                             \
182   : (_) == 'H' ? 7                             \
183   : (_) == 'I' ? 8                             \
184   : (_) == 'J' ? 9                             \
185   : (_) == 'K' ? 10                            \
186   : (_) == 'L' ? 11                            \
187   : (_) == 'M' ? 12                            \
188   : (_) == 'N' ? 13                            \
189   : (_) == 'O' ? 14                            \
190   : (_) == 'P' ? 15                            \
191   : (_) == 'Q' ? 16                            \
192   : (_) == 'R' ? 17                            \
193   : (_) == 'S' ? 18                            \
194   : (_) == 'T' ? 19                            \
195   : (_) == 'U' ? 20                            \
196   : (_) == 'V' ? 21                            \
197   : (_) == 'W' ? 22                            \
198   : (_) == 'X' ? 23                            \
199   : (_) == 'Y' ? 24                            \
200   : (_) == 'Z' ? 25                            \
201   : (_) == '2' ? 26                            \
202   : (_) == '3' ? 27                            \
203   : (_) == '4' ? 28                            \
204   : (_) == '5' ? 29                            \
205   : (_) == '6' ? 30                            \
206   : (_) == '7' ? 31                            \
207   : -1)
208
209static const signed char b32[0x100] = {
210  B32 (0), B32 (1), B32 (2), B32 (3),
211  B32 (4), B32 (5), B32 (6), B32 (7),
212  B32 (8), B32 (9), B32 (10), B32 (11),
213  B32 (12), B32 (13), B32 (14), B32 (15),
214  B32 (16), B32 (17), B32 (18), B32 (19),
215  B32 (20), B32 (21), B32 (22), B32 (23),
216  B32 (24), B32 (25), B32 (26), B32 (27),
217  B32 (28), B32 (29), B32 (30), B32 (31),
218  B32 (32), B32 (33), B32 (34), B32 (35),
219  B32 (36), B32 (37), B32 (38), B32 (39),
220  B32 (40), B32 (41), B32 (42), B32 (43),
221  B32 (44), B32 (45), B32 (46), B32 (47),
222  B32 (48), B32 (49), B32 (50), B32 (51),
223  B32 (52), B32 (53), B32 (54), B32 (55),
224  B32 (56), B32 (57), B32 (58), B32 (59),
225  B32 (60), B32 (61), B32 (62), B32 (63),
226  B32 (32), B32 (65), B32 (66), B32 (67),
227  B32 (68), B32 (69), B32 (70), B32 (71),
228  B32 (72), B32 (73), B32 (74), B32 (75),
229  B32 (76), B32 (77), B32 (78), B32 (79),
230  B32 (80), B32 (81), B32 (82), B32 (83),
231  B32 (84), B32 (85), B32 (86), B32 (87),
232  B32 (88), B32 (89), B32 (90), B32 (91),
233  B32 (92), B32 (93), B32 (94), B32 (95),
234  B32 (96), B32 (97), B32 (98), B32 (99),
235  B32 (100), B32 (101), B32 (102), B32 (103),
236  B32 (104), B32 (105), B32 (106), B32 (107),
237  B32 (108), B32 (109), B32 (110), B32 (111),
238  B32 (112), B32 (113), B32 (114), B32 (115),
239  B32 (116), B32 (117), B32 (118), B32 (119),
240  B32 (120), B32 (121), B32 (122), B32 (123),
241  B32 (124), B32 (125), B32 (126), B32 (127),
242  B32 (128), B32 (129), B32 (130), B32 (131),
243  B32 (132), B32 (133), B32 (134), B32 (135),
244  B32 (136), B32 (137), B32 (138), B32 (139),
245  B32 (140), B32 (141), B32 (142), B32 (143),
246  B32 (144), B32 (145), B32 (146), B32 (147),
247  B32 (148), B32 (149), B32 (150), B32 (151),
248  B32 (152), B32 (153), B32 (154), B32 (155),
249  B32 (156), B32 (157), B32 (158), B32 (159),
250  B32 (160), B32 (161), B32 (162), B32 (163),
251  B32 (132), B32 (165), B32 (166), B32 (167),
252  B32 (168), B32 (169), B32 (170), B32 (171),
253  B32 (172), B32 (173), B32 (174), B32 (175),
254  B32 (176), B32 (177), B32 (178), B32 (179),
255  B32 (180), B32 (181), B32 (182), B32 (183),
256  B32 (184), B32 (185), B32 (186), B32 (187),
257  B32 (188), B32 (189), B32 (190), B32 (191),
258  B32 (192), B32 (193), B32 (194), B32 (195),
259  B32 (196), B32 (197), B32 (198), B32 (199),
260  B32 (200), B32 (201), B32 (202), B32 (203),
261  B32 (204), B32 (205), B32 (206), B32 (207),
262  B32 (208), B32 (209), B32 (210), B32 (211),
263  B32 (212), B32 (213), B32 (214), B32 (215),
264  B32 (216), B32 (217), B32 (218), B32 (219),
265  B32 (220), B32 (221), B32 (222), B32 (223),
266  B32 (224), B32 (225), B32 (226), B32 (227),
267  B32 (228), B32 (229), B32 (230), B32 (231),
268  B32 (232), B32 (233), B32 (234), B32 (235),
269  B32 (236), B32 (237), B32 (238), B32 (239),
270  B32 (240), B32 (241), B32 (242), B32 (243),
271  B32 (244), B32 (245), B32 (246), B32 (247),
272  B32 (248), B32 (249), B32 (250), B32 (251),
273  B32 (252), B32 (253), B32 (254), B32 (255)
274};
275
276#if UCHAR_MAX == 255
277# define uchar_in_range(c) true
278#else
279# define uchar_in_range(c) ((c) <= 255)
280#endif
281
282/* Return true if CH is a character from the Base32 alphabet, and
283   false otherwise.  Note that '=' is padding and not considered to be
284   part of the alphabet.  */
285bool
286isbase32 (char ch)
287{
288  return uchar_in_range (to_uchar (ch)) && 0 <= b32[to_uchar (ch)];
289}
290
291/* Initialize decode-context buffer, CTX.  */
292void
293base32_decode_ctx_init (struct base32_decode_context *ctx)
294{
295  ctx->i = 0;
296}
297
298/* If CTX->i is 0 or 8, there are eight or more bytes in [*IN..IN_END), and
299   none of those eight is a newline, then return *IN.  Otherwise, copy up to
300   4 - CTX->i non-newline bytes from that range into CTX->buf, starting at
301   index CTX->i and setting CTX->i to reflect the number of bytes copied,
302   and return CTX->buf.  In either case, advance *IN to point to the byte
303   after the last one processed, and set *N_NON_NEWLINE to the number of
304   verified non-newline bytes accessible through the returned pointer.  */
305static char *
306get_8 (struct base32_decode_context *ctx,
307       char const *restrict *in, char const *restrict in_end,
308       size_t *n_non_newline)
309{
310  if (ctx->i == 8)
311    ctx->i = 0;
312
313  if (ctx->i == 0)
314    {
315      char const *t = *in;
316      if (8 <= in_end - *in && memchr (t, '\n', 8) == NULL)
317        {
318          /* This is the common case: no newline.  */
319          *in += 8;
320          *n_non_newline = 8;
321          return (char *) t;
322        }
323    }
324
325  {
326    /* Copy non-newline bytes into BUF.  */
327    char const *p = *in;
328    while (p < in_end)
329      {
330        char c = *p++;
331        if (c != '\n')
332          {
333            ctx->buf[ctx->i++] = c;
334            if (ctx->i == 8)
335              break;
336          }
337      }
338
339    *in = p;
340    *n_non_newline = ctx->i;
341    return ctx->buf;
342  }
343}
344
345#define return_false                            \
346  do                                            \
347    {                                           \
348      *outp = out;                              \
349      return false;                             \
350    }                                           \
351  while (false)
352
353/* Decode eight bytes of base32-encoded data, IN, of length INLEN
354   into the output buffer, *OUT, of size *OUTLEN bytes.  Return true if
355   decoding is successful, false otherwise.  If *OUTLEN is too small,
356   as many bytes as possible are written to *OUT.  On return, advance
357   *OUT to point to the byte after the last one written, and decrement
358   *OUTLEN to reflect the number of bytes remaining in *OUT.  */
359static bool
360decode_8 (char const *restrict in, size_t inlen,
361          char *restrict *outp, size_t *outleft)
362{
363  char *out = *outp;
364  if (inlen < 8)
365    return false;
366
367  if (!isbase32 (in[0]) || !isbase32 (in[1]) )
368    return false;
369
370  if (*outleft)
371    {
372      *out++ = ((b32[to_uchar (in[0])] << 3)
373                | (b32[to_uchar (in[1])] >> 2));
374      --*outleft;
375    }
376
377  if (in[2] == '=')
378    {
379      if (in[3] != '=' || in[4] != '=' || in[5] != '='
380          || in[6] != '=' || in[7] != '=')
381        return_false;
382    }
383  else
384    {
385      if (!isbase32 (in[2]) || !isbase32 (in[3]))
386        return_false;
387
388      if (*outleft)
389        {
390          *out++ = ((b32[to_uchar (in[1])] << 6)
391                    | (b32[to_uchar (in[2])] << 1)
392                    | (b32[to_uchar (in[3])] >> 4));
393          --*outleft;
394        }
395
396      if (in[4] == '=')
397        {
398          if (in[5] != '=' || in[6] != '=' || in[7] != '=')
399            return_false;
400        }
401      else
402        {
403          if (!isbase32 (in[4]))
404            return_false;
405
406          if (*outleft)
407            {
408              *out++ = ((b32[to_uchar (in[3])] << 4)
409                        | (b32[to_uchar (in[4])] >> 1));
410              --*outleft;
411            }
412
413          if (in[5] == '=')
414            {
415              if (in[6] != '=' || in[7] != '=')
416                return_false;
417            }
418          else
419            {
420              if (!isbase32 (in[5]) || !isbase32 (in[6]))
421                return_false;
422
423              if (*outleft)
424                {
425                  *out++ = ((b32[to_uchar (in[4])] << 7)
426                            | (b32[to_uchar (in[5])] << 2)
427                            | (b32[to_uchar (in[6])] >> 3));
428                  --*outleft;
429                }
430
431              if (in[7] != '=')
432                {
433                  if (!isbase32 (in[7]))
434                    return_false;
435
436                  if (*outleft)
437                    {
438                      *out++ = ((b32[to_uchar (in[6])] << 5)
439                                | (b32[to_uchar (in[7])]));
440                      --*outleft;
441                    }
442                }
443            }
444        }
445    }
446
447  *outp = out;
448  return true;
449}
450
451/* Decode base32-encoded input array IN of length INLEN to output array
452   OUT that can hold *OUTLEN bytes.  The input data may be interspersed
453   with newlines.  Return true if decoding was successful, i.e. if the
454   input was valid base32 data, false otherwise.  If *OUTLEN is too
455   small, as many bytes as possible will be written to OUT.  On return,
456   *OUTLEN holds the length of decoded bytes in OUT.  Note that as soon
457   as any non-alphabet, non-newline character is encountered, decoding
458   is stopped and false is returned.  If INLEN is zero, then process
459   only whatever data is stored in CTX.
460
461   Initially, CTX must have been initialized via base32_decode_ctx_init.
462   Subsequent calls to this function must reuse whatever state is recorded
463   in that buffer.  It is necessary for when a octuple of base32 input
464   bytes spans two input buffers.
465
466   If CTX is NULL then newlines are treated as garbage and the input
467   buffer is processed as a unit.  */
468
469bool
470base32_decode_ctx (struct base32_decode_context *ctx,
471                   const char *restrict in, size_t inlen,
472                   char *restrict out, size_t *outlen)
473{
474  size_t outleft = *outlen;
475  bool ignore_newlines = ctx != NULL;
476  bool flush_ctx = false;
477  unsigned int ctx_i = 0;
478
479  if (ignore_newlines)
480    {
481      ctx_i = ctx->i;
482      flush_ctx = inlen == 0;
483    }
484
485
486  while (true)
487    {
488      size_t outleft_save = outleft;
489      if (ctx_i == 0 && !flush_ctx)
490        {
491          while (true)
492            {
493              /* Save a copy of outleft, in case we need to re-parse this
494                 block of four bytes.  */
495              outleft_save = outleft;
496              if (!decode_8 (in, inlen, &out, &outleft))
497                break;
498
499              in += 8;
500              inlen -= 8;
501            }
502        }
503
504      if (inlen == 0 && !flush_ctx)
505        break;
506
507      /* Handle the common case of 72-byte wrapped lines.
508         This also handles any other multiple-of-8-byte wrapping.  */
509      if (inlen && *in == '\n' && ignore_newlines)
510        {
511          ++in;
512          --inlen;
513          continue;
514        }
515
516      /* Restore OUT and OUTLEFT.  */
517      out -= outleft_save - outleft;
518      outleft = outleft_save;
519
520      {
521        char const *in_end = in + inlen;
522        char const *non_nl;
523
524        if (ignore_newlines)
525          non_nl = get_8 (ctx, &in, in_end, &inlen);
526        else
527          non_nl = in;  /* Might have nl in this case. */
528
529        /* If the input is empty or consists solely of newlines (0 non-newlines),
530           then we're done.  Likewise if there are fewer than 8 bytes when not
531           flushing context and not treating newlines as garbage.  */
532        if (inlen == 0 || (inlen < 8 && !flush_ctx && ignore_newlines))
533          {
534            inlen = 0;
535            break;
536          }
537        if (!decode_8 (non_nl, inlen, &out, &outleft))
538          break;
539
540        inlen = in_end - in;
541      }
542    }
543
544  *outlen -= outleft;
545
546  return inlen == 0;
547}
548
549/* Allocate an output buffer in *OUT, and decode the base32 encoded
550   data stored in IN of size INLEN to the *OUT buffer.  On return, the
551   size of the decoded data is stored in *OUTLEN.  OUTLEN may be NULL,
552   if the caller is not interested in the decoded length.  *OUT may be
553   NULL to indicate an out of memory error, in which case *OUTLEN
554   contains the size of the memory block needed.  The function returns
555   true on successful decoding and memory allocation errors.  (Use the
556   *OUT and *OUTLEN parameters to differentiate between successful
557   decoding and memory error.)  The function returns false if the
558   input was invalid, in which case *OUT is NULL and *OUTLEN is
559   undefined. */
560bool
561base32_decode_alloc_ctx (struct base32_decode_context *ctx,
562                         const char *in, size_t inlen, char **out,
563                         size_t *outlen)
564{
565  /* This may allocate a few bytes too many, depending on input,
566     but it's not worth the extra CPU time to compute the exact size.
567     The exact size is 5 * inlen / 8, minus one or more bytes if the
568     input is padded with one or more "=".
569     Dividing before multiplying avoids the possibility of overflow.  */
570  size_t needlen = 5 * (inlen / 8) + 5;
571
572  *out = malloc (needlen);
573  if (!*out)
574    return true;
575
576  if (!base32_decode_ctx (ctx, in, inlen, *out, &needlen))
577    {
578      free (*out);
579      *out = NULL;
580      return false;
581    }
582
583  if (outlen)
584    *outlen = needlen;
585
586  return true;
587}
588