1/*
2 * Copyright 2020, Data61
3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO)
4 * ABN 41 687 119 230.
5 *
6 * This software may be distributed and modified according to the terms of
7 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
8 * See "LICENSE_BSD2.txt" for details.
9 *
10 * @TAG(DATA61_BSD)
11 */
12
13#pragma once
14
15#include <stdint.h>
16#include <string.h>
17#include <stdbool.h>
18#include <utils/arith.h>
19#include <utils/base64.h>
20
21/*
22 * Streaming base64 CBOR encoder
23 *
24 * This implementation is intended to allow structured data to be
25 * streamed out via a serial connection in a manner that minimises the
26 * number of actual bytes that must be written to the output.
27 *
28 * Data is streamed to an output as base64 encoded CBOR which can then
29 * be extracted from a serial log and decoded offline.
30 */
31
32typedef struct {
33    base64_t streamer;
34} cbor64_t;
35
36/* Major types of CBOR items */
37typedef enum {
38    CBOR64_MT_UNSIGNED_INT = 0,
39    CBOR64_MT_NEGATIVE_INT = 1,
40    CBOR64_MT_BYTE_STRING = 2,
41    CBOR64_MT_UTF8_STRING = 3,
42    CBOR64_MT_ARRAY = 4,
43    CBOR64_MT_MAP = 5,
44    CBOR64_MT_TAG = 6,
45    CBOR64_MT_FLOAT = 7,
46    CBOR64_MT_SIMPLE = 7,
47    CBOR64_MT_BREAK = 7,
48} cbor64_mt_t;
49
50/* Additional information identifiers */
51typedef enum {
52    /* Values below 24 are integer literals */
53    CBOR64_AI_INT_LITERAL_MAX = 24,
54    /* Numeric value sizes */
55    CBOR64_AI_UINT8_T = 24,
56    CBOR64_AI_UINT16_T = 25,
57    CBOR64_AI_UINT32_T = 26,
58    CBOR64_AI_UINT64_T = 27,
59    /* Simple value indicated in next bytes */
60    CBOR64_AI_SIMPLE_BYTE = 24,
61    /* Float sizes */
62    CBOR64_AI_FLOAT16_T = 25, /* IEEE 754 Half-precision */
63    CBOR64_AI_FLOAT32_T = 26, /* IEEE 754 Single-precision */
64    CBOR64_AI_FLOAT64_T = 27, /* IEEE 754 Double-precision */
65    /* Array/map length specifier */
66    CBOR64_AI_INDEFINITE_LENGTH = 31,
67} cbor64_ai_t;
68
69/* Simple values */
70typedef enum {
71    /* Boolean */
72    CBOR64_SIMPLE_FALSE = 20,
73    CBOR64_SIMPLE_TRUE = 21,
74    /* Null */
75    CBOR64_SIMPLE_NULL = 22,
76    /* Undefined */
77    CBOR64_SIMPLE_UNDEFINED = 23,
78} cbor64_simple_t;
79
80/* tags */
81typedef enum {
82    /* Semantic descriptors */
83
84    /* Date & time (encoded as UTF-8 string) */
85    CBOR64_TAG_DATETIME_UTF8 = 0,
86    /* Date & time encoded relative to an epoch */
87    CBOR64_TAG_DATETIME_EPOCH = 1,
88    /* Big integers (encoded as bytes) */
89    CBOR64_TAG_POSITIVE_BIGNUM = 2,
90    CBOR64_TAG_NEGATIVE_BIGNUM = 3,
91    /* Decimal fraction (encoded as array 2 integers (mantissa, base 10 scale)) */
92    CBOR64_TAG_DECIMAL_FRACTION = 4,
93    /* Big float (encoded as array 2 integers (mantissa, base 2 scale)) */
94    CBOR64_TAG_BIG_FLOAT = 4,
95
96    /* Encoding hints */
97
98    /* Encode byte string children as base64url */
99    CBOR64_TAG_ENCODE_BASE64URL = 21,
100    /* Encode byte string children as base64 */
101    CBOR64_TAG_ENCODE_BASE64 = 22,
102    /* Encode byte string children as base16 */
103    CBOR64_TAG_ENCODE_BASE16 = 23,
104    /* Byte string encodes CBOR item */
105    CBOR64_TAG_ENCODE_CBOR = 24,
106
107    /* UTF-8 String descriptors */
108
109    /* String is a URI */
110    CBOR64_TAG_UTF8_URI = 32,
111    /* String is a base64url */
112    CBOR64_TAG_UTF8_BASE64URL = 33,
113    /* String is a base64 */
114    CBOR64_TAG_UTF8_BASE64 = 34,
115    /* String is a PCRE/ECMA262 regular expression */
116    CBOR64_TAG_UTF8_RE = 35,
117    /* String MIME message */
118    CBOR64_TAG_UTF8_MIME = 36,
119
120    /* Shared values */
121
122    /* A value that may later be referenced */
123    CBOR64_TAG_SHAREABLE = 28,
124    /* A reference to a previously shared value */
125    CBOR64_TAG_SHARED_VALUE = 29,
126
127    /* String referneces */
128
129    /* A reference to a previously tagged string */
130    CBOR64_TAG_STRING_REF = 25,
131    /* A domain containing string references */
132    CBOR64_TAG_STRING_REF_DOMAIN = 256,
133
134    /* Self-described CBOR (magic bytes) */
135    CBOR64_TAG_SELF_DESCRIBED = 55799,
136} cbor64_tag_t;
137
138/*
139 * Inline implementation
140 * =====================
141 */
142
143/* Generate the initial byte indicating the type of the following data */
144int cbor64_initial_byte(base64_t *streamer, cbor64_mt_t type, uint8_t data);
145
146/* Send a break byte to terminate indefinite-length item */
147int cbor64_send_break(base64_t *streamer);
148
149/* This sends a numeric item to the streamer using big-endian encoding */
150int cbor64_send_item(base64_t *streamer, cbor64_mt_t type, uint64_t number);
151
152/* Send a type array of bytes (UTF8 or bytes) */
153int cbor64_send_typed_bytes(base64_t *streamer, cbor64_mt_t type, unsigned char *buffer, size_t length);
154
155/* Send a simple value in one or two bytes */
156int cbor64_send_simple(base64_t *streamer, cbor64_simple_t value);
157
158/*
159 * External API
160 * ============
161 */
162
163
164/*
165 * Send a tag for the following item
166 *
167 * A tag is a single item describing the next item in the stream. It
168 * can denote some particular semantic meaning for the subsequent item
169 * or that the item is to be encoded in some particular manner when
170 * translated to JSON (see cbor64_tag_t).
171 */
172static inline int cbor64_tag(base64_t *streamer, cbor64_tag_t tag)
173{
174    return cbor64_send_item(streamer, CBOR64_MT_TAG, tag);
175}
176
177/*
178 * Simple types
179 * ------------
180 */
181
182/* Send a boolean value */
183static inline int cbor64_bool(base64_t *streamer, int boolean)
184{
185    uint8_t value = CBOR64_SIMPLE_FALSE;
186    if (boolean) {
187        value = CBOR64_SIMPLE_TRUE;
188    }
189    return cbor64_send_simple(streamer, value);
190}
191
192/* Send a null */
193static inline int cbor64_null(base64_t *streamer)
194{
195    return cbor64_send_simple(streamer, CBOR64_SIMPLE_NULL);
196}
197
198/* Send an undefined */
199static inline int cbor64_undefined(base64_t *streamer)
200{
201    return cbor64_send_simple(streamer, CBOR64_SIMPLE_UNDEFINED);
202}
203
204/*
205 * Integer types
206 * -------------
207 */
208
209/* Send an unsigned integer value */
210static inline int cbor64_uint(base64_t *streamer, uint64_t number)
211{
212    return cbor64_send_item(streamer, CBOR64_MT_UNSIGNED_INT, number);
213}
214
215/* Send a signed integer value */
216static inline int cbor64_int(base64_t *streamer, int64_t number)
217{
218    cbor64_mt_t type = CBOR64_MT_UNSIGNED_INT;
219    if (number < 0) {
220        type = CBOR64_MT_NEGATIVE_INT;
221        number = (-1) - number;
222    }
223
224    return cbor64_send_item(streamer, type, number);
225}
226
227/*
228 * IEEE 754 Float types
229 * --------------------
230 */
231
232/* Send a single-precision float value */
233int cbor64_float(base64_t *streamer, float number);
234
235/* Send a double-precision float value */
236int cbor64_double(base64_t *streamer, double number);
237
238/*
239 * Byte arrays
240 * -----------
241 *
242 * The following functions describe 3 kinds of byte array:
243 *  - Raw bytes (bytes)
244 *  - C strings that are not guaranteed to be UTF8 (string)
245 *  - UTF-8 C strings (utf8)
246 *
247 * Each has a function that will stream a single array along with its
248 * size which can be used directly. Additionally, a series of 'chunks'
249 * can be sent without the need to know the number of chunks. A series
250 * of chunks must start with a call to 'cbor64_<kind>_chunks_start' and
251 * finish with a call to 'cbor64_<kind>_chunks_start' with only calls to
252 * the corresponding 'cbor64_<kind>' in-between.
253 *
254 * For example:
255 *
256 *     cbor64_utf8_chunks_start(streamer);
257 *     cbor64_utf8(streamer, "Hello,");
258 *     cbor64_utf8(streamer, "world!");
259 *     cbor64_utf8_chunks_end(streamer);
260 */
261
262/* send an array of bytes */
263static inline int cbor64_bytes(base64_t *streamer, unsigned char *buffer, size_t length)
264{
265    return cbor64_send_typed_bytes(streamer, CBOR64_MT_BYTE_STRING, buffer, length);
266}
267
268/* Start chunked bytes */
269static inline int cbor64_byte_chunks_start(base64_t *streamer)
270{
271    return cbor64_send_item(streamer, CBOR64_MT_BYTE_STRING, CBOR64_AI_INDEFINITE_LENGTH);
272}
273
274/* End chunked string */
275static inline int cbor64_byte_chunks_end(base64_t *streamer)
276{
277    return cbor64_send_break(streamer);
278}
279
280/* Send a non-UTF-8 string */
281static inline int cbor64_string(base64_t *streamer, char *text)
282{
283    return cbor64_bytes(streamer, text, strlen(text));
284}
285
286/* Start chunked string */
287static inline int cbor64_string_chunks_start(base64_t *streamer)
288{
289    return cbor64_send_item(streamer, CBOR64_MT_BYTE_STRING, CBOR64_AI_INDEFINITE_LENGTH);
290}
291
292/* End chunked string */
293static inline int cbor64_string_chunks_end(base64_t *streamer)
294{
295    return cbor64_send_break(streamer);
296}
297
298/* Send a UTF-8 string */
299static inline int cbor64_utf8(base64_t *streamer, char *text)
300{
301    return cbor64_send_typed_bytes(streamer, CBOR64_MT_UTF8_STRING, text, strlen(text));
302}
303
304/* Start chunked UTF-8 string */
305static inline int cbor64_utf8_chunks_start(base64_t *streamer)
306{
307    return cbor64_send_item(streamer, CBOR64_MT_UTF8_STRING, CBOR64_AI_INDEFINITE_LENGTH);
308}
309
310/* End chunked UTF-8 string */
311static inline int cbor64_utf8_chunks_end(base64_t *streamer)
312{
313    return cbor64_send_break(streamer);
314}
315
316/*
317 * Arrays
318 * ------
319 *
320 * Arrays are a series of items. An array of known length need only
321 * start with a call to 'cbor64_array_length'.
322 *
323 *     cbor64_array_length(streamer, 2);
324 *     cbor64_uint(streamer, 12);
325 *     cbor64_uint(streamer, 28);
326 *
327 * If the length is unknown, the array can be started with
328 * 'cbor64_array_start' and completed with a call to 'cbor64_array_end'.
329 *
330 *     cbor64_array_start(streamer);
331 *     cbor64_uint(streamer, 15);
332 *     cbor64_uint(streamer, 10538);
333 *     cbor64_array_end(streamer);
334 */
335
336/* Start an array of unknown length */
337static inline int cbor64_array_start(base64_t *streamer)
338{
339    return cbor64_initial_byte(streamer, CBOR64_MT_ARRAY, CBOR64_AI_INDEFINITE_LENGTH);
340}
341
342/* End an array of unknown length */
343static inline int cbor64_array_end(base64_t *streamer)
344{
345    return cbor64_send_break(streamer);
346}
347
348/* Start an array of known length */
349static inline int cbor64_array_length(base64_t *streamer, uint64_t length)
350{
351    return cbor64_send_item(streamer, CBOR64_MT_ARRAY, length);
352}
353
354/*
355 * Maps
356 * ----
357 *
358 * Maps are a series of key-value pairs. The keys may be of any type.
359 *
360 * A map of known length need only start with a call to
361 * 'cbor64_map_length'.
362 *
363 *     cbor64_map_length(streamer, 2);
364 *     cbor64_utf8(streamer, "x");
365 *     cbor64_uint(streamer, 48);
366 *     cbor64_utf8(streamer, "y");
367 *     cbor64_uint(streamer, 97);
368 *
369 * If the length is unknown, the map can be started with
370 * 'cbor64_map_start' and completed with a call to 'cbor64_map_end'.
371 *
372 *     cbor64_map_start(streamer);
373 *     cbor64_utf8(streamer, "x");
374 *     cbor64_uint(streamer, 48);
375 *     cbor64_utf8(streamer, "y");
376 *     cbor64_uint(streamer, 97);
377 *     cbor64_map_end(streamer);
378 */
379
380/* Start a map of unknown length */
381static inline int cbor64_map_start(base64_t *streamer)
382{
383    return cbor64_initial_byte(streamer, CBOR64_MT_MAP, CBOR64_AI_INDEFINITE_LENGTH);
384}
385
386/* End a map of unknown length */
387static inline int cbor64_map_end(base64_t *streamer)
388{
389    return cbor64_send_break(streamer);
390}
391
392/* Start a map of known length */
393static inline int cbor64_map_length(base64_t *streamer, uint64_t length)
394{
395    return cbor64_send_item(streamer, CBOR64_MT_MAP, length);
396}
397
398/*
399 * String reference domains
400 * ========================
401 *
402 * String reference domains allow reduced encoding of strings by only
403 * emitting each encoded string once and then using tagged numeric
404 * references to previous occurrences of strings.
405 *
406 * The current implementation is suboptimal but avoid allocation by
407 * using a static allocation of the strings used.
408 *
409 * Within a string reference domain, all strings must be emitted using
410 * 'cbor64_string_ref' or 'cbor64_utf8_ref' emitter. To emit a sized
411 * byte array or data containing strings not in the domain, you can
412 * create a new null domain that contains no references.
413 *
414 * Using shared values
415 * -------------------
416 *
417 * If the tooling used does not support string reference domains but
418 * does support shared values, this can be used to implement similar
419 * semantics, however only one domain using shared values can exist in a
420 * dataset.
421 */
422
423/* Tracks the strings which have already been emitted and their index. */
424typedef struct {
425    char **strings;
426    size_t emitted;
427    /* Use shared values rather than string references */
428    bool shared_values;
429} cbor64_domain_t;
430
431/* Start a new domain with no inner string references */
432static inline int cbor64_null_domain(base64_t *streamer)
433{
434    return cbor64_tag(streamer, CBOR64_TAG_STRING_REF_DOMAIN);
435}
436
437/*
438 * Create a new string reference domain
439 *
440 * The provided array of strings must not be used again within this
441 * domain in a nested fashion.
442 *
443 * The array of strings must be terminated with a NULL.
444 */
445static inline int cbor64_string_ref_domain(base64_t *streamer, char **strings, cbor64_domain_t *domain)
446{
447    domain->strings = strings;
448    domain->emitted = 0;
449    domain->shared_values = false;
450
451    return cbor64_tag(streamer, CBOR64_TAG_STRING_REF_DOMAIN);
452}
453
454/*
455 * Create a new shared value domain
456 *
457 * There must be no more than one shared value domain in an output.
458 *
459 * The provided array of strings must not be used again within this
460 * domain in a nested fashion.
461 *
462 * The array of strings must be terminated with a NULL.
463 */
464static inline void cbor64_shared_value_domain(char **strings, cbor64_domain_t *domain)
465{
466    domain->strings = strings;
467    domain->emitted = 0;
468    domain->shared_values = true;
469}
470
471/*
472 * Emit a string reference
473 */
474int cbor64_string_ref(base64_t *streamer, cbor64_domain_t *domain, char *string);
475
476/*
477 * Emit a utf8 reference
478 */
479int cbor64_utf8_ref(base64_t *streamer, cbor64_domain_t *domain, char *string);
480