1/* 2 * Copyright 2020, Data61 3 * Commonwealth Scientific and Industrial Research Organisation (CSIRO) 4 * ABN 41 687 119 230. 5 * 6 * This software may be distributed and modified according to the terms of 7 * the BSD 2-Clause license. Note that NO WARRANTY is provided. 8 * See "LICENSE_BSD2.txt" for details. 9 * 10 * @TAG(DATA61_BSD) 11 */ 12 13#pragma once 14 15#include <stdint.h> 16#include <string.h> 17#include <stdbool.h> 18#include <utils/arith.h> 19#include <utils/base64.h> 20 21/* 22 * Streaming base64 CBOR encoder 23 * 24 * This implementation is intended to allow structured data to be 25 * streamed out via a serial connection in a manner that minimises the 26 * number of actual bytes that must be written to the output. 27 * 28 * Data is streamed to an output as base64 encoded CBOR which can then 29 * be extracted from a serial log and decoded offline. 30 */ 31 32typedef struct { 33 base64_t streamer; 34} cbor64_t; 35 36/* Major types of CBOR items */ 37typedef enum { 38 CBOR64_MT_UNSIGNED_INT = 0, 39 CBOR64_MT_NEGATIVE_INT = 1, 40 CBOR64_MT_BYTE_STRING = 2, 41 CBOR64_MT_UTF8_STRING = 3, 42 CBOR64_MT_ARRAY = 4, 43 CBOR64_MT_MAP = 5, 44 CBOR64_MT_TAG = 6, 45 CBOR64_MT_FLOAT = 7, 46 CBOR64_MT_SIMPLE = 7, 47 CBOR64_MT_BREAK = 7, 48} cbor64_mt_t; 49 50/* Additional information identifiers */ 51typedef enum { 52 /* Values below 24 are integer literals */ 53 CBOR64_AI_INT_LITERAL_MAX = 24, 54 /* Numeric value sizes */ 55 CBOR64_AI_UINT8_T = 24, 56 CBOR64_AI_UINT16_T = 25, 57 CBOR64_AI_UINT32_T = 26, 58 CBOR64_AI_UINT64_T = 27, 59 /* Simple value indicated in next bytes */ 60 CBOR64_AI_SIMPLE_BYTE = 24, 61 /* Float sizes */ 62 CBOR64_AI_FLOAT16_T = 25, /* IEEE 754 Half-precision */ 63 CBOR64_AI_FLOAT32_T = 26, /* IEEE 754 Single-precision */ 64 CBOR64_AI_FLOAT64_T = 27, /* IEEE 754 Double-precision */ 65 /* Array/map length specifier */ 66 CBOR64_AI_INDEFINITE_LENGTH = 31, 67} cbor64_ai_t; 68 69/* Simple values */ 70typedef enum { 71 /* Boolean */ 72 CBOR64_SIMPLE_FALSE = 20, 73 CBOR64_SIMPLE_TRUE = 21, 74 /* Null */ 75 CBOR64_SIMPLE_NULL = 22, 76 /* Undefined */ 77 CBOR64_SIMPLE_UNDEFINED = 23, 78} cbor64_simple_t; 79 80/* tags */ 81typedef enum { 82 /* Semantic descriptors */ 83 84 /* Date & time (encoded as UTF-8 string) */ 85 CBOR64_TAG_DATETIME_UTF8 = 0, 86 /* Date & time encoded relative to an epoch */ 87 CBOR64_TAG_DATETIME_EPOCH = 1, 88 /* Big integers (encoded as bytes) */ 89 CBOR64_TAG_POSITIVE_BIGNUM = 2, 90 CBOR64_TAG_NEGATIVE_BIGNUM = 3, 91 /* Decimal fraction (encoded as array 2 integers (mantissa, base 10 scale)) */ 92 CBOR64_TAG_DECIMAL_FRACTION = 4, 93 /* Big float (encoded as array 2 integers (mantissa, base 2 scale)) */ 94 CBOR64_TAG_BIG_FLOAT = 4, 95 96 /* Encoding hints */ 97 98 /* Encode byte string children as base64url */ 99 CBOR64_TAG_ENCODE_BASE64URL = 21, 100 /* Encode byte string children as base64 */ 101 CBOR64_TAG_ENCODE_BASE64 = 22, 102 /* Encode byte string children as base16 */ 103 CBOR64_TAG_ENCODE_BASE16 = 23, 104 /* Byte string encodes CBOR item */ 105 CBOR64_TAG_ENCODE_CBOR = 24, 106 107 /* UTF-8 String descriptors */ 108 109 /* String is a URI */ 110 CBOR64_TAG_UTF8_URI = 32, 111 /* String is a base64url */ 112 CBOR64_TAG_UTF8_BASE64URL = 33, 113 /* String is a base64 */ 114 CBOR64_TAG_UTF8_BASE64 = 34, 115 /* String is a PCRE/ECMA262 regular expression */ 116 CBOR64_TAG_UTF8_RE = 35, 117 /* String MIME message */ 118 CBOR64_TAG_UTF8_MIME = 36, 119 120 /* Shared values */ 121 122 /* A value that may later be referenced */ 123 CBOR64_TAG_SHAREABLE = 28, 124 /* A reference to a previously shared value */ 125 CBOR64_TAG_SHARED_VALUE = 29, 126 127 /* String referneces */ 128 129 /* A reference to a previously tagged string */ 130 CBOR64_TAG_STRING_REF = 25, 131 /* A domain containing string references */ 132 CBOR64_TAG_STRING_REF_DOMAIN = 256, 133 134 /* Self-described CBOR (magic bytes) */ 135 CBOR64_TAG_SELF_DESCRIBED = 55799, 136} cbor64_tag_t; 137 138/* 139 * Inline implementation 140 * ===================== 141 */ 142 143/* Generate the initial byte indicating the type of the following data */ 144int cbor64_initial_byte(base64_t *streamer, cbor64_mt_t type, uint8_t data); 145 146/* Send a break byte to terminate indefinite-length item */ 147int cbor64_send_break(base64_t *streamer); 148 149/* This sends a numeric item to the streamer using big-endian encoding */ 150int cbor64_send_item(base64_t *streamer, cbor64_mt_t type, uint64_t number); 151 152/* Send a type array of bytes (UTF8 or bytes) */ 153int cbor64_send_typed_bytes(base64_t *streamer, cbor64_mt_t type, unsigned char *buffer, size_t length); 154 155/* Send a simple value in one or two bytes */ 156int cbor64_send_simple(base64_t *streamer, cbor64_simple_t value); 157 158/* 159 * External API 160 * ============ 161 */ 162 163 164/* 165 * Send a tag for the following item 166 * 167 * A tag is a single item describing the next item in the stream. It 168 * can denote some particular semantic meaning for the subsequent item 169 * or that the item is to be encoded in some particular manner when 170 * translated to JSON (see cbor64_tag_t). 171 */ 172static inline int cbor64_tag(base64_t *streamer, cbor64_tag_t tag) 173{ 174 return cbor64_send_item(streamer, CBOR64_MT_TAG, tag); 175} 176 177/* 178 * Simple types 179 * ------------ 180 */ 181 182/* Send a boolean value */ 183static inline int cbor64_bool(base64_t *streamer, int boolean) 184{ 185 uint8_t value = CBOR64_SIMPLE_FALSE; 186 if (boolean) { 187 value = CBOR64_SIMPLE_TRUE; 188 } 189 return cbor64_send_simple(streamer, value); 190} 191 192/* Send a null */ 193static inline int cbor64_null(base64_t *streamer) 194{ 195 return cbor64_send_simple(streamer, CBOR64_SIMPLE_NULL); 196} 197 198/* Send an undefined */ 199static inline int cbor64_undefined(base64_t *streamer) 200{ 201 return cbor64_send_simple(streamer, CBOR64_SIMPLE_UNDEFINED); 202} 203 204/* 205 * Integer types 206 * ------------- 207 */ 208 209/* Send an unsigned integer value */ 210static inline int cbor64_uint(base64_t *streamer, uint64_t number) 211{ 212 return cbor64_send_item(streamer, CBOR64_MT_UNSIGNED_INT, number); 213} 214 215/* Send a signed integer value */ 216static inline int cbor64_int(base64_t *streamer, int64_t number) 217{ 218 cbor64_mt_t type = CBOR64_MT_UNSIGNED_INT; 219 if (number < 0) { 220 type = CBOR64_MT_NEGATIVE_INT; 221 number = (-1) - number; 222 } 223 224 return cbor64_send_item(streamer, type, number); 225} 226 227/* 228 * IEEE 754 Float types 229 * -------------------- 230 */ 231 232/* Send a single-precision float value */ 233int cbor64_float(base64_t *streamer, float number); 234 235/* Send a double-precision float value */ 236int cbor64_double(base64_t *streamer, double number); 237 238/* 239 * Byte arrays 240 * ----------- 241 * 242 * The following functions describe 3 kinds of byte array: 243 * - Raw bytes (bytes) 244 * - C strings that are not guaranteed to be UTF8 (string) 245 * - UTF-8 C strings (utf8) 246 * 247 * Each has a function that will stream a single array along with its 248 * size which can be used directly. Additionally, a series of 'chunks' 249 * can be sent without the need to know the number of chunks. A series 250 * of chunks must start with a call to 'cbor64_<kind>_chunks_start' and 251 * finish with a call to 'cbor64_<kind>_chunks_start' with only calls to 252 * the corresponding 'cbor64_<kind>' in-between. 253 * 254 * For example: 255 * 256 * cbor64_utf8_chunks_start(streamer); 257 * cbor64_utf8(streamer, "Hello,"); 258 * cbor64_utf8(streamer, "world!"); 259 * cbor64_utf8_chunks_end(streamer); 260 */ 261 262/* send an array of bytes */ 263static inline int cbor64_bytes(base64_t *streamer, unsigned char *buffer, size_t length) 264{ 265 return cbor64_send_typed_bytes(streamer, CBOR64_MT_BYTE_STRING, buffer, length); 266} 267 268/* Start chunked bytes */ 269static inline int cbor64_byte_chunks_start(base64_t *streamer) 270{ 271 return cbor64_send_item(streamer, CBOR64_MT_BYTE_STRING, CBOR64_AI_INDEFINITE_LENGTH); 272} 273 274/* End chunked string */ 275static inline int cbor64_byte_chunks_end(base64_t *streamer) 276{ 277 return cbor64_send_break(streamer); 278} 279 280/* Send a non-UTF-8 string */ 281static inline int cbor64_string(base64_t *streamer, char *text) 282{ 283 return cbor64_bytes(streamer, text, strlen(text)); 284} 285 286/* Start chunked string */ 287static inline int cbor64_string_chunks_start(base64_t *streamer) 288{ 289 return cbor64_send_item(streamer, CBOR64_MT_BYTE_STRING, CBOR64_AI_INDEFINITE_LENGTH); 290} 291 292/* End chunked string */ 293static inline int cbor64_string_chunks_end(base64_t *streamer) 294{ 295 return cbor64_send_break(streamer); 296} 297 298/* Send a UTF-8 string */ 299static inline int cbor64_utf8(base64_t *streamer, char *text) 300{ 301 return cbor64_send_typed_bytes(streamer, CBOR64_MT_UTF8_STRING, text, strlen(text)); 302} 303 304/* Start chunked UTF-8 string */ 305static inline int cbor64_utf8_chunks_start(base64_t *streamer) 306{ 307 return cbor64_send_item(streamer, CBOR64_MT_UTF8_STRING, CBOR64_AI_INDEFINITE_LENGTH); 308} 309 310/* End chunked UTF-8 string */ 311static inline int cbor64_utf8_chunks_end(base64_t *streamer) 312{ 313 return cbor64_send_break(streamer); 314} 315 316/* 317 * Arrays 318 * ------ 319 * 320 * Arrays are a series of items. An array of known length need only 321 * start with a call to 'cbor64_array_length'. 322 * 323 * cbor64_array_length(streamer, 2); 324 * cbor64_uint(streamer, 12); 325 * cbor64_uint(streamer, 28); 326 * 327 * If the length is unknown, the array can be started with 328 * 'cbor64_array_start' and completed with a call to 'cbor64_array_end'. 329 * 330 * cbor64_array_start(streamer); 331 * cbor64_uint(streamer, 15); 332 * cbor64_uint(streamer, 10538); 333 * cbor64_array_end(streamer); 334 */ 335 336/* Start an array of unknown length */ 337static inline int cbor64_array_start(base64_t *streamer) 338{ 339 return cbor64_initial_byte(streamer, CBOR64_MT_ARRAY, CBOR64_AI_INDEFINITE_LENGTH); 340} 341 342/* End an array of unknown length */ 343static inline int cbor64_array_end(base64_t *streamer) 344{ 345 return cbor64_send_break(streamer); 346} 347 348/* Start an array of known length */ 349static inline int cbor64_array_length(base64_t *streamer, uint64_t length) 350{ 351 return cbor64_send_item(streamer, CBOR64_MT_ARRAY, length); 352} 353 354/* 355 * Maps 356 * ---- 357 * 358 * Maps are a series of key-value pairs. The keys may be of any type. 359 * 360 * A map of known length need only start with a call to 361 * 'cbor64_map_length'. 362 * 363 * cbor64_map_length(streamer, 2); 364 * cbor64_utf8(streamer, "x"); 365 * cbor64_uint(streamer, 48); 366 * cbor64_utf8(streamer, "y"); 367 * cbor64_uint(streamer, 97); 368 * 369 * If the length is unknown, the map can be started with 370 * 'cbor64_map_start' and completed with a call to 'cbor64_map_end'. 371 * 372 * cbor64_map_start(streamer); 373 * cbor64_utf8(streamer, "x"); 374 * cbor64_uint(streamer, 48); 375 * cbor64_utf8(streamer, "y"); 376 * cbor64_uint(streamer, 97); 377 * cbor64_map_end(streamer); 378 */ 379 380/* Start a map of unknown length */ 381static inline int cbor64_map_start(base64_t *streamer) 382{ 383 return cbor64_initial_byte(streamer, CBOR64_MT_MAP, CBOR64_AI_INDEFINITE_LENGTH); 384} 385 386/* End a map of unknown length */ 387static inline int cbor64_map_end(base64_t *streamer) 388{ 389 return cbor64_send_break(streamer); 390} 391 392/* Start a map of known length */ 393static inline int cbor64_map_length(base64_t *streamer, uint64_t length) 394{ 395 return cbor64_send_item(streamer, CBOR64_MT_MAP, length); 396} 397 398/* 399 * String reference domains 400 * ======================== 401 * 402 * String reference domains allow reduced encoding of strings by only 403 * emitting each encoded string once and then using tagged numeric 404 * references to previous occurrences of strings. 405 * 406 * The current implementation is suboptimal but avoid allocation by 407 * using a static allocation of the strings used. 408 * 409 * Within a string reference domain, all strings must be emitted using 410 * 'cbor64_string_ref' or 'cbor64_utf8_ref' emitter. To emit a sized 411 * byte array or data containing strings not in the domain, you can 412 * create a new null domain that contains no references. 413 * 414 * Using shared values 415 * ------------------- 416 * 417 * If the tooling used does not support string reference domains but 418 * does support shared values, this can be used to implement similar 419 * semantics, however only one domain using shared values can exist in a 420 * dataset. 421 */ 422 423/* Tracks the strings which have already been emitted and their index. */ 424typedef struct { 425 char **strings; 426 size_t emitted; 427 /* Use shared values rather than string references */ 428 bool shared_values; 429} cbor64_domain_t; 430 431/* Start a new domain with no inner string references */ 432static inline int cbor64_null_domain(base64_t *streamer) 433{ 434 return cbor64_tag(streamer, CBOR64_TAG_STRING_REF_DOMAIN); 435} 436 437/* 438 * Create a new string reference domain 439 * 440 * The provided array of strings must not be used again within this 441 * domain in a nested fashion. 442 * 443 * The array of strings must be terminated with a NULL. 444 */ 445static inline int cbor64_string_ref_domain(base64_t *streamer, char **strings, cbor64_domain_t *domain) 446{ 447 domain->strings = strings; 448 domain->emitted = 0; 449 domain->shared_values = false; 450 451 return cbor64_tag(streamer, CBOR64_TAG_STRING_REF_DOMAIN); 452} 453 454/* 455 * Create a new shared value domain 456 * 457 * There must be no more than one shared value domain in an output. 458 * 459 * The provided array of strings must not be used again within this 460 * domain in a nested fashion. 461 * 462 * The array of strings must be terminated with a NULL. 463 */ 464static inline void cbor64_shared_value_domain(char **strings, cbor64_domain_t *domain) 465{ 466 domain->strings = strings; 467 domain->emitted = 0; 468 domain->shared_values = true; 469} 470 471/* 472 * Emit a string reference 473 */ 474int cbor64_string_ref(base64_t *streamer, cbor64_domain_t *domain, char *string); 475 476/* 477 * Emit a utf8 reference 478 */ 479int cbor64_utf8_ref(base64_t *streamer, cbor64_domain_t *domain, char *string); 480