1/*
2 * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
3 *
4 * libcbor is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
6 */
7
8#include "unicode.h"
9#include <stdint.h>
10
11#define UTF8_ACCEPT 0
12#define UTF8_REJECT 1
13
14static const uint8_t utf8d[] = {
15    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
16    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
17    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 00..1f */
18    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
19    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
20    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 20..3f */
21    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
22    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
23    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 40..5f */
24    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
25    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
26    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 60..7f */
27    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
28    1,   1,   1,   1,   1,   9,   9,   9,   9,   9,   9,
29    9,   9,   9,   9,   9,   9,   9,   9,   9,   9, /* 80..9f */
30    7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
31    7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
32    7,   7,   7,   7,   7,   7,   7,   7,   7,   7, /* a0..bf */
33    8,   8,   2,   2,   2,   2,   2,   2,   2,   2,   2,
34    2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
35    2,   2,   2,   2,   2,   2,   2,   2,   2,   2, /* c0..df */
36    0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
37    0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */
38    0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
39    0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */
40    0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,
41    0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */
42    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
43    1,   1,   1,   1,   1,   1,   0,   1,   1,   1,   1,
44    1,   0,   1,   0,   1,   1,   1,   1,   1,   1, /* s1..s2 */
45    1,   2,   1,   1,   1,   1,   1,   2,   1,   2,   1,
46    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
47    1,   2,   1,   1,   1,   1,   1,   1,   1,   1, /* s3..s4 */
48    1,   2,   1,   1,   1,   1,   1,   1,   1,   2,   1,
49    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
50    1,   3,   1,   3,   1,   1,   1,   1,   1,   1, /* s5..s6 */
51    1,   3,   1,   1,   1,   1,   1,   3,   1,   3,   1,
52    1,   1,   1,   1,   1,   1,   3,   1,   1,   1,   1,
53    1,   1,   1,   1,   1,   1,   1,   1,   1,   1, /* s7..s8 */
54};
55
56/* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann
57 * <bjoern@hoehrmann.de> */
58/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
59uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
60  uint32_t type = utf8d[byte];
61
62  *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
63                                   : (0xff >> type) & (byte);
64
65  *state = utf8d[256 + *state * 16 + type];
66  return *state;
67}
68
69size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length,
70                                     struct _cbor_unicode_status* status) {
71  *status =
72      (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK};
73  uint32_t codepoint, state = UTF8_ACCEPT, res;
74  size_t pos = 0, count = 0;
75
76  for (; pos < source_length; pos++) {
77    res = _cbor_unicode_decode(&state, &codepoint, source[pos]);
78
79    if (res == UTF8_ACCEPT) {
80      count++;
81    } else if (res == UTF8_REJECT) {
82      goto error;
83    }
84  }
85
86  /* Unfinished multibyte codepoint */
87  if (state != UTF8_ACCEPT) goto error;
88
89  return count;
90
91error:
92  *status = (struct _cbor_unicode_status){.location = pos,
93                                          .status = _CBOR_UNICODE_BADCP};
94  return 0;
95}
96