1/*
2 * Copyright (c) 2014-2019 Pavel Kalvoda <me@pavelkalvoda.com>
3 *
4 * libcbor is free software; you can redistribute it and/or modify
5 * it under the terms of the MIT license. See LICENSE for details.
6 */
7
8#include "unicode.h"
9
10#define UTF8_ACCEPT 0
11#define UTF8_REJECT 1
12
13static const uint8_t utf8d[] = {
14    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
15    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
16    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 00..1f */
17    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
18    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
19    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 20..3f */
20    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
21    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
22    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 40..5f */
23    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
24    0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
25    0,   0,   0,   0,   0,   0,   0,   0,   0,   0, /* 60..7f */
26    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
27    1,   1,   1,   1,   1,   9,   9,   9,   9,   9,   9,
28    9,   9,   9,   9,   9,   9,   9,   9,   9,   9, /* 80..9f */
29    7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
30    7,   7,   7,   7,   7,   7,   7,   7,   7,   7,   7,
31    7,   7,   7,   7,   7,   7,   7,   7,   7,   7, /* a0..bf */
32    8,   8,   2,   2,   2,   2,   2,   2,   2,   2,   2,
33    2,   2,   2,   2,   2,   2,   2,   2,   2,   2,   2,
34    2,   2,   2,   2,   2,   2,   2,   2,   2,   2, /* c0..df */
35    0xa, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3, 0x3,
36    0x3, 0x3, 0x4, 0x3, 0x3, /* e0..ef */
37    0xb, 0x6, 0x6, 0x6, 0x5, 0x8, 0x8, 0x8, 0x8, 0x8, 0x8,
38    0x8, 0x8, 0x8, 0x8, 0x8, /* f0..ff */
39    0x0, 0x1, 0x2, 0x3, 0x5, 0x8, 0x7, 0x1, 0x1, 0x1, 0x4,
40    0x6, 0x1, 0x1, 0x1, 0x1, /* s0..s0 */
41    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
42    1,   1,   1,   1,   1,   1,   0,   1,   1,   1,   1,
43    1,   0,   1,   0,   1,   1,   1,   1,   1,   1, /* s1..s2 */
44    1,   2,   1,   1,   1,   1,   1,   2,   1,   2,   1,
45    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
46    1,   2,   1,   1,   1,   1,   1,   1,   1,   1, /* s3..s4 */
47    1,   2,   1,   1,   1,   1,   1,   1,   1,   2,   1,
48    1,   1,   1,   1,   1,   1,   1,   1,   1,   1,   1,
49    1,   3,   1,   3,   1,   1,   1,   1,   1,   1, /* s5..s6 */
50    1,   3,   1,   1,   1,   1,   1,   3,   1,   3,   1,
51    1,   1,   1,   1,   1,   1,   3,   1,   1,   1,   1,
52    1,   1,   1,   1,   1,   1,   1,   1,   1,   1, /* s7..s8 */
53};
54
55/* Copyright of this function: (c) 2008-2009 Bjoern Hoehrmann
56 * <bjoern@hoehrmann.de> */
57/* See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. */
58uint32_t _cbor_unicode_decode(uint32_t* state, uint32_t* codep, uint32_t byte) {
59  uint32_t type = utf8d[byte];
60
61  *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6)
62                                   : (0xff >> type) & (byte);
63
64  *state = utf8d[256 + *state * 16 + type];
65  return *state;
66}
67
68size_t _cbor_unicode_codepoint_count(cbor_data source, size_t source_length,
69                                     struct _cbor_unicode_status* status) {
70  *status =
71      (struct _cbor_unicode_status){.location = 0, .status = _CBOR_UNICODE_OK};
72  uint32_t codepoint, state = UTF8_ACCEPT, res;
73  size_t pos = 0, count = 0;
74
75  for (; pos < source_length; pos++) {
76    res = _cbor_unicode_decode(&state, &codepoint, source[pos]);
77
78    if (res == UTF8_ACCEPT) {
79      count++;
80    } else if (res == UTF8_REJECT) {
81      goto error;
82    }
83  }
84
85  /* Unfinished multibyte codepoint */
86  if (state != UTF8_ACCEPT) goto error;
87
88  return count;
89
90error:
91  *status = (struct _cbor_unicode_status){.location = pos,
92                                          .status = _CBOR_UNICODE_BADCP};
93  return -1;
94}
95