1/*
2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7/* This program was written by a recovering C programmer. It likely has many
8 * things that will make C++ programmers cringe. FFTF.
9 */
10
11#include <cassert>
12#include <cwchar>
13#include <error.h>
14#include <errno.h>
15#include <fstream>
16#include <getopt.h>
17#include <iostream>
18#include <locale>
19#include <map>
20
21using namespace std;
22
23#define die(args...) error(-1, errno, args)
24
25#include "tables.hpp" /* generated */
26
27static int to_ascii(wifstream &in, wofstream &out) {
28    assert(in.is_open());
29    assert(out.is_open());
30
31    wchar_t c;
32    while (in.get(c)) {
33
34        const auto t = unicode_to_ascii.find(c);
35        if (t == unicode_to_ascii.end())
36            out << c;
37        else
38            out << t->second;
39
40    }
41
42    return 0;
43}
44
45static int to_unicode(wifstream &in, wofstream &out) {
46    assert(in.is_open());
47    assert(out.is_open());
48
49    wchar_t buffer[ASCII_SEQ_MAX + 1];
50    unsigned int index = 0;
51
52    wchar_t c;
53    while (in.get(c)) {
54
55        buffer[index++] = c;
56
57        if ((index == 1 && buffer[0] != '\\') ||
58            (index == 2 && buffer[1] != '<') ||
59            (index == 3 && buffer[2] == '^')) {
60            /* Dump the buffer. */
61            buffer[index] = '\0';
62            out << buffer;
63            index = 0;
64        } else if (buffer[index - 1] == '>') {
65            buffer[index] = '\0';
66            const auto t = ascii_to_unicode.find(buffer);
67            if (t == ascii_to_unicode.end())
68                die("unrecognised ASCII sequence \"%.*ls\"", (int)index,
69                    buffer);
70            else
71                out << t->second;
72            index = 0;
73        } else if (index == sizeof(buffer) - 1) {
74            die("too large ASCII sequence \"%.*ls...\" in source", (int)index,
75                buffer);
76        }
77    }
78
79    if (index > 0) {
80        /* There is some pending text in the buffer. */
81        buffer[index] = '\0';
82        out << buffer;
83    }
84
85    return 0;
86}
87
88class Options {
89    public:
90        wifstream input;
91        wofstream output;
92        enum {
93            TO_UNICODE = 0,
94            TO_ASCII,
95        } mode;
96
97        Options() : mode(TO_UNICODE) {}
98
99        ~Options() {
100            input.close();
101            output.close();
102        }
103};
104
105static int parse_args(int argc, char **argv, Options &options) {
106    while (true) {
107        static struct option opts[] = {
108            {"input", required_argument, 0, 'i'},
109            {"output", required_argument, 0, 'o'},
110            {"to-ascii", no_argument, 0, 'a'},
111            {"to-unicode", no_argument, 0, 'u'},
112            {0, 0, 0, 0},
113        };
114        int index = 0;
115
116        int c = getopt_long(argc, argv, "ai:o:u", opts, &index);
117
118        if (c == -1)
119            break;
120
121        switch (c) {
122            case 'a':
123                options.mode = Options::TO_ASCII;
124                break;
125
126            case 'i':
127                if (options.input.is_open())
128                    options.input.close();
129                options.input.open(optarg);
130                if (!options.input.is_open())
131                    die("failed to open %s", optarg);
132                break;
133
134            case 'o':
135                if (options.output.is_open())
136                    options.output.close();
137                options.output.open(optarg);
138                if (!options.output.is_open())
139                    die("failed to open %s", optarg);
140                break;
141
142            case 'u':
143                options.mode = Options::TO_UNICODE;
144                break;
145
146            default:
147                return -1;
148        }
149    }
150
151    if (!options.input.is_open())
152        options.input.open("/dev/stdin");
153    if (!options.output.is_open())
154        options.output.open("/dev/stdout");
155
156    return 0;
157}
158
159int main(int argc, char **argv) {
160
161    /* Switch to the user's native locale, which hopefully supports UTF-8. */
162    locale::global(locale(""));
163
164    Options options;
165
166    if (parse_args(argc, argv, options) != 0)
167        return -1;
168
169    switch (options.mode) {
170
171        case Options::TO_ASCII:
172            return to_ascii(options.input, options.output);
173
174        case Options::TO_UNICODE:
175            return to_unicode(options.input, options.output);
176
177        default:
178            assert(!"invalid mode?");
179    }
180
181    return 0;
182}
183