1/*
2 * Copyright 2015, NICTA
3 *
4 * This software may be distributed and modified according to the terms of
5 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
6 * See "LICENSE_BSD2.txt" for details.
7 *
8 * @TAG(NICTA_BSD)
9 *
10 */
11
12/* This program was written by a recovering C programmer. It likely has many
13 * things that will make C++ programmers cringe. FFTF.
14 */
15
16#include <cassert>
17#include <cwchar>
18#include <error.h>
19#include <errno.h>
20#include <fstream>
21#include <getopt.h>
22#include <iostream>
23#include <locale>
24#include <map>
25
26using namespace std;
27
28#define die(args...) error(-1, errno, args)
29
30#include "tables.hpp" /* generated */
31
32static int to_ascii(wifstream &in, wofstream &out) {
33    assert(in.is_open());
34    assert(out.is_open());
35
36    wchar_t c;
37    while (in.get(c)) {
38
39        const auto t = unicode_to_ascii.find(c);
40        if (t == unicode_to_ascii.end())
41            out << c;
42        else
43            out << t->second;
44
45    }
46
47    return 0;
48}
49
50static int to_unicode(wifstream &in, wofstream &out) {
51    assert(in.is_open());
52    assert(out.is_open());
53
54    wchar_t buffer[ASCII_SEQ_MAX + 1];
55    unsigned int index = 0;
56
57    wchar_t c;
58    while (in.get(c)) {
59
60        buffer[index++] = c;
61
62        if ((index == 1 && buffer[0] != '\\') ||
63            (index == 2 && buffer[1] != '<') ||
64            (index == 3 && buffer[2] == '^')) {
65            /* Dump the buffer. */
66            buffer[index] = '\0';
67            out << buffer;
68            index = 0;
69        } else if (buffer[index - 1] == '>') {
70            buffer[index] = '\0';
71            const auto t = ascii_to_unicode.find(buffer);
72            if (t == ascii_to_unicode.end())
73                die("unrecognised ASCII sequence \"%.*ls\"", (int)index,
74                    buffer);
75            else
76                out << t->second;
77            index = 0;
78        } else if (index == sizeof(buffer) - 1) {
79            die("too large ASCII sequence \"%.*ls...\" in source", (int)index,
80                buffer);
81        }
82    }
83
84    if (index > 0) {
85        /* There is some pending text in the buffer. */
86        buffer[index] = '\0';
87        out << buffer;
88    }
89
90    return 0;
91}
92
93class Options {
94    public:
95        wifstream input;
96        wofstream output;
97        enum {
98            TO_UNICODE = 0,
99            TO_ASCII,
100        } mode;
101
102        Options() : mode(TO_UNICODE) {}
103
104        ~Options() {
105            input.close();
106            output.close();
107        }
108};
109
110static int parse_args(int argc, char **argv, Options &options) {
111    while (true) {
112        static struct option opts[] = {
113            {"input", required_argument, 0, 'i'},
114            {"output", required_argument, 0, 'o'},
115            {"to-ascii", no_argument, 0, 'a'},
116            {"to-unicode", no_argument, 0, 'u'},
117            {0, 0, 0, 0},
118        };
119        int index = 0;
120
121        int c = getopt_long(argc, argv, "ai:o:u", opts, &index);
122
123        if (c == -1)
124            break;
125
126        switch (c) {
127            case 'a':
128                options.mode = Options::TO_ASCII;
129                break;
130
131            case 'i':
132                if (options.input.is_open())
133                    options.input.close();
134                options.input.open(optarg);
135                if (!options.input.is_open())
136                    die("failed to open %s", optarg);
137                break;
138
139            case 'o':
140                if (options.output.is_open())
141                    options.output.close();
142                options.output.open(optarg);
143                if (!options.output.is_open())
144                    die("failed to open %s", optarg);
145                break;
146
147            case 'u':
148                options.mode = Options::TO_UNICODE;
149                break;
150
151            default:
152                return -1;
153        }
154    }
155
156    if (!options.input.is_open())
157        options.input.open("/dev/stdin");
158    if (!options.output.is_open())
159        options.output.open("/dev/stdout");
160
161    return 0;
162}
163
164int main(int argc, char **argv) {
165
166    /* Switch to the user's native locale, which hopefully supports UTF-8. */
167    locale::global(locale(""));
168
169    Options options;
170
171    if (parse_args(argc, argv, options) != 0)
172        return -1;
173
174    switch (options.mode) {
175
176        case Options::TO_ASCII:
177            return to_ascii(options.input, options.output);
178
179        case Options::TO_UNICODE:
180            return to_unicode(options.input, options.output);
181
182        default:
183            assert(!"invalid mode?");
184    }
185
186    return 0;
187}
188