1/* 2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7/* This program was written by a recovering C programmer. It likely has many 8 * things that will make C++ programmers cringe. FFTF. 9 */ 10 11#include <cassert> 12#include <cwchar> 13#include <error.h> 14#include <errno.h> 15#include <fstream> 16#include <getopt.h> 17#include <iostream> 18#include <locale> 19#include <map> 20 21using namespace std; 22 23#define die(args...) error(-1, errno, args) 24 25#include "tables.hpp" /* generated */ 26 27static int to_ascii(wifstream &in, wofstream &out) { 28 assert(in.is_open()); 29 assert(out.is_open()); 30 31 wchar_t c; 32 while (in.get(c)) { 33 34 const auto t = unicode_to_ascii.find(c); 35 if (t == unicode_to_ascii.end()) 36 out << c; 37 else 38 out << t->second; 39 40 } 41 42 return 0; 43} 44 45static int to_unicode(wifstream &in, wofstream &out) { 46 assert(in.is_open()); 47 assert(out.is_open()); 48 49 wchar_t buffer[ASCII_SEQ_MAX + 1]; 50 unsigned int index = 0; 51 52 wchar_t c; 53 while (in.get(c)) { 54 55 buffer[index++] = c; 56 57 if ((index == 1 && buffer[0] != '\\') || 58 (index == 2 && buffer[1] != '<') || 59 (index == 3 && buffer[2] == '^')) { 60 /* Dump the buffer. */ 61 buffer[index] = '\0'; 62 out << buffer; 63 index = 0; 64 } else if (buffer[index - 1] == '>') { 65 buffer[index] = '\0'; 66 const auto t = ascii_to_unicode.find(buffer); 67 if (t == ascii_to_unicode.end()) 68 die("unrecognised ASCII sequence \"%.*ls\"", (int)index, 69 buffer); 70 else 71 out << t->second; 72 index = 0; 73 } else if (index == sizeof(buffer) - 1) { 74 die("too large ASCII sequence \"%.*ls...\" in source", (int)index, 75 buffer); 76 } 77 } 78 79 if (index > 0) { 80 /* There is some pending text in the buffer. */ 81 buffer[index] = '\0'; 82 out << buffer; 83 } 84 85 return 0; 86} 87 88class Options { 89 public: 90 wifstream input; 91 wofstream output; 92 enum { 93 TO_UNICODE = 0, 94 TO_ASCII, 95 } mode; 96 97 Options() : mode(TO_UNICODE) {} 98 99 ~Options() { 100 input.close(); 101 output.close(); 102 } 103}; 104 105static int parse_args(int argc, char **argv, Options &options) { 106 while (true) { 107 static struct option opts[] = { 108 {"input", required_argument, 0, 'i'}, 109 {"output", required_argument, 0, 'o'}, 110 {"to-ascii", no_argument, 0, 'a'}, 111 {"to-unicode", no_argument, 0, 'u'}, 112 {0, 0, 0, 0}, 113 }; 114 int index = 0; 115 116 int c = getopt_long(argc, argv, "ai:o:u", opts, &index); 117 118 if (c == -1) 119 break; 120 121 switch (c) { 122 case 'a': 123 options.mode = Options::TO_ASCII; 124 break; 125 126 case 'i': 127 if (options.input.is_open()) 128 options.input.close(); 129 options.input.open(optarg); 130 if (!options.input.is_open()) 131 die("failed to open %s", optarg); 132 break; 133 134 case 'o': 135 if (options.output.is_open()) 136 options.output.close(); 137 options.output.open(optarg); 138 if (!options.output.is_open()) 139 die("failed to open %s", optarg); 140 break; 141 142 case 'u': 143 options.mode = Options::TO_UNICODE; 144 break; 145 146 default: 147 return -1; 148 } 149 } 150 151 if (!options.input.is_open()) 152 options.input.open("/dev/stdin"); 153 if (!options.output.is_open()) 154 options.output.open("/dev/stdout"); 155 156 return 0; 157} 158 159int main(int argc, char **argv) { 160 161 /* Switch to the user's native locale, which hopefully supports UTF-8. */ 162 locale::global(locale("")); 163 164 Options options; 165 166 if (parse_args(argc, argv, options) != 0) 167 return -1; 168 169 switch (options.mode) { 170 171 case Options::TO_ASCII: 172 return to_ascii(options.input, options.output); 173 174 case Options::TO_UNICODE: 175 return to_unicode(options.input, options.output); 176 177 default: 178 assert(!"invalid mode?"); 179 } 180 181 return 0; 182} 183