1// CODYlib -*- mode:c++ -*- 2// Copyright (C) 2020 Nathan Sidwell, nathan@acm.org 3// License: Apache v2.0 4 5// Cody 6#include "internal.hh" 7// C++ 8#include <algorithm> 9// C 10#include <cstring> 11// OS 12#include <unistd.h> 13#include <cerrno> 14 15// MessageBuffer code 16 17// Lines consist of words and end with a NEWLINE (0xa) char 18// Whitespace characters are TAB (0x9) and SPACE (0x20) 19// Words consist of non-whitespace chars separated by whitespace. 20// Multiple lines in one transaction are indicated by ending non-final 21// lines with a SEMICOLON (0x3b) word, immediately before the NEWLINE 22// Continuations with ; preceding it 23// Words matching regexp [-+_/%.a-zA-Z0-9]+ need no quoting. 24// Quoting with '...' 25// Anything outside of [-+_/%.a-zA-Z0-9] needs quoting 26// Anything outside of <= <space> or DEL or \' or \\ needs escaping. 27// Escapes are \\, \', \n, \t, \_, everything else as \<hex><hex>? 28// Spaces separate words, UTF8 encoding for non-ascii chars 29 30namespace Cody { 31namespace Detail { 32 33static const char CONTINUE = S2C(u8";"); 34 35void MessageBuffer::BeginLine () 36{ 37 if (!buffer.empty ()) 38 { 39 // Terminate the previous line with a continuation 40 buffer.reserve (buffer.size () + 3); 41 buffer.push_back (S2C(u8" ")); 42 buffer.push_back (CONTINUE); 43 buffer.push_back (S2C(u8"\n")); 44 } 45 lastBol = buffer.size (); 46} 47 48// QUOTE means 'maybe quote', we search it for quote-needing chars 49 50void MessageBuffer::Append (char const *str, bool quote, size_t len) 51{ 52 if (len == ~size_t (0)) 53 len = strlen (str); 54 55 if (!len && !quote) 56 return; 57 58 // We want to quote characters outside of [-+_A-Za-z0-9/%.], anything 59 // that could remotely be shell-active. UTF8 encoding for non-ascii. 60 if (quote && len) 61 { 62 quote = false; 63 // Scan looking for quote-needing characters. We could just 64 // append until we find one, but that's probably confusing 65 for (size_t ix = len; ix--;) 66 { 67 unsigned char c = (unsigned char)str[ix]; 68 if (!((c >= S2C(u8"a") && c <= S2C(u8"z")) 69 || (c >= S2C(u8"A") && c <= S2C(u8"Z")) 70 || (c >= S2C(u8"0") && c <= S2C(u8"9")) 71 || c == S2C(u8"-") || c == S2C(u8"+") || c == S2C(u8"_") 72 || c == S2C(u8"/") || c == S2C(u8"%") || c == S2C(u8"."))) 73 { 74 quote = true; 75 break; 76 } 77 } 78 } 79 80 // Maximal length of appended string 81 buffer.reserve (buffer.size () + len * (quote ? 3 : 1) + 2); 82 83 if (quote) 84 buffer.push_back (S2C(u8"'")); 85 86 for (auto *end = str + len; str != end;) 87 { 88 auto *e = end; 89 90 if (quote) 91 // Look for next escape-needing char. More relaxed than 92 // the earlier needs-quoting check. 93 for (e = str; e != end; ++e) 94 { 95 unsigned char c = (unsigned char)*e; 96 if (c < S2C(u8" ") || c == 0x7f 97 || c == S2C(u8"\\") || c == S2C(u8"'")) 98 break; 99 } 100 buffer.insert (buffer.end (), str, e); 101 str = e; 102 103 if (str == end) 104 break; 105 106 buffer.push_back (S2C(u8"\\")); 107 switch (unsigned char c = (unsigned char)*str++) 108 { 109 case S2C(u8"\t"): 110 c = S2C(u8"t"); 111 goto append; 112 113 case S2C(u8"\n"): 114 c = S2C(u8"n"); 115 goto append; 116 117 case S2C(u8"'"): 118 case S2C(u8"\\"): 119 append: 120 buffer.push_back (c); 121 break; 122 123 default: 124 // Full-on escape. Use 2 lower-case hex chars 125 for (unsigned shift = 8; shift;) 126 { 127 shift -= 4; 128 129 char nibble = (c >> shift) & 0xf; 130 nibble += S2C(u8"0"); 131 if (nibble > S2C(u8"9")) 132 nibble += S2C(u8"a") - (S2C(u8"9") + 1); 133 buffer.push_back (nibble); 134 } 135 } 136 } 137 138 if (quote) 139 buffer.push_back (S2C(u8"'")); 140} 141 142void MessageBuffer::Append (char c) 143{ 144 buffer.push_back (c); 145} 146 147void MessageBuffer::AppendInteger (unsigned u) 148{ 149 // Sigh, even though std::to_string is C++11, we support building on 150 // gcc 4.8, which is a C++11 compiler lacking std::to_string. so 151 // have something horrible. 152 std::string v (20, 0); 153 size_t len = snprintf (const_cast<char *> (v.data ()), v.size (), "%u", u); 154 v.erase (len); 155 156 AppendWord (v); 157} 158 159int MessageBuffer::Write (int fd) noexcept 160{ 161 size_t limit = buffer.size () - lastBol; 162 ssize_t count = write (fd, &buffer.data ()[lastBol], limit); 163 164 int err = 0; 165 if (count < 0) 166 err = errno; 167 else 168 { 169 lastBol += count; 170 if (size_t (count) != limit) 171 err = EAGAIN; 172 } 173 174 if (err != EAGAIN && err != EINTR) 175 { 176 // Reset for next message 177 buffer.clear (); 178 lastBol = 0; 179 } 180 181 return err; 182} 183 184int MessageBuffer::Read (int fd) noexcept 185{ 186 constexpr size_t blockSize = 200; 187 188 size_t lwm = buffer.size (); 189 size_t hwm = buffer.capacity (); 190 if (hwm - lwm < blockSize / 2) 191 hwm += blockSize; 192 buffer.resize (hwm); 193 194 auto iter = buffer.begin () + lwm; 195 ssize_t count = read (fd, &*iter, hwm - lwm); 196 buffer.resize (lwm + (count >= 0 ? count : 0)); 197 198 if (count < 0) 199 return errno; 200 201 if (!count) 202 // End of file 203 return -1; 204 205 bool more = true; 206 for (;;) 207 { 208 auto newline = std::find (iter, buffer.end (), S2C(u8"\n")); 209 if (newline == buffer.end ()) 210 break; 211 more = newline != buffer.begin () && newline[-1] == CONTINUE; 212 iter = newline + 1; 213 214 if (iter == buffer.end ()) 215 break; 216 217 if (!more) 218 { 219 // There is no continuation, but there are chars after the 220 // newline. Truncate the buffer and return an error 221 buffer.resize (iter - buffer.begin ()); 222 return EINVAL; 223 } 224 } 225 226 return more ? EAGAIN : 0; 227} 228 229int MessageBuffer::Lex (std::vector<std::string> &result) 230{ 231 result.clear (); 232 233 if (IsAtEnd ()) 234 return ENOENT; 235 236 Assert (buffer.back () == S2C(u8"\n")); 237 238 auto iter = buffer.begin () + lastBol; 239 240 for (std::string *word = nullptr;;) 241 { 242 char c = *iter; 243 244 ++iter; 245 if (c == S2C(u8" ") || c == S2C(u8"\t")) 246 { 247 word = nullptr; 248 continue; 249 } 250 251 if (c == S2C(u8"\n")) 252 break; 253 254 if (c == CONTINUE) 255 { 256 // Line continuation 257 if (word || *iter != S2C(u8"\n")) 258 goto malformed; 259 ++iter; 260 break; 261 } 262 263 if (c <= S2C(u8" ") || c >= 0x7f) 264 goto malformed; 265 266 if (!word) 267 { 268 result.emplace_back (); 269 word = &result.back (); 270 } 271 272 if (c == S2C(u8"'")) 273 { 274 // Quoted word 275 for (;;) 276 { 277 c = *iter; 278 279 if (c == S2C(u8"\n")) 280 { 281 malformed:; 282 result.clear (); 283 iter = std::find (iter, buffer.end (), S2C(u8"\n")); 284 auto back = iter; 285 if (back[-1] == CONTINUE && back[-2] == S2C(u8" ")) 286 // Smells like a line continuation 287 back -= 2; 288 result.emplace_back (&buffer[lastBol], 289 back - buffer.begin () - lastBol); 290 ++iter; 291 lastBol = iter - buffer.begin (); 292 return EINVAL; 293 } 294 295 if (c < S2C(u8" ") || c >= 0x7f) 296 goto malformed; 297 298 ++iter; 299 if (c == S2C(u8"'")) 300 break; 301 302 if (c == S2C(u8"\\")) 303 // escape 304 switch (c = *iter) 305 { 306 case S2C(u8"\\"): 307 case S2C(u8"'"): 308 ++iter; 309 break; 310 311 case S2C(u8"n"): 312 c = S2C(u8"\n"); 313 ++iter; 314 break; 315 316 case S2C(u8"_"): 317 // We used to escape SPACE as \_, so accept that 318 c = S2C(u8" "); 319 ++iter; 320 break; 321 322 case S2C(u8"t"): 323 c = S2C(u8"\t"); 324 ++iter; 325 break; 326 327 default: 328 { 329 unsigned v = 0; 330 for (unsigned nibble = 0; nibble != 2; nibble++) 331 { 332 c = *iter; 333 if (c < S2C(u8"0")) 334 { 335 if (!nibble) 336 goto malformed; 337 break; 338 } 339 else if (c <= S2C(u8"9")) 340 c -= S2C(u8"0"); 341 else if (c < S2C(u8"a")) 342 { 343 if (!nibble) 344 goto malformed; 345 break; 346 } 347 else if (c <= S2C(u8"f")) 348 c -= S2C(u8"a") - 10; 349 else 350 { 351 if (!nibble) 352 goto malformed; 353 break; 354 } 355 ++iter; 356 v = (v << 4) | c; 357 } 358 c = v; 359 } 360 } 361 word->push_back (c); 362 } 363 } 364 else 365 // Unquoted character 366 word->push_back (c); 367 } 368 lastBol = iter - buffer.begin (); 369 if (result.empty ()) 370 return ENOENT; 371 372 return 0; 373} 374 375void MessageBuffer::LexedLine (std::string &str) 376{ 377 if (lastBol) 378 { 379 size_t pos = lastBol - 1; 380 for (; pos; pos--) 381 if (buffer[pos-1] == S2C(u8"\n")) 382 break; 383 384 size_t end = lastBol - 1; 385 if (buffer[end-1] == CONTINUE && buffer[end-2] == S2C(u8" ")) 386 // Strip line continuation 387 end -= 2; 388 str.append (&buffer[pos], end - pos); 389 } 390} 391} // Detail 392} // Cody 393