1#include <unistd.h> 2#include "lt_types.h" 3 4// Return number of bytes in the UTF-8 sequence which begins with a given byte. 5int wchar_len(byte b) { 6 if ((b & 0xE0) == 0xC0) return 2; 7 if ((b & 0xF0) == 0xE0) return 3; 8 if ((b & 0xF8) == 0xF0) return 4; 9 return 1; 10} 11 12void store_wchar(byte** p, wchar ch) { 13 if (ch < 0x80) { 14 *(*p)++ = (char) ch; 15 } else if (ch < 0x800) { 16 *(*p)++ = (byte) (0xC0 | ((ch >> 6) & 0x1F)); 17 *(*p)++ = (byte) (0x80 | (ch & 0x3F)); 18 } else if (ch < 0x10000) { 19 *(*p)++ = (byte) (0xE0 | ((ch >> 12) & 0x0F)); 20 *(*p)++ = (byte) (0x80 | ((ch >> 6) & 0x3F)); 21 *(*p)++ = (byte) (0x80 | (ch & 0x3F)); 22 } else { 23 *(*p)++ = (byte) (0xF0 | ((ch >> 18) & 0x07)); 24 *(*p)++ = (byte) (0x80 | ((ch >> 12) & 0x3F)); 25 *(*p)++ = (byte) (0x80 | ((ch >> 6) & 0x3F)); 26 *(*p)++ = (byte) (0x80 | (ch & 0x3F)); 27 } 28} 29 30wchar load_wchar(const byte** p) { 31 wchar ch; 32 switch (wchar_len(**p)) { 33 default: 34 ch = *(*p)++ & 0xFF; 35 break; 36 case 2: 37 ch = (*(*p)++ & 0x1F) << 6; 38 ch |= *(*p)++ & 0x3F; 39 break; 40 case 3: 41 ch = (*(*p)++ & 0x0F) << 12; 42 ch |= (*(*p)++ & 0x3F) << 6; 43 ch |= (*(*p)++ & 0x3F); 44 break; 45 case 4: 46 ch = (*(*p)++ & 0x07) << 18; 47 ch |= (*(*p)++ & 0x3F) << 12; 48 ch |= (*(*p)++ & 0x3F) << 6; 49 ch |= (*(*p)++ & 0x3F); 50 break; 51 } 52 return ch; 53} 54 55wchar read_wchar(int fd) { 56 byte cbuf[UNICODE_MAX_BYTES]; 57 int n = read(fd, &cbuf[0], 1); 58 if (n <= 0) 59 return 0; 60 int len = wchar_len(cbuf[0]); 61 int i; 62 for (i = 1; i < len; ++i) { 63 int n = read(fd, &cbuf[i], 1); 64 if (n != 1) return 0; 65 } 66 const byte* cp = cbuf; 67 wchar ch = load_wchar(&cp); 68 // assert(cp-cbuf == len); 69 return ch; 70} 71