ct.c revision 104349
1104349Sphk#define CHARSET_MAX 41 2104349Sphk 3104349Sphkstatic const char * 4104349SphkgetTok(const char **pp) 5104349Sphk{ 6104349Sphk enum { inAtom, inString, init, inComment }; 7104349Sphk int state = init; 8104349Sphk const char *tokStart = 0; 9104349Sphk for (;;) { 10104349Sphk switch (**pp) { 11104349Sphk case '\0': 12104349Sphk return 0; 13104349Sphk case ' ': 14104349Sphk case '\r': 15104349Sphk case '\t': 16104349Sphk case '\n': 17104349Sphk if (state == inAtom) 18104349Sphk return tokStart; 19104349Sphk break; 20104349Sphk case '(': 21104349Sphk if (state == inAtom) 22104349Sphk return tokStart; 23104349Sphk if (state != inString) 24104349Sphk state++; 25104349Sphk break; 26104349Sphk case ')': 27104349Sphk if (state > init) 28104349Sphk --state; 29104349Sphk else if (state != inString) 30104349Sphk return 0; 31104349Sphk break; 32104349Sphk case ';': 33104349Sphk case '/': 34104349Sphk case '=': 35104349Sphk if (state == inAtom) 36104349Sphk return tokStart; 37104349Sphk if (state == init) 38104349Sphk return (*pp)++; 39104349Sphk break; 40104349Sphk case '\\': 41104349Sphk ++*pp; 42104349Sphk if (**pp == '\0') 43104349Sphk return 0; 44104349Sphk break; 45104349Sphk case '"': 46104349Sphk switch (state) { 47104349Sphk case inString: 48104349Sphk ++*pp; 49104349Sphk return tokStart; 50104349Sphk case inAtom: 51104349Sphk return tokStart; 52104349Sphk case init: 53104349Sphk tokStart = *pp; 54104349Sphk state = inString; 55104349Sphk break; 56104349Sphk } 57104349Sphk break; 58104349Sphk default: 59104349Sphk if (state == init) { 60104349Sphk tokStart = *pp; 61104349Sphk state = inAtom; 62104349Sphk } 63104349Sphk break; 64104349Sphk } 65104349Sphk ++*pp; 66104349Sphk } 67104349Sphk /* not reached */ 68104349Sphk} 69104349Sphk 70104349Sphk/* key must be lowercase ASCII */ 71104349Sphk 72104349Sphkstatic int 73104349Sphkmatchkey(const char *start, const char *end, const char *key) 74104349Sphk{ 75104349Sphk if (!start) 76104349Sphk return 0; 77104349Sphk for (; start != end; start++, key++) 78104349Sphk if (*start != *key && *start != 'A' + (*key - 'a')) 79104349Sphk return 0; 80104349Sphk return *key == '\0'; 81104349Sphk} 82104349Sphk 83104349Sphkvoid 84104349SphkgetXMLCharset(const char *buf, char *charset) 85104349Sphk{ 86104349Sphk const char *next, *p; 87104349Sphk 88104349Sphk charset[0] = '\0'; 89104349Sphk next = buf; 90104349Sphk p = getTok(&next); 91104349Sphk if (matchkey(p, next, "text")) 92104349Sphk strcpy(charset, "us-ascii"); 93104349Sphk else if (!matchkey(p, next, "application")) 94104349Sphk return; 95104349Sphk p = getTok(&next); 96104349Sphk if (!p || *p != '/') 97104349Sphk return; 98104349Sphk p = getTok(&next); 99104349Sphk if (matchkey(p, next, "xml")) 100104349Sphk isXml = 1; 101104349Sphk p = getTok(&next); 102104349Sphk while (p) { 103104349Sphk if (*p == ';') { 104104349Sphk p = getTok(&next); 105104349Sphk if (matchkey(p, next, "charset")) { 106104349Sphk p = getTok(&next); 107104349Sphk if (p && *p == '=') { 108104349Sphk p = getTok(&next); 109104349Sphk if (p) { 110104349Sphk char *s = charset; 111104349Sphk if (*p == '"') { 112104349Sphk while (++p != next - 1) { 113104349Sphk if (*p == '\\') 114104349Sphk ++p; 115104349Sphk if (s == charset + CHARSET_MAX - 1) { 116104349Sphk charset[0] = '\0'; 117104349Sphk break; 118104349Sphk } 119104349Sphk *s++ = *p; 120104349Sphk } 121104349Sphk *s++ = '\0'; 122104349Sphk } 123104349Sphk else { 124104349Sphk if (next - p > CHARSET_MAX - 1) 125104349Sphk break; 126104349Sphk while (p != next) 127104349Sphk *s++ = *p++; 128104349Sphk *s = 0; 129104349Sphk break; 130104349Sphk } 131104349Sphk } 132104349Sphk } 133104349Sphk } 134104349Sphk } 135104349Sphk else 136104349Sphk p = getTok(&next); 137104349Sphk } 138104349Sphk} 139104349Sphk 140104349Sphkint 141104349Sphkmain(int argc, char **argv) 142104349Sphk{ 143104349Sphk char buf[CHARSET_MAX]; 144104349Sphk getXMLCharset(argv[1], buf); 145104349Sphk printf("charset = \"%s\"\n", buf); 146104349Sphk return 0; 147104349Sphk} 148