1355604Sdelphij/* 2355604Sdelphij __ __ _ 3355604Sdelphij ___\ \/ /_ __ __ _| |_ 4355604Sdelphij / _ \\ /| '_ \ / _` | __| 5355604Sdelphij | __// \| |_) | (_| | |_ 6355604Sdelphij \___/_/\_\ .__/ \__,_|\__| 7355604Sdelphij |_| XML parser 8355604Sdelphij 9355604Sdelphij Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10355604Sdelphij Copyright (c) 2000-2017 Expat development team 11355604Sdelphij Licensed under the MIT license: 12355604Sdelphij 13355604Sdelphij Permission is hereby granted, free of charge, to any person obtaining 14355604Sdelphij a copy of this software and associated documentation files (the 15355604Sdelphij "Software"), to deal in the Software without restriction, including 16355604Sdelphij without limitation the rights to use, copy, modify, merge, publish, 17355604Sdelphij distribute, sublicense, and/or sell copies of the Software, and to permit 18355604Sdelphij persons to whom the Software is furnished to do so, subject to the 19355604Sdelphij following conditions: 20355604Sdelphij 21355604Sdelphij The above copyright notice and this permission notice shall be included 22355604Sdelphij in all copies or substantial portions of the Software. 23355604Sdelphij 24355604Sdelphij THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25355604Sdelphij EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26355604Sdelphij MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27355604Sdelphij NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28355604Sdelphij DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29355604Sdelphij OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30355604Sdelphij USE OR OTHER DEALINGS IN THE SOFTWARE. 31355604Sdelphij*/ 32355604Sdelphij 33104349Sphk#define CHARSET_MAX 41 34104349Sphk 35104349Sphkstatic const char * 36355604SdelphijgetTok(const char **pp) { 37104349Sphk enum { inAtom, inString, init, inComment }; 38104349Sphk int state = init; 39104349Sphk const char *tokStart = 0; 40104349Sphk for (;;) { 41104349Sphk switch (**pp) { 42104349Sphk case '\0': 43104349Sphk return 0; 44104349Sphk case ' ': 45104349Sphk case '\r': 46104349Sphk case '\t': 47104349Sphk case '\n': 48104349Sphk if (state == inAtom) 49104349Sphk return tokStart; 50104349Sphk break; 51104349Sphk case '(': 52104349Sphk if (state == inAtom) 53104349Sphk return tokStart; 54104349Sphk if (state != inString) 55104349Sphk state++; 56104349Sphk break; 57104349Sphk case ')': 58104349Sphk if (state > init) 59104349Sphk --state; 60104349Sphk else if (state != inString) 61104349Sphk return 0; 62104349Sphk break; 63104349Sphk case ';': 64104349Sphk case '/': 65104349Sphk case '=': 66104349Sphk if (state == inAtom) 67104349Sphk return tokStart; 68104349Sphk if (state == init) 69104349Sphk return (*pp)++; 70104349Sphk break; 71104349Sphk case '\\': 72104349Sphk ++*pp; 73104349Sphk if (**pp == '\0') 74104349Sphk return 0; 75104349Sphk break; 76104349Sphk case '"': 77104349Sphk switch (state) { 78104349Sphk case inString: 79104349Sphk ++*pp; 80104349Sphk return tokStart; 81104349Sphk case inAtom: 82104349Sphk return tokStart; 83104349Sphk case init: 84104349Sphk tokStart = *pp; 85104349Sphk state = inString; 86104349Sphk break; 87104349Sphk } 88104349Sphk break; 89104349Sphk default: 90104349Sphk if (state == init) { 91104349Sphk tokStart = *pp; 92104349Sphk state = inAtom; 93104349Sphk } 94104349Sphk break; 95104349Sphk } 96104349Sphk ++*pp; 97104349Sphk } 98104349Sphk /* not reached */ 99104349Sphk} 100104349Sphk 101104349Sphk/* key must be lowercase ASCII */ 102104349Sphk 103104349Sphkstatic int 104355604Sdelphijmatchkey(const char *start, const char *end, const char *key) { 105355604Sdelphij if (! start) 106104349Sphk return 0; 107104349Sphk for (; start != end; start++, key++) 108104349Sphk if (*start != *key && *start != 'A' + (*key - 'a')) 109104349Sphk return 0; 110104349Sphk return *key == '\0'; 111104349Sphk} 112104349Sphk 113104349Sphkvoid 114355604SdelphijgetXMLCharset(const char *buf, char *charset) { 115104349Sphk const char *next, *p; 116104349Sphk 117104349Sphk charset[0] = '\0'; 118104349Sphk next = buf; 119104349Sphk p = getTok(&next); 120104349Sphk if (matchkey(p, next, "text")) 121104349Sphk strcpy(charset, "us-ascii"); 122355604Sdelphij else if (! matchkey(p, next, "application")) 123104349Sphk return; 124104349Sphk p = getTok(&next); 125355604Sdelphij if (! p || *p != '/') 126104349Sphk return; 127104349Sphk p = getTok(&next); 128104349Sphk if (matchkey(p, next, "xml")) 129104349Sphk isXml = 1; 130104349Sphk p = getTok(&next); 131104349Sphk while (p) { 132104349Sphk if (*p == ';') { 133104349Sphk p = getTok(&next); 134104349Sphk if (matchkey(p, next, "charset")) { 135104349Sphk p = getTok(&next); 136104349Sphk if (p && *p == '=') { 137104349Sphk p = getTok(&next); 138104349Sphk if (p) { 139104349Sphk char *s = charset; 140104349Sphk if (*p == '"') { 141104349Sphk while (++p != next - 1) { 142104349Sphk if (*p == '\\') 143104349Sphk ++p; 144104349Sphk if (s == charset + CHARSET_MAX - 1) { 145104349Sphk charset[0] = '\0'; 146104349Sphk break; 147104349Sphk } 148104349Sphk *s++ = *p; 149104349Sphk } 150104349Sphk *s++ = '\0'; 151355604Sdelphij } else { 152104349Sphk if (next - p > CHARSET_MAX - 1) 153104349Sphk break; 154104349Sphk while (p != next) 155104349Sphk *s++ = *p++; 156104349Sphk *s = 0; 157104349Sphk break; 158104349Sphk } 159104349Sphk } 160104349Sphk } 161104349Sphk } 162355604Sdelphij } else 163355604Sdelphij p = getTok(&next); 164104349Sphk } 165104349Sphk} 166104349Sphk 167104349Sphkint 168355604Sdelphijmain(int argc, char **argv) { 169104349Sphk char buf[CHARSET_MAX]; 170104349Sphk getXMLCharset(argv[1], buf); 171104349Sphk printf("charset = \"%s\"\n", buf); 172104349Sphk return 0; 173104349Sphk} 174