1355604Sdelphij/* 2355604Sdelphij __ __ _ 3355604Sdelphij ___\ \/ /_ __ __ _| |_ 4355604Sdelphij / _ \\ /| '_ \ / _` | __| 5355604Sdelphij | __// \| |_) | (_| | |_ 6355604Sdelphij \___/_/\_\ .__/ \__,_|\__| 7355604Sdelphij |_| XML parser 8355604Sdelphij 9355604Sdelphij Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 10355604Sdelphij Copyright (c) 2000-2017 Expat development team 11355604Sdelphij Licensed under the MIT license: 12355604Sdelphij 13355604Sdelphij Permission is hereby granted, free of charge, to any person obtaining 14355604Sdelphij a copy of this software and associated documentation files (the 15355604Sdelphij "Software"), to deal in the Software without restriction, including 16355604Sdelphij without limitation the rights to use, copy, modify, merge, publish, 17355604Sdelphij distribute, sublicense, and/or sell copies of the Software, and to permit 18355604Sdelphij persons to whom the Software is furnished to do so, subject to the 19355604Sdelphij following conditions: 20355604Sdelphij 21355604Sdelphij The above copyright notice and this permission notice shall be included 22355604Sdelphij in all copies or substantial portions of the Software. 23355604Sdelphij 24355604Sdelphij THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25355604Sdelphij EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26355604Sdelphij MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 27355604Sdelphij NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 28355604Sdelphij DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 29355604Sdelphij OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 30355604Sdelphij USE OR OTHER DEALINGS IN THE SOFTWARE. 31355604Sdelphij*/ 32355604Sdelphij 33104349Sphk#include <string.h> 34104349Sphk#include "xmlmime.h" 35104349Sphk 36104349Sphkstatic const char * 37355604SdelphijgetTok(const char **pp) { 38104349Sphk /* inComment means one level of nesting; inComment+1 means two levels etc */ 39104349Sphk enum { inAtom, inString, init, inComment }; 40104349Sphk int state = init; 41104349Sphk const char *tokStart = 0; 42104349Sphk for (;;) { 43104349Sphk switch (**pp) { 44104349Sphk case '\0': 45104349Sphk if (state == inAtom) 46104349Sphk return tokStart; 47104349Sphk return 0; 48104349Sphk case ' ': 49104349Sphk case '\r': 50104349Sphk case '\t': 51104349Sphk case '\n': 52104349Sphk if (state == inAtom) 53104349Sphk return tokStart; 54104349Sphk break; 55104349Sphk case '(': 56104349Sphk if (state == inAtom) 57104349Sphk return tokStart; 58104349Sphk if (state != inString) 59104349Sphk state++; 60104349Sphk break; 61104349Sphk case ')': 62104349Sphk if (state > init) 63104349Sphk --state; 64104349Sphk else if (state != inString) 65104349Sphk return 0; 66104349Sphk break; 67104349Sphk case ';': 68104349Sphk case '/': 69104349Sphk case '=': 70104349Sphk if (state == inAtom) 71104349Sphk return tokStart; 72104349Sphk if (state == init) 73104349Sphk return (*pp)++; 74104349Sphk break; 75104349Sphk case '\\': 76104349Sphk ++*pp; 77104349Sphk if (**pp == '\0') 78104349Sphk return 0; 79104349Sphk break; 80104349Sphk case '"': 81104349Sphk switch (state) { 82104349Sphk case inString: 83104349Sphk ++*pp; 84104349Sphk return tokStart; 85104349Sphk case inAtom: 86104349Sphk return tokStart; 87104349Sphk case init: 88104349Sphk tokStart = *pp; 89104349Sphk state = inString; 90104349Sphk break; 91104349Sphk } 92104349Sphk break; 93104349Sphk default: 94104349Sphk if (state == init) { 95104349Sphk tokStart = *pp; 96104349Sphk state = inAtom; 97104349Sphk } 98104349Sphk break; 99104349Sphk } 100104349Sphk ++*pp; 101104349Sphk } 102104349Sphk /* not reached */ 103104349Sphk} 104104349Sphk 105104349Sphk/* key must be lowercase ASCII */ 106104349Sphk 107104349Sphkstatic int 108355604Sdelphijmatchkey(const char *start, const char *end, const char *key) { 109355604Sdelphij if (! start) 110104349Sphk return 0; 111104349Sphk for (; start != end; start++, key++) 112104349Sphk if (*start != *key && *start != 'A' + (*key - 'a')) 113104349Sphk return 0; 114104349Sphk return *key == '\0'; 115104349Sphk} 116104349Sphk 117104349Sphkvoid 118355604SdelphijgetXMLCharset(const char *buf, char *charset) { 119104349Sphk const char *next, *p; 120104349Sphk 121104349Sphk charset[0] = '\0'; 122104349Sphk next = buf; 123104349Sphk p = getTok(&next); 124104349Sphk if (matchkey(p, next, "text")) 125104349Sphk strcpy(charset, "us-ascii"); 126355604Sdelphij else if (! matchkey(p, next, "application")) 127104349Sphk return; 128104349Sphk p = getTok(&next); 129355604Sdelphij if (! p || *p != '/') 130104349Sphk return; 131104349Sphk p = getTok(&next); 132355604Sdelphij /* BEGIN disabled code */ 133355604Sdelphij if (0) { 134355604Sdelphij if (! matchkey(p, next, "xml") && charset[0] == '\0') 135355604Sdelphij return; 136355604Sdelphij } 137355604Sdelphij /* END disabled code */ 138104349Sphk p = getTok(&next); 139104349Sphk while (p) { 140104349Sphk if (*p == ';') { 141104349Sphk p = getTok(&next); 142104349Sphk if (matchkey(p, next, "charset")) { 143104349Sphk p = getTok(&next); 144104349Sphk if (p && *p == '=') { 145104349Sphk p = getTok(&next); 146104349Sphk if (p) { 147104349Sphk char *s = charset; 148104349Sphk if (*p == '"') { 149104349Sphk while (++p != next - 1) { 150104349Sphk if (*p == '\\') 151104349Sphk ++p; 152104349Sphk if (s == charset + CHARSET_MAX - 1) { 153104349Sphk charset[0] = '\0'; 154104349Sphk break; 155104349Sphk } 156104349Sphk *s++ = *p; 157104349Sphk } 158104349Sphk *s++ = '\0'; 159355604Sdelphij } else { 160104349Sphk if (next - p > CHARSET_MAX - 1) 161104349Sphk break; 162104349Sphk while (p != next) 163104349Sphk *s++ = *p++; 164104349Sphk *s = 0; 165104349Sphk break; 166104349Sphk } 167104349Sphk } 168104349Sphk } 169104349Sphk break; 170104349Sphk } 171355604Sdelphij } else 172355604Sdelphij p = getTok(&next); 173104349Sphk } 174104349Sphk} 175104349Sphk 176104349Sphk#ifdef TEST 177104349Sphk 178355604Sdelphij# include <stdio.h> 179104349Sphk 180104349Sphkint 181355604Sdelphijmain(int argc, char *argv[]) { 182104349Sphk char buf[CHARSET_MAX]; 183104349Sphk if (argc <= 1) 184104349Sphk return 1; 185104349Sphk printf("%s\n", argv[1]); 186104349Sphk getXMLCharset(argv[1], buf); 187104349Sphk printf("charset=\"%s\"\n", buf); 188104349Sphk return 0; 189104349Sphk} 190104349Sphk 191104349Sphk#endif /* TEST */ 192