1214501Srpaulo/* 2214501Srpaulo * UPnP XML helper routines 3214501Srpaulo * Copyright (c) 2000-2003 Intel Corporation 4214501Srpaulo * Copyright (c) 2006-2007 Sony Corporation 5214501Srpaulo * Copyright (c) 2008-2009 Atheros Communications 6214501Srpaulo * Copyright (c) 2009, Jouni Malinen <j@w1.fi> 7214501Srpaulo * 8214501Srpaulo * See wps_upnp.c for more details on licensing and code history. 9214501Srpaulo */ 10214501Srpaulo 11214501Srpaulo#include "includes.h" 12214501Srpaulo 13214501Srpaulo#include "common.h" 14214501Srpaulo#include "base64.h" 15214501Srpaulo#include "http.h" 16214501Srpaulo#include "upnp_xml.h" 17214501Srpaulo 18214501Srpaulo 19214501Srpaulo/* 20214501Srpaulo * XML parsing and formatting 21214501Srpaulo * 22214501Srpaulo * XML is a markup language based on unicode; usually (and in our case, 23214501Srpaulo * always!) based on utf-8. utf-8 uses a variable number of bytes per 24214501Srpaulo * character. utf-8 has the advantage that all non-ASCII unicode characters are 25214501Srpaulo * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII 26214501Srpaulo * characters are single ascii bytes, thus we can use typical text processing. 27214501Srpaulo * 28214501Srpaulo * (One other interesting thing about utf-8 is that it is possible to look at 29214501Srpaulo * any random byte and determine if it is the first byte of a character as 30214501Srpaulo * versus a continuation byte). 31214501Srpaulo * 32214501Srpaulo * The base syntax of XML uses a few ASCII punctionation characters; any 33214501Srpaulo * characters that would appear in the payload data are rewritten using 34214501Srpaulo * sequences, e.g., & for ampersand(&) and < for left angle bracket (<). 35214501Srpaulo * Five such escapes total (more can be defined but that does not apply to our 36214501Srpaulo * case). Thus we can safely parse for angle brackets etc. 37214501Srpaulo * 38214501Srpaulo * XML describes tree structures of tagged data, with each element beginning 39214501Srpaulo * with an opening tag <label> and ending with a closing tag </label> with 40214501Srpaulo * matching label. (There is also a self-closing tag <label/> which is supposed 41214501Srpaulo * to be equivalent to <label></label>, i.e., no payload, but we are unlikely 42214501Srpaulo * to see it for our purpose). 43214501Srpaulo * 44214501Srpaulo * Actually the opening tags are a little more complicated because they can 45214501Srpaulo * contain "attributes" after the label (delimited by ascii space or tab chars) 46214501Srpaulo * of the form attribute_label="value" or attribute_label='value'; as it turns 47214501Srpaulo * out we do not have to read any of these attributes, just ignore them. 48214501Srpaulo * 49214501Srpaulo * Labels are any sequence of chars other than space, tab, right angle bracket 50214501Srpaulo * (and ?), but may have an inner structure of <namespace><colon><plain_label>. 51214501Srpaulo * As it turns out, we can ignore the namespaces, in fact we can ignore the 52214501Srpaulo * entire tree hierarchy, because the plain labels we are looking for will be 53214501Srpaulo * unique (not in general, but for this application). We do however have to be 54214501Srpaulo * careful to skip over the namespaces. 55214501Srpaulo * 56214501Srpaulo * In generating XML we have to be more careful, but that is easy because 57214501Srpaulo * everything we do is pretty canned. The only real care to take is to escape 58214501Srpaulo * any special chars in our payload. 59214501Srpaulo */ 60214501Srpaulo 61214501Srpaulo/** 62214501Srpaulo * xml_next_tag - Advance to next tag 63214501Srpaulo * @in: Input 64214501Srpaulo * @out: OUT: start of tag just after '<' 65214501Srpaulo * @out_tagname: OUT: start of name of tag, skipping namespace 66214501Srpaulo * @end: OUT: one after tag 67214501Srpaulo * Returns: 0 on success, 1 on failure 68214501Srpaulo * 69214501Srpaulo * A tag has form: 70214501Srpaulo * <left angle bracket><...><right angle bracket> 71214501Srpaulo * Within the angle brackets, there is an optional leading forward slash (which 72214501Srpaulo * makes the tag an ending tag), then an optional leading label (followed by 73214501Srpaulo * colon) and then the tag name itself. 74214501Srpaulo * 75214501Srpaulo * Note that angle brackets present in the original data must have been encoded 76214501Srpaulo * as < and > so they will not trouble us. 77214501Srpaulo */ 78252726Srpauloint xml_next_tag(const char *in, const char **out, 79252726Srpaulo const char **out_tagname, const char **end) 80214501Srpaulo{ 81214501Srpaulo while (*in && *in != '<') 82214501Srpaulo in++; 83214501Srpaulo if (*in != '<') 84214501Srpaulo return 1; 85214501Srpaulo *out = ++in; 86214501Srpaulo if (*in == '/') 87214501Srpaulo in++; 88214501Srpaulo *out_tagname = in; /* maybe */ 89214501Srpaulo while (isalnum(*in) || *in == '-') 90214501Srpaulo in++; 91214501Srpaulo if (*in == ':') 92214501Srpaulo *out_tagname = ++in; 93214501Srpaulo while (*in && *in != '>') 94214501Srpaulo in++; 95214501Srpaulo if (*in != '>') 96214501Srpaulo return 1; 97214501Srpaulo *end = ++in; 98214501Srpaulo return 0; 99214501Srpaulo} 100214501Srpaulo 101214501Srpaulo 102214501Srpaulo/* xml_data_encode -- format data for xml file, escaping special characters. 103214501Srpaulo * 104214501Srpaulo * Note that we assume we are using utf8 both as input and as output! 105214501Srpaulo * In utf8, characters may be classed as follows: 106214501Srpaulo * 0xxxxxxx(2) -- 1 byte ascii char 107214501Srpaulo * 11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80 108214501Srpaulo * 110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here) 109214501Srpaulo * 1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here) 110214501Srpaulo * 11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here) 111214501Srpaulo * 10xxxxxx(2) -- extension byte (6 payload bits per byte) 112214501Srpaulo * Some values implied by the above are however illegal because they 113214501Srpaulo * do not represent unicode chars or are not the shortest encoding. 114214501Srpaulo * Actually, we can almost entirely ignore the above and just do 115214501Srpaulo * text processing same as for ascii text. 116214501Srpaulo * 117214501Srpaulo * XML is written with arbitrary unicode characters, except that five 118214501Srpaulo * characters have special meaning and so must be escaped where they 119214501Srpaulo * appear in payload data... which we do here. 120214501Srpaulo */ 121214501Srpaulovoid xml_data_encode(struct wpabuf *buf, const char *data, int len) 122214501Srpaulo{ 123214501Srpaulo int i; 124214501Srpaulo for (i = 0; i < len; i++) { 125214501Srpaulo u8 c = ((u8 *) data)[i]; 126214501Srpaulo if (c == '<') { 127214501Srpaulo wpabuf_put_str(buf, "<"); 128214501Srpaulo continue; 129214501Srpaulo } 130214501Srpaulo if (c == '>') { 131214501Srpaulo wpabuf_put_str(buf, ">"); 132214501Srpaulo continue; 133214501Srpaulo } 134214501Srpaulo if (c == '&') { 135214501Srpaulo wpabuf_put_str(buf, "&"); 136214501Srpaulo continue; 137214501Srpaulo } 138214501Srpaulo if (c == '\'') { 139214501Srpaulo wpabuf_put_str(buf, "'"); 140214501Srpaulo continue; 141214501Srpaulo } 142214501Srpaulo if (c == '"') { 143214501Srpaulo wpabuf_put_str(buf, """); 144214501Srpaulo continue; 145214501Srpaulo } 146214501Srpaulo /* 147214501Srpaulo * We could try to represent control characters using the 148214501Srpaulo * sequence: &#x; where x is replaced by a hex numeral, but not 149214501Srpaulo * clear why we would do this. 150214501Srpaulo */ 151214501Srpaulo wpabuf_put_u8(buf, c); 152214501Srpaulo } 153214501Srpaulo} 154214501Srpaulo 155214501Srpaulo 156214501Srpaulo/* xml_add_tagged_data -- format tagged data as a new xml line. 157214501Srpaulo * 158214501Srpaulo * tag must not have any special chars. 159214501Srpaulo * data may have special chars, which are escaped. 160214501Srpaulo */ 161214501Srpaulovoid xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data) 162214501Srpaulo{ 163214501Srpaulo wpabuf_printf(buf, "<%s>", tag); 164214501Srpaulo xml_data_encode(buf, data, os_strlen(data)); 165214501Srpaulo wpabuf_printf(buf, "</%s>\n", tag); 166214501Srpaulo} 167214501Srpaulo 168214501Srpaulo 169214501Srpaulo/* A POST body looks something like (per upnp spec): 170214501Srpaulo * <?xml version="1.0"?> 171214501Srpaulo * <s:Envelope 172214501Srpaulo * xmlns:s="http://schemas.xmlsoap.org/soap/envelope/" 173214501Srpaulo * s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/"> 174214501Srpaulo * <s:Body> 175214501Srpaulo * <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v"> 176214501Srpaulo * <argumentName>in arg value</argumentName> 177214501Srpaulo * other in args and their values go here, if any 178214501Srpaulo * </u:actionName> 179214501Srpaulo * </s:Body> 180214501Srpaulo * </s:Envelope> 181214501Srpaulo * 182214501Srpaulo * where : 183214501Srpaulo * s: might be some other namespace name followed by colon 184214501Srpaulo * u: might be some other namespace name followed by colon 185214501Srpaulo * actionName will be replaced according to action requested 186214501Srpaulo * schema following actionName will be WFA scheme instead 187214501Srpaulo * argumentName will be actual argument name 188214501Srpaulo * (in arg value) will be actual argument value 189214501Srpaulo */ 190214501Srpaulochar * xml_get_first_item(const char *doc, const char *item) 191214501Srpaulo{ 192214501Srpaulo const char *match = item; 193214501Srpaulo int match_len = os_strlen(item); 194214501Srpaulo const char *tag, *tagname, *end; 195214501Srpaulo char *value; 196214501Srpaulo 197214501Srpaulo /* 198214501Srpaulo * This is crude: ignore any possible tag name conflicts and go right 199214501Srpaulo * to the first tag of this name. This should be ok for the limited 200214501Srpaulo * domain of UPnP messages. 201214501Srpaulo */ 202214501Srpaulo for (;;) { 203214501Srpaulo if (xml_next_tag(doc, &tag, &tagname, &end)) 204214501Srpaulo return NULL; 205214501Srpaulo doc = end; 206214501Srpaulo if (!os_strncasecmp(tagname, match, match_len) && 207214501Srpaulo *tag != '/' && 208214501Srpaulo (tagname[match_len] == '>' || 209214501Srpaulo !isgraph(tagname[match_len]))) { 210214501Srpaulo break; 211214501Srpaulo } 212214501Srpaulo } 213214501Srpaulo end = doc; 214214501Srpaulo while (*end && *end != '<') 215214501Srpaulo end++; 216214501Srpaulo value = os_zalloc(1 + (end - doc)); 217214501Srpaulo if (value == NULL) 218214501Srpaulo return NULL; 219214501Srpaulo os_memcpy(value, doc, end - doc); 220214501Srpaulo return value; 221214501Srpaulo} 222214501Srpaulo 223214501Srpaulo 224214501Srpaulostruct wpabuf * xml_get_base64_item(const char *data, const char *name, 225214501Srpaulo enum http_reply_code *ret) 226214501Srpaulo{ 227214501Srpaulo char *msg; 228214501Srpaulo struct wpabuf *buf; 229214501Srpaulo unsigned char *decoded; 230214501Srpaulo size_t len; 231214501Srpaulo 232214501Srpaulo msg = xml_get_first_item(data, name); 233214501Srpaulo if (msg == NULL) { 234214501Srpaulo *ret = UPNP_ARG_VALUE_INVALID; 235214501Srpaulo return NULL; 236214501Srpaulo } 237214501Srpaulo 238214501Srpaulo decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len); 239214501Srpaulo os_free(msg); 240214501Srpaulo if (decoded == NULL) { 241214501Srpaulo *ret = UPNP_OUT_OF_MEMORY; 242214501Srpaulo return NULL; 243214501Srpaulo } 244214501Srpaulo 245214501Srpaulo buf = wpabuf_alloc_ext_data(decoded, len); 246214501Srpaulo if (buf == NULL) { 247214501Srpaulo os_free(decoded); 248214501Srpaulo *ret = UPNP_OUT_OF_MEMORY; 249214501Srpaulo return NULL; 250214501Srpaulo } 251214501Srpaulo return buf; 252214501Srpaulo} 253