1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16/** 17 * @file apr_xml.h 18 * @brief APR-UTIL XML Library 19 */ 20#ifndef APR_XML_H 21#define APR_XML_H 22 23/** 24 * @defgroup APR_Util_XML XML 25 * @ingroup APR_Util 26 * @{ 27 */ 28#include "apr_pools.h" 29#include "apr_tables.h" 30#include "apr_file_io.h" 31 32#include "apu.h" 33#if APR_CHARSET_EBCDIC 34#include "apr_xlate.h" 35#endif 36 37#ifdef __cplusplus 38extern "C" { 39#endif 40 41/** 42 * @package Apache XML library 43 */ 44 45/* -------------------------------------------------------------------- */ 46 47/* ### these will need to move at some point to a more logical spot */ 48 49/** @see apr_text */ 50typedef struct apr_text apr_text; 51 52/** Structure to keep a linked list of pieces of text */ 53struct apr_text { 54 /** The current piece of text */ 55 const char *text; 56 /** a pointer to the next piece of text */ 57 struct apr_text *next; 58}; 59 60/** @see apr_text_header */ 61typedef struct apr_text_header apr_text_header; 62 63/** A list of pieces of text */ 64struct apr_text_header { 65 /** The first piece of text in the list */ 66 apr_text *first; 67 /** The last piece of text in the list */ 68 apr_text *last; 69}; 70 71/** 72 * Append a piece of text to the end of a list 73 * @param p The pool to allocate out of 74 * @param hdr The text header to append to 75 * @param text The new text to append 76 */ 77APU_DECLARE(void) apr_text_append(apr_pool_t *p, apr_text_header *hdr, 78 const char *text); 79 80 81/* -------------------------------------------------------------------- 82** 83** XML PARSING 84*/ 85 86/* 87** Qualified namespace values 88** 89** APR_XML_NS_DAV_ID 90** We always insert the "DAV:" namespace URI at the head of the 91** namespace array. This means that it will always be at ID==0, 92** making it much easier to test for. 93** 94** APR_XML_NS_NONE 95** This special ID is used for two situations: 96** 97** 1) The namespace prefix begins with "xml" (and we do not know 98** what it means). Namespace prefixes with "xml" (any case) as 99** their first three characters are reserved by the XML Namespaces 100** specification for future use. mod_dav will pass these through 101** unchanged. When this identifier is used, the prefix is LEFT in 102** the element/attribute name. Downstream processing should not 103** prepend another prefix. 104** 105** 2) The element/attribute does not have a namespace. 106** 107** a) No prefix was used, and a default namespace has not been 108** defined. 109** b) No prefix was used, and the default namespace was specified 110** to mean "no namespace". This is done with a namespace 111** declaration of: xmlns="" 112** (this declaration is typically used to override a previous 113** specification for the default namespace) 114** 115** In these cases, we need to record that the elem/attr has no 116** namespace so that we will not attempt to prepend a prefix. 117** All namespaces that are used will have a prefix assigned to 118** them -- mod_dav will never set or use the default namespace 119** when generating XML. This means that "no prefix" will always 120** mean "no namespace". 121** 122** In both cases, the XML generation will avoid prepending a prefix. 123** For the first case, this means the original prefix/name will be 124** inserted into the output stream. For the latter case, it means 125** the name will have no prefix, and since we never define a default 126** namespace, this means it will have no namespace. 127** 128** Note: currently, mod_dav understands the "xmlns" prefix and the 129** "xml:lang" attribute. These are handled specially (they aren't 130** left within the XML tree), so the APR_XML_NS_NONE value won't ever 131** really apply to these values. 132*/ 133#define APR_XML_NS_DAV_ID 0 /**< namespace ID for "DAV:" */ 134#define APR_XML_NS_NONE -10 /**< no namespace for this elem/attr */ 135 136#define APR_XML_NS_ERROR_BASE -100 /**< used only during processing */ 137/** Is this namespace an error? */ 138#define APR_XML_NS_IS_ERROR(e) ((e) <= APR_XML_NS_ERROR_BASE) 139 140/** @see apr_xml_attr */ 141typedef struct apr_xml_attr apr_xml_attr; 142/** @see apr_xml_elem */ 143typedef struct apr_xml_elem apr_xml_elem; 144/** @see apr_xml_doc */ 145typedef struct apr_xml_doc apr_xml_doc; 146 147/** apr_xml_attr: holds a parsed XML attribute */ 148struct apr_xml_attr { 149 /** attribute name */ 150 const char *name; 151 /** index into namespace array */ 152 int ns; 153 154 /** attribute value */ 155 const char *value; 156 157 /** next attribute */ 158 struct apr_xml_attr *next; 159}; 160 161/** apr_xml_elem: holds a parsed XML element */ 162struct apr_xml_elem { 163 /** element name */ 164 const char *name; 165 /** index into namespace array */ 166 int ns; 167 /** xml:lang for attrs/contents */ 168 const char *lang; 169 170 /** cdata right after start tag */ 171 apr_text_header first_cdata; 172 /** cdata after MY end tag */ 173 apr_text_header following_cdata; 174 175 /** parent element */ 176 struct apr_xml_elem *parent; 177 /** next (sibling) element */ 178 struct apr_xml_elem *next; 179 /** first child element */ 180 struct apr_xml_elem *first_child; 181 /** first attribute */ 182 struct apr_xml_attr *attr; 183 184 /* used only during parsing */ 185 /** last child element */ 186 struct apr_xml_elem *last_child; 187 /** namespaces scoped by this elem */ 188 struct apr_xml_ns_scope *ns_scope; 189 190 /* used by modules during request processing */ 191 /** Place for modules to store private data */ 192 void *priv; 193}; 194 195/** Is this XML element empty? */ 196#define APR_XML_ELEM_IS_EMPTY(e) ((e)->first_child == NULL && \ 197 (e)->first_cdata.first == NULL) 198 199/** apr_xml_doc: holds a parsed XML document */ 200struct apr_xml_doc { 201 /** root element */ 202 apr_xml_elem *root; 203 /** array of namespaces used */ 204 apr_array_header_t *namespaces; 205}; 206 207/** Opaque XML parser structure */ 208typedef struct apr_xml_parser apr_xml_parser; 209 210/** 211 * Create an XML parser 212 * @param pool The pool for allocating the parser and the parse results. 213 * @return The new parser. 214 */ 215APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool); 216 217/** 218 * Parse a File, producing a xml_doc 219 * @param p The pool for allocating the parse results. 220 * @param parser A pointer to *parser (needed so calling function can get 221 * errors), will be set to NULL on successful completion. 222 * @param ppdoc A pointer to *apr_xml_doc (which has the parsed results in it) 223 * @param xmlfd A file to read from. 224 * @param buffer_length Buffer length which would be suitable 225 * @return Any errors found during parsing. 226 */ 227APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p, 228 apr_xml_parser **parser, 229 apr_xml_doc **ppdoc, 230 apr_file_t *xmlfd, 231 apr_size_t buffer_length); 232 233 234/** 235 * Feed input into the parser 236 * @param parser The XML parser for parsing this data. 237 * @param data The data to parse. 238 * @param len The length of the data. 239 * @return Any errors found during parsing. 240 * @remark Use apr_xml_parser_geterror() to get more error information. 241 */ 242APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser, 243 const char *data, 244 apr_size_t len); 245 246/** 247 * Terminate the parsing and return the result 248 * @param parser The XML parser for parsing this data. 249 * @param pdoc The resulting parse information. May be NULL to simply 250 * terminate the parsing without fetching the info. 251 * @return Any errors found during the final stage of parsing. 252 * @remark Use apr_xml_parser_geterror() to get more error information. 253 */ 254APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser, 255 apr_xml_doc **pdoc); 256 257/** 258 * Fetch additional error information from the parser. 259 * @param parser The XML parser to query for errors. 260 * @param errbuf A buffer for storing error text. 261 * @param errbufsize The length of the error text buffer. 262 * @return The error buffer 263 */ 264APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser, 265 char *errbuf, 266 apr_size_t errbufsize); 267 268 269/** 270 * Converts an XML element tree to flat text 271 * @param p The pool to allocate out of 272 * @param elem The XML element to convert 273 * @param style How to covert the XML. One of: 274 * <PRE> 275 * APR_XML_X2T_FULL start tag, contents, end tag 276 * APR_XML_X2T_INNER contents only 277 * APR_XML_X2T_LANG_INNER xml:lang + inner contents 278 * APR_XML_X2T_FULL_NS_LANG FULL + ns defns + xml:lang 279 * </PRE> 280 * @param namespaces The namespace of the current XML element 281 * @param ns_map Namespace mapping 282 * @param pbuf Buffer to put the converted text into 283 * @param psize Size of the converted text 284 */ 285APU_DECLARE(void) apr_xml_to_text(apr_pool_t *p, const apr_xml_elem *elem, 286 int style, apr_array_header_t *namespaces, 287 int *ns_map, const char **pbuf, 288 apr_size_t *psize); 289 290/* style argument values: */ 291#define APR_XML_X2T_FULL 0 /**< start tag, contents, end tag */ 292#define APR_XML_X2T_INNER 1 /**< contents only */ 293#define APR_XML_X2T_LANG_INNER 2 /**< xml:lang + inner contents */ 294#define APR_XML_X2T_FULL_NS_LANG 3 /**< FULL + ns defns + xml:lang */ 295 296/** 297 * empty XML element 298 * @param p The pool to allocate out of 299 * @param elem The XML element to empty 300 * @return the string that was stored in the XML element 301 */ 302APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t *p, 303 const apr_xml_elem *elem); 304 305/** 306 * quote an XML string 307 * Replace '\<', '\>', and '\&' with '\<', '\>', and '\&'. 308 * @param p The pool to allocate out of 309 * @param s The string to quote 310 * @param quotes If quotes is true, then replace '"' with '\"'. 311 * @return The quoted string 312 * @note If the string does not contain special characters, it is not 313 * duplicated into the pool and the original string is returned. 314 */ 315APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s, 316 int quotes); 317 318/** 319 * Quote an XML element 320 * @param p The pool to allocate out of 321 * @param elem The element to quote 322 */ 323APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem); 324 325/* manage an array of unique URIs: apr_xml_insert_uri() and APR_XML_URI_ITEM() */ 326 327/** 328 * return the URI's (existing) index, or insert it and return a new index 329 * @param uri_array array to insert into 330 * @param uri The uri to insert 331 * @return int The uri's index 332 */ 333APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array, 334 const char *uri); 335 336/** Get the URI item for this XML element */ 337#define APR_XML_GET_URI_ITEM(ary, i) (((const char * const *)(ary)->elts)[i]) 338 339#if APR_CHARSET_EBCDIC 340/** 341 * Convert parsed tree in EBCDIC 342 * @param p The pool to allocate out of 343 * @param pdoc The apr_xml_doc to convert. 344 * @param xlate The translation handle to use. 345 * @return Any errors found during conversion. 346 */ 347APU_DECLARE(apr_status_t) apr_xml_parser_convert_doc(apr_pool_t *p, 348 apr_xml_doc *pdoc, 349 apr_xlate_t *convset); 350#endif 351 352#ifdef __cplusplus 353} 354#endif 355/** @} */ 356#endif /* APR_XML_H */ 357