svn_xml.h revision 362181
1/** 2 * @copyright 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 * @endcopyright 22 * 23 * @file svn_xml.h 24 * @brief XML code shared by various Subversion libraries. 25 */ 26 27#ifndef SVN_XML_H 28#define SVN_XML_H 29 30#include <apr.h> 31#include <apr_pools.h> 32#include <apr_hash.h> 33 34#include "svn_types.h" 35#include "svn_string.h" 36 37#ifdef __cplusplus 38extern "C" { 39#endif /* __cplusplus */ 40 41/** The namespace all Subversion XML uses. */ 42#define SVN_XML_NAMESPACE "svn:" 43 44/** Used as style argument to svn_xml_make_open_tag() and friends. */ 45enum svn_xml_open_tag_style { 46 /** <tag ...> */ 47 svn_xml_normal = 1, 48 49 /** <tag ...>, no cosmetic newline */ 50 svn_xml_protect_pcdata, 51 52 /** <tag .../> */ 53 svn_xml_self_closing 54}; 55 56 57 58/** Determine if a string of character @a data of length @a len is a 59 * safe bet for use with the svn_xml_escape_* functions found in this 60 * header. 61 * 62 * Return @c TRUE if it is, @c FALSE otherwise. 63 * 64 * Essentially, this function exists to determine whether or not 65 * simply running a string of bytes through the Subversion XML escape 66 * routines will produce legitimate XML. It should only be necessary 67 * for data which might contain bytes that cannot be safely encoded 68 * into XML (certain control characters, for example). 69 */ 70svn_boolean_t 71svn_xml_is_xml_safe(const char *data, 72 apr_size_t len); 73 74/** Create or append in @a *outstr an xml-escaped version of @a string, 75 * suitable for output as character data. 76 * 77 * If @a *outstr is @c NULL, set @a *outstr to a new stringbuf allocated 78 * in @a pool, else append to the existing stringbuf there. 79 */ 80void 81svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr, 82 const svn_stringbuf_t *string, 83 apr_pool_t *pool); 84 85/** Same as svn_xml_escape_cdata_stringbuf(), but @a string is an 86 * @c svn_string_t. 87 */ 88void 89svn_xml_escape_cdata_string(svn_stringbuf_t **outstr, 90 const svn_string_t *string, 91 apr_pool_t *pool); 92 93/** Same as svn_xml_escape_cdata_stringbuf(), but @a string is a 94 * NULL-terminated C string. 95 */ 96void 97svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr, 98 const char *string, 99 apr_pool_t *pool); 100 101 102/** Create or append in @a *outstr an xml-escaped version of @a string, 103 * suitable for output as an attribute value. 104 * 105 * If @a *outstr is @c NULL, set @a *outstr to a new stringbuf allocated 106 * in @a pool, else append to the existing stringbuf there. 107 */ 108void 109svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr, 110 const svn_stringbuf_t *string, 111 apr_pool_t *pool); 112 113/** Same as svn_xml_escape_attr_stringbuf(), but @a string is an 114 * @c svn_string_t. 115 */ 116void 117svn_xml_escape_attr_string(svn_stringbuf_t **outstr, 118 const svn_string_t *string, 119 apr_pool_t *pool); 120 121/** Same as svn_xml_escape_attr_stringbuf(), but @a string is a 122 * NULL-terminated C string. 123 */ 124void 125svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr, 126 const char *string, 127 apr_pool_t *pool); 128 129/** 130 * Return UTF-8 string @a string if it contains no characters that are 131 * unrepresentable in XML. Else, return a copy of @a string, 132 * allocated in @a pool, with each unrepresentable character replaced 133 * by "?\uuu", where "uuu" is the three-digit unsigned decimal value 134 * of that character. 135 * 136 * Neither the input nor the output need be valid XML; however, the 137 * output can always be safely XML-escaped. 138 * 139 * @note The current implementation treats all Unicode characters as 140 * representable, except for most ASCII control characters (the 141 * exceptions being CR, LF, and TAB, which are valid in XML). There 142 * may be other UTF-8 characters that are invalid in XML; see 143 * http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=90591 144 * and its thread for details. 145 * 146 * @since New in 1.2. 147 */ 148const char * 149svn_xml_fuzzy_escape(const char *string, 150 apr_pool_t *pool); 151 152 153/*---------------------------------------------------------------*/ 154 155/* Generalized Subversion XML Parsing */ 156 157/** A generalized Subversion XML parser object */ 158typedef struct svn_xml_parser_t svn_xml_parser_t; 159 160typedef void (*svn_xml_start_elem)(void *baton, 161 const char *name, 162 const char **atts); 163 164typedef void (*svn_xml_end_elem)(void *baton, const char *name); 165 166/* data is not NULL-terminated. */ 167typedef void (*svn_xml_char_data)(void *baton, 168 const char *data, 169 apr_size_t len); 170 171 172/** Create a general Subversion XML parser. 173 * 174 * The @c svn_xml_parser_t object itself will be allocated from @a pool, 175 * but some internal structures may be allocated out of pool. Use 176 * svn_xml_free_parser() to free all memory used by the parser. 177 * 178 * @since Since Subversion 1.10 parser will be freed automatically on pool 179 * cleanup or by svn_xml_free_parser() call. 180 */ 181svn_xml_parser_t * 182svn_xml_make_parser(void *baton, 183 svn_xml_start_elem start_handler, 184 svn_xml_end_elem end_handler, 185 svn_xml_char_data data_handler, 186 apr_pool_t *pool); 187 188 189/** Free a general Subversion XML parser */ 190void 191svn_xml_free_parser(svn_xml_parser_t *svn_parser); 192 193 194/** Push @a len bytes of xml data in @a buf at @a svn_parser. 195 * 196 * If this is the final push, @a is_final must be set. 197 * 198 * An error will be returned if there was a syntax problem in the XML, 199 * or if any of the callbacks set an error using 200 * svn_xml_signal_bailout(). 201 * 202 * If an error is returned, the @c svn_xml_parser_t will have been freed 203 * automatically, so the caller should not call svn_xml_free_parser(). 204 */ 205svn_error_t * 206svn_xml_parse(svn_xml_parser_t *svn_parser, 207 const char *buf, 208 apr_size_t len, 209 svn_boolean_t is_final); 210 211 212 213/** The way to officially bail out of xml parsing. 214 * 215 * Store @a error in @a svn_parser and set all expat callbacks to @c NULL. 216 */ 217void 218svn_xml_signal_bailout(svn_error_t *error, 219 svn_xml_parser_t *svn_parser); 220 221 222 223 224 225/*** Helpers for dealing with the data Expat gives us. ***/ 226 227/** Return the value associated with @a name in expat attribute array @a atts, 228 * else return @c NULL. 229 * 230 * (There could never be a @c NULL attribute value in the XML, 231 * although the empty string is possible.) 232 * 233 * @a atts is an array of c-strings: even-numbered indexes are names, 234 * odd-numbers hold values. If all is right, it should end on an 235 * even-numbered index pointing to @c NULL. 236 */ 237const char * 238svn_xml_get_attr_value(const char *name, 239 const char *const *atts); 240 241 242 243/* Converting between Expat attribute lists and APR hash tables. */ 244 245 246/** Create an attribute hash from @c va_list @a ap. 247 * 248 * The contents of @a ap are alternating <tt>char *</tt> keys and 249 * <tt>char *</tt> vals, terminated by a final @c NULL falling on an 250 * even index (zero-based). 251 */ 252apr_hash_t * 253svn_xml_ap_to_hash(va_list ap, 254 apr_pool_t *pool); 255 256/** Create a hash that corresponds to Expat xml attribute list @a atts. 257 * 258 * The hash's keys and values are <tt>char *</tt>'s. 259 * 260 * @a atts may be NULL, in which case you just get an empty hash back 261 * (this makes life more convenient for some callers). 262 */ 263apr_hash_t * 264svn_xml_make_att_hash(const char **atts, 265 apr_pool_t *pool); 266 267 268/** Like svn_xml_make_att_hash(), but takes a hash and preserves any 269 * key/value pairs already in it. 270 */ 271void 272svn_xml_hash_atts_preserving(const char **atts, 273 apr_hash_t *ht, 274 apr_pool_t *pool); 275 276/** Like svn_xml_make_att_hash(), but takes a hash and overwrites 277 * key/value pairs already in it that also appear in @a atts. 278 */ 279void 280svn_xml_hash_atts_overlaying(const char **atts, 281 apr_hash_t *ht, 282 apr_pool_t *pool); 283 284 285 286/* Printing XML */ 287 288/** Create an XML header and return it in @a *str. 289 * 290 * Fully-formed XML documents should start out with a header, 291 * something like <pre> 292 * \<?xml version="1.0" encoding="UTF-8"?\> 293 * </pre> 294 * 295 * This function returns such a header. @a *str must either be @c NULL, in 296 * which case a new string is created, or it must point to an existing 297 * string to be appended to. @a encoding must either be NULL, in which case 298 * encoding information is omitted from the header, or must be the name of 299 * the encoding of the XML document, such as "UTF-8". 300 * 301 * @since New in 1.7. 302 */ 303void 304svn_xml_make_header2(svn_stringbuf_t **str, 305 const char *encoding, 306 apr_pool_t *pool); 307 308/** Like svn_xml_make_header2(), but does not emit encoding information. 309 * 310 * @deprecated Provided for backward compatibility with the 1.6 API. 311 */ 312SVN_DEPRECATED 313void 314svn_xml_make_header(svn_stringbuf_t **str, 315 apr_pool_t *pool); 316 317 318/** Store a new xml tag @a tagname in @a *str. 319 * 320 * If @a *str is @c NULL, set @a *str to a new stringbuf allocated 321 * in @a pool, else append to the existing stringbuf there. 322 * 323 * Take the tag's attributes from varargs, a SVN_VA_NULL-terminated list of 324 * alternating <tt>char *</tt> key and <tt>char *</tt> val. Do xml-escaping 325 * on each val. 326 * 327 * @a style is one of the enumerated styles in @c svn_xml_open_tag_style. 328 */ 329void 330svn_xml_make_open_tag(svn_stringbuf_t **str, 331 apr_pool_t *pool, 332 enum svn_xml_open_tag_style style, 333 const char *tagname, 334 ...) SVN_NEEDS_SENTINEL_NULL; 335 336 337/** Like svn_xml_make_open_tag(), but takes a @c va_list instead of being 338 * variadic. 339 */ 340void 341svn_xml_make_open_tag_v(svn_stringbuf_t **str, 342 apr_pool_t *pool, 343 enum svn_xml_open_tag_style style, 344 const char *tagname, 345 va_list ap); 346 347 348/** Like svn_xml_make_open_tag(), but takes a hash table of attributes 349 * (<tt>char *</tt> keys mapping to <tt>char *</tt> values). 350 * 351 * You might ask, why not just provide svn_xml_make_tag_atts()? 352 * 353 * The reason is that a hash table is the most natural interface to an 354 * attribute list; the fact that Expat uses <tt>char **</tt> atts instead is 355 * certainly a defensible implementation decision, but since we'd have 356 * to have special code to support such lists throughout Subversion 357 * anyway, we might as well write that code for the natural interface 358 * (hashes) and then convert in the few cases where conversion is 359 * needed. Someday it might even be nice to change expat-lite to work 360 * with apr hashes. 361 * 362 * See conversion functions svn_xml_make_att_hash() and 363 * svn_xml_make_att_hash_overlaying(). Callers should use those to 364 * convert Expat attr lists into hashes when necessary. 365 */ 366void 367svn_xml_make_open_tag_hash(svn_stringbuf_t **str, 368 apr_pool_t *pool, 369 enum svn_xml_open_tag_style style, 370 const char *tagname, 371 apr_hash_t *attributes); 372 373 374/** Store an xml close tag @a tagname in @a str. 375 * 376 * If @a *str is @c NULL, set @a *str to a new stringbuf allocated 377 * in @a pool, else append to the existing stringbuf there. 378 */ 379void 380svn_xml_make_close_tag(svn_stringbuf_t **str, 381 apr_pool_t *pool, 382 const char *tagname); 383 384 385#ifdef __cplusplus 386} 387#endif /* __cplusplus */ 388 389#endif /* SVN_XML_H */ 390