svn_xml.h revision 362181
1/**
2 * @copyright
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 * @endcopyright
22 *
23 * @file svn_xml.h
24 * @brief XML code shared by various Subversion libraries.
25 */
26
27#ifndef SVN_XML_H
28#define SVN_XML_H
29
30#include <apr.h>
31#include <apr_pools.h>
32#include <apr_hash.h>
33
34#include "svn_types.h"
35#include "svn_string.h"
36
37#ifdef __cplusplus
38extern "C" {
39#endif /* __cplusplus */
40
41/** The namespace all Subversion XML uses. */
42#define SVN_XML_NAMESPACE "svn:"
43
44/** Used as style argument to svn_xml_make_open_tag() and friends. */
45enum svn_xml_open_tag_style {
46  /** <tag ...> */
47  svn_xml_normal = 1,
48
49  /** <tag ...>, no cosmetic newline */
50  svn_xml_protect_pcdata,
51
52  /** <tag .../>  */
53  svn_xml_self_closing
54};
55
56
57
58/** Determine if a string of character @a data of length @a len is a
59 * safe bet for use with the svn_xml_escape_* functions found in this
60 * header.
61 *
62 * Return @c TRUE if it is, @c FALSE otherwise.
63 *
64 * Essentially, this function exists to determine whether or not
65 * simply running a string of bytes through the Subversion XML escape
66 * routines will produce legitimate XML.  It should only be necessary
67 * for data which might contain bytes that cannot be safely encoded
68 * into XML (certain control characters, for example).
69 */
70svn_boolean_t
71svn_xml_is_xml_safe(const char *data,
72                    apr_size_t len);
73
74/** Create or append in @a *outstr an xml-escaped version of @a string,
75 * suitable for output as character data.
76 *
77 * If @a *outstr is @c NULL, set @a *outstr to a new stringbuf allocated
78 * in @a pool, else append to the existing stringbuf there.
79 */
80void
81svn_xml_escape_cdata_stringbuf(svn_stringbuf_t **outstr,
82                               const svn_stringbuf_t *string,
83                               apr_pool_t *pool);
84
85/** Same as svn_xml_escape_cdata_stringbuf(), but @a string is an
86 * @c svn_string_t.
87 */
88void
89svn_xml_escape_cdata_string(svn_stringbuf_t **outstr,
90                            const svn_string_t *string,
91                            apr_pool_t *pool);
92
93/** Same as svn_xml_escape_cdata_stringbuf(), but @a string is a
94 * NULL-terminated C string.
95 */
96void
97svn_xml_escape_cdata_cstring(svn_stringbuf_t **outstr,
98                             const char *string,
99                             apr_pool_t *pool);
100
101
102/** Create or append in @a *outstr an xml-escaped version of @a string,
103 * suitable for output as an attribute value.
104 *
105 * If @a *outstr is @c NULL, set @a *outstr to a new stringbuf allocated
106 * in @a pool, else append to the existing stringbuf there.
107 */
108void
109svn_xml_escape_attr_stringbuf(svn_stringbuf_t **outstr,
110                              const svn_stringbuf_t *string,
111                              apr_pool_t *pool);
112
113/** Same as svn_xml_escape_attr_stringbuf(), but @a string is an
114 * @c svn_string_t.
115 */
116void
117svn_xml_escape_attr_string(svn_stringbuf_t **outstr,
118                           const svn_string_t *string,
119                           apr_pool_t *pool);
120
121/** Same as svn_xml_escape_attr_stringbuf(), but @a string is a
122 * NULL-terminated C string.
123 */
124void
125svn_xml_escape_attr_cstring(svn_stringbuf_t **outstr,
126                            const char *string,
127                            apr_pool_t *pool);
128
129/**
130 * Return UTF-8 string @a string if it contains no characters that are
131 * unrepresentable in XML.  Else, return a copy of @a string,
132 * allocated in @a pool, with each unrepresentable character replaced
133 * by "?\uuu", where "uuu" is the three-digit unsigned decimal value
134 * of that character.
135 *
136 * Neither the input nor the output need be valid XML; however, the
137 * output can always be safely XML-escaped.
138 *
139 * @note The current implementation treats all Unicode characters as
140 * representable, except for most ASCII control characters (the
141 * exceptions being CR, LF, and TAB, which are valid in XML).  There
142 * may be other UTF-8 characters that are invalid in XML; see
143 * http://subversion.tigris.org/servlets/ReadMsg?list=dev&msgNo=90591
144 * and its thread for details.
145 *
146 * @since New in 1.2.
147 */
148const char *
149svn_xml_fuzzy_escape(const char *string,
150                     apr_pool_t *pool);
151
152
153/*---------------------------------------------------------------*/
154
155/* Generalized Subversion XML Parsing */
156
157/** A generalized Subversion XML parser object */
158typedef struct svn_xml_parser_t svn_xml_parser_t;
159
160typedef void (*svn_xml_start_elem)(void *baton,
161                                   const char *name,
162                                   const char **atts);
163
164typedef void (*svn_xml_end_elem)(void *baton, const char *name);
165
166/* data is not NULL-terminated. */
167typedef void (*svn_xml_char_data)(void *baton,
168                                  const char *data,
169                                  apr_size_t len);
170
171
172/** Create a general Subversion XML parser.
173 *
174 * The @c svn_xml_parser_t object itself will be allocated from @a pool,
175 * but some internal structures may be allocated out of pool.  Use
176 * svn_xml_free_parser() to free all memory used by the parser.
177 *
178 * @since Since Subversion 1.10 parser will be freed automatically on pool
179 * cleanup or by svn_xml_free_parser() call.
180 */
181svn_xml_parser_t *
182svn_xml_make_parser(void *baton,
183                    svn_xml_start_elem start_handler,
184                    svn_xml_end_elem end_handler,
185                    svn_xml_char_data data_handler,
186                    apr_pool_t *pool);
187
188
189/** Free a general Subversion XML parser */
190void
191svn_xml_free_parser(svn_xml_parser_t *svn_parser);
192
193
194/** Push @a len bytes of xml data in @a buf at @a svn_parser.
195 *
196 * If this is the final push, @a is_final must be set.
197 *
198 * An error will be returned if there was a syntax problem in the XML,
199 * or if any of the callbacks set an error using
200 * svn_xml_signal_bailout().
201 *
202 * If an error is returned, the @c svn_xml_parser_t will have been freed
203 * automatically, so the caller should not call svn_xml_free_parser().
204 */
205svn_error_t *
206svn_xml_parse(svn_xml_parser_t *svn_parser,
207              const char *buf,
208              apr_size_t len,
209              svn_boolean_t is_final);
210
211
212
213/** The way to officially bail out of xml parsing.
214 *
215 * Store @a error in @a svn_parser and set all expat callbacks to @c NULL.
216 */
217void
218svn_xml_signal_bailout(svn_error_t *error,
219                       svn_xml_parser_t *svn_parser);
220
221
222
223
224
225/*** Helpers for dealing with the data Expat gives us. ***/
226
227/** Return the value associated with @a name in expat attribute array @a atts,
228 * else return @c NULL.
229 *
230 * (There could never be a @c NULL attribute value in the XML,
231 * although the empty string is possible.)
232 *
233 * @a atts is an array of c-strings: even-numbered indexes are names,
234 * odd-numbers hold values.  If all is right, it should end on an
235 * even-numbered index pointing to @c NULL.
236 */
237const char *
238svn_xml_get_attr_value(const char *name,
239                       const char *const *atts);
240
241
242
243/* Converting between Expat attribute lists and APR hash tables. */
244
245
246/** Create an attribute hash from @c va_list @a ap.
247 *
248 * The contents of @a ap are alternating <tt>char *</tt> keys and
249 * <tt>char *</tt> vals, terminated by a final @c NULL falling on an
250 * even index (zero-based).
251 */
252apr_hash_t *
253svn_xml_ap_to_hash(va_list ap,
254                   apr_pool_t *pool);
255
256/** Create a hash that corresponds to Expat xml attribute list @a atts.
257 *
258 * The hash's keys and values are <tt>char *</tt>'s.
259 *
260 * @a atts may be NULL, in which case you just get an empty hash back
261 * (this makes life more convenient for some callers).
262 */
263apr_hash_t *
264svn_xml_make_att_hash(const char **atts,
265                      apr_pool_t *pool);
266
267
268/** Like svn_xml_make_att_hash(), but takes a hash and preserves any
269 * key/value pairs already in it.
270 */
271void
272svn_xml_hash_atts_preserving(const char **atts,
273                             apr_hash_t *ht,
274                             apr_pool_t *pool);
275
276/** Like svn_xml_make_att_hash(), but takes a hash and overwrites
277 * key/value pairs already in it that also appear in @a atts.
278 */
279void
280svn_xml_hash_atts_overlaying(const char **atts,
281                             apr_hash_t *ht,
282                             apr_pool_t *pool);
283
284
285
286/* Printing XML */
287
288/** Create an XML header and return it in @a *str.
289 *
290 * Fully-formed XML documents should start out with a header,
291 * something like <pre>
292 *         \<?xml version="1.0" encoding="UTF-8"?\>
293 * </pre>
294 *
295 * This function returns such a header.  @a *str must either be @c NULL, in
296 * which case a new string is created, or it must point to an existing
297 * string to be appended to. @a encoding must either be NULL, in which case
298 * encoding information is omitted from the header, or must be the name of
299 * the encoding of the XML document, such as "UTF-8".
300 *
301 * @since New in 1.7.
302 */
303void
304svn_xml_make_header2(svn_stringbuf_t **str,
305                     const char *encoding,
306                     apr_pool_t *pool);
307
308/** Like svn_xml_make_header2(), but does not emit encoding information.
309 *
310 * @deprecated Provided for backward compatibility with the 1.6 API.
311 */
312SVN_DEPRECATED
313void
314svn_xml_make_header(svn_stringbuf_t **str,
315                    apr_pool_t *pool);
316
317
318/** Store a new xml tag @a tagname in @a *str.
319 *
320 * If @a *str is @c NULL, set @a *str to a new stringbuf allocated
321 * in @a pool, else append to the existing stringbuf there.
322 *
323 * Take the tag's attributes from varargs, a SVN_VA_NULL-terminated list of
324 * alternating <tt>char *</tt> key and <tt>char *</tt> val.  Do xml-escaping
325 * on each val.
326 *
327 * @a style is one of the enumerated styles in @c svn_xml_open_tag_style.
328 */
329void
330svn_xml_make_open_tag(svn_stringbuf_t **str,
331                      apr_pool_t *pool,
332                      enum svn_xml_open_tag_style style,
333                      const char *tagname,
334                      ...) SVN_NEEDS_SENTINEL_NULL;
335
336
337/** Like svn_xml_make_open_tag(), but takes a @c va_list instead of being
338 * variadic.
339 */
340void
341svn_xml_make_open_tag_v(svn_stringbuf_t **str,
342                        apr_pool_t *pool,
343                        enum svn_xml_open_tag_style style,
344                        const char *tagname,
345                        va_list ap);
346
347
348/** Like svn_xml_make_open_tag(), but takes a hash table of attributes
349 * (<tt>char *</tt> keys mapping to <tt>char *</tt> values).
350 *
351 * You might ask, why not just provide svn_xml_make_tag_atts()?
352 *
353 * The reason is that a hash table is the most natural interface to an
354 * attribute list; the fact that Expat uses <tt>char **</tt> atts instead is
355 * certainly a defensible implementation decision, but since we'd have
356 * to have special code to support such lists throughout Subversion
357 * anyway, we might as well write that code for the natural interface
358 * (hashes) and then convert in the few cases where conversion is
359 * needed.  Someday it might even be nice to change expat-lite to work
360 * with apr hashes.
361 *
362 * See conversion functions svn_xml_make_att_hash() and
363 * svn_xml_make_att_hash_overlaying().  Callers should use those to
364 * convert Expat attr lists into hashes when necessary.
365 */
366void
367svn_xml_make_open_tag_hash(svn_stringbuf_t **str,
368                           apr_pool_t *pool,
369                           enum svn_xml_open_tag_style style,
370                           const char *tagname,
371                           apr_hash_t *attributes);
372
373
374/** Store an xml close tag @a tagname in @a str.
375 *
376 * If @a *str is @c NULL, set @a *str to a new stringbuf allocated
377 * in @a pool, else append to the existing stringbuf there.
378 */
379void
380svn_xml_make_close_tag(svn_stringbuf_t **str,
381                       apr_pool_t *pool,
382                       const char *tagname);
383
384
385#ifdef __cplusplus
386}
387#endif /* __cplusplus */
388
389#endif /* SVN_XML_H */
390