svn_string.h revision 299742
1/**
2 * @copyright
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 * @endcopyright
22 *
23 * @file svn_string.h
24 * @brief Counted-length strings for Subversion, plus some C string goodies.
25 *
26 * There are two string datatypes: @c svn_string_t and @c svn_stringbuf_t.
27 * The former is a simple pointer/length pair useful for passing around
28 * strings (or arbitrary bytes) with a counted length. @c svn_stringbuf_t is
29 * buffered to enable efficient appending of strings without an allocation
30 * and copy for each append operation.
31 *
32 * @c svn_string_t contains a <tt>const char *</tt> for its data, so it is
33 * most appropriate for constant data and for functions which expect constant,
34 * counted data. Functions should generally use <tt>const @c svn_string_t
35 * *</tt> as their parameter to indicate they are expecting a constant,
36 * counted string.
37 *
38 * @c svn_stringbuf_t uses a plain <tt>char *</tt> for its data, so it is
39 * most appropriate for modifiable data.
40 *
41 * <h3>Invariants</h3>
42 *
43 *   1. Null termination:
44 *
45 *      Both structures maintain a significant invariant:
46 *
47 *         <tt>s->data[s->len] == '\\0'</tt>
48 *
49 *      The functions defined within this header file will maintain
50 *      the invariant (which does imply that memory is
51 *      allocated/defined as @c len+1 bytes).  If code outside of the
52 *      @c svn_string.h functions manually builds these structures,
53 *      then they must enforce this invariant.
54 *
55 *      Note that an @c svn_string(buf)_t may contain binary data,
56 *      which means that strlen(s->data) does not have to equal @c
57 *      s->len. The null terminator is provided to make it easier to
58 *      pass @c s->data to C string interfaces.
59 *
60 *
61 *   2. Non-NULL input:
62 *
63 *      All the functions assume their input data pointer is non-NULL,
64 *      unless otherwise documented, and may seg fault if passed
65 *      NULL.  The input data may *contain* null bytes, of course, just
66 *      the data pointer itself must not be NULL.
67 *
68 * <h3>Memory allocation</h3>
69 *
70 *   All the functions make a deep copy of all input data, and never store
71 *   a pointer to the original input data.
72 */
73
74
75#ifndef SVN_STRING_H
76#define SVN_STRING_H
77
78#include <apr.h>          /* for apr_size_t */
79#include <apr_pools.h>    /* for apr_pool_t */
80#include <apr_tables.h>   /* for apr_array_header_t */
81
82#include "svn_types.h"    /* for svn_boolean_t, svn_error_t */
83
84#ifdef __cplusplus
85extern "C" {
86#endif /* __cplusplus */
87
88/**
89 * @defgroup svn_string String handling
90 * @{
91 */
92
93
94
95/** A simple counted string. */
96typedef struct svn_string_t
97{
98  const char *data; /**< pointer to the bytestring */
99  apr_size_t len;   /**< length of bytestring */
100} svn_string_t;
101
102/** A buffered string, capable of appending without an allocation and copy
103 * for each append. */
104typedef struct svn_stringbuf_t
105{
106  /** a pool from which this string was originally allocated, and is not
107   * necessarily specific to this string.  This is used only for allocating
108   * more memory from when the string needs to grow.
109   */
110  apr_pool_t *pool;
111
112  /** pointer to the bytestring */
113  char *data;
114
115  /** length of bytestring */
116  apr_size_t len;
117
118  /** total size of buffer allocated */
119  apr_size_t blocksize;
120} svn_stringbuf_t;
121
122
123/**
124 * @defgroup svn_string_svn_string_t svn_string_t functions
125 * @{
126 */
127
128/** Create a new string copied from the null-terminated C string @a cstring.
129 */
130svn_string_t *
131svn_string_create(const char *cstring, apr_pool_t *pool);
132
133/** Create a new, empty string.
134 *
135 * @since New in 1.8.
136 */
137svn_string_t *
138svn_string_create_empty(apr_pool_t *pool);
139
140/** Create a new string copied from a generic string of bytes, @a bytes, of
141 * length @a size bytes.  @a bytes is NOT assumed to be null-terminated, but
142 * the new string will be.
143 *
144 * @since Since 1.9, @a bytes can be NULL if @a size is zero.
145 */
146svn_string_t *
147svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool);
148
149/** Create a new string copied from the stringbuf @a strbuf.
150 */
151svn_string_t *
152svn_string_create_from_buf(const svn_stringbuf_t *strbuf, apr_pool_t *pool);
153
154/** Create a new string by printf-style formatting using @a fmt and the
155 * variable arguments, which are as appropriate for apr_psprintf().
156 */
157svn_string_t *
158svn_string_createf(apr_pool_t *pool, const char *fmt, ...)
159  __attribute__((format(printf, 2, 3)));
160
161/** Create a new string by printf-style formatting using @c fmt and @a ap.
162 * This is the same as svn_string_createf() except for the different
163 * way of passing the variable arguments.
164 */
165svn_string_t *
166svn_string_createv(apr_pool_t *pool, const char *fmt, va_list ap)
167  __attribute__((format(printf, 2, 0)));
168
169/** Return TRUE if @a str is empty (has length zero). */
170svn_boolean_t
171svn_string_isempty(const svn_string_t *str);
172
173/** Return a duplicate of @a original_string.
174 *
175 * @since Since 1.9, @a original_string can be NULL in which case NULL will
176 * be returned.
177 */
178svn_string_t *
179svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool);
180
181/** Return @c TRUE iff @a str1 and @a str2 have identical length and data. */
182svn_boolean_t
183svn_string_compare(const svn_string_t *str1, const svn_string_t *str2);
184
185/** Return offset of first non-whitespace character in @a str, or return
186 * @a str->len if none.
187 */
188apr_size_t
189svn_string_first_non_whitespace(const svn_string_t *str);
190
191/** Return position of last occurrence of @a ch in @a str, or return
192 * @a str->len if no occurrence.
193 */
194apr_size_t
195svn_string_find_char_backward(const svn_string_t *str, char ch);
196
197/** @} */
198
199
200/**
201 * @defgroup svn_string_svn_stringbuf_t svn_stringbuf_t functions
202 * @{
203 */
204
205/** Create a new stringbuf copied from the null-terminated C string
206 * @a cstring.
207 */
208svn_stringbuf_t *
209svn_stringbuf_create(const char *cstring, apr_pool_t *pool);
210
211/** Create a new stringbuf copied from the generic string of bytes, @a bytes,
212 * of length @a size bytes.  @a bytes is NOT assumed to be null-terminated,
213 * but the new stringbuf will be.
214 *
215 * @since Since 1.9, @a bytes can be NULL if @a size is zero.
216 */
217svn_stringbuf_t *
218svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool);
219
220/** Create a new, empty stringbuf.
221 *
222 * @since New in 1.8.
223 */
224svn_stringbuf_t *
225svn_stringbuf_create_empty(apr_pool_t *pool);
226
227/** Create a new, empty stringbuf with at least @a minimum_size bytes of
228 * space available in the memory block.
229 *
230 * The allocated string buffer will be at least one byte larger than
231 * @a minimum_size to account for a final '\\0'.
232 *
233 * @since New in 1.6.
234 */
235svn_stringbuf_t *
236svn_stringbuf_create_ensure(apr_size_t minimum_size, apr_pool_t *pool);
237
238/** Create a new stringbuf copied from the string @a str.
239 */
240svn_stringbuf_t *
241svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool);
242
243/** Create a new stringbuf using the given @a str as initial buffer.
244 * Allocate the result in @a pool.  In contrast to #svn_stringbuf_create,
245 * the contents of @a str may change when the stringbuf gets modified.
246 *
247 * @since New in 1.9
248 */
249svn_stringbuf_t *
250svn_stringbuf_create_wrap(char *str, apr_pool_t *pool);
251
252/** Create a new stringbuf by printf-style formatting using @a fmt and the
253 * variable arguments, which are as appropriate for apr_psprintf().
254 */
255svn_stringbuf_t *
256svn_stringbuf_createf(apr_pool_t *pool, const char *fmt, ...)
257  __attribute__((format(printf, 2, 3)));
258
259/** Create a new stringbuf by printf-style formatting using @c fmt and @a ap.
260 * This is the same as svn_stringbuf_createf() except for the different
261 * way of passing the variable arguments.
262 */
263svn_stringbuf_t *
264svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap)
265  __attribute__((format(printf, 2, 0)));
266
267/** Make sure that @a str has at least @a minimum_size
268 * bytes of space available in the memory block.
269 *
270 * The allocated string buffer will be at least one byte larger than
271 * @a minimum_size to account for a final '\\0'.
272 *
273 * @note: Before Subversion 1.8 this function did not ensure space for
274 * one byte more than @a minimum_size.  If compatibility with pre-1.8
275 * behaviour is required callers must assume space for only
276 * @a minimum_size-1 data bytes plus a final '\\0'.
277 */
278void
279svn_stringbuf_ensure(svn_stringbuf_t *str, apr_size_t minimum_size);
280
281/** Set @a str to a copy of the null-terminated C string @a value. */
282void
283svn_stringbuf_set(svn_stringbuf_t *str, const char *value);
284
285/** Set @a str to empty (zero length). */
286void
287svn_stringbuf_setempty(svn_stringbuf_t *str);
288
289/** Return @c TRUE if @a str is empty (has length zero). */
290svn_boolean_t
291svn_stringbuf_isempty(const svn_stringbuf_t *str);
292
293/** Chop @a nbytes bytes off end of @a str, but not more than @a str->len. */
294void
295svn_stringbuf_chop(svn_stringbuf_t *str, apr_size_t nbytes);
296
297/** Fill @a str with character @a c. */
298void
299svn_stringbuf_fillchar(svn_stringbuf_t *str, unsigned char c);
300
301/** Append the single character @a byte onto @a targetstr.
302 *
303 * This is an optimized version of svn_stringbuf_appendbytes()
304 * that is much faster to call and execute. Gains vary with the ABI.
305 * The advantages extend beyond the actual call because the reduced
306 * register pressure allows for more optimization within the caller.
307 *
308 * Reallocs if necessary. @a targetstr is affected, nothing else is.
309 * @since New in 1.7.
310 */
311void
312svn_stringbuf_appendbyte(svn_stringbuf_t *targetstr,
313                         char byte);
314
315/** Append the array of bytes @a bytes of length @a count onto @a targetstr.
316 *
317 * Reallocs if necessary. @a targetstr is affected, nothing else is.
318 *
319 * @since 1.9 @a bytes can be NULL if @a count is zero.
320 */
321void
322svn_stringbuf_appendbytes(svn_stringbuf_t *targetstr,
323                          const char *bytes,
324                          apr_size_t count);
325
326/** Append @a byte @a count times onto @a targetstr.
327 *
328 * Reallocs if necessary. @a targetstr is affected, nothing else is.
329 * @since New in 1.9.
330 */
331void
332svn_stringbuf_appendfill(svn_stringbuf_t *targetstr,
333                         char byte,
334                         apr_size_t count);
335
336/** Append the stringbuf @c appendstr onto @a targetstr.
337 *
338 * Reallocs if necessary. @a targetstr is affected, nothing else is.
339 */
340void
341svn_stringbuf_appendstr(svn_stringbuf_t *targetstr,
342                        const svn_stringbuf_t *appendstr);
343
344/** Append the C string @a cstr onto @a targetstr.
345 *
346 * Reallocs if necessary. @a targetstr is affected, nothing else is.
347 */
348void
349svn_stringbuf_appendcstr(svn_stringbuf_t *targetstr,
350                         const char *cstr);
351
352/** Insert into @a str at position @a pos an array of bytes @a bytes
353 * which is @a count bytes long.
354 *
355 * The resulting string will be @c count+str->len bytes long.  If
356 * @a pos is larger than or equal to @c str->len, simply append @a bytes.
357 *
358 * Reallocs if necessary. @a str is affected, nothing else is.
359 *
360 * @note The inserted string may be a sub-range of @a str.
361 *
362 * @since New in 1.8.
363 *
364 * @since Since 1.9, @a bytes can be NULL if @a count is zero.
365 */
366void
367svn_stringbuf_insert(svn_stringbuf_t *str,
368                     apr_size_t pos,
369                     const char *bytes,
370                     apr_size_t count);
371
372/** Remove @a count bytes from @a str, starting at position @a pos.
373 *
374 * If that range exceeds the current string data, truncate @a str at
375 * @a pos.  If @a pos is larger than or equal to @c str->len, this will
376 * be a no-op.  Otherwise, the resulting string will be @c str->len-count
377 * bytes long.
378 *
379 * @since New in 1.8.
380 */
381void
382svn_stringbuf_remove(svn_stringbuf_t *str,
383                     apr_size_t pos,
384                     apr_size_t count);
385
386/** Replace in @a str the substring which starts at @a pos and is @a
387 * old_count bytes long with a new substring @a bytes which is @a
388 * new_count bytes long.
389 *
390 * This is faster but functionally equivalent to the following sequence:
391 * @code
392     svn_stringbuf_remove(str, pos, old_count);
393     svn_stringbuf_insert(str, pos, bytes, new_count);
394 * @endcode
395 *
396 * @since New in 1.8.
397 *
398 * @since Since 1.9, @a bytes can be NULL if @a new_count is zero.
399 */
400void
401svn_stringbuf_replace(svn_stringbuf_t *str,
402                      apr_size_t pos,
403                      apr_size_t old_count,
404                      const char *bytes,
405                      apr_size_t new_count);
406
407/** Return a duplicate of @a original_string. */
408svn_stringbuf_t *
409svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool);
410
411/** Return @c TRUE iff @a str1 and @a str2 have identical length and data. */
412svn_boolean_t
413svn_stringbuf_compare(const svn_stringbuf_t *str1,
414                      const svn_stringbuf_t *str2);
415
416/** Return offset of first non-whitespace character in @a str, or return
417 * @a str->len if none.
418 */
419apr_size_t
420svn_stringbuf_first_non_whitespace(const svn_stringbuf_t *str);
421
422/** Strip whitespace from both sides of @a str (modified in place). */
423void
424svn_stringbuf_strip_whitespace(svn_stringbuf_t *str);
425
426/** Return position of last occurrence of @a ch in @a str, or return
427 * @a str->len if no occurrence.
428 */
429apr_size_t
430svn_stringbuf_find_char_backward(const svn_stringbuf_t *str, char ch);
431
432/** Return @c TRUE iff @a str1 and @a str2 have identical length and data. */
433svn_boolean_t
434svn_string_compare_stringbuf(const svn_string_t *str1,
435                             const svn_stringbuf_t *str2);
436
437/** @} */
438
439
440/**
441 * @defgroup svn_string_cstrings C string functions
442 * @{
443 */
444
445/** Divide @a input into substrings, interpreting any char from @a sep
446 * as a token separator.
447 *
448 * Return an array of copies of those substrings (plain const char*),
449 * allocating both the array and the copies in @a pool.
450 *
451 * None of the elements added to the array contain any of the
452 * characters in @a sep_chars, and none of the new elements are empty
453 * (thus, it is possible that the returned array will have length
454 * zero).
455 *
456 * If @a chop_whitespace is TRUE, then remove leading and trailing
457 * whitespace from the returned strings.
458 */
459apr_array_header_t *
460svn_cstring_split(const char *input,
461                  const char *sep_chars,
462                  svn_boolean_t chop_whitespace,
463                  apr_pool_t *pool);
464
465/** Like svn_cstring_split(), but append to existing @a array instead of
466 * creating a new one.  Allocate the copied substrings in @a pool
467 * (i.e., caller decides whether or not to pass @a array->pool as @a pool).
468 */
469void
470svn_cstring_split_append(apr_array_header_t *array,
471                         const char *input,
472                         const char *sep_chars,
473                         svn_boolean_t chop_whitespace,
474                         apr_pool_t *pool);
475
476
477/** Return @c TRUE iff @a str matches any of the elements of @a list, a list
478 * of zero or more glob patterns.
479 */
480svn_boolean_t
481svn_cstring_match_glob_list(const char *str, const apr_array_header_t *list);
482
483/** Return @c TRUE iff @a str exactly matches any of the elements of @a list.
484 *
485 * @since new in 1.7
486 */
487svn_boolean_t
488svn_cstring_match_list(const char *str, const apr_array_header_t *list);
489
490/**
491 * Get the next token from @a *str interpreting any char from @a sep as a
492 * token separator.  Separators at the beginning of @a str will be skipped.
493 * Returns a pointer to the beginning of the first token in @a *str or NULL
494 * if no token is left.  Modifies @a str such that the next call will return
495 * the next token.
496 *
497 * @note The content of @a *str may be modified by this function.
498 *
499 * @since New in 1.8.
500 */
501char *
502svn_cstring_tokenize(const char *sep, char **str);
503
504/**
505 * Return the number of line breaks in @a msg, allowing any kind of newline
506 * termination (CR, LF, CRLF, or LFCR), even inconsistent.
507 *
508 * @since New in 1.2.
509 */
510int
511svn_cstring_count_newlines(const char *msg);
512
513/**
514 * Return a cstring which is the concatenation of @a strings (an array
515 * of char *) each followed by @a separator (that is, @a separator
516 * will also end the resulting string).  Allocate the result in @a pool.
517 * If @a strings is empty, then return the empty string.
518 *
519 * @since New in 1.2.
520 */
521char *
522svn_cstring_join(const apr_array_header_t *strings,
523                 const char *separator,
524                 apr_pool_t *pool);
525
526/**
527 * Compare two strings @a atr1 and @a atr2, treating case-equivalent
528 * unaccented Latin (ASCII subset) letters as equal.
529 *
530 * Returns in integer greater than, equal to, or less than 0,
531 * according to whether @a str1 is considered greater than, equal to,
532 * or less than @a str2.
533 *
534 * @since New in 1.5.
535 */
536int
537svn_cstring_casecmp(const char *str1, const char *str2);
538
539/**
540 * Parse the C string @a str into a 64 bit number, and return it in @a *n.
541 * Assume that the number is represented in base @a base.
542 * Raise an error if conversion fails (e.g. due to overflow), or if the
543 * converted number is smaller than @a minval or larger than @a maxval.
544 * Leading whitespace in @a str is skipped in a locale-dependent way.
545 *
546 * @since New in 1.7.
547 */
548svn_error_t *
549svn_cstring_strtoi64(apr_int64_t *n, const char *str,
550                     apr_int64_t minval, apr_int64_t maxval,
551                     int base);
552
553/**
554 * Parse the C string @a str into a 64 bit number, and return it in @a *n.
555 * Assume that the number is represented in base 10.
556 * Raise an error if conversion fails (e.g. due to overflow).
557 * Leading whitespace in @a str is skipped in a locale-dependent way.
558 *
559 * @since New in 1.7.
560 */
561svn_error_t *
562svn_cstring_atoi64(apr_int64_t *n, const char *str);
563
564/**
565 * Parse the C string @a str into a 32 bit number, and return it in @a *n.
566 * Assume that the number is represented in base 10.
567 * Raise an error if conversion fails (e.g. due to overflow).
568 * Leading whitespace in @a str is skipped in a locale-dependent way.
569 *
570 * @since New in 1.7.
571 */
572svn_error_t *
573svn_cstring_atoi(int *n, const char *str);
574
575/**
576 * Parse the C string @a str into an unsigned 64 bit number, and return
577 * it in @a *n. Assume that the number is represented in base @a base.
578 * Raise an error if conversion fails (e.g. due to overflow), or if the
579 * converted number is smaller than @a minval or larger than @a maxval.
580 * Leading whitespace in @a str is skipped in a locale-dependent way.
581 *
582 * @since New in 1.7.
583 */
584svn_error_t *
585svn_cstring_strtoui64(apr_uint64_t *n, const char *str,
586                      apr_uint64_t minval, apr_uint64_t maxval,
587                      int base);
588
589/**
590 * Parse the C string @a str into an unsigned 64 bit number, and return
591 * it in @a *n. Assume that the number is represented in base 10.
592 * Raise an error if conversion fails (e.g. due to overflow).
593 * Leading whitespace in @a str is skipped in a locale-dependent way.
594 *
595 * @since New in 1.7.
596 */
597svn_error_t *
598svn_cstring_atoui64(apr_uint64_t *n, const char *str);
599
600/**
601 * Parse the C string @a str into an unsigned 32 bit number, and return
602 * it in @a *n. Assume that the number is represented in base 10.
603 * Raise an error if conversion fails (e.g. due to overflow).
604 * Leading whitespace in @a str is skipped in a locale-dependent way.
605 *
606 * @since New in 1.7.
607 */
608svn_error_t *
609svn_cstring_atoui(unsigned int *n, const char *str);
610
611/**
612 * Skip the common prefix @a prefix from the C string @a str, and return
613 * a pointer to the next character after the prefix.
614 * Return @c NULL if @a str does not start with @a prefix.
615 *
616 * @since New in 1.9.
617 */
618const char *
619svn_cstring_skip_prefix(const char *str, const char *prefix);
620
621/** @} */
622
623/** @} */
624
625
626#ifdef __cplusplus
627}
628#endif /* __cplusplus */
629
630#endif  /* SVN_STRING_H */
631