1/* ====================================================================
2 *    Licensed to the Apache Software Foundation (ASF) under one
3 *    or more contributor license agreements.  See the NOTICE file
4 *    distributed with this work for additional information
5 *    regarding copyright ownership.  The ASF licenses this file
6 *    to you under the Apache License, Version 2.0 (the
7 *    "License"); you may not use this file except in compliance
8 *    with the License.  You may obtain a copy of the License at
9 *
10 *      http://www.apache.org/licenses/LICENSE-2.0
11 *
12 *    Unless required by applicable law or agreed to in writing,
13 *    software distributed under the License is distributed on an
14 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 *    KIND, either express or implied.  See the License for the
16 *    specific language governing permissions and limitations
17 *    under the License.
18 * ====================================================================
19 */
20
21/**
22 * @file apr_cstr.h
23 * @brief C string goodies.
24 */
25
26#ifndef APR_CSTR_H
27#define APR_CSTR_H
28
29#include <apr.h>          /* for apr_size_t */
30#include <apr_pools.h>    /* for apr_pool_t */
31#include <apr_tables.h>   /* for apr_array_header_t */
32
33#ifdef __cplusplus
34extern "C" {
35#endif /* __cplusplus */
36
37/**
38 * @defgroup apr_cstr C (POSIX) locale string functions
39 * @ingroup apr_strings
40 *
41 * The apr_cstr_* functions provide traditional C char * string text handling,
42 * and notabilty they treat all text in the C (a.k.a. POSIX) locale using the
43 * minimal POSIX character set, represented in either ASCII or a corresponding
44 * EBCDIC subset.
45 *
46 * Character values outside of that set are treated as opaque bytes, and all
47 * multi-byte character sequences are handled as individual distinct octets.
48 *
49 * Multi-byte characters sequences whose octets fall in the ASCII range cause
50 * unexpected results, such as in the ISO-2022-JP code page where ASCII octets
51 * occur within both shift-state and multibyte sequences.
52 *
53 * In the case of the UTF-8 encoding, all multibyte characters all fall outside
54 * of the C/POSIX range of characters, so these functions are generally safe
55 * to use on UTF-8 strings. The programmer must be aware that each octet may
56 * not represent a distinct printable character in such encodings.
57 *
58 * The standard C99/POSIX string functions, rather than apr_cstr, should be
59 * used in all cases where the current locale and encoding of the text is
60 * significant.
61 * @{
62 */
63
64
65/** Divide @a input into substrings, interpreting any char from @a sep
66 * as a token separator.
67 *
68 * Return an array of copies of those substrings (plain const char*),
69 * allocating both the array and the copies in @a pool.
70 *
71 * None of the elements added to the array contain any of the
72 * characters in @a sep_chars, and none of the new elements are empty
73 * (thus, it is possible that the returned array will have length
74 * zero).
75 *
76 * If @a chop_whitespace is TRUE, then remove leading and trailing
77 * whitespace from the returned strings.
78 *
79 * @since New in 1.6
80 */
81APR_DECLARE(apr_array_header_t *) apr_cstr_split(const char *input,
82                                                 const char *sep_chars,
83                                                 int chop_whitespace,
84                                                 apr_pool_t *pool);
85
86/** Like apr_cstr_split(), but append to existing @a array instead of
87 * creating a new one.  Allocate the copied substrings in @a pool
88 * (i.e., caller decides whether or not to pass @a array->pool as @a pool).
89 *
90 * @since New in 1.6
91 */
92APR_DECLARE(void) apr_cstr_split_append(apr_array_header_t *array,
93                                        const char *input,
94                                        const char *sep_chars,
95                                        int chop_whitespace,
96                                        apr_pool_t *pool);
97
98
99/** Return @c TRUE iff @a str matches any of the elements of @a list, a list
100 * of zero or more glob patterns.
101 *
102 * @since New in 1.6
103 */
104APR_DECLARE(int) apr_cstr_match_glob_list(const char *str,
105                                          const apr_array_header_t *list);
106
107/** Return @c TRUE iff @a str exactly matches any of the elements of @a list.
108 *
109 * @since New in 1.6
110 */
111APR_DECLARE(int) apr_cstr_match_list(const char *str,
112                                     const apr_array_header_t *list);
113
114/**
115 * Get the next token from @a *str interpreting any char from @a sep as a
116 * token separator.  Separators at the beginning of @a str will be skipped.
117 * Returns a pointer to the beginning of the first token in @a *str or NULL
118 * if no token is left.  Modifies @a str such that the next call will return
119 * the next token.
120 *
121 * @note The content of @a *str may be modified by this function.
122 *
123 * @since New in 1.6.
124 */
125APR_DECLARE(char *) apr_cstr_tokenize(const char *sep, char **str);
126
127/**
128 * Return the number of line breaks in @a msg, allowing any kind of newline
129 * termination (CR, LF, CRLF, or LFCR), even inconsistent.
130 *
131 * @since New in 1.6.
132 */
133APR_DECLARE(int) apr_cstr_count_newlines(const char *msg);
134
135#if 0 /* XXX: stringbuf logic is not present in APR */
136/**
137 * Return a cstring which is the concatenation of @a strings (an array
138 * of char *) each followed by @a separator (that is, @a separator
139 * will also end the resulting string).  Allocate the result in @a pool.
140 * If @a strings is empty, then return the empty string.
141 *
142 * @since New in 1.6.
143 */
144APR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings,
145                                  const char *separator,
146                                  apr_pool_t *pool);
147#endif
148
149/**
150 * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2,
151 * treating upper and lower case values of the 26 standard C/POSIX alphabetic
152 * characters as equivalent. Extended latin characters outside of this set
153 * are treated as unique octets, irrespective of the current locale.
154 *
155 * Returns in integer greater than, equal to, or less than 0,
156 * according to whether @a str1 is considered greater than, equal to,
157 * or less than @a str2.
158 *
159 * @since New in 1.6.
160 */
161APR_DECLARE(int) apr_cstr_casecmp(const char *str1, const char *str2);
162
163/**
164 * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2,
165 * treating upper and lower case values of the 26 standard C/POSIX alphabetic
166 * characters as equivalent. Extended latin characters outside of this set
167 * are treated as unique octets, irrespective of the current locale.
168 *
169 * Returns in integer greater than, equal to, or less than 0,
170 * according to whether @a str1 is considered greater than, equal to,
171 * or less than @a str2.
172 *
173 * @since New in 1.6.
174 */
175APR_DECLARE(int) apr_cstr_casecmpn(const char *str1,
176                                   const char *str2,
177                                   apr_size_t n);
178
179/**
180 * Parse the C string @a str into a 64 bit number, and return it in @a *n.
181 * Assume that the number is represented in base @a base.
182 * Raise an error if conversion fails (e.g. due to overflow), or if the
183 * converted number is smaller than @a minval or larger than @a maxval.
184 *
185 * Leading whitespace in @a str is skipped in a locale-dependent way.
186 * After that, the string may contain an optional '+' (positive, default)
187 * or '-' (negative) character, followed by an optional '0x' prefix if
188 * @a base is 0 or 16, followed by numeric digits appropriate for the base.
189 * If there are any more characters after the numeric digits, an error is
190 * returned.
191 *
192 * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal,
193 * else a leading '0' means octal (implemented, though not documented, in
194 * apr_strtoi64() in APR 0.9.0 through 1.5.0), else use base ten.
195 *
196 * @since New in 1.6.
197 */
198APR_DECLARE(apr_status_t) apr_cstr_strtoi64(apr_int64_t *n, const char *str,
199                                            apr_int64_t minval,
200                                            apr_int64_t maxval,
201                                            int base);
202
203/**
204 * Parse the C string @a str into a 64 bit number, and return it in @a *n.
205 * Assume that the number is represented in base 10.
206 * Raise an error if conversion fails (e.g. due to overflow).
207 *
208 * The behaviour otherwise is as described for apr_cstr_strtoi64().
209 *
210 * @since New in 1.6.
211 */
212APR_DECLARE(apr_status_t) apr_cstr_atoi64(apr_int64_t *n, const char *str);
213
214/**
215 * Parse the C string @a str into a 32 bit number, and return it in @a *n.
216 * Assume that the number is represented in base 10.
217 * Raise an error if conversion fails (e.g. due to overflow).
218 *
219 * The behaviour otherwise is as described for apr_cstr_strtoi64().
220 *
221 * @since New in 1.6.
222 */
223APR_DECLARE(apr_status_t) apr_cstr_atoi(int *n, const char *str);
224
225/**
226 * Parse the C string @a str into an unsigned 64 bit number, and return
227 * it in @a *n. Assume that the number is represented in base @a base.
228 * Raise an error if conversion fails (e.g. due to overflow), or if the
229 * converted number is smaller than @a minval or larger than @a maxval.
230 *
231 * Leading whitespace in @a str is skipped in a locale-dependent way.
232 * After that, the string may contain an optional '+' (positive, default)
233 * or '-' (negative) character, followed by an optional '0x' prefix if
234 * @a base is 0 or 16, followed by numeric digits appropriate for the base.
235 * If there are any more characters after the numeric digits, an error is
236 * returned.
237 *
238 * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal,
239 * else a leading '0' means octal (as implemented, though not documented, in
240 * apr_strtoi64(), else use base ten.
241 *
242 * @warning The implementation returns APR_ERANGE if the parsed number
243 * is greater than APR_INT64_MAX, even if it is not greater than @a maxval.
244 *
245 * @since New in 1.6.
246 */
247APR_DECLARE(apr_status_t) apr_cstr_strtoui64(apr_uint64_t *n, const char *str,
248                                             apr_uint64_t minval,
249                                             apr_uint64_t maxval,
250                                             int base);
251
252/**
253 * Parse the C string @a str into an unsigned 64 bit number, and return
254 * it in @a *n. Assume that the number is represented in base 10.
255 * Raise an error if conversion fails (e.g. due to overflow).
256 *
257 * The behaviour otherwise is as described for apr_cstr_strtoui64(),
258 * including the upper limit of APR_INT64_MAX.
259 *
260 * @since New in 1.6.
261 */
262APR_DECLARE(apr_status_t) apr_cstr_atoui64(apr_uint64_t *n, const char *str);
263
264/**
265 * Parse the C string @a str into an unsigned 32 bit number, and return
266 * it in @a *n. Assume that the number is represented in base 10.
267 * Raise an error if conversion fails (e.g. due to overflow).
268 *
269 * The behaviour otherwise is as described for apr_cstr_strtoui64(),
270 * including the upper limit of APR_INT64_MAX.
271 *
272 * @since New in 1.6.
273 */
274APR_DECLARE(apr_status_t) apr_cstr_atoui(unsigned int *n, const char *str);
275
276/**
277 * Skip the common prefix @a prefix from the C string @a str, and return
278 * a pointer to the next character after the prefix.
279 * Return @c NULL if @a str does not start with @a prefix.
280 *
281 * @since New in 1.6.
282 */
283APR_DECLARE(const char *) apr_cstr_skip_prefix(const char *str,
284                                               const char *prefix);
285
286/** @} */
287
288#ifdef __cplusplus
289}
290#endif /* __cplusplus */
291
292#endif  /* SVN_STRING_H */
293