1/* ==================================================================== 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 * ==================================================================== 19 */ 20 21/** 22 * @file apr_cstr.h 23 * @brief C string goodies. 24 */ 25 26#ifndef APR_CSTR_H 27#define APR_CSTR_H 28 29#include <apr.h> /* for apr_size_t */ 30#include <apr_pools.h> /* for apr_pool_t */ 31#include <apr_tables.h> /* for apr_array_header_t */ 32 33#ifdef __cplusplus 34extern "C" { 35#endif /* __cplusplus */ 36 37/** 38 * @defgroup apr_cstr C (POSIX) locale string functions 39 * @ingroup apr_strings 40 * 41 * The apr_cstr_* functions provide traditional C char * string text handling, 42 * and notabilty they treat all text in the C (a.k.a. POSIX) locale using the 43 * minimal POSIX character set, represented in either ASCII or a corresponding 44 * EBCDIC subset. 45 * 46 * Character values outside of that set are treated as opaque bytes, and all 47 * multi-byte character sequences are handled as individual distinct octets. 48 * 49 * Multi-byte characters sequences whose octets fall in the ASCII range cause 50 * unexpected results, such as in the ISO-2022-JP code page where ASCII octets 51 * occur within both shift-state and multibyte sequences. 52 * 53 * In the case of the UTF-8 encoding, all multibyte characters all fall outside 54 * of the C/POSIX range of characters, so these functions are generally safe 55 * to use on UTF-8 strings. The programmer must be aware that each octet may 56 * not represent a distinct printable character in such encodings. 57 * 58 * The standard C99/POSIX string functions, rather than apr_cstr, should be 59 * used in all cases where the current locale and encoding of the text is 60 * significant. 61 * @{ 62 */ 63 64 65/** Divide @a input into substrings, interpreting any char from @a sep 66 * as a token separator. 67 * 68 * Return an array of copies of those substrings (plain const char*), 69 * allocating both the array and the copies in @a pool. 70 * 71 * None of the elements added to the array contain any of the 72 * characters in @a sep_chars, and none of the new elements are empty 73 * (thus, it is possible that the returned array will have length 74 * zero). 75 * 76 * If @a chop_whitespace is TRUE, then remove leading and trailing 77 * whitespace from the returned strings. 78 * 79 * @since New in 1.6 80 */ 81APR_DECLARE(apr_array_header_t *) apr_cstr_split(const char *input, 82 const char *sep_chars, 83 int chop_whitespace, 84 apr_pool_t *pool); 85 86/** Like apr_cstr_split(), but append to existing @a array instead of 87 * creating a new one. Allocate the copied substrings in @a pool 88 * (i.e., caller decides whether or not to pass @a array->pool as @a pool). 89 * 90 * @since New in 1.6 91 */ 92APR_DECLARE(void) apr_cstr_split_append(apr_array_header_t *array, 93 const char *input, 94 const char *sep_chars, 95 int chop_whitespace, 96 apr_pool_t *pool); 97 98 99/** Return @c TRUE iff @a str matches any of the elements of @a list, a list 100 * of zero or more glob patterns. 101 * 102 * @since New in 1.6 103 */ 104APR_DECLARE(int) apr_cstr_match_glob_list(const char *str, 105 const apr_array_header_t *list); 106 107/** Return @c TRUE iff @a str exactly matches any of the elements of @a list. 108 * 109 * @since New in 1.6 110 */ 111APR_DECLARE(int) apr_cstr_match_list(const char *str, 112 const apr_array_header_t *list); 113 114/** 115 * Get the next token from @a *str interpreting any char from @a sep as a 116 * token separator. Separators at the beginning of @a str will be skipped. 117 * Returns a pointer to the beginning of the first token in @a *str or NULL 118 * if no token is left. Modifies @a str such that the next call will return 119 * the next token. 120 * 121 * @note The content of @a *str may be modified by this function. 122 * 123 * @since New in 1.6. 124 */ 125APR_DECLARE(char *) apr_cstr_tokenize(const char *sep, char **str); 126 127/** 128 * Return the number of line breaks in @a msg, allowing any kind of newline 129 * termination (CR, LF, CRLF, or LFCR), even inconsistent. 130 * 131 * @since New in 1.6. 132 */ 133APR_DECLARE(int) apr_cstr_count_newlines(const char *msg); 134 135#if 0 /* XXX: stringbuf logic is not present in APR */ 136/** 137 * Return a cstring which is the concatenation of @a strings (an array 138 * of char *) each followed by @a separator (that is, @a separator 139 * will also end the resulting string). Allocate the result in @a pool. 140 * If @a strings is empty, then return the empty string. 141 * 142 * @since New in 1.6. 143 */ 144APR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings, 145 const char *separator, 146 apr_pool_t *pool); 147#endif 148 149/** 150 * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2, 151 * treating upper and lower case values of the 26 standard C/POSIX alphabetic 152 * characters as equivalent. Extended latin characters outside of this set 153 * are treated as unique octets, irrespective of the current locale. 154 * 155 * Returns in integer greater than, equal to, or less than 0, 156 * according to whether @a str1 is considered greater than, equal to, 157 * or less than @a str2. 158 * 159 * @since New in 1.6. 160 */ 161APR_DECLARE(int) apr_cstr_casecmp(const char *str1, const char *str2); 162 163/** 164 * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2, 165 * treating upper and lower case values of the 26 standard C/POSIX alphabetic 166 * characters as equivalent. Extended latin characters outside of this set 167 * are treated as unique octets, irrespective of the current locale. 168 * 169 * Returns in integer greater than, equal to, or less than 0, 170 * according to whether @a str1 is considered greater than, equal to, 171 * or less than @a str2. 172 * 173 * @since New in 1.6. 174 */ 175APR_DECLARE(int) apr_cstr_casecmpn(const char *str1, 176 const char *str2, 177 apr_size_t n); 178 179/** 180 * Parse the C string @a str into a 64 bit number, and return it in @a *n. 181 * Assume that the number is represented in base @a base. 182 * Raise an error if conversion fails (e.g. due to overflow), or if the 183 * converted number is smaller than @a minval or larger than @a maxval. 184 * 185 * Leading whitespace in @a str is skipped in a locale-dependent way. 186 * After that, the string may contain an optional '+' (positive, default) 187 * or '-' (negative) character, followed by an optional '0x' prefix if 188 * @a base is 0 or 16, followed by numeric digits appropriate for the base. 189 * If there are any more characters after the numeric digits, an error is 190 * returned. 191 * 192 * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal, 193 * else a leading '0' means octal (implemented, though not documented, in 194 * apr_strtoi64() in APR 0.9.0 through 1.5.0), else use base ten. 195 * 196 * @since New in 1.6. 197 */ 198APR_DECLARE(apr_status_t) apr_cstr_strtoi64(apr_int64_t *n, const char *str, 199 apr_int64_t minval, 200 apr_int64_t maxval, 201 int base); 202 203/** 204 * Parse the C string @a str into a 64 bit number, and return it in @a *n. 205 * Assume that the number is represented in base 10. 206 * Raise an error if conversion fails (e.g. due to overflow). 207 * 208 * The behaviour otherwise is as described for apr_cstr_strtoi64(). 209 * 210 * @since New in 1.6. 211 */ 212APR_DECLARE(apr_status_t) apr_cstr_atoi64(apr_int64_t *n, const char *str); 213 214/** 215 * Parse the C string @a str into a 32 bit number, and return it in @a *n. 216 * Assume that the number is represented in base 10. 217 * Raise an error if conversion fails (e.g. due to overflow). 218 * 219 * The behaviour otherwise is as described for apr_cstr_strtoi64(). 220 * 221 * @since New in 1.6. 222 */ 223APR_DECLARE(apr_status_t) apr_cstr_atoi(int *n, const char *str); 224 225/** 226 * Parse the C string @a str into an unsigned 64 bit number, and return 227 * it in @a *n. Assume that the number is represented in base @a base. 228 * Raise an error if conversion fails (e.g. due to overflow), or if the 229 * converted number is smaller than @a minval or larger than @a maxval. 230 * 231 * Leading whitespace in @a str is skipped in a locale-dependent way. 232 * After that, the string may contain an optional '+' (positive, default) 233 * or '-' (negative) character, followed by an optional '0x' prefix if 234 * @a base is 0 or 16, followed by numeric digits appropriate for the base. 235 * If there are any more characters after the numeric digits, an error is 236 * returned. 237 * 238 * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal, 239 * else a leading '0' means octal (as implemented, though not documented, in 240 * apr_strtoi64(), else use base ten. 241 * 242 * @warning The implementation returns APR_ERANGE if the parsed number 243 * is greater than APR_INT64_MAX, even if it is not greater than @a maxval. 244 * 245 * @since New in 1.6. 246 */ 247APR_DECLARE(apr_status_t) apr_cstr_strtoui64(apr_uint64_t *n, const char *str, 248 apr_uint64_t minval, 249 apr_uint64_t maxval, 250 int base); 251 252/** 253 * Parse the C string @a str into an unsigned 64 bit number, and return 254 * it in @a *n. Assume that the number is represented in base 10. 255 * Raise an error if conversion fails (e.g. due to overflow). 256 * 257 * The behaviour otherwise is as described for apr_cstr_strtoui64(), 258 * including the upper limit of APR_INT64_MAX. 259 * 260 * @since New in 1.6. 261 */ 262APR_DECLARE(apr_status_t) apr_cstr_atoui64(apr_uint64_t *n, const char *str); 263 264/** 265 * Parse the C string @a str into an unsigned 32 bit number, and return 266 * it in @a *n. Assume that the number is represented in base 10. 267 * Raise an error if conversion fails (e.g. due to overflow). 268 * 269 * The behaviour otherwise is as described for apr_cstr_strtoui64(), 270 * including the upper limit of APR_INT64_MAX. 271 * 272 * @since New in 1.6. 273 */ 274APR_DECLARE(apr_status_t) apr_cstr_atoui(unsigned int *n, const char *str); 275 276/** 277 * Skip the common prefix @a prefix from the C string @a str, and return 278 * a pointer to the next character after the prefix. 279 * Return @c NULL if @a str does not start with @a prefix. 280 * 281 * @since New in 1.6. 282 */ 283APR_DECLARE(const char *) apr_cstr_skip_prefix(const char *str, 284 const char *prefix); 285 286/** @} */ 287 288#ifdef __cplusplus 289} 290#endif /* __cplusplus */ 291 292#endif /* SVN_STRING_H */ 293