apr_cstr.h revision 339228
157429Smarkm/* ==================================================================== 257429Smarkm * Licensed to the Apache Software Foundation (ASF) under one 357429Smarkm * or more contributor license agreements. See the NOTICE file 457429Smarkm * distributed with this work for additional information 557429Smarkm * regarding copyright ownership. The ASF licenses this file 657429Smarkm * to you under the Apache License, Version 2.0 (the 757429Smarkm * "License"); you may not use this file except in compliance 860573Skris * with the License. You may obtain a copy of the License at 965668Skris * 1065668Skris * http://www.apache.org/licenses/LICENSE-2.0 1165668Skris * 1265668Skris * Unless required by applicable law or agreed to in writing, 1365668Skris * software distributed under the License is distributed on an 1465668Skris * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 1560573Skris * KIND, either express or implied. See the License for the 1692559Sdes * specific language governing permissions and limitations 1765668Skris * under the License. 1865668Skris * ==================================================================== 1965668Skris */ 2065668Skris 2165668Skris/** 2265668Skris * @file apr_cstr.h 2365668Skris * @brief C string goodies. 2465668Skris */ 2565668Skris 2665668Skris#ifndef APR_CSTR_H 2765668Skris#define APR_CSTR_H 2865668Skris 2965668Skris#include <apr.h> /* for apr_size_t */ 3065668Skris#include <apr_pools.h> /* for apr_pool_t */ 3165668Skris#include <apr_tables.h> /* for apr_array_header_t */ 3265668Skris 3365668Skris#ifdef __cplusplus 3465668Skrisextern "C" { 3565668Skris#endif /* __cplusplus */ 3665668Skris 3765668Skris/** 3865668Skris * @defgroup apr_cstr C (POSIX) locale string functions 3957429Smarkm * @ingroup apr_strings 4057429Smarkm * 4157429Smarkm * The apr_cstr_* functions provide traditional C char * string text handling, 42157019Sdes * and notabilty they treat all text in the C (a.k.a. POSIX) locale using the 4357429Smarkm * minimal POSIX character set, represented in either ASCII or a corresponding 4457429Smarkm * EBCDIC subset. 4576262Sgreen * 4676262Sgreen * Character values outside of that set are treated as opaque bytes, and all 4757429Smarkm * multi-byte character sequences are handled as individual distinct octets. 4857429Smarkm * 4976262Sgreen * Multi-byte characters sequences whose octets fall in the ASCII range cause 5076262Sgreen * unexpected results, such as in the ISO-2022-JP code page where ASCII octets 5157429Smarkm * occur within both shift-state and multibyte sequences. 5257429Smarkm * 5376262Sgreen * In the case of the UTF-8 encoding, all multibyte characters all fall outside 5465668Skris * of the C/POSIX range of characters, so these functions are generally safe 5565668Skris * to use on UTF-8 strings. The programmer must be aware that each octet may 5692559Sdes * not represent a distinct printable character in such encodings. 57124207Sdes * 5865668Skris * The standard C99/POSIX string functions, rather than apr_cstr, should be 5992559Sdes * used in all cases where the current locale and encoding of the text is 6057429Smarkm * significant. 6157429Smarkm * @{ 6257429Smarkm */ 6357429Smarkm 6457429Smarkm 6592559Sdes/** Divide @a input into substrings, interpreting any char from @a sep 6657429Smarkm * as a token separator. 6757429Smarkm * 6857429Smarkm * Return an array of copies of those substrings (plain const char*), 6992559Sdes * allocating both the array and the copies in @a pool. 7057429Smarkm * 71137019Sdes * None of the elements added to the array contain any of the 7257429Smarkm * characters in @a sep_chars, and none of the new elements are empty 7357429Smarkm * (thus, it is possible that the returned array will have length 7457429Smarkm * zero). 7592559Sdes * 7657429Smarkm * If @a chop_whitespace is TRUE, then remove leading and trailing 7776262Sgreen * whitespace from the returned strings. 7857429Smarkm * 7957429Smarkm * @since New in 1.6 8092559Sdes */ 8157429SmarkmAPR_DECLARE(apr_array_header_t *) apr_cstr_split(const char *input, 8257429Smarkm const char *sep_chars, 8357429Smarkm int chop_whitespace, 8457429Smarkm apr_pool_t *pool); 8557429Smarkm 8657429Smarkm/** Like apr_cstr_split(), but append to existing @a array instead of 8757429Smarkm * creating a new one. Allocate the copied substrings in @a pool 8857429Smarkm * (i.e., caller decides whether or not to pass @a array->pool as @a pool). 8960573Skris * 9060573Skris * @since New in 1.6 9160573Skris */ 9257429SmarkmAPR_DECLARE(void) apr_cstr_split_append(apr_array_header_t *array, 9357429Smarkm const char *input, 9457429Smarkm const char *sep_chars, 9557429Smarkm int chop_whitespace, 9692559Sdes apr_pool_t *pool); 9757429Smarkm 9857429Smarkm 9957429Smarkm/** Return @c TRUE iff @a str matches any of the elements of @a list, a list 10057429Smarkm * of zero or more glob patterns. 10157429Smarkm * 10257429Smarkm * @since New in 1.6 10357429Smarkm */ 10457429SmarkmAPR_DECLARE(int) apr_cstr_match_glob_list(const char *str, 10557429Smarkm const apr_array_header_t *list); 10657429Smarkm 10792559Sdes/** Return @c TRUE iff @a str exactly matches any of the elements of @a list. 10892559Sdes * 10992559Sdes * @since New in 1.6 11092559Sdes */ 11192559SdesAPR_DECLARE(int) apr_cstr_match_list(const char *str, 112149753Sdes const apr_array_header_t *list); 113149753Sdes 114149753Sdes/** 11592559Sdes * Get the next token from @a *str interpreting any char from @a sep as a 11692559Sdes * token separator. Separators at the beginning of @a str will be skipped. 11792559Sdes * Returns a pointer to the beginning of the first token in @a *str or NULL 11892559Sdes * if no token is left. Modifies @a str such that the next call will return 11992559Sdes * the next token. 12092559Sdes * 12192559Sdes * @note The content of @a *str may be modified by this function. 12292559Sdes * 12392559Sdes * @since New in 1.6. 12492559Sdes */ 12592559SdesAPR_DECLARE(char *) apr_cstr_tokenize(const char *sep, char **str); 12692559Sdes 12792559Sdes/** 12892559Sdes * Return the number of line breaks in @a msg, allowing any kind of newline 12992559Sdes * termination (CR, LF, CRLF, or LFCR), even inconsistent. 13092559Sdes * 13192559Sdes * @since New in 1.6. 13292559Sdes */ 13392559SdesAPR_DECLARE(int) apr_cstr_count_newlines(const char *msg); 13476262Sgreen 13598941Sdes#if 0 /* XXX: stringbuf logic is not present in APR */ 13676262Sgreen/** 13792559Sdes * Return a cstring which is the concatenation of @a strings (an array 13892559Sdes * of char *) each followed by @a separator (that is, @a separator 13976262Sgreen * will also end the resulting string). Allocate the result in @a pool. 14092559Sdes * If @a strings is empty, then return the empty string. 14157429Smarkm * 14260573Skris * @since New in 1.6. 143157019Sdes */ 14460573SkrisAPR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings, 14560573Skris const char *separator, 14692559Sdes apr_pool_t *pool); 147137019Sdes#endif 148157019Sdes 14960573Skris/** 15060573Skris * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2, 15192559Sdes * treating upper and lower case values of the 26 standard C/POSIX alphabetic 15292559Sdes * characters as equivalent. Extended latin characters outside of this set 153157019Sdes * are treated as unique octets, irrespective of the current locale. 15460573Skris * 15560573Skris * Returns in integer greater than, equal to, or less than 0, 15660573Skris * according to whether @a str1 is considered greater than, equal to, 15760573Skris * or less than @a str2. 15860573Skris * 15957429Smarkm * @since New in 1.6. 160157019Sdes */ 161157019SdesAPR_DECLARE(int) apr_cstr_casecmp(const char *str1, const char *str2); 162157019Sdes 163157019Sdes/** 164157019Sdes * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2, 165157019Sdes * treating upper and lower case values of the 26 standard C/POSIX alphabetic 166157019Sdes * characters as equivalent. Extended latin characters outside of this set 167157019Sdes * are treated as unique octets, irrespective of the current locale. 168157019Sdes * 169157019Sdes * Returns in integer greater than, equal to, or less than 0, 170157019Sdes * according to whether @a str1 is considered greater than, equal to, 171157019Sdes * or less than @a str2. 172157019Sdes * 173157019Sdes * @since New in 1.6. 174157019Sdes */ 175157019SdesAPR_DECLARE(int) apr_cstr_casecmpn(const char *str1, 176157019Sdes const char *str2, 177157019Sdes apr_size_t n); 178157019Sdes 179157019Sdes/** 180157019Sdes * Parse the C string @a str into a 64 bit number, and return it in @a *n. 181157019Sdes * Assume that the number is represented in base @a base. 182157019Sdes * Raise an error if conversion fails (e.g. due to overflow), or if the 183157019Sdes * converted number is smaller than @a minval or larger than @a maxval. 184157019Sdes * 185157019Sdes * Leading whitespace in @a str is skipped in a locale-dependent way. 186157019Sdes * After that, the string may contain an optional '+' (positive, default) 187157019Sdes * or '-' (negative) character, followed by an optional '0x' prefix if 18860573Skris * @a base is 0 or 16, followed by numeric digits appropriate for the base. 18960573Skris * If there are any more characters after the numeric digits, an error is 19060573Skris * returned. 19160573Skris * 19292559Sdes * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal, 19369587Sgreen * else a leading '0' means octal (implemented, though not documented, in 19469587Sgreen * apr_strtoi64() in APR 0.9.0 through 1.5.0), else use base ten. 19560573Skris * 19660573Skris * @since New in 1.6. 19776262Sgreen */ 19876262SgreenAPR_DECLARE(apr_status_t) apr_cstr_strtoi64(apr_int64_t *n, const char *str, 19976262Sgreen apr_int64_t minval, 20076262Sgreen apr_int64_t maxval, 20160573Skris int base); 20260573Skris 20360573Skris/** 20460573Skris * Parse the C string @a str into a 64 bit number, and return it in @a *n. 20560573Skris * Assume that the number is represented in base 10. 206137019Sdes * Raise an error if conversion fails (e.g. due to overflow). 20760573Skris * 20860573Skris * The behaviour otherwise is as described for apr_cstr_strtoi64(). 20969587Sgreen * 21074500Sgreen * @since New in 1.6. 21174500Sgreen */ 212124207SdesAPR_DECLARE(apr_status_t) apr_cstr_atoi64(apr_int64_t *n, const char *str); 21374500Sgreen 21474500Sgreen/** 21576262Sgreen * Parse the C string @a str into a 32 bit number, and return it in @a *n. 21674500Sgreen * Assume that the number is represented in base 10. 21774500Sgreen * Raise an error if conversion fails (e.g. due to overflow). 21874500Sgreen * 21974500Sgreen * The behaviour otherwise is as described for apr_cstr_strtoi64(). 22074500Sgreen * 221106130Sdes * @since New in 1.6. 22274500Sgreen */ 22369587SgreenAPR_DECLARE(apr_status_t) apr_cstr_atoi(int *n, const char *str); 22469587Sgreen 22569587Sgreen/** 22669587Sgreen * Parse the C string @a str into an unsigned 64 bit number, and return 22769587Sgreen * it in @a *n. Assume that the number is represented in base @a base. 22869587Sgreen * Raise an error if conversion fails (e.g. due to overflow), or if the 22969587Sgreen * converted number is smaller than @a minval or larger than @a maxval. 23069587Sgreen * 23169587Sgreen * Leading whitespace in @a str is skipped in a locale-dependent way. 23260573Skris * After that, the string may contain an optional '+' (positive, default) 23360573Skris * or '-' (negative) character, followed by an optional '0x' prefix if 23460573Skris * @a base is 0 or 16, followed by numeric digits appropriate for the base. 23557429Smarkm * If there are any more characters after the numeric digits, an error is 23657429Smarkm * returned. 23757429Smarkm * 23857429Smarkm * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal, 23992559Sdes * else a leading '0' means octal (as implemented, though not documented, in 24060573Skris * apr_strtoi64(), else use base ten. 24199063Sdes * 24257429Smarkm * @warning The implementation returns APR_ERANGE if the parsed number 243137019Sdes * is greater than APR_INT64_MAX, even if it is not greater than @a maxval. 244137019Sdes * 24557429Smarkm * @since New in 1.6. 24657429Smarkm */ 24757429SmarkmAPR_DECLARE(apr_status_t) apr_cstr_strtoui64(apr_uint64_t *n, const char *str, 24857429Smarkm apr_uint64_t minval, 24957429Smarkm apr_uint64_t maxval, 25092559Sdes int base); 25157429Smarkm 25292559Sdes/** 25357429Smarkm * Parse the C string @a str into an unsigned 64 bit number, and return 25457429Smarkm * it in @a *n. Assume that the number is represented in base 10. 25557429Smarkm * Raise an error if conversion fails (e.g. due to overflow). 25692559Sdes * 25757429Smarkm * The behaviour otherwise is as described for apr_cstr_strtoui64(), 258137019Sdes * including the upper limit of APR_INT64_MAX. 25957429Smarkm * 26057429Smarkm * @since New in 1.6. 261137019Sdes */ 26257429SmarkmAPR_DECLARE(apr_status_t) apr_cstr_atoui64(apr_uint64_t *n, const char *str); 26357429Smarkm 26499063Sdes/** 26599063Sdes * Parse the C string @a str into an unsigned 32 bit number, and return 26699063Sdes * it in @a *n. Assume that the number is represented in base 10. 267120489Sjoe * Raise an error if conversion fails (e.g. due to overflow). 268120489Sjoe * 269120489Sjoe * The behaviour otherwise is as described for apr_cstr_strtoui64(), 27069587Sgreen * including the upper limit of APR_INT64_MAX. 27157429Smarkm * 27292559Sdes * @since New in 1.6. 27357429Smarkm */ 27492559SdesAPR_DECLARE(apr_status_t) apr_cstr_atoui(unsigned int *n, const char *str); 27592559Sdes 27692559Sdes/** 27757429Smarkm * Skip the common prefix @a prefix from the C string @a str, and return 27857429Smarkm * a pointer to the next character after the prefix. 27960573Skris * Return @c NULL if @a str does not start with @a prefix. 28092559Sdes * 28192559Sdes * @since New in 1.6. 28292559Sdes */ 28369587SgreenAPR_DECLARE(const char *) apr_cstr_skip_prefix(const char *str, 28457429Smarkm const char *prefix); 28557429Smarkm 28660573Skris/** @} */ 28760573Skris 28860573Skris#ifdef __cplusplus 28960573Skris} 29060573Skris#endif /* __cplusplus */ 29157429Smarkm 292124207Sdes#endif /* SVN_STRING_H */ 29360573Skris