apr_cstr.h revision 339228
157429Smarkm/* ====================================================================
257429Smarkm *    Licensed to the Apache Software Foundation (ASF) under one
357429Smarkm *    or more contributor license agreements.  See the NOTICE file
457429Smarkm *    distributed with this work for additional information
557429Smarkm *    regarding copyright ownership.  The ASF licenses this file
657429Smarkm *    to you under the Apache License, Version 2.0 (the
757429Smarkm *    "License"); you may not use this file except in compliance
860573Skris *    with the License.  You may obtain a copy of the License at
965668Skris *
1065668Skris *      http://www.apache.org/licenses/LICENSE-2.0
1165668Skris *
1265668Skris *    Unless required by applicable law or agreed to in writing,
1365668Skris *    software distributed under the License is distributed on an
1465668Skris *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
1560573Skris *    KIND, either express or implied.  See the License for the
1692559Sdes *    specific language governing permissions and limitations
1765668Skris *    under the License.
1865668Skris * ====================================================================
1965668Skris */
2065668Skris
2165668Skris/**
2265668Skris * @file apr_cstr.h
2365668Skris * @brief C string goodies.
2465668Skris */
2565668Skris
2665668Skris#ifndef APR_CSTR_H
2765668Skris#define APR_CSTR_H
2865668Skris
2965668Skris#include <apr.h>          /* for apr_size_t */
3065668Skris#include <apr_pools.h>    /* for apr_pool_t */
3165668Skris#include <apr_tables.h>   /* for apr_array_header_t */
3265668Skris
3365668Skris#ifdef __cplusplus
3465668Skrisextern "C" {
3565668Skris#endif /* __cplusplus */
3665668Skris
3765668Skris/**
3865668Skris * @defgroup apr_cstr C (POSIX) locale string functions
3957429Smarkm * @ingroup apr_strings
4057429Smarkm *
4157429Smarkm * The apr_cstr_* functions provide traditional C char * string text handling,
42157019Sdes * and notabilty they treat all text in the C (a.k.a. POSIX) locale using the
4357429Smarkm * minimal POSIX character set, represented in either ASCII or a corresponding
4457429Smarkm * EBCDIC subset.
4576262Sgreen *
4676262Sgreen * Character values outside of that set are treated as opaque bytes, and all
4757429Smarkm * multi-byte character sequences are handled as individual distinct octets.
4857429Smarkm *
4976262Sgreen * Multi-byte characters sequences whose octets fall in the ASCII range cause
5076262Sgreen * unexpected results, such as in the ISO-2022-JP code page where ASCII octets
5157429Smarkm * occur within both shift-state and multibyte sequences.
5257429Smarkm *
5376262Sgreen * In the case of the UTF-8 encoding, all multibyte characters all fall outside
5465668Skris * of the C/POSIX range of characters, so these functions are generally safe
5565668Skris * to use on UTF-8 strings. The programmer must be aware that each octet may
5692559Sdes * not represent a distinct printable character in such encodings.
57124207Sdes *
5865668Skris * The standard C99/POSIX string functions, rather than apr_cstr, should be
5992559Sdes * used in all cases where the current locale and encoding of the text is
6057429Smarkm * significant.
6157429Smarkm * @{
6257429Smarkm */
6357429Smarkm
6457429Smarkm
6592559Sdes/** Divide @a input into substrings, interpreting any char from @a sep
6657429Smarkm * as a token separator.
6757429Smarkm *
6857429Smarkm * Return an array of copies of those substrings (plain const char*),
6992559Sdes * allocating both the array and the copies in @a pool.
7057429Smarkm *
71137019Sdes * None of the elements added to the array contain any of the
7257429Smarkm * characters in @a sep_chars, and none of the new elements are empty
7357429Smarkm * (thus, it is possible that the returned array will have length
7457429Smarkm * zero).
7592559Sdes *
7657429Smarkm * If @a chop_whitespace is TRUE, then remove leading and trailing
7776262Sgreen * whitespace from the returned strings.
7857429Smarkm *
7957429Smarkm * @since New in 1.6
8092559Sdes */
8157429SmarkmAPR_DECLARE(apr_array_header_t *) apr_cstr_split(const char *input,
8257429Smarkm                                                 const char *sep_chars,
8357429Smarkm                                                 int chop_whitespace,
8457429Smarkm                                                 apr_pool_t *pool);
8557429Smarkm
8657429Smarkm/** Like apr_cstr_split(), but append to existing @a array instead of
8757429Smarkm * creating a new one.  Allocate the copied substrings in @a pool
8857429Smarkm * (i.e., caller decides whether or not to pass @a array->pool as @a pool).
8960573Skris *
9060573Skris * @since New in 1.6
9160573Skris */
9257429SmarkmAPR_DECLARE(void) apr_cstr_split_append(apr_array_header_t *array,
9357429Smarkm                                        const char *input,
9457429Smarkm                                        const char *sep_chars,
9557429Smarkm                                        int chop_whitespace,
9692559Sdes                                        apr_pool_t *pool);
9757429Smarkm
9857429Smarkm
9957429Smarkm/** Return @c TRUE iff @a str matches any of the elements of @a list, a list
10057429Smarkm * of zero or more glob patterns.
10157429Smarkm *
10257429Smarkm * @since New in 1.6
10357429Smarkm */
10457429SmarkmAPR_DECLARE(int) apr_cstr_match_glob_list(const char *str,
10557429Smarkm                                          const apr_array_header_t *list);
10657429Smarkm
10792559Sdes/** Return @c TRUE iff @a str exactly matches any of the elements of @a list.
10892559Sdes *
10992559Sdes * @since New in 1.6
11092559Sdes */
11192559SdesAPR_DECLARE(int) apr_cstr_match_list(const char *str,
112149753Sdes                                     const apr_array_header_t *list);
113149753Sdes
114149753Sdes/**
11592559Sdes * Get the next token from @a *str interpreting any char from @a sep as a
11692559Sdes * token separator.  Separators at the beginning of @a str will be skipped.
11792559Sdes * Returns a pointer to the beginning of the first token in @a *str or NULL
11892559Sdes * if no token is left.  Modifies @a str such that the next call will return
11992559Sdes * the next token.
12092559Sdes *
12192559Sdes * @note The content of @a *str may be modified by this function.
12292559Sdes *
12392559Sdes * @since New in 1.6.
12492559Sdes */
12592559SdesAPR_DECLARE(char *) apr_cstr_tokenize(const char *sep, char **str);
12692559Sdes
12792559Sdes/**
12892559Sdes * Return the number of line breaks in @a msg, allowing any kind of newline
12992559Sdes * termination (CR, LF, CRLF, or LFCR), even inconsistent.
13092559Sdes *
13192559Sdes * @since New in 1.6.
13292559Sdes */
13392559SdesAPR_DECLARE(int) apr_cstr_count_newlines(const char *msg);
13476262Sgreen
13598941Sdes#if 0 /* XXX: stringbuf logic is not present in APR */
13676262Sgreen/**
13792559Sdes * Return a cstring which is the concatenation of @a strings (an array
13892559Sdes * of char *) each followed by @a separator (that is, @a separator
13976262Sgreen * will also end the resulting string).  Allocate the result in @a pool.
14092559Sdes * If @a strings is empty, then return the empty string.
14157429Smarkm *
14260573Skris * @since New in 1.6.
143157019Sdes */
14460573SkrisAPR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings,
14560573Skris                                  const char *separator,
14692559Sdes                                  apr_pool_t *pool);
147137019Sdes#endif
148157019Sdes
14960573Skris/**
15060573Skris * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2,
15192559Sdes * treating upper and lower case values of the 26 standard C/POSIX alphabetic
15292559Sdes * characters as equivalent. Extended latin characters outside of this set
153157019Sdes * are treated as unique octets, irrespective of the current locale.
15460573Skris *
15560573Skris * Returns in integer greater than, equal to, or less than 0,
15660573Skris * according to whether @a str1 is considered greater than, equal to,
15760573Skris * or less than @a str2.
15860573Skris *
15957429Smarkm * @since New in 1.6.
160157019Sdes */
161157019SdesAPR_DECLARE(int) apr_cstr_casecmp(const char *str1, const char *str2);
162157019Sdes
163157019Sdes/**
164157019Sdes * Perform a case-insensitive comparison of two strings @a atr1 and @a atr2,
165157019Sdes * treating upper and lower case values of the 26 standard C/POSIX alphabetic
166157019Sdes * characters as equivalent. Extended latin characters outside of this set
167157019Sdes * are treated as unique octets, irrespective of the current locale.
168157019Sdes *
169157019Sdes * Returns in integer greater than, equal to, or less than 0,
170157019Sdes * according to whether @a str1 is considered greater than, equal to,
171157019Sdes * or less than @a str2.
172157019Sdes *
173157019Sdes * @since New in 1.6.
174157019Sdes */
175157019SdesAPR_DECLARE(int) apr_cstr_casecmpn(const char *str1,
176157019Sdes                                   const char *str2,
177157019Sdes                                   apr_size_t n);
178157019Sdes
179157019Sdes/**
180157019Sdes * Parse the C string @a str into a 64 bit number, and return it in @a *n.
181157019Sdes * Assume that the number is represented in base @a base.
182157019Sdes * Raise an error if conversion fails (e.g. due to overflow), or if the
183157019Sdes * converted number is smaller than @a minval or larger than @a maxval.
184157019Sdes *
185157019Sdes * Leading whitespace in @a str is skipped in a locale-dependent way.
186157019Sdes * After that, the string may contain an optional '+' (positive, default)
187157019Sdes * or '-' (negative) character, followed by an optional '0x' prefix if
18860573Skris * @a base is 0 or 16, followed by numeric digits appropriate for the base.
18960573Skris * If there are any more characters after the numeric digits, an error is
19060573Skris * returned.
19160573Skris *
19292559Sdes * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal,
19369587Sgreen * else a leading '0' means octal (implemented, though not documented, in
19469587Sgreen * apr_strtoi64() in APR 0.9.0 through 1.5.0), else use base ten.
19560573Skris *
19660573Skris * @since New in 1.6.
19776262Sgreen */
19876262SgreenAPR_DECLARE(apr_status_t) apr_cstr_strtoi64(apr_int64_t *n, const char *str,
19976262Sgreen                                            apr_int64_t minval,
20076262Sgreen                                            apr_int64_t maxval,
20160573Skris                                            int base);
20260573Skris
20360573Skris/**
20460573Skris * Parse the C string @a str into a 64 bit number, and return it in @a *n.
20560573Skris * Assume that the number is represented in base 10.
206137019Sdes * Raise an error if conversion fails (e.g. due to overflow).
20760573Skris *
20860573Skris * The behaviour otherwise is as described for apr_cstr_strtoi64().
20969587Sgreen *
21074500Sgreen * @since New in 1.6.
21174500Sgreen */
212124207SdesAPR_DECLARE(apr_status_t) apr_cstr_atoi64(apr_int64_t *n, const char *str);
21374500Sgreen
21474500Sgreen/**
21576262Sgreen * Parse the C string @a str into a 32 bit number, and return it in @a *n.
21674500Sgreen * Assume that the number is represented in base 10.
21774500Sgreen * Raise an error if conversion fails (e.g. due to overflow).
21874500Sgreen *
21974500Sgreen * The behaviour otherwise is as described for apr_cstr_strtoi64().
22074500Sgreen *
221106130Sdes * @since New in 1.6.
22274500Sgreen */
22369587SgreenAPR_DECLARE(apr_status_t) apr_cstr_atoi(int *n, const char *str);
22469587Sgreen
22569587Sgreen/**
22669587Sgreen * Parse the C string @a str into an unsigned 64 bit number, and return
22769587Sgreen * it in @a *n. Assume that the number is represented in base @a base.
22869587Sgreen * Raise an error if conversion fails (e.g. due to overflow), or if the
22969587Sgreen * converted number is smaller than @a minval or larger than @a maxval.
23069587Sgreen *
23169587Sgreen * Leading whitespace in @a str is skipped in a locale-dependent way.
23260573Skris * After that, the string may contain an optional '+' (positive, default)
23360573Skris * or '-' (negative) character, followed by an optional '0x' prefix if
23460573Skris * @a base is 0 or 16, followed by numeric digits appropriate for the base.
23557429Smarkm * If there are any more characters after the numeric digits, an error is
23657429Smarkm * returned.
23757429Smarkm *
23857429Smarkm * If @a base is zero, then a leading '0x' or '0X' prefix means hexadecimal,
23992559Sdes * else a leading '0' means octal (as implemented, though not documented, in
24060573Skris * apr_strtoi64(), else use base ten.
24199063Sdes *
24257429Smarkm * @warning The implementation returns APR_ERANGE if the parsed number
243137019Sdes * is greater than APR_INT64_MAX, even if it is not greater than @a maxval.
244137019Sdes *
24557429Smarkm * @since New in 1.6.
24657429Smarkm */
24757429SmarkmAPR_DECLARE(apr_status_t) apr_cstr_strtoui64(apr_uint64_t *n, const char *str,
24857429Smarkm                                             apr_uint64_t minval,
24957429Smarkm                                             apr_uint64_t maxval,
25092559Sdes                                             int base);
25157429Smarkm
25292559Sdes/**
25357429Smarkm * Parse the C string @a str into an unsigned 64 bit number, and return
25457429Smarkm * it in @a *n. Assume that the number is represented in base 10.
25557429Smarkm * Raise an error if conversion fails (e.g. due to overflow).
25692559Sdes *
25757429Smarkm * The behaviour otherwise is as described for apr_cstr_strtoui64(),
258137019Sdes * including the upper limit of APR_INT64_MAX.
25957429Smarkm *
26057429Smarkm * @since New in 1.6.
261137019Sdes */
26257429SmarkmAPR_DECLARE(apr_status_t) apr_cstr_atoui64(apr_uint64_t *n, const char *str);
26357429Smarkm
26499063Sdes/**
26599063Sdes * Parse the C string @a str into an unsigned 32 bit number, and return
26699063Sdes * it in @a *n. Assume that the number is represented in base 10.
267120489Sjoe * Raise an error if conversion fails (e.g. due to overflow).
268120489Sjoe *
269120489Sjoe * The behaviour otherwise is as described for apr_cstr_strtoui64(),
27069587Sgreen * including the upper limit of APR_INT64_MAX.
27157429Smarkm *
27292559Sdes * @since New in 1.6.
27357429Smarkm */
27492559SdesAPR_DECLARE(apr_status_t) apr_cstr_atoui(unsigned int *n, const char *str);
27592559Sdes
27692559Sdes/**
27757429Smarkm * Skip the common prefix @a prefix from the C string @a str, and return
27857429Smarkm * a pointer to the next character after the prefix.
27960573Skris * Return @c NULL if @a str does not start with @a prefix.
28092559Sdes *
28192559Sdes * @since New in 1.6.
28292559Sdes */
28369587SgreenAPR_DECLARE(const char *) apr_cstr_skip_prefix(const char *str,
28457429Smarkm                                               const char *prefix);
28557429Smarkm
28660573Skris/** @} */
28760573Skris
28860573Skris#ifdef __cplusplus
28960573Skris}
29060573Skris#endif /* __cplusplus */
29157429Smarkm
292124207Sdes#endif  /* SVN_STRING_H */
29360573Skris