1/*    Licensed to the Apache Software Foundation (ASF) under one
2 *    or more contributor license agreements.  See the NOTICE file
3 *    distributed with this work for additional information
4 *    regarding copyright ownership.  The ASF licenses this file
5 *    to you under the Apache License, Version 2.0 (the
6 *    "License"); you may not use this file except in compliance
7 *    with the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 *    Unless required by applicable law or agreed to in writing,
12 *    software distributed under the License is distributed on an
13 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 *    KIND, either express or implied.  See the License for the
15 *    specific language governing permissions and limitations
16 *    under the License.
17 */
18
19#include "apr.h"
20#include "apr_lib.h"
21#include "apr_strings.h"
22#include "apr_fnmatch.h"
23#if 0
24#define APR_WANT_STDIO
25#define APR_WANT_STRFUNC
26#endif
27#include "apr_want.h"
28#include "apr_cstr.h"
29
30APR_DECLARE(void) apr_cstr_split_append(apr_array_header_t *array,
31                                        const char *input,
32                                        const char *sep_chars,
33                                        int chop_whitespace,
34                                        apr_pool_t *pool)
35{
36  char *pats;
37  char *p;
38
39  pats = apr_pstrdup(pool, input);  /* strtok wants non-const data */
40  p = apr_cstr_tokenize(sep_chars, &pats);
41
42  while (p)
43    {
44      if (chop_whitespace)
45        {
46          while (apr_isspace(*p))
47            p++;
48
49          {
50            char *e = p + (strlen(p) - 1);
51            while ((e >= p) && (apr_isspace(*e)))
52              e--;
53            *(++e) = '\0';
54          }
55        }
56
57      if (p[0] != '\0')
58        APR_ARRAY_PUSH(array, const char *) = p;
59
60      p = apr_cstr_tokenize(sep_chars, &pats);
61    }
62
63  return;
64}
65
66
67APR_DECLARE(apr_array_header_t *) apr_cstr_split(const char *input,
68                                                 const char *sep_chars,
69                                                 int chop_whitespace,
70                                                 apr_pool_t *pool)
71{
72  apr_array_header_t *a = apr_array_make(pool, 5, sizeof(input));
73  apr_cstr_split_append(a, input, sep_chars, chop_whitespace, pool);
74  return a;
75}
76
77
78APR_DECLARE(int) apr_cstr_match_glob_list(const char *str,
79                                          const apr_array_header_t *list)
80{
81  int i;
82
83  for (i = 0; i < list->nelts; i++)
84    {
85      const char *this_pattern = APR_ARRAY_IDX(list, i, char *);
86
87      if (apr_fnmatch(this_pattern, str, 0) == APR_SUCCESS)
88        return TRUE;
89    }
90
91  return FALSE;
92}
93
94APR_DECLARE(int) apr_cstr_match_list(const char *str,
95                                     const apr_array_header_t *list)
96{
97  int i;
98
99  for (i = 0; i < list->nelts; i++)
100    {
101      const char *this_str = APR_ARRAY_IDX(list, i, char *);
102
103      if (strcmp(this_str, str) == 0)
104        return TRUE;
105    }
106
107  return FALSE;
108}
109
110APR_DECLARE(char *) apr_cstr_tokenize(const char *sep, char **str)
111{
112    char *token;
113    char *next;
114    char csep;
115
116    /* check parameters */
117    if ((sep == NULL) || (str == NULL) || (*str == NULL))
118        return NULL;
119
120    /* let APR handle edge cases and multiple separators */
121    csep = *sep;
122    if (csep == '\0' || sep[1] != '\0')
123      return apr_strtok(NULL, sep, str);
124
125    /* skip characters in sep (will terminate at '\0') */
126    token = *str;
127    while (*token == csep)
128        ++token;
129
130    if (!*token)          /* no more tokens */
131        return NULL;
132
133    /* skip valid token characters to terminate token and
134     * prepare for the next call (will terminate at '\0)
135     */
136    next = strchr(token, csep);
137    if (next == NULL)
138      {
139        *str = token + strlen(token);
140      }
141    else
142      {
143        *next = '\0';
144        *str = next + 1;
145      }
146
147    return token;
148}
149
150APR_DECLARE(int) apr_cstr_count_newlines(const char *msg)
151{
152  int count = 0;
153  const char *p;
154
155  for (p = msg; *p; p++)
156    {
157      if (*p == '\n')
158        {
159          count++;
160          if (*(p + 1) == '\r')
161            p++;
162        }
163      else if (*p == '\r')
164        {
165          count++;
166          if (*(p + 1) == '\n')
167            p++;
168        }
169    }
170
171  return count;
172}
173
174#if 0 /* XXX: stringbuf logic is not present in APR */
175APR_DECLARE(char *) apr_cstr_join(const apr_array_header_t *strings,
176                                  const char *separator,
177                                  apr_pool_t *pool)
178{
179  svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool);
180  size_t sep_len = strlen(separator);
181  int i;
182
183  for (i = 0; i < strings->nelts; i++)
184    {
185      const char *string = APR_ARRAY_IDX(strings, i, const char *);
186      svn_stringbuf_appendbytes(new_str, string, strlen(string));
187      svn_stringbuf_appendbytes(new_str, separator, sep_len);
188    }
189  return new_str->data;
190}
191#endif
192
193#if !APR_CHARSET_EBCDIC
194/*
195 * Our own known-fast translation table for casecmp by character.
196 * Only ASCII alpha characters 41-5A are folded to 61-7A, other
197 * octets (such as extended latin alphabetics) are never case-folded.
198 * NOTE: Other than Alpha A-Z/a-z, each code point is unique!
199 */
200static const short ucharmap[] = {
201    0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7,
202    0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf,
203    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
204    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
205    0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
206    0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
207    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
208    0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
209    0x40,  'a',  'b',  'c',  'd',  'e',  'f',  'g',
210     'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
211     'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
212     'x',  'y',  'z', 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
213    0x60,  'a',  'b',  'c',  'd',  'e',  'f',  'g',
214     'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
215     'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
216     'x',  'y',  'z', 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
217    0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
218    0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
219    0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
220    0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
221    0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
222    0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
223    0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
224    0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
225    0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
226    0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
227    0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
228    0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
229    0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
230    0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
231    0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
232    0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
233};
234#else /* APR_CHARSET_EBCDIC */
235/*
236 * Derived from apr-iconv/ccs/cp037.c for EBCDIC case comparison,
237 * provides unique identity of every char value (strict ISO-646
238 * conformance, arbitrary election of an ISO-8859-1 ordering, and
239 * very arbitrary control code assignments into C1 to achieve
240 * identity and a reversible mapping of code points),
241 * then folding the equivalences of ASCII 41-5A into 61-7A,
242 * presenting comparison results in a somewhat ISO/IEC 10646
243 * (ASCII-like) order, depending on the EBCDIC code page in use.
244 *
245 * NOTE: Other than Alpha A-Z/a-z, each code point is unique!
246 */
247static const short ucharmap[] = {
248    0x00, 0x01, 0x02, 0x03, 0x9C, 0x09, 0x86, 0x7F,
249    0x97, 0x8D, 0x8E, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
250    0x10, 0x11, 0x12, 0x13, 0x9D, 0x85, 0x08, 0x87,
251    0x18, 0x19, 0x92, 0x8F, 0x1C, 0x1D, 0x1E, 0x1F,
252    0x80, 0x81, 0x82, 0x83, 0x84, 0x0A, 0x17, 0x1B,
253    0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x05, 0x06, 0x07,
254    0x90, 0x91, 0x16, 0x93, 0x94, 0x95, 0x96, 0x04,
255    0x98, 0x99, 0x9A, 0x9B, 0x14, 0x15, 0x9E, 0x1A,
256    0x20, 0xA0, 0xE2, 0xE4, 0xE0, 0xE1, 0xE3, 0xE5,
257    0xE7, 0xF1, 0xA2, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
258    0x26, 0xE9, 0xEA, 0xEB, 0xE8, 0xED, 0xEE, 0xEF,
259    0xEC, 0xDF, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0xAC,
260    0x2D, 0x2F, 0xC2, 0xC4, 0xC0, 0xC1, 0xC3, 0xC5,
261    0xC7, 0xD1, 0xA6, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
262    0xF8, 0xC9, 0xCA, 0xCB, 0xC8, 0xCD, 0xCE, 0xCF,
263    0xCC, 0x60, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
264    0xD8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
265    0x68, 0x69, 0xAB, 0xBB, 0xF0, 0xFD, 0xFE, 0xB1,
266    0xB0, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
267    0x71, 0x72, 0xAA, 0xBA, 0xE6, 0xB8, 0xC6, 0xA4,
268    0xB5, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
269    0x79, 0x7A, 0xA1, 0xBF, 0xD0, 0xDD, 0xDE, 0xAE,
270    0x5E, 0xA3, 0xA5, 0xB7, 0xA9, 0xA7, 0xB6, 0xBC,
271    0xBD, 0xBE, 0x5B, 0x5D, 0xAF, 0xA8, 0xB4, 0xD7,
272    0x7B, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
273    0x68, 0x69, 0xAD, 0xF4, 0xF6, 0xF2, 0xF3, 0xF5,
274    0x7D, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70,
275    0x71, 0x72, 0xB9, 0xFB, 0xFC, 0xF9, 0xFA, 0xFF,
276    0x5C, 0xF7, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
277    0x79, 0x7A, 0xB2, 0xD4, 0xD6, 0xD2, 0xD3, 0xD5,
278    0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
279    0x38, 0x39, 0xB3, 0xDB, 0xDC, 0xD9, 0xDA, 0x9F
280};
281#endif
282
283APR_DECLARE(int) apr_cstr_casecmp(const char *s1, const char *s2)
284{
285    const unsigned char *str1 = (const unsigned char *)s1;
286    const unsigned char *str2 = (const unsigned char *)s2;
287    for (;;)
288    {
289        const int c1 = (int)(*str1);
290        const int c2 = (int)(*str2);
291        const int cmp = ucharmap[c1] - ucharmap[c2];
292        /* Not necessary to test for !c2, this is caught by cmp */
293        if (cmp || !c1)
294            return cmp;
295        str1++;
296        str2++;
297    }
298}
299
300APR_DECLARE(int) apr_cstr_casecmpn(const char *s1, const char *s2, apr_size_t n)
301{
302    const unsigned char *str1 = (const unsigned char *)s1;
303    const unsigned char *str2 = (const unsigned char *)s2;
304    while (n--)
305    {
306        const int c1 = (int)(*str1);
307        const int c2 = (int)(*str2);
308        const int cmp = ucharmap[c1] - ucharmap[c2];
309        /* Not necessary to test for !c2, this is caught by cmp */
310        if (cmp || !c1)
311            return cmp;
312        str1++;
313        str2++;
314    }
315    return 0;
316}
317
318APR_DECLARE(apr_status_t) apr_cstr_strtoui64(apr_uint64_t *n, const char *str,
319                                apr_uint64_t minval, apr_uint64_t maxval,
320                                int base)
321{
322  apr_int64_t val;
323  char *endptr;
324
325  /* We assume errno is thread-safe. */
326  errno = 0; /* APR-0.9 doesn't always set errno */
327
328  /* ### We're throwing away half the number range here.
329   * ### APR needs a apr_strtoui64() function. */
330  val = apr_strtoi64(str, &endptr, base);
331  if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
332    return APR_EINVAL;
333  if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
334      val < 0 || (apr_uint64_t)val < minval || (apr_uint64_t)val > maxval)
335    return APR_ERANGE;
336  *n = val;
337  return APR_SUCCESS;
338}
339
340APR_DECLARE(apr_status_t) apr_cstr_atoui64(apr_uint64_t *n, const char *str)
341{
342  return apr_cstr_strtoui64(n, str, 0, APR_UINT64_MAX, 10);
343}
344
345APR_DECLARE(apr_status_t) apr_cstr_atoui(unsigned int *n, const char *str)
346{
347  apr_uint64_t val;
348  apr_status_t rv = apr_cstr_strtoui64(&val, str, 0, APR_UINT32_MAX, 10);
349  if (rv == APR_SUCCESS)
350    *n = (unsigned int)val;
351  return rv;
352}
353
354APR_DECLARE(apr_status_t) apr_cstr_strtoi64(apr_int64_t *n, const char *str,
355                               apr_int64_t minval, apr_int64_t maxval,
356                               int base)
357{
358  apr_int64_t val;
359  char *endptr;
360
361  /* We assume errno is thread-safe. */
362  errno = 0; /* APR-0.9 doesn't always set errno */
363
364  val = apr_strtoi64(str, &endptr, base);
365  if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
366    return APR_EINVAL;
367  if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
368      val < minval || val > maxval)
369    return APR_ERANGE;
370  *n = val;
371  return APR_SUCCESS;
372}
373
374APR_DECLARE(apr_status_t) apr_cstr_atoi64(apr_int64_t *n, const char *str)
375{
376  return apr_cstr_strtoi64(n, str, APR_INT64_MIN, APR_INT64_MAX, 10);
377}
378
379APR_DECLARE(apr_status_t) apr_cstr_atoi(int *n, const char *str)
380{
381  apr_int64_t val;
382  apr_status_t rv;
383
384  rv = apr_cstr_strtoi64(&val, str, APR_INT32_MIN, APR_INT32_MAX, 10);
385  if (rv == APR_SUCCESS)
386    *n = (int)val;
387  return rv;
388}
389
390APR_DECLARE(const char *) apr_cstr_skip_prefix(const char *str,
391                                               const char *prefix)
392{
393  apr_size_t len = strlen(prefix);
394
395  if (strncmp(str, prefix, len) == 0)
396    {
397      return str + len;
398    }
399  else
400    {
401      return NULL;
402    }
403}
404