string.c revision 362181
1/*
2 * string.c:  routines to manipulate counted-length strings
3 *            (svn_stringbuf_t and svn_string_t) and C strings.
4 *
5 *
6 * ====================================================================
7 *    Licensed to the Apache Software Foundation (ASF) under one
8 *    or more contributor license agreements.  See the NOTICE file
9 *    distributed with this work for additional information
10 *    regarding copyright ownership.  The ASF licenses this file
11 *    to you under the Apache License, Version 2.0 (the
12 *    "License"); you may not use this file except in compliance
13 *    with the License.  You may obtain a copy of the License at
14 *
15 *      http://www.apache.org/licenses/LICENSE-2.0
16 *
17 *    Unless required by applicable law or agreed to in writing,
18 *    software distributed under the License is distributed on an
19 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
20 *    KIND, either express or implied.  See the License for the
21 *    specific language governing permissions and limitations
22 *    under the License.
23 * ====================================================================
24 */
25
26
27
28#include <apr.h>
29#include <assert.h>
30
31#include <string.h>      /* for memcpy(), memcmp(), strlen() */
32#include <apr_fnmatch.h>
33#include "svn_string.h"  /* loads "svn_types.h" and <apr_pools.h> */
34#include "svn_ctype.h"
35#include "private/svn_dep_compat.h"
36#include "private/svn_string_private.h"
37
38#include "svn_private_config.h"
39
40
41
42/* Allocate the space for a memory buffer from POOL.
43 * Return a pointer to the new buffer in *DATA and its size in *SIZE.
44 * The buffer size will be at least MINIMUM_SIZE.
45 *
46 * N.B.: The stringbuf creation functions use this, but since stringbufs
47 *       always consume at least 1 byte for the NUL terminator, the
48 *       resulting data pointers will never be NULL.
49 */
50static APR_INLINE void
51membuf_create(void **data, apr_size_t *size,
52              apr_size_t minimum_size, apr_pool_t *pool)
53{
54  /* apr_palloc will allocate multiples of 8.
55   * Thus, we would waste some of that memory if we stuck to the
56   * smaller size. Note that this is safe even if apr_palloc would
57   * use some other alignment or none at all. */
58  minimum_size = APR_ALIGN_DEFAULT(minimum_size);
59  *data = apr_palloc(pool, minimum_size);
60  *size = minimum_size;
61}
62
63/* Ensure that the size of a given memory buffer is at least MINIMUM_SIZE
64 * bytes. If *SIZE is already greater than or equal to MINIMUM_SIZE,
65 * this function does nothing.
66 *
67 * If *SIZE is 0, the allocated buffer size will be MINIMUM_SIZE
68 * rounded up to the nearest APR alignment boundary. Otherwse, *SIZE
69 * will be multiplied by a power of two such that the result is
70 * greater or equal to MINIMUM_SIZE. The pointer to the new buffer
71 * will be returned in *DATA, and its size in *SIZE.
72 */
73static APR_INLINE void
74membuf_ensure(void **data, apr_size_t *size,
75              apr_size_t minimum_size, apr_pool_t *pool)
76{
77  if (minimum_size > *size)
78    {
79      apr_size_t new_size = *size;
80
81      if (new_size == 0)
82        new_size = minimum_size;
83      else
84        while (new_size < minimum_size)
85          {
86            const apr_size_t prev_size = new_size;
87            new_size *= 2;
88
89            /* check for apr_size_t overflow */
90            if (prev_size > new_size)
91              {
92                new_size = minimum_size;
93                break;
94              }
95          }
96
97      membuf_create(data, size, new_size, pool);
98    }
99}
100
101void
102svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool)
103{
104  membuf_create(&membuf->data, &membuf->size, size, pool);
105  membuf->pool = pool;
106}
107
108void
109svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size)
110{
111  membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
112}
113
114void
115svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size)
116{
117  const void *const old_data = membuf->data;
118  const apr_size_t old_size = membuf->size;
119
120  membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool);
121
122  /* If we re-allocated MEMBUF->DATA, it cannot be NULL.
123   * Statically initialized membuffers (OLD_DATA) may be NULL, though. */
124  if (old_data && old_data != membuf->data)
125    memcpy(membuf->data, old_data, old_size);
126}
127
128/* Always provide an out-of-line implementation of svn_membuf__zero */
129#undef svn_membuf__zero
130void
131svn_membuf__zero(svn_membuf_t *membuf)
132{
133  SVN_MEMBUF__ZERO(membuf);
134}
135
136/* Always provide an out-of-line implementation of svn_membuf__nzero */
137#undef svn_membuf__nzero
138void
139svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size)
140{
141  SVN_MEMBUF__NZERO(membuf, size);
142}
143
144static APR_INLINE svn_boolean_t
145string_compare(const char *str1,
146               const char *str2,
147               apr_size_t len1,
148               apr_size_t len2)
149{
150  /* easy way out :)  */
151  if (len1 != len2)
152    return FALSE;
153
154  /* now the strings must have identical lengths */
155
156  if ((memcmp(str1, str2, len1)) == 0)
157    return TRUE;
158  else
159    return FALSE;
160}
161
162static APR_INLINE apr_size_t
163string_first_non_whitespace(const char *str, apr_size_t len)
164{
165  apr_size_t i;
166
167  for (i = 0; i < len; i++)
168    {
169      if (! svn_ctype_isspace(str[i]))
170        return i;
171    }
172
173  /* if we get here, then the string must be entirely whitespace */
174  return len;
175}
176
177static APR_INLINE apr_size_t
178find_char_backward(const char *str, apr_size_t len, char ch)
179{
180  apr_size_t i = len;
181
182  while (i != 0)
183    {
184      if (str[--i] == ch)
185        return i;
186    }
187
188  /* char was not found, return len */
189  return len;
190}
191
192
193/* svn_string functions */
194
195/* Return a new svn_string_t object, allocated in POOL, initialized with
196 * DATA and SIZE.  Do not copy the contents of DATA, just store the pointer.
197 * SIZE is the length in bytes of DATA, excluding the required NUL
198 * terminator. */
199static svn_string_t *
200create_string(const char *data, apr_size_t size,
201              apr_pool_t *pool)
202{
203  svn_string_t *new_string;
204
205  new_string = apr_palloc(pool, sizeof(*new_string));
206
207  new_string->data = data;
208  new_string->len = size;
209
210  return new_string;
211}
212
213/* A data buffer for a zero-length string (just a null terminator).  Many
214 * svn_string_t instances may share this same buffer. */
215static const char empty_buffer[1] = {0};
216
217svn_string_t *
218svn_string_create_empty(apr_pool_t *pool)
219{
220  svn_string_t *new_string = apr_palloc(pool, sizeof(*new_string));
221  new_string->data = empty_buffer;
222  new_string->len = 0;
223
224  return new_string;
225}
226
227
228svn_string_t *
229svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
230{
231  void *mem;
232  char *data;
233  svn_string_t *new_string;
234
235  /* Allocate memory for svn_string_t and data in one chunk. */
236  mem = apr_palloc(pool, sizeof(*new_string) + size + 1);
237  data = (char*)mem + sizeof(*new_string);
238
239  new_string = mem;
240  new_string->data = data;
241  new_string->len = size;
242
243  /* If SIZE is 0, NULL is valid for BYTES. */
244  if (size)
245    memcpy(data, bytes, size);
246
247  /* Null termination is the convention -- even if we suspect the data
248     to be binary, it's not up to us to decide, it's the caller's
249     call.  Heck, that's why they call it the caller! */
250  data[size] = '\0';
251
252  return new_string;
253}
254
255
256svn_string_t *
257svn_string_create(const char *cstring, apr_pool_t *pool)
258{
259  return svn_string_ncreate(cstring, strlen(cstring), pool);
260}
261
262
263svn_string_t *
264svn_string_create_from_buf(const svn_stringbuf_t *strbuf, apr_pool_t *pool)
265{
266  return svn_string_ncreate(strbuf->data, strbuf->len, pool);
267}
268
269
270svn_string_t *
271svn_string_createv(apr_pool_t *pool, const char *fmt, va_list ap)
272{
273  char *data = apr_pvsprintf(pool, fmt, ap);
274
275  /* wrap an svn_string_t around the new data */
276  return create_string(data, strlen(data), pool);
277}
278
279
280svn_string_t *
281svn_string_createf(apr_pool_t *pool, const char *fmt, ...)
282{
283  svn_string_t *str;
284
285  va_list ap;
286  va_start(ap, fmt);
287  str = svn_string_createv(pool, fmt, ap);
288  va_end(ap);
289
290  return str;
291}
292
293
294svn_boolean_t
295svn_string_isempty(const svn_string_t *str)
296{
297  return (str->len == 0);
298}
299
300
301svn_string_t *
302svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool)
303{
304  return (original_string ? svn_string_ncreate(original_string->data,
305                                               original_string->len, pool)
306                          : NULL);
307}
308
309
310
311svn_boolean_t
312svn_string_compare(const svn_string_t *str1, const svn_string_t *str2)
313{
314  return
315    string_compare(str1->data, str2->data, str1->len, str2->len);
316}
317
318
319
320apr_size_t
321svn_string_first_non_whitespace(const svn_string_t *str)
322{
323  return
324    string_first_non_whitespace(str->data, str->len);
325}
326
327
328apr_size_t
329svn_string_find_char_backward(const svn_string_t *str, char ch)
330{
331  return find_char_backward(str->data, str->len, ch);
332}
333
334svn_string_t *
335svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf)
336{
337  /* In debug mode, detect attempts to modify the original STRBUF object.
338   */
339#ifdef SVN_DEBUG
340  strbuf->pool = NULL;
341  strbuf->blocksize = strbuf->len + 1;
342#endif
343
344  /* Both, svn_string_t and svn_stringbuf_t are public API structures
345   * since the svn epoch. Thus, we can rely on their precise layout not
346   * to change.
347   *
348   * It just so happens that svn_string_t is structurally equivalent
349   * to the (data, len) sub-set of svn_stringbuf_t. There is also no
350   * difference in alignment and padding. So, we can just re-interpret
351   * that part of STRBUF as a svn_string_t.
352   *
353   * However, since svn_string_t does not know about the blocksize
354   * member in svn_stringbuf_t, any attempt to re-size the returned
355   * svn_string_t might invalidate the STRBUF struct. Hence, we consider
356   * the source STRBUF "consumed".
357   *
358   * Modifying the string character content is fine, though.
359   */
360  return (svn_string_t *)&strbuf->data;
361}
362
363
364
365/* svn_stringbuf functions */
366
367svn_stringbuf_t *
368svn_stringbuf_create_empty(apr_pool_t *pool)
369{
370  return svn_stringbuf_create_ensure(0, pool);
371}
372
373svn_stringbuf_t *
374svn_stringbuf_create_ensure(apr_size_t blocksize, apr_pool_t *pool)
375{
376  void *mem;
377  svn_stringbuf_t *new_string;
378
379  ++blocksize; /* + space for '\0' */
380
381  /* Allocate memory for svn_string_t and data in one chunk. */
382  membuf_create(&mem, &blocksize, blocksize + sizeof(*new_string), pool);
383
384  /* Initialize header and string */
385  new_string = mem;
386  new_string->data = (char*)mem + sizeof(*new_string);
387  new_string->data[0] = '\0';
388  new_string->len = 0;
389  new_string->blocksize = blocksize - sizeof(*new_string);
390  new_string->pool = pool;
391
392  return new_string;
393}
394
395svn_stringbuf_t *
396svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool)
397{
398  svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool);
399
400  /* If SIZE is 0, NULL is valid for BYTES. */
401  if (size)
402    memcpy(strbuf->data, bytes, size);
403
404  /* Null termination is the convention -- even if we suspect the data
405     to be binary, it's not up to us to decide, it's the caller's
406     call.  Heck, that's why they call it the caller! */
407  strbuf->data[size] = '\0';
408  strbuf->len = size;
409
410  return strbuf;
411}
412
413
414svn_stringbuf_t *
415svn_stringbuf_create(const char *cstring, apr_pool_t *pool)
416{
417  return svn_stringbuf_ncreate(cstring, strlen(cstring), pool);
418}
419
420
421svn_stringbuf_t *
422svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool)
423{
424  return svn_stringbuf_ncreate(str->data, str->len, pool);
425}
426
427svn_stringbuf_t *
428svn_stringbuf_create_wrap(char *str, apr_pool_t *pool)
429{
430  svn_stringbuf_t *result = apr_palloc(pool, sizeof(*result));
431  result->pool = pool;
432  result->data = str;
433  result->len = strlen(str);
434  result->blocksize = result->len + 1;
435
436  return result;
437}
438
439svn_stringbuf_t *
440svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap)
441{
442  char *data = apr_pvsprintf(pool, fmt, ap);
443  apr_size_t size = strlen(data);
444  svn_stringbuf_t *new_string;
445
446  new_string = apr_palloc(pool, sizeof(*new_string));
447  new_string->data = data;
448  new_string->len = size;
449  new_string->blocksize = size + 1;
450  new_string->pool = pool;
451
452  return new_string;
453}
454
455
456svn_stringbuf_t *
457svn_stringbuf_createf(apr_pool_t *pool, const char *fmt, ...)
458{
459  svn_stringbuf_t *str;
460
461  va_list ap;
462  va_start(ap, fmt);
463  str = svn_stringbuf_createv(pool, fmt, ap);
464  va_end(ap);
465
466  return str;
467}
468
469
470void
471svn_stringbuf_fillchar(svn_stringbuf_t *str, unsigned char c)
472{
473  memset(str->data, c, str->len);
474}
475
476
477void
478svn_stringbuf_set(svn_stringbuf_t *str, const char *value)
479{
480  apr_size_t amt = strlen(value);
481
482  membuf_ensure((void**) &str->data, &str->blocksize, amt + 1, str->pool);
483  memcpy(str->data, value, amt + 1);
484  str->len = amt;
485}
486
487void
488svn_stringbuf_setempty(svn_stringbuf_t *str)
489{
490  str->data[0] = '\0';
491  str->len = 0;
492}
493
494
495void
496svn_stringbuf_chop(svn_stringbuf_t *str, apr_size_t nbytes)
497{
498  if (nbytes > str->len)
499    str->len = 0;
500  else
501    str->len -= nbytes;
502
503  str->data[str->len] = '\0';
504}
505
506void
507svn_stringbuf_leftchop(svn_stringbuf_t *str, apr_size_t nbytes)
508{
509  if (str->len == 0)
510    return;
511
512  if (nbytes >= str->len)
513    {
514      str->len = 0;
515      *str->data = '\0';
516    }
517  else
518    {
519      /* Note: This will irretrievably waste nbytes of space in the
520         stringbuf's pool, but unlike the alternative of memmoving the
521         data, it's a constant-time operation. */
522      str->data += nbytes;
523      str->len -= nbytes;
524      str->blocksize -= nbytes;
525    }
526}
527
528svn_boolean_t
529svn_stringbuf_isempty(const svn_stringbuf_t *str)
530{
531  return (str->len == 0);
532}
533
534
535void
536svn_stringbuf_ensure(svn_stringbuf_t *str, apr_size_t minimum_size)
537{
538  void *mem = NULL;
539  ++minimum_size;  /* + space for '\0' */
540
541  membuf_ensure(&mem, &str->blocksize, minimum_size, str->pool);
542  if (mem && mem != str->data)
543    {
544      if (str->data)
545        memcpy(mem, str->data, str->len + 1);
546      str->data = mem;
547    }
548}
549
550
551/* WARNING - Optimized code ahead!
552 * This function has been hand-tuned for performance. Please read
553 * the comments below before modifying the code.
554 */
555void
556svn_stringbuf_appendbyte(svn_stringbuf_t *str, char byte)
557{
558  char *dest;
559  apr_size_t old_len = str->len;
560
561  /* In most cases, there will be pre-allocated memory left
562   * to just write the new byte at the end of the used section
563   * and terminate the string properly.
564   */
565  if (str->blocksize > old_len + 1)
566    {
567      /* The following read does not depend this write, so we
568       * can issue the write first to minimize register pressure:
569       * The value of old_len+1 is no longer needed; on most processors,
570       * dest[old_len+1] will be calculated implicitly as part of
571       * the addressing scheme.
572       */
573      str->len = old_len+1;
574
575      /* Since the compiler cannot be sure that *src->data and *src
576       * don't overlap, we read src->data *once* before writing
577       * to *src->data. Replacing dest with str->data would force
578       * the compiler to read it again after the first byte.
579       */
580      dest = str->data;
581
582      /* If not already available in a register as per ABI, load
583       * "byte" into the register (e.g. the one freed from old_len+1),
584       * then write it to the string buffer and terminate it properly.
585       *
586       * Including the "byte" fetch, all operations so far could be
587       * issued at once and be scheduled at the CPU's descression.
588       * Most likely, no-one will soon depend on the data that will be
589       * written in this function. So, no stalls there, either.
590       */
591      dest[old_len] = byte;
592      dest[old_len+1] = '\0';
593    }
594  else
595    {
596      /* we need to re-allocate the string buffer
597       * -> let the more generic implementation take care of that part
598       */
599
600      /* Depending on the ABI, "byte" is a register value. If we were
601       * to take its address directly, the compiler might decide to
602       * put in on the stack *unconditionally*, even if that would
603       * only be necessary for this block.
604       */
605      char b = byte;
606      svn_stringbuf_appendbytes(str, &b, 1);
607    }
608}
609
610
611void
612svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes,
613                          apr_size_t count)
614{
615  apr_size_t total_len;
616  void *start_address;
617
618  if (!count)
619    /* Allow BYTES to be NULL by avoiding passing it to memcpy. */
620    return;
621
622  total_len = str->len + count;  /* total size needed */
623
624  /* svn_stringbuf_ensure adds 1 for null terminator. */
625  svn_stringbuf_ensure(str, total_len);
626
627  /* get address 1 byte beyond end of original bytestring */
628  start_address = (str->data + str->len);
629
630  memcpy(start_address, bytes, count);
631  str->len = total_len;
632
633  str->data[str->len] = '\0';  /* We don't know if this is binary
634                                  data or not, but convention is
635                                  to null-terminate. */
636}
637
638void
639svn_stringbuf_appendfill(svn_stringbuf_t *str,
640                         char byte,
641                         apr_size_t count)
642{
643  apr_size_t new_len = str->len + count;
644  svn_stringbuf_ensure(str, new_len);
645
646  memset(str->data + str->len, byte, count);
647
648  /* update buffer length and always NUL-terminate it */
649  str->len = new_len;
650  str->data[new_len] = '\0';
651}
652
653
654void
655svn_stringbuf_appendstr(svn_stringbuf_t *targetstr,
656                        const svn_stringbuf_t *appendstr)
657{
658  svn_stringbuf_appendbytes(targetstr, appendstr->data, appendstr->len);
659}
660
661
662void
663svn_stringbuf_appendcstr(svn_stringbuf_t *targetstr, const char *cstr)
664{
665  svn_stringbuf_appendbytes(targetstr, cstr, strlen(cstr));
666}
667
668void
669svn_stringbuf_insert(svn_stringbuf_t *str,
670                     apr_size_t pos,
671                     const char *bytes,
672                     apr_size_t count)
673{
674  /* For COUNT==0, we allow BYTES to be NULL. It's a no-op in that case. */
675  if (count == 0)
676    return;
677
678  /* special case: BYTES overlaps with this string -> copy the source */
679  if (bytes + count > str->data && bytes < str->data + str->blocksize)
680    bytes = apr_pmemdup(str->pool, bytes, count);
681
682  if (pos > str->len)
683    pos = str->len;
684
685  svn_stringbuf_ensure(str, str->len + count);
686  memmove(str->data + pos + count, str->data + pos, str->len - pos + 1);
687  memcpy(str->data + pos, bytes, count);
688
689  str->len += count;
690}
691
692void
693svn_stringbuf_remove(svn_stringbuf_t *str,
694                     apr_size_t pos,
695                     apr_size_t count)
696{
697  if (pos > str->len)
698    pos = str->len;
699  if (count > str->len - pos)
700    count = str->len - pos;
701
702  memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1);
703  str->len -= count;
704}
705
706void
707svn_stringbuf_replace(svn_stringbuf_t *str,
708                      apr_size_t pos,
709                      apr_size_t old_count,
710                      const char *bytes,
711                      apr_size_t new_count)
712{
713  /* For COUNT==0, we allow BYTES to be NULL.
714   * In that case, this is just a substring removal. */
715  if (new_count == 0)
716    {
717      svn_stringbuf_remove(str, pos, old_count);
718      return;
719    }
720
721  /* special case: BYTES overlaps with this string -> copy the source */
722  if (bytes + new_count > str->data && bytes < str->data + str->blocksize)
723    bytes = apr_pmemdup(str->pool, bytes, new_count);
724
725  if (pos > str->len)
726    pos = str->len;
727  if (old_count > str->len - pos)
728    old_count = str->len - pos;
729
730  if (old_count < new_count)
731    {
732      apr_size_t delta = new_count - old_count;
733      svn_stringbuf_ensure(str, str->len + delta);
734    }
735
736  if (old_count != new_count)
737    memmove(str->data + pos + new_count, str->data + pos + old_count,
738            str->len - pos - old_count + 1);
739
740  memcpy(str->data + pos, bytes, new_count);
741  str->len += new_count - old_count;
742}
743
744
745apr_size_t
746svn_stringbuf_replace_all(svn_stringbuf_t *str,
747                          const char *to_find,
748                          const char *replacement)
749{
750  apr_size_t replacements = 0;
751
752  apr_size_t current = 0;
753  apr_size_t original_length = str->len;
754
755  apr_size_t to_copy;
756  apr_size_t to_find_len;
757  apr_size_t replacement_len;
758  apr_size_t new_length;
759
760  /* Early exit. */
761  const char *pos = strstr(str->data, to_find);
762  if (pos == NULL)
763    return 0;
764
765  to_find_len = strlen(to_find);
766  replacement_len = strlen(replacement);
767
768  /* We will store the new contents behind the NUL terminator of the current
769   * data and track the total length in STR->LEN to make the reallocation
770   * code preserve both bits.  However, we need to keep the NUL between them
771   * to make strstr stop at that boundary. */
772  ++str->len;
773
774  /* Find all occurrences of TO_FIND, copy the bits in between to the target,
775   * separated by REPLACEMENT. */
776  for ( ; pos; pos = strstr(str->data + current, to_find), ++replacements)
777    {
778      to_copy = pos - str->data - current;
779      svn_stringbuf_ensure(str, str->len + to_copy + replacement_len);
780
781      if (to_copy)
782        memcpy(str->data + str->len, str->data + current, to_copy);
783      current += to_copy + to_find_len;
784
785      str->len += to_copy;
786      memcpy(str->data + str->len, replacement, replacement_len);
787      str->len += replacement_len;
788    }
789
790  /* Copy remainder. */
791  to_copy = original_length - current;
792  if (to_copy)
793    {
794      svn_stringbuf_ensure(str, str->len + to_copy);
795      memcpy(str->data + str->len, str->data + current, to_copy);
796      str->len += to_copy;
797    }
798
799  /* Move new contents to the start of the buffer and terminate it. */
800  new_length = str->len - original_length - 1;
801  memmove(str->data, str->data + original_length + 1, new_length);
802  str->len = new_length;
803  str->data[new_length] = 0;
804
805  /* Done. */
806  return replacements;
807}
808
809
810svn_stringbuf_t *
811svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool)
812{
813  return (svn_stringbuf_ncreate(original_string->data,
814                                original_string->len, pool));
815}
816
817
818
819svn_boolean_t
820svn_stringbuf_compare(const svn_stringbuf_t *str1,
821                      const svn_stringbuf_t *str2)
822{
823  return string_compare(str1->data, str2->data, str1->len, str2->len);
824}
825
826
827
828apr_size_t
829svn_stringbuf_first_non_whitespace(const svn_stringbuf_t *str)
830{
831  return string_first_non_whitespace(str->data, str->len);
832}
833
834
835void
836svn_stringbuf_strip_whitespace(svn_stringbuf_t *str)
837{
838  /* Skip (hide) whitespace at the beginning of the string. */
839  if (svn_ctype_isspace(str->data[0]))
840    {
841      /* Find first non-whitespace character */
842      apr_size_t offset = string_first_non_whitespace(str->data + 1,
843                                                      str->len - 1) + 1;
844
845      /* Go ahead!  Waste some RAM, we've got pools! :)  */
846      str->data += offset;
847      str->len -= offset;
848      str->blocksize -= offset;
849    }
850
851  /* Now that we've trimmed the front, trim the end, wasting more RAM. */
852  while ((str->len > 0) && svn_ctype_isspace(str->data[str->len - 1]))
853    str->len--;
854  str->data[str->len] = '\0';
855}
856
857
858apr_size_t
859svn_stringbuf_find_char_backward(const svn_stringbuf_t *str, char ch)
860{
861  return find_char_backward(str->data, str->len, ch);
862}
863
864
865svn_boolean_t
866svn_string_compare_stringbuf(const svn_string_t *str1,
867                             const svn_stringbuf_t *str2)
868{
869  return string_compare(str1->data, str2->data, str1->len, str2->len);
870}
871
872
873
874/*** C string stuff. ***/
875
876void
877svn_cstring_split_append(apr_array_header_t *array,
878                         const char *input,
879                         const char *sep_chars,
880                         svn_boolean_t chop_whitespace,
881                         apr_pool_t *pool)
882{
883  char *pats;
884  char *p;
885
886  pats = apr_pstrdup(pool, input);  /* strtok wants non-const data */
887  p = svn_cstring_tokenize(sep_chars, &pats);
888
889  while (p)
890    {
891      if (chop_whitespace)
892        {
893          while (svn_ctype_isspace(*p))
894            p++;
895
896          {
897            char *e = p + (strlen(p) - 1);
898            while ((e >= p) && (svn_ctype_isspace(*e)))
899              e--;
900            *(++e) = '\0';
901          }
902        }
903
904      if (p[0] != '\0')
905        APR_ARRAY_PUSH(array, const char *) = p;
906
907      p = svn_cstring_tokenize(sep_chars, &pats);
908    }
909
910  return;
911}
912
913
914apr_array_header_t *
915svn_cstring_split(const char *input,
916                  const char *sep_chars,
917                  svn_boolean_t chop_whitespace,
918                  apr_pool_t *pool)
919{
920  apr_array_header_t *a = apr_array_make(pool, 5, sizeof(input));
921  svn_cstring_split_append(a, input, sep_chars, chop_whitespace, pool);
922  return a;
923}
924
925
926svn_boolean_t svn_cstring_match_glob_list(const char *str,
927                                          const apr_array_header_t *list)
928{
929  int i;
930
931  for (i = 0; i < list->nelts; i++)
932    {
933      const char *this_pattern = APR_ARRAY_IDX(list, i, char *);
934
935      if (apr_fnmatch(this_pattern, str, 0) == APR_SUCCESS)
936        return TRUE;
937    }
938
939  return FALSE;
940}
941
942svn_boolean_t
943svn_cstring_match_list(const char *str, const apr_array_header_t *list)
944{
945  int i;
946
947  for (i = 0; i < list->nelts; i++)
948    {
949      const char *this_str = APR_ARRAY_IDX(list, i, char *);
950
951      if (strcmp(this_str, str) == 0)
952        return TRUE;
953    }
954
955  return FALSE;
956}
957
958char *
959svn_cstring_tokenize(const char *sep, char **str)
960{
961    char *token;
962    char *next;
963    char csep;
964
965    /* check parameters */
966    if ((sep == NULL) || (str == NULL) || (*str == NULL))
967        return NULL;
968
969    /* let APR handle edge cases and multiple separators */
970    csep = *sep;
971    if (csep == '\0' || sep[1] != '\0')
972      return apr_strtok(NULL, sep, str);
973
974    /* skip characters in sep (will terminate at '\0') */
975    token = *str;
976    while (*token == csep)
977        ++token;
978
979    if (!*token)          /* no more tokens */
980        return NULL;
981
982    /* skip valid token characters to terminate token and
983     * prepare for the next call (will terminate at '\0)
984     */
985    next = strchr(token, csep);
986    if (next == NULL)
987      {
988        *str = token + strlen(token);
989      }
990    else
991      {
992        *next = '\0';
993        *str = next + 1;
994      }
995
996    return token;
997}
998
999int svn_cstring_count_newlines(const char *msg)
1000{
1001  int count = 0;
1002  const char *p;
1003
1004  for (p = msg; *p; p++)
1005    {
1006      if (*p == '\n')
1007        {
1008          count++;
1009          if (*(p + 1) == '\r')
1010            p++;
1011        }
1012      else if (*p == '\r')
1013        {
1014          count++;
1015          if (*(p + 1) == '\n')
1016            p++;
1017        }
1018    }
1019
1020  return count;
1021}
1022
1023char *
1024svn_cstring_join2(const apr_array_header_t *strings,
1025                  const char *separator,
1026                  svn_boolean_t trailing_separator,
1027                  apr_pool_t *pool)
1028{
1029  svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool);
1030  size_t sep_len = strlen(separator);
1031  int i;
1032
1033  for (i = 0; i < strings->nelts; i++)
1034    {
1035      const char *string = APR_ARRAY_IDX(strings, i, const char *);
1036      if (i > 0)
1037        svn_stringbuf_appendbytes(new_str, separator, sep_len);
1038      svn_stringbuf_appendbytes(new_str, string, strlen(string));
1039    }
1040
1041  if (strings->nelts > 0 && trailing_separator)
1042    svn_stringbuf_appendbytes(new_str, separator, sep_len);
1043
1044  return new_str->data;
1045}
1046
1047int
1048svn_cstring_casecmp(const char *str1, const char *str2)
1049{
1050  for (;;)
1051    {
1052      const int a = *str1++;
1053      const int b = *str2++;
1054      const int cmp = svn_ctype_casecmp(a, b);
1055      if (cmp || !a || !b)
1056        return cmp;
1057    }
1058}
1059
1060svn_error_t *
1061svn_cstring_strtoui64(apr_uint64_t *n, const char *str,
1062                      apr_uint64_t minval, apr_uint64_t maxval,
1063                      int base)
1064{
1065  apr_int64_t val;
1066  char *endptr;
1067
1068  /* We assume errno is thread-safe. */
1069  errno = 0; /* APR-0.9 doesn't always set errno */
1070
1071  /* ### We're throwing away half the number range here.
1072   * ### APR needs a apr_strtoui64() function. */
1073  val = apr_strtoi64(str, &endptr, base);
1074  if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
1075    return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1076                             _("Could not convert '%s' into a number"),
1077                             str);
1078  if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
1079      val < 0 || (apr_uint64_t)val < minval || (apr_uint64_t)val > maxval)
1080    /* ### Mark this for translation when gettext doesn't choke on macros. */
1081    return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1082                             "Number '%s' is out of range "
1083                             "'[%" APR_UINT64_T_FMT ", %" APR_UINT64_T_FMT "]'",
1084                             str, minval, maxval);
1085  *n = val;
1086  return SVN_NO_ERROR;
1087}
1088
1089svn_error_t *
1090svn_cstring_atoui64(apr_uint64_t *n, const char *str)
1091{
1092  return svn_error_trace(svn_cstring_strtoui64(n, str, 0,
1093                                               APR_UINT64_MAX, 10));
1094}
1095
1096svn_error_t *
1097svn_cstring_atoui(unsigned int *n, const char *str)
1098{
1099  apr_uint64_t val;
1100
1101  SVN_ERR(svn_cstring_strtoui64(&val, str, 0, APR_UINT32_MAX, 10));
1102  *n = (unsigned int)val;
1103  return SVN_NO_ERROR;
1104}
1105
1106svn_error_t *
1107svn_cstring_strtoi64(apr_int64_t *n, const char *str,
1108                     apr_int64_t minval, apr_int64_t maxval,
1109                     int base)
1110{
1111  apr_int64_t val;
1112  char *endptr;
1113
1114  /* We assume errno is thread-safe. */
1115  errno = 0; /* APR-0.9 doesn't always set errno */
1116
1117  val = apr_strtoi64(str, &endptr, base);
1118  if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0')
1119    return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1120                             _("Could not convert '%s' into a number"),
1121                             str);
1122  if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) ||
1123      val < minval || val > maxval)
1124    /* ### Mark this for translation when gettext doesn't choke on macros. */
1125    return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL,
1126                             "Number '%s' is out of range "
1127                             "'[%" APR_INT64_T_FMT ", %" APR_INT64_T_FMT "]'",
1128                             str, minval, maxval);
1129  *n = val;
1130  return SVN_NO_ERROR;
1131}
1132
1133svn_error_t *
1134svn_cstring_atoi64(apr_int64_t *n, const char *str)
1135{
1136  return svn_error_trace(svn_cstring_strtoi64(n, str, APR_INT64_MIN,
1137                                              APR_INT64_MAX, 10));
1138}
1139
1140svn_error_t *
1141svn_cstring_atoi(int *n, const char *str)
1142{
1143  apr_int64_t val;
1144
1145  SVN_ERR(svn_cstring_strtoi64(&val, str, APR_INT32_MIN, APR_INT32_MAX, 10));
1146  *n = (int)val;
1147  return SVN_NO_ERROR;
1148}
1149
1150unsigned long
1151svn__strtoul(const char* buffer, const char** end)
1152{
1153  unsigned long result = 0;
1154
1155  /* this loop will execute in just 2 CPU cycles, confirmed by measurement:
1156     7 macro-ops (max 4 / cycle => 2 cycles)
1157       1 load (max 1 / cycle)
1158       1 jumps (compare + conditional jump == 1 macro op; max 1 / cycle)
1159       2 arithmetic ops (subtract, increment; max 3 / cycle)
1160       2 scale-and-add AGU ops (max 3 / cycle)
1161       1 compiler-generated move operation
1162     dependency chain: temp = result * 4 + result; result = temp * 2 + c
1163                       (2 ops with latency 1 => 2 cycles)
1164   */
1165  while (1)
1166    {
1167      unsigned long c = (unsigned char)*buffer - (unsigned char)'0';
1168      if (c > 9)
1169        break;
1170
1171      result = result * 10 + c;
1172      ++buffer;
1173    }
1174
1175  *end = buffer;
1176  return result;
1177}
1178
1179/* "Precalculated" itoa values for 2 places (including leading zeros).
1180 * For maximum performance, make sure all table entries are word-aligned.
1181 */
1182static const char decimal_table[100][4]
1183    = { "00", "01", "02", "03", "04", "05", "06", "07", "08", "09"
1184      , "10", "11", "12", "13", "14", "15", "16", "17", "18", "19"
1185      , "20", "21", "22", "23", "24", "25", "26", "27", "28", "29"
1186      , "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"
1187      , "40", "41", "42", "43", "44", "45", "46", "47", "48", "49"
1188      , "50", "51", "52", "53", "54", "55", "56", "57", "58", "59"
1189      , "60", "61", "62", "63", "64", "65", "66", "67", "68", "69"
1190      , "70", "71", "72", "73", "74", "75", "76", "77", "78", "79"
1191      , "80", "81", "82", "83", "84", "85", "86", "87", "88", "89"
1192      , "90", "91", "92", "93", "94", "95", "96", "97", "98", "99"};
1193
1194/* Copy the two bytes at SOURCE[0] and SOURCE[1] to DEST[0] and DEST[1] */
1195#define COPY_TWO_BYTES(dest,source)\
1196  memcpy((dest), (source), 2)
1197
1198apr_size_t
1199svn__ui64toa(char * dest, apr_uint64_t number)
1200{
1201  char buffer[SVN_INT64_BUFFER_SIZE];
1202  apr_uint32_t reduced;   /* used for 32 bit DIV */
1203  char* target;
1204
1205  /* Small numbers are by far the most common case.
1206   * Therefore, we use special code.
1207   */
1208  if (number < 100)
1209    {
1210      if (number < 10)
1211        {
1212          dest[0] = (char)('0' + number);
1213          dest[1] = 0;
1214          return 1;
1215        }
1216      else
1217        {
1218          COPY_TWO_BYTES(dest, decimal_table[(apr_size_t)number]);
1219          dest[2] = 0;
1220          return 2;
1221        }
1222    }
1223
1224  /* Standard code. Write string in pairs of chars back-to-front */
1225  buffer[SVN_INT64_BUFFER_SIZE - 1] = 0;
1226  target = &buffer[SVN_INT64_BUFFER_SIZE - 3];
1227
1228  /* Loop may be executed 0 .. 2 times. */
1229  while (number >= 100000000)
1230    {
1231      /* Number is larger than 100^4, i.e. we can write 4x2 chars.
1232       * Also, use 32 bit DIVs as these are about twice as fast.
1233       */
1234      reduced = (apr_uint32_t)(number % 100000000);
1235      number /= 100000000;
1236
1237      COPY_TWO_BYTES(target - 0, decimal_table[reduced % 100]);
1238      reduced /= 100;
1239      COPY_TWO_BYTES(target - 2, decimal_table[reduced % 100]);
1240      reduced /= 100;
1241      COPY_TWO_BYTES(target - 4, decimal_table[reduced % 100]);
1242      reduced /= 100;
1243      COPY_TWO_BYTES(target - 6, decimal_table[reduced % 100]);
1244      target -= 8;
1245    }
1246
1247  /* Now, the number fits into 32 bits, but may still be larger than 99 */
1248  reduced = (apr_uint32_t)(number);
1249  while (reduced >= 100)
1250    {
1251      COPY_TWO_BYTES(target, decimal_table[reduced % 100]);
1252      reduced /= 100;
1253      target -= 2;
1254    }
1255
1256  /* The number is now smaller than 100 but larger than 1 */
1257  COPY_TWO_BYTES(target, decimal_table[reduced]);
1258
1259  /* Correction for uneven count of places. */
1260  if (reduced < 10)
1261    ++target;
1262
1263  /* Copy to target */
1264  memcpy(dest, target, &buffer[SVN_INT64_BUFFER_SIZE] - target);
1265  return &buffer[SVN_INT64_BUFFER_SIZE] - target - 1;
1266}
1267
1268apr_size_t
1269svn__i64toa(char * dest, apr_int64_t number)
1270{
1271  if (number >= 0)
1272    return svn__ui64toa(dest, (apr_uint64_t)number);
1273
1274  *dest = '-';
1275  return svn__ui64toa(dest + 1, 0 - (apr_uint64_t)number) + 1;
1276}
1277
1278static void
1279ui64toa_sep(apr_uint64_t number, char separator, char *buffer)
1280{
1281  apr_size_t length = svn__ui64toa(buffer, number);
1282  apr_size_t i;
1283
1284  for (i = length; i > 3; i -= 3)
1285    {
1286      memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3);
1287      buffer[i-3] = separator;
1288      length++;
1289    }
1290
1291  buffer[length] = 0;
1292}
1293
1294char *
1295svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool)
1296{
1297  char buffer[2 * SVN_INT64_BUFFER_SIZE];
1298  ui64toa_sep(number, separator, buffer);
1299
1300  return apr_pstrdup(pool, buffer);
1301}
1302
1303char *
1304svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool)
1305{
1306  char buffer[2 * SVN_INT64_BUFFER_SIZE];
1307  if (number < 0)
1308    {
1309      buffer[0] = '-';
1310      ui64toa_sep((apr_uint64_t)(-number), separator, &buffer[1]);
1311    }
1312  else
1313    ui64toa_sep((apr_uint64_t)(number), separator, buffer);
1314
1315  return apr_pstrdup(pool, buffer);
1316}
1317
1318apr_size_t
1319svn__ui64tobase36(char *dest, apr_uint64_t value)
1320{
1321  char *dest_start = dest;
1322  if (value < 10)
1323    {
1324      /* pretty frequent and trivial case. Make it fast. */
1325      *(dest++) = (char)(value) + '0';
1326    }
1327  else
1328    {
1329      char buffer[SVN_INT64_BUFFER_SIZE];
1330      char *p = buffer;
1331
1332      /* write result as little-endian to buffer */
1333      while (value > 0)
1334        {
1335          char c = (char)(value % 36);
1336          value /= 36;
1337
1338          *p = (c <= 9) ? (c + '0') : (c - 10 + 'a');
1339          ++p;
1340        }
1341
1342      /* copy as big-endian to DEST */
1343      while (p > buffer)
1344        *(dest++) = *(--p);
1345    }
1346
1347  *dest = '\0';
1348  return dest - dest_start;
1349}
1350
1351apr_uint64_t
1352svn__base36toui64(const char **next, const char *source)
1353{
1354  apr_uint64_t result = 0;
1355  apr_uint64_t factor = 1;
1356  int i  = 0;
1357  char digits[SVN_INT64_BUFFER_SIZE];
1358
1359  /* convert digits to numerical values and count the number of places.
1360   * Also, prevent buffer overflow. */
1361  while (i < sizeof(digits))
1362    {
1363      char c = *source;
1364      if (c < 'a')
1365        {
1366          /* includes detection of NUL terminator */
1367          if (c < '0' || c > '9')
1368            break;
1369
1370          c -= '0';
1371        }
1372      else
1373        {
1374          if (c < 'a' || c > 'z')
1375            break;
1376
1377          c -= 'a' - 10;
1378        }
1379
1380      digits[i++] = c;
1381      source++;
1382    }
1383
1384  /* fold digits into the result */
1385  while (i > 0)
1386    {
1387      result += factor * (apr_uint64_t)digits[--i];
1388      factor *= 36;
1389    }
1390
1391  if (next)
1392    *next = source;
1393
1394  return result;
1395}
1396
1397
1398apr_size_t
1399svn_cstring__similarity(const char *stra, const char *strb,
1400                        svn_membuf_t *buffer, apr_size_t *rlcs)
1401{
1402  svn_string_t stringa, stringb;
1403  stringa.data = stra;
1404  stringa.len = strlen(stra);
1405  stringb.data = strb;
1406  stringb.len = strlen(strb);
1407  return svn_string__similarity(&stringa, &stringb, buffer, rlcs);
1408}
1409
1410apr_size_t
1411svn_string__similarity(const svn_string_t *stringa,
1412                       const svn_string_t *stringb,
1413                       svn_membuf_t *buffer, apr_size_t *rlcs)
1414{
1415  const char *stra = stringa->data;
1416  const char *strb = stringb->data;
1417  const apr_size_t lena = stringa->len;
1418  const apr_size_t lenb = stringb->len;
1419  const apr_size_t total = lena + lenb;
1420  const char *enda = stra + lena;
1421  const char *endb = strb + lenb;
1422  apr_size_t lcs = 0;
1423
1424  /* Skip the common prefix ... */
1425  while (stra < enda && strb < endb && *stra == *strb)
1426    {
1427      ++stra; ++strb;
1428      ++lcs;
1429    }
1430
1431  /* ... and the common suffix */
1432  while (stra < enda && strb < endb)
1433    {
1434      --enda; --endb;
1435      if (*enda != *endb)
1436        {
1437          ++enda; ++endb;
1438          break;
1439        }
1440
1441      ++lcs;
1442    }
1443
1444  if (stra < enda && strb < endb)
1445    {
1446      const apr_size_t resta = enda - stra;
1447      const apr_size_t restb = endb - strb;
1448      const apr_size_t slots = (resta > restb ? restb : resta);
1449      apr_size_t *curr, *prev;
1450      const char *pstr;
1451
1452      /* The outer loop must iterate on the longer string. */
1453      if (resta < restb)
1454        {
1455          pstr = stra;
1456          stra = strb;
1457          strb = pstr;
1458
1459          pstr = enda;
1460          enda = endb;
1461          endb = pstr;
1462        }
1463
1464      /* Allocate two columns in the LCS matrix
1465         ### Optimize this to (slots + 2) instesd of 2 * (slots + 1) */
1466      svn_membuf__ensure(buffer, 2 * (slots + 1) * sizeof(apr_size_t));
1467      svn_membuf__nzero(buffer, (slots + 2) * sizeof(apr_size_t));
1468      prev = buffer->data;
1469      curr = prev + slots + 1;
1470
1471      /* Calculate LCS length of the remainder */
1472      for (pstr = stra; pstr < enda; ++pstr)
1473        {
1474          apr_size_t i;
1475          for (i = 1; i <= slots; ++i)
1476            {
1477              if (*pstr == strb[i-1])
1478                curr[i] = prev[i-1] + 1;
1479              else
1480                curr[i] = (curr[i-1] > prev[i] ? curr[i-1] : prev[i]);
1481            }
1482
1483          /* Swap the buffers, making the previous one current */
1484          {
1485            apr_size_t *const temp = prev;
1486            prev = curr;
1487            curr = temp;
1488          }
1489        }
1490
1491      lcs += prev[slots];
1492    }
1493
1494  if (rlcs)
1495    *rlcs = lcs;
1496
1497  /* Return similarity ratio rounded to 4 significant digits */
1498  if (total)
1499    return ((2 * SVN_STRING__SIM_RANGE_MAX * lcs + total/2) / total);
1500  else
1501    return SVN_STRING__SIM_RANGE_MAX;
1502}
1503
1504apr_size_t
1505svn_cstring__match_length(const char *a,
1506                          const char *b,
1507                          apr_size_t max_len)
1508{
1509  apr_size_t pos = 0;
1510
1511#if SVN_UNALIGNED_ACCESS_IS_OK
1512
1513  /* Chunky processing is so much faster ...
1514   *
1515   * We can't make this work on architectures that require aligned access
1516   * because A and B will probably have different alignment. So, skipping
1517   * the first few chars until alignment is reached is not an option.
1518   */
1519  for (; max_len - pos >= sizeof(apr_size_t); pos += sizeof(apr_size_t))
1520    if (*(const apr_size_t*)(a + pos) != *(const apr_size_t*)(b + pos))
1521      break;
1522
1523#endif
1524
1525  for (; pos < max_len; ++pos)
1526    if (a[pos] != b[pos])
1527      break;
1528
1529  return pos;
1530}
1531
1532apr_size_t
1533svn_cstring__reverse_match_length(const char *a,
1534                                  const char *b,
1535                                  apr_size_t max_len)
1536{
1537  apr_size_t pos = 0;
1538
1539#if SVN_UNALIGNED_ACCESS_IS_OK
1540
1541  /* Chunky processing is so much faster ...
1542   *
1543   * We can't make this work on architectures that require aligned access
1544   * because A and B will probably have different alignment. So, skipping
1545   * the first few chars until alignment is reached is not an option.
1546   */
1547  for (pos = sizeof(apr_size_t); pos <= max_len; pos += sizeof(apr_size_t))
1548    if (*(const apr_size_t*)(a - pos) != *(const apr_size_t*)(b - pos))
1549      break;
1550
1551  pos -= sizeof(apr_size_t);
1552
1553#endif
1554
1555  /* If we find a mismatch at -pos, pos-1 characters matched.
1556   */
1557  while (++pos <= max_len)
1558    if (a[0-pos] != b[0-pos])
1559      return pos - 1;
1560
1561  /* No mismatch found -> at least MAX_LEN matching chars.
1562   */
1563  return max_len;
1564}
1565
1566const char *
1567svn_cstring_skip_prefix(const char *str, const char *prefix)
1568{
1569  apr_size_t len = strlen(prefix);
1570
1571  if (strncmp(str, prefix, len) == 0)
1572    {
1573      return str + len;
1574    }
1575  else
1576    {
1577      return NULL;
1578    }
1579}
1580