1/* 2 * string.c: routines to manipulate counted-length strings 3 * (svn_stringbuf_t and svn_string_t) and C strings. 4 * 5 * 6 * ==================================================================== 7 * Licensed to the Apache Software Foundation (ASF) under one 8 * or more contributor license agreements. See the NOTICE file 9 * distributed with this work for additional information 10 * regarding copyright ownership. The ASF licenses this file 11 * to you under the Apache License, Version 2.0 (the 12 * "License"); you may not use this file except in compliance 13 * with the License. You may obtain a copy of the License at 14 * 15 * http://www.apache.org/licenses/LICENSE-2.0 16 * 17 * Unless required by applicable law or agreed to in writing, 18 * software distributed under the License is distributed on an 19 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 20 * KIND, either express or implied. See the License for the 21 * specific language governing permissions and limitations 22 * under the License. 23 * ==================================================================== 24 */ 25 26 27 28#include <apr.h> 29 30#include <string.h> /* for memcpy(), memcmp(), strlen() */ 31#include <apr_fnmatch.h> 32#include "svn_string.h" /* loads "svn_types.h" and <apr_pools.h> */ 33#include "svn_ctype.h" 34#include "private/svn_dep_compat.h" 35#include "private/svn_string_private.h" 36 37#include "svn_private_config.h" 38 39 40 41/* Allocate the space for a memory buffer from POOL. 42 * Return a pointer to the new buffer in *DATA and its size in *SIZE. 43 * The buffer size will be at least MINIMUM_SIZE. 44 * 45 * N.B.: The stringbuf creation functions use this, but since stringbufs 46 * always consume at least 1 byte for the NUL terminator, the 47 * resulting data pointers will never be NULL. 48 */ 49static APR_INLINE void 50membuf_create(void **data, apr_size_t *size, 51 apr_size_t minimum_size, apr_pool_t *pool) 52{ 53 /* apr_palloc will allocate multiples of 8. 54 * Thus, we would waste some of that memory if we stuck to the 55 * smaller size. Note that this is safe even if apr_palloc would 56 * use some other aligment or none at all. */ 57 minimum_size = APR_ALIGN_DEFAULT(minimum_size); 58 *data = (!minimum_size ? NULL : apr_palloc(pool, minimum_size)); 59 *size = minimum_size; 60} 61 62/* Ensure that the size of a given memory buffer is at least MINIMUM_SIZE 63 * bytes. If *SIZE is already greater than or equal to MINIMUM_SIZE, 64 * this function does nothing. 65 * 66 * If *SIZE is 0, the allocated buffer size will be MINIMUM_SIZE 67 * rounded up to the nearest APR alignment boundary. Otherwse, *SIZE 68 * will be multiplied by a power of two such that the result is 69 * greater or equal to MINIMUM_SIZE. The pointer to the new buffer 70 * will be returned in *DATA, and its size in *SIZE. 71 */ 72static APR_INLINE void 73membuf_ensure(void **data, apr_size_t *size, 74 apr_size_t minimum_size, apr_pool_t *pool) 75{ 76 if (minimum_size > *size) 77 { 78 apr_size_t new_size = *size; 79 80 if (new_size == 0) 81 /* APR will increase odd allocation sizes to the next 82 * multiple for 8, for instance. Take advantage of that 83 * knowledge and allow for the extra size to be used. */ 84 new_size = minimum_size; 85 else 86 while (new_size < minimum_size) 87 { 88 /* new_size is aligned; doubling it should keep it aligned */ 89 const apr_size_t prev_size = new_size; 90 new_size *= 2; 91 92 /* check for apr_size_t overflow */ 93 if (prev_size > new_size) 94 { 95 new_size = minimum_size; 96 break; 97 } 98 } 99 100 membuf_create(data, size, new_size, pool); 101 } 102} 103 104void 105svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool) 106{ 107 membuf_create(&membuf->data, &membuf->size, size, pool); 108 membuf->pool = pool; 109} 110 111void 112svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size) 113{ 114 membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool); 115} 116 117void 118svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size) 119{ 120 const void *const old_data = membuf->data; 121 const apr_size_t old_size = membuf->size; 122 123 membuf_ensure(&membuf->data, &membuf->size, size, membuf->pool); 124 if (membuf->data && old_data && old_data != membuf->data) 125 memcpy(membuf->data, old_data, old_size); 126} 127 128/* Always provide an out-of-line implementation of svn_membuf__zero */ 129#undef svn_membuf__zero 130void 131svn_membuf__zero(svn_membuf_t *membuf) 132{ 133 SVN_MEMBUF__ZERO(membuf); 134} 135 136/* Always provide an out-of-line implementation of svn_membuf__nzero */ 137#undef svn_membuf__nzero 138void 139svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size) 140{ 141 SVN_MEMBUF__NZERO(membuf, size); 142} 143 144static APR_INLINE svn_boolean_t 145string_compare(const char *str1, 146 const char *str2, 147 apr_size_t len1, 148 apr_size_t len2) 149{ 150 /* easy way out :) */ 151 if (len1 != len2) 152 return FALSE; 153 154 /* now the strings must have identical lenghths */ 155 156 if ((memcmp(str1, str2, len1)) == 0) 157 return TRUE; 158 else 159 return FALSE; 160} 161 162static APR_INLINE apr_size_t 163string_first_non_whitespace(const char *str, apr_size_t len) 164{ 165 apr_size_t i; 166 167 for (i = 0; i < len; i++) 168 { 169 if (! svn_ctype_isspace(str[i])) 170 return i; 171 } 172 173 /* if we get here, then the string must be entirely whitespace */ 174 return len; 175} 176 177static APR_INLINE apr_size_t 178find_char_backward(const char *str, apr_size_t len, char ch) 179{ 180 apr_size_t i = len; 181 182 while (i != 0) 183 { 184 if (str[--i] == ch) 185 return i; 186 } 187 188 /* char was not found, return len */ 189 return len; 190} 191 192 193/* svn_string functions */ 194 195/* Return a new svn_string_t object, allocated in POOL, initialized with 196 * DATA and SIZE. Do not copy the contents of DATA, just store the pointer. 197 * SIZE is the length in bytes of DATA, excluding the required NUL 198 * terminator. */ 199static svn_string_t * 200create_string(const char *data, apr_size_t size, 201 apr_pool_t *pool) 202{ 203 svn_string_t *new_string; 204 205 new_string = apr_palloc(pool, sizeof(*new_string)); 206 207 new_string->data = data; 208 new_string->len = size; 209 210 return new_string; 211} 212 213/* A data buffer for a zero-length string (just a null terminator). Many 214 * svn_string_t instances may share this same buffer. */ 215static const char empty_buffer[1] = {0}; 216 217svn_string_t * 218svn_string_create_empty(apr_pool_t *pool) 219{ 220 svn_string_t *new_string = apr_palloc(pool, sizeof(*new_string)); 221 new_string->data = empty_buffer; 222 new_string->len = 0; 223 224 return new_string; 225} 226 227 228svn_string_t * 229svn_string_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool) 230{ 231 void *mem; 232 char *data; 233 svn_string_t *new_string; 234 235 /* Allocate memory for svn_string_t and data in one chunk. */ 236 mem = apr_palloc(pool, sizeof(*new_string) + size + 1); 237 data = (char*)mem + sizeof(*new_string); 238 239 new_string = mem; 240 new_string->data = data; 241 new_string->len = size; 242 243 memcpy(data, bytes, size); 244 245 /* Null termination is the convention -- even if we suspect the data 246 to be binary, it's not up to us to decide, it's the caller's 247 call. Heck, that's why they call it the caller! */ 248 data[size] = '\0'; 249 250 return new_string; 251} 252 253 254svn_string_t * 255svn_string_create(const char *cstring, apr_pool_t *pool) 256{ 257 return svn_string_ncreate(cstring, strlen(cstring), pool); 258} 259 260 261svn_string_t * 262svn_string_create_from_buf(const svn_stringbuf_t *strbuf, apr_pool_t *pool) 263{ 264 return svn_string_ncreate(strbuf->data, strbuf->len, pool); 265} 266 267 268svn_string_t * 269svn_string_createv(apr_pool_t *pool, const char *fmt, va_list ap) 270{ 271 char *data = apr_pvsprintf(pool, fmt, ap); 272 273 /* wrap an svn_string_t around the new data */ 274 return create_string(data, strlen(data), pool); 275} 276 277 278svn_string_t * 279svn_string_createf(apr_pool_t *pool, const char *fmt, ...) 280{ 281 svn_string_t *str; 282 283 va_list ap; 284 va_start(ap, fmt); 285 str = svn_string_createv(pool, fmt, ap); 286 va_end(ap); 287 288 return str; 289} 290 291 292svn_boolean_t 293svn_string_isempty(const svn_string_t *str) 294{ 295 return (str->len == 0); 296} 297 298 299svn_string_t * 300svn_string_dup(const svn_string_t *original_string, apr_pool_t *pool) 301{ 302 return (svn_string_ncreate(original_string->data, 303 original_string->len, pool)); 304} 305 306 307 308svn_boolean_t 309svn_string_compare(const svn_string_t *str1, const svn_string_t *str2) 310{ 311 return 312 string_compare(str1->data, str2->data, str1->len, str2->len); 313} 314 315 316 317apr_size_t 318svn_string_first_non_whitespace(const svn_string_t *str) 319{ 320 return 321 string_first_non_whitespace(str->data, str->len); 322} 323 324 325apr_size_t 326svn_string_find_char_backward(const svn_string_t *str, char ch) 327{ 328 return find_char_backward(str->data, str->len, ch); 329} 330 331svn_string_t * 332svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf) 333{ 334 /* In debug mode, detect attempts to modify the original STRBUF object. 335 */ 336#ifdef SVN_DEBUG 337 strbuf->pool = NULL; 338 strbuf->blocksize = strbuf->len + 1; 339#endif 340 341 /* Both, svn_string_t and svn_stringbuf_t are public API structures 342 * since the svn epoch. Thus, we can rely on their precise layout not 343 * to change. 344 * 345 * It just so happens that svn_string_t is structurally equivalent 346 * to the (data, len) sub-set of svn_stringbuf_t. There is also no 347 * difference in alignment and padding. So, we can just re-interpret 348 * that part of STRBUF as a svn_string_t. 349 * 350 * However, since svn_string_t does not know about the blocksize 351 * member in svn_stringbuf_t, any attempt to re-size the returned 352 * svn_string_t might invalidate the STRBUF struct. Hence, we consider 353 * the source STRBUF "consumed". 354 * 355 * Modifying the string character content is fine, though. 356 */ 357 return (svn_string_t *)&strbuf->data; 358} 359 360 361 362/* svn_stringbuf functions */ 363 364svn_stringbuf_t * 365svn_stringbuf_create_empty(apr_pool_t *pool) 366{ 367 return svn_stringbuf_create_ensure(0, pool); 368} 369 370svn_stringbuf_t * 371svn_stringbuf_create_ensure(apr_size_t blocksize, apr_pool_t *pool) 372{ 373 void *mem; 374 svn_stringbuf_t *new_string; 375 376 ++blocksize; /* + space for '\0' */ 377 378 /* Allocate memory for svn_string_t and data in one chunk. */ 379 membuf_create(&mem, &blocksize, blocksize + sizeof(*new_string), pool); 380 381 /* Initialize header and string */ 382 new_string = mem; 383 new_string->data = (char*)mem + sizeof(*new_string); 384 new_string->data[0] = '\0'; 385 new_string->len = 0; 386 new_string->blocksize = blocksize - sizeof(*new_string); 387 new_string->pool = pool; 388 389 return new_string; 390} 391 392svn_stringbuf_t * 393svn_stringbuf_ncreate(const char *bytes, apr_size_t size, apr_pool_t *pool) 394{ 395 svn_stringbuf_t *strbuf = svn_stringbuf_create_ensure(size, pool); 396 memcpy(strbuf->data, bytes, size); 397 398 /* Null termination is the convention -- even if we suspect the data 399 to be binary, it's not up to us to decide, it's the caller's 400 call. Heck, that's why they call it the caller! */ 401 strbuf->data[size] = '\0'; 402 strbuf->len = size; 403 404 return strbuf; 405} 406 407 408svn_stringbuf_t * 409svn_stringbuf_create(const char *cstring, apr_pool_t *pool) 410{ 411 return svn_stringbuf_ncreate(cstring, strlen(cstring), pool); 412} 413 414 415svn_stringbuf_t * 416svn_stringbuf_create_from_string(const svn_string_t *str, apr_pool_t *pool) 417{ 418 return svn_stringbuf_ncreate(str->data, str->len, pool); 419} 420 421 422svn_stringbuf_t * 423svn_stringbuf_createv(apr_pool_t *pool, const char *fmt, va_list ap) 424{ 425 char *data = apr_pvsprintf(pool, fmt, ap); 426 apr_size_t size = strlen(data); 427 svn_stringbuf_t *new_string; 428 429 new_string = apr_palloc(pool, sizeof(*new_string)); 430 new_string->data = data; 431 new_string->len = size; 432 new_string->blocksize = size + 1; 433 new_string->pool = pool; 434 435 return new_string; 436} 437 438 439svn_stringbuf_t * 440svn_stringbuf_createf(apr_pool_t *pool, const char *fmt, ...) 441{ 442 svn_stringbuf_t *str; 443 444 va_list ap; 445 va_start(ap, fmt); 446 str = svn_stringbuf_createv(pool, fmt, ap); 447 va_end(ap); 448 449 return str; 450} 451 452 453void 454svn_stringbuf_fillchar(svn_stringbuf_t *str, unsigned char c) 455{ 456 memset(str->data, c, str->len); 457} 458 459 460void 461svn_stringbuf_set(svn_stringbuf_t *str, const char *value) 462{ 463 apr_size_t amt = strlen(value); 464 465 svn_stringbuf_ensure(str, amt); 466 memcpy(str->data, value, amt + 1); 467 str->len = amt; 468} 469 470void 471svn_stringbuf_setempty(svn_stringbuf_t *str) 472{ 473 if (str->len > 0) 474 str->data[0] = '\0'; 475 476 str->len = 0; 477} 478 479 480void 481svn_stringbuf_chop(svn_stringbuf_t *str, apr_size_t nbytes) 482{ 483 if (nbytes > str->len) 484 str->len = 0; 485 else 486 str->len -= nbytes; 487 488 str->data[str->len] = '\0'; 489} 490 491 492svn_boolean_t 493svn_stringbuf_isempty(const svn_stringbuf_t *str) 494{ 495 return (str->len == 0); 496} 497 498 499void 500svn_stringbuf_ensure(svn_stringbuf_t *str, apr_size_t minimum_size) 501{ 502 void *mem = NULL; 503 ++minimum_size; /* + space for '\0' */ 504 505 membuf_ensure(&mem, &str->blocksize, minimum_size, str->pool); 506 if (mem && mem != str->data) 507 { 508 if (str->data) 509 memcpy(mem, str->data, str->len + 1); 510 str->data = mem; 511 } 512} 513 514 515/* WARNING - Optimized code ahead! 516 * This function has been hand-tuned for performance. Please read 517 * the comments below before modifying the code. 518 */ 519void 520svn_stringbuf_appendbyte(svn_stringbuf_t *str, char byte) 521{ 522 char *dest; 523 apr_size_t old_len = str->len; 524 525 /* In most cases, there will be pre-allocated memory left 526 * to just write the new byte at the end of the used section 527 * and terminate the string properly. 528 */ 529 if (str->blocksize > old_len + 1) 530 { 531 /* The following read does not depend this write, so we 532 * can issue the write first to minimize register pressure: 533 * The value of old_len+1 is no longer needed; on most processors, 534 * dest[old_len+1] will be calculated implicitly as part of 535 * the addressing scheme. 536 */ 537 str->len = old_len+1; 538 539 /* Since the compiler cannot be sure that *src->data and *src 540 * don't overlap, we read src->data *once* before writing 541 * to *src->data. Replacing dest with str->data would force 542 * the compiler to read it again after the first byte. 543 */ 544 dest = str->data; 545 546 /* If not already available in a register as per ABI, load 547 * "byte" into the register (e.g. the one freed from old_len+1), 548 * then write it to the string buffer and terminate it properly. 549 * 550 * Including the "byte" fetch, all operations so far could be 551 * issued at once and be scheduled at the CPU's descression. 552 * Most likely, no-one will soon depend on the data that will be 553 * written in this function. So, no stalls there, either. 554 */ 555 dest[old_len] = byte; 556 dest[old_len+1] = '\0'; 557 } 558 else 559 { 560 /* we need to re-allocate the string buffer 561 * -> let the more generic implementation take care of that part 562 */ 563 564 /* Depending on the ABI, "byte" is a register value. If we were 565 * to take its address directly, the compiler might decide to 566 * put in on the stack *unconditionally*, even if that would 567 * only be necessary for this block. 568 */ 569 char b = byte; 570 svn_stringbuf_appendbytes(str, &b, 1); 571 } 572} 573 574 575void 576svn_stringbuf_appendbytes(svn_stringbuf_t *str, const char *bytes, 577 apr_size_t count) 578{ 579 apr_size_t total_len; 580 void *start_address; 581 582 total_len = str->len + count; /* total size needed */ 583 584 /* svn_stringbuf_ensure adds 1 for null terminator. */ 585 svn_stringbuf_ensure(str, total_len); 586 587 /* get address 1 byte beyond end of original bytestring */ 588 start_address = (str->data + str->len); 589 590 memcpy(start_address, bytes, count); 591 str->len = total_len; 592 593 str->data[str->len] = '\0'; /* We don't know if this is binary 594 data or not, but convention is 595 to null-terminate. */ 596} 597 598 599void 600svn_stringbuf_appendstr(svn_stringbuf_t *targetstr, 601 const svn_stringbuf_t *appendstr) 602{ 603 svn_stringbuf_appendbytes(targetstr, appendstr->data, appendstr->len); 604} 605 606 607void 608svn_stringbuf_appendcstr(svn_stringbuf_t *targetstr, const char *cstr) 609{ 610 svn_stringbuf_appendbytes(targetstr, cstr, strlen(cstr)); 611} 612 613void 614svn_stringbuf_insert(svn_stringbuf_t *str, 615 apr_size_t pos, 616 const char *bytes, 617 apr_size_t count) 618{ 619 if (bytes + count > str->data && bytes < str->data + str->blocksize) 620 { 621 /* special case: BYTES overlaps with this string -> copy the source */ 622 const char *temp = apr_pstrndup(str->pool, bytes, count); 623 svn_stringbuf_insert(str, pos, temp, count); 624 } 625 else 626 { 627 if (pos > str->len) 628 pos = str->len; 629 630 svn_stringbuf_ensure(str, str->len + count); 631 memmove(str->data + pos + count, str->data + pos, str->len - pos + 1); 632 memcpy(str->data + pos, bytes, count); 633 634 str->len += count; 635 } 636} 637 638void 639svn_stringbuf_remove(svn_stringbuf_t *str, 640 apr_size_t pos, 641 apr_size_t count) 642{ 643 if (pos > str->len) 644 pos = str->len; 645 if (pos + count > str->len) 646 count = str->len - pos; 647 648 memmove(str->data + pos, str->data + pos + count, str->len - pos - count + 1); 649 str->len -= count; 650} 651 652void 653svn_stringbuf_replace(svn_stringbuf_t *str, 654 apr_size_t pos, 655 apr_size_t old_count, 656 const char *bytes, 657 apr_size_t new_count) 658{ 659 if (bytes + new_count > str->data && bytes < str->data + str->blocksize) 660 { 661 /* special case: BYTES overlaps with this string -> copy the source */ 662 const char *temp = apr_pstrndup(str->pool, bytes, new_count); 663 svn_stringbuf_replace(str, pos, old_count, temp, new_count); 664 } 665 else 666 { 667 if (pos > str->len) 668 pos = str->len; 669 if (pos + old_count > str->len) 670 old_count = str->len - pos; 671 672 if (old_count < new_count) 673 { 674 apr_size_t delta = new_count - old_count; 675 svn_stringbuf_ensure(str, str->len + delta); 676 } 677 678 if (old_count != new_count) 679 memmove(str->data + pos + new_count, str->data + pos + old_count, 680 str->len - pos - old_count + 1); 681 682 memcpy(str->data + pos, bytes, new_count); 683 str->len += new_count - old_count; 684 } 685} 686 687 688svn_stringbuf_t * 689svn_stringbuf_dup(const svn_stringbuf_t *original_string, apr_pool_t *pool) 690{ 691 return (svn_stringbuf_ncreate(original_string->data, 692 original_string->len, pool)); 693} 694 695 696 697svn_boolean_t 698svn_stringbuf_compare(const svn_stringbuf_t *str1, 699 const svn_stringbuf_t *str2) 700{ 701 return string_compare(str1->data, str2->data, str1->len, str2->len); 702} 703 704 705 706apr_size_t 707svn_stringbuf_first_non_whitespace(const svn_stringbuf_t *str) 708{ 709 return string_first_non_whitespace(str->data, str->len); 710} 711 712 713void 714svn_stringbuf_strip_whitespace(svn_stringbuf_t *str) 715{ 716 /* Find first non-whitespace character */ 717 apr_size_t offset = svn_stringbuf_first_non_whitespace(str); 718 719 /* Go ahead! Waste some RAM, we've got pools! :) */ 720 str->data += offset; 721 str->len -= offset; 722 str->blocksize -= offset; 723 724 /* Now that we've trimmed the front, trim the end, wasting more RAM. */ 725 while ((str->len > 0) && svn_ctype_isspace(str->data[str->len - 1])) 726 str->len--; 727 str->data[str->len] = '\0'; 728} 729 730 731apr_size_t 732svn_stringbuf_find_char_backward(const svn_stringbuf_t *str, char ch) 733{ 734 return find_char_backward(str->data, str->len, ch); 735} 736 737 738svn_boolean_t 739svn_string_compare_stringbuf(const svn_string_t *str1, 740 const svn_stringbuf_t *str2) 741{ 742 return string_compare(str1->data, str2->data, str1->len, str2->len); 743} 744 745 746 747/*** C string stuff. ***/ 748 749void 750svn_cstring_split_append(apr_array_header_t *array, 751 const char *input, 752 const char *sep_chars, 753 svn_boolean_t chop_whitespace, 754 apr_pool_t *pool) 755{ 756 char *pats; 757 char *p; 758 759 pats = apr_pstrdup(pool, input); /* strtok wants non-const data */ 760 p = svn_cstring_tokenize(sep_chars, &pats); 761 762 while (p) 763 { 764 if (chop_whitespace) 765 { 766 while (svn_ctype_isspace(*p)) 767 p++; 768 769 { 770 char *e = p + (strlen(p) - 1); 771 while ((e >= p) && (svn_ctype_isspace(*e))) 772 e--; 773 *(++e) = '\0'; 774 } 775 } 776 777 if (p[0] != '\0') 778 APR_ARRAY_PUSH(array, const char *) = p; 779 780 p = svn_cstring_tokenize(sep_chars, &pats); 781 } 782 783 return; 784} 785 786 787apr_array_header_t * 788svn_cstring_split(const char *input, 789 const char *sep_chars, 790 svn_boolean_t chop_whitespace, 791 apr_pool_t *pool) 792{ 793 apr_array_header_t *a = apr_array_make(pool, 5, sizeof(input)); 794 svn_cstring_split_append(a, input, sep_chars, chop_whitespace, pool); 795 return a; 796} 797 798 799svn_boolean_t svn_cstring_match_glob_list(const char *str, 800 const apr_array_header_t *list) 801{ 802 int i; 803 804 for (i = 0; i < list->nelts; i++) 805 { 806 const char *this_pattern = APR_ARRAY_IDX(list, i, char *); 807 808 if (apr_fnmatch(this_pattern, str, 0) == APR_SUCCESS) 809 return TRUE; 810 } 811 812 return FALSE; 813} 814 815svn_boolean_t 816svn_cstring_match_list(const char *str, const apr_array_header_t *list) 817{ 818 int i; 819 820 for (i = 0; i < list->nelts; i++) 821 { 822 const char *this_str = APR_ARRAY_IDX(list, i, char *); 823 824 if (strcmp(this_str, str) == 0) 825 return TRUE; 826 } 827 828 return FALSE; 829} 830 831char * 832svn_cstring_tokenize(const char *sep, char **str) 833{ 834 char *token; 835 const char * next; 836 char csep; 837 838 /* check parameters */ 839 if ((sep == NULL) || (str == NULL) || (*str == NULL)) 840 return NULL; 841 842 /* let APR handle edge cases and multiple separators */ 843 csep = *sep; 844 if (csep == '\0' || sep[1] != '\0') 845 return apr_strtok(NULL, sep, str); 846 847 /* skip characters in sep (will terminate at '\0') */ 848 token = *str; 849 while (*token == csep) 850 ++token; 851 852 if (!*token) /* no more tokens */ 853 return NULL; 854 855 /* skip valid token characters to terminate token and 856 * prepare for the next call (will terminate at '\0) 857 */ 858 next = strchr(token, csep); 859 if (next == NULL) 860 { 861 *str = token + strlen(token); 862 } 863 else 864 { 865 *(char *)next = '\0'; 866 *str = (char *)next + 1; 867 } 868 869 return token; 870} 871 872int svn_cstring_count_newlines(const char *msg) 873{ 874 int count = 0; 875 const char *p; 876 877 for (p = msg; *p; p++) 878 { 879 if (*p == '\n') 880 { 881 count++; 882 if (*(p + 1) == '\r') 883 p++; 884 } 885 else if (*p == '\r') 886 { 887 count++; 888 if (*(p + 1) == '\n') 889 p++; 890 } 891 } 892 893 return count; 894} 895 896char * 897svn_cstring_join(const apr_array_header_t *strings, 898 const char *separator, 899 apr_pool_t *pool) 900{ 901 svn_stringbuf_t *new_str = svn_stringbuf_create_empty(pool); 902 size_t sep_len = strlen(separator); 903 int i; 904 905 for (i = 0; i < strings->nelts; i++) 906 { 907 const char *string = APR_ARRAY_IDX(strings, i, const char *); 908 svn_stringbuf_appendbytes(new_str, string, strlen(string)); 909 svn_stringbuf_appendbytes(new_str, separator, sep_len); 910 } 911 return new_str->data; 912} 913 914int 915svn_cstring_casecmp(const char *str1, const char *str2) 916{ 917 for (;;) 918 { 919 const int a = *str1++; 920 const int b = *str2++; 921 const int cmp = svn_ctype_casecmp(a, b); 922 if (cmp || !a || !b) 923 return cmp; 924 } 925} 926 927svn_error_t * 928svn_cstring_strtoui64(apr_uint64_t *n, const char *str, 929 apr_uint64_t minval, apr_uint64_t maxval, 930 int base) 931{ 932 apr_int64_t val; 933 char *endptr; 934 935 /* We assume errno is thread-safe. */ 936 errno = 0; /* APR-0.9 doesn't always set errno */ 937 938 /* ### We're throwing away half the number range here. 939 * ### APR needs a apr_strtoui64() function. */ 940 val = apr_strtoi64(str, &endptr, base); 941 if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0') 942 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL, 943 _("Could not convert '%s' into a number"), 944 str); 945 if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) || 946 val < 0 || (apr_uint64_t)val < minval || (apr_uint64_t)val > maxval) 947 /* ### Mark this for translation when gettext doesn't choke on macros. */ 948 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL, 949 "Number '%s' is out of range " 950 "'[%" APR_UINT64_T_FMT ", %" APR_UINT64_T_FMT "]'", 951 str, minval, maxval); 952 *n = val; 953 return SVN_NO_ERROR; 954} 955 956svn_error_t * 957svn_cstring_atoui64(apr_uint64_t *n, const char *str) 958{ 959 return svn_error_trace(svn_cstring_strtoui64(n, str, 0, 960 APR_UINT64_MAX, 10)); 961} 962 963svn_error_t * 964svn_cstring_atoui(unsigned int *n, const char *str) 965{ 966 apr_uint64_t val; 967 968 SVN_ERR(svn_cstring_strtoui64(&val, str, 0, APR_UINT32_MAX, 10)); 969 *n = (unsigned int)val; 970 return SVN_NO_ERROR; 971} 972 973svn_error_t * 974svn_cstring_strtoi64(apr_int64_t *n, const char *str, 975 apr_int64_t minval, apr_int64_t maxval, 976 int base) 977{ 978 apr_int64_t val; 979 char *endptr; 980 981 /* We assume errno is thread-safe. */ 982 errno = 0; /* APR-0.9 doesn't always set errno */ 983 984 val = apr_strtoi64(str, &endptr, base); 985 if (errno == EINVAL || endptr == str || str[0] == '\0' || *endptr != '\0') 986 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL, 987 _("Could not convert '%s' into a number"), 988 str); 989 if ((errno == ERANGE && (val == APR_INT64_MIN || val == APR_INT64_MAX)) || 990 val < minval || val > maxval) 991 /* ### Mark this for translation when gettext doesn't choke on macros. */ 992 return svn_error_createf(SVN_ERR_INCORRECT_PARAMS, NULL, 993 "Number '%s' is out of range " 994 "'[%" APR_INT64_T_FMT ", %" APR_INT64_T_FMT "]'", 995 str, minval, maxval); 996 *n = val; 997 return SVN_NO_ERROR; 998} 999 1000svn_error_t * 1001svn_cstring_atoi64(apr_int64_t *n, const char *str) 1002{ 1003 return svn_error_trace(svn_cstring_strtoi64(n, str, APR_INT64_MIN, 1004 APR_INT64_MAX, 10)); 1005} 1006 1007svn_error_t * 1008svn_cstring_atoi(int *n, const char *str) 1009{ 1010 apr_int64_t val; 1011 1012 SVN_ERR(svn_cstring_strtoi64(&val, str, APR_INT32_MIN, APR_INT32_MAX, 10)); 1013 *n = (int)val; 1014 return SVN_NO_ERROR; 1015} 1016 1017 1018apr_status_t 1019svn__strtoff(apr_off_t *offset, const char *buf, char **end, int base) 1020{ 1021#if !APR_VERSION_AT_LEAST(1,0,0) 1022 errno = 0; 1023 *offset = strtol(buf, end, base); 1024 return APR_FROM_OS_ERROR(errno); 1025#else 1026 return apr_strtoff(offset, buf, end, base); 1027#endif 1028} 1029 1030/* "Precalculated" itoa values for 2 places (including leading zeros). 1031 * For maximum performance, make sure all table entries are word-aligned. 1032 */ 1033static const char decimal_table[100][4] 1034 = { "00", "01", "02", "03", "04", "05", "06", "07", "08", "09" 1035 , "10", "11", "12", "13", "14", "15", "16", "17", "18", "19" 1036 , "20", "21", "22", "23", "24", "25", "26", "27", "28", "29" 1037 , "30", "31", "32", "33", "34", "35", "36", "37", "38", "39" 1038 , "40", "41", "42", "43", "44", "45", "46", "47", "48", "49" 1039 , "50", "51", "52", "53", "54", "55", "56", "57", "58", "59" 1040 , "60", "61", "62", "63", "64", "65", "66", "67", "68", "69" 1041 , "70", "71", "72", "73", "74", "75", "76", "77", "78", "79" 1042 , "80", "81", "82", "83", "84", "85", "86", "87", "88", "89" 1043 , "90", "91", "92", "93", "94", "95", "96", "97", "98", "99"}; 1044 1045/* Copy the two bytes at SOURCE[0] and SOURCE[1] to DEST[0] and DEST[1] */ 1046#define COPY_TWO_BYTES(dest,source)\ 1047 memcpy((dest), (source), 2) 1048 1049apr_size_t 1050svn__ui64toa(char * dest, apr_uint64_t number) 1051{ 1052 char buffer[SVN_INT64_BUFFER_SIZE]; 1053 apr_uint32_t reduced; /* used for 32 bit DIV */ 1054 char* target; 1055 1056 /* Small numbers are by far the most common case. 1057 * Therefore, we use special code. 1058 */ 1059 if (number < 100) 1060 { 1061 if (number < 10) 1062 { 1063 dest[0] = (char)('0' + number); 1064 dest[1] = 0; 1065 return 1; 1066 } 1067 else 1068 { 1069 COPY_TWO_BYTES(dest, decimal_table[(apr_size_t)number]); 1070 dest[2] = 0; 1071 return 2; 1072 } 1073 } 1074 1075 /* Standard code. Write string in pairs of chars back-to-front */ 1076 buffer[SVN_INT64_BUFFER_SIZE - 1] = 0; 1077 target = &buffer[SVN_INT64_BUFFER_SIZE - 3]; 1078 1079 /* Loop may be executed 0 .. 2 times. */ 1080 while (number >= 100000000) 1081 { 1082 /* Number is larger than 100^4, i.e. we can write 4x2 chars. 1083 * Also, use 32 bit DIVs as these are about twice as fast. 1084 */ 1085 reduced = (apr_uint32_t)(number % 100000000); 1086 number /= 100000000; 1087 1088 COPY_TWO_BYTES(target - 0, decimal_table[reduced % 100]); 1089 reduced /= 100; 1090 COPY_TWO_BYTES(target - 2, decimal_table[reduced % 100]); 1091 reduced /= 100; 1092 COPY_TWO_BYTES(target - 4, decimal_table[reduced % 100]); 1093 reduced /= 100; 1094 COPY_TWO_BYTES(target - 6, decimal_table[reduced % 100]); 1095 target -= 8; 1096 } 1097 1098 /* Now, the number fits into 32 bits, but may still be larger than 99 */ 1099 reduced = (apr_uint32_t)(number); 1100 while (reduced >= 100) 1101 { 1102 COPY_TWO_BYTES(target, decimal_table[reduced % 100]); 1103 reduced /= 100; 1104 target -= 2; 1105 } 1106 1107 /* The number is now smaller than 100 but larger than 1 */ 1108 COPY_TWO_BYTES(target, decimal_table[reduced]); 1109 1110 /* Correction for uneven count of places. */ 1111 if (reduced < 10) 1112 ++target; 1113 1114 /* Copy to target */ 1115 memcpy(dest, target, &buffer[SVN_INT64_BUFFER_SIZE] - target); 1116 return &buffer[SVN_INT64_BUFFER_SIZE] - target - 1; 1117} 1118 1119apr_size_t 1120svn__i64toa(char * dest, apr_int64_t number) 1121{ 1122 if (number >= 0) 1123 return svn__ui64toa(dest, (apr_uint64_t)number); 1124 1125 *dest = '-'; 1126 return svn__ui64toa(dest + 1, (apr_uint64_t)(0-number)) + 1; 1127} 1128 1129static void 1130ui64toa_sep(apr_uint64_t number, char seperator, char *buffer) 1131{ 1132 apr_size_t length = svn__ui64toa(buffer, number); 1133 apr_size_t i; 1134 1135 for (i = length; i > 3; i -= 3) 1136 { 1137 memmove(&buffer[i - 2], &buffer[i - 3], length - i + 3); 1138 buffer[i-3] = seperator; 1139 length++; 1140 } 1141 1142 buffer[length] = 0; 1143} 1144 1145char * 1146svn__ui64toa_sep(apr_uint64_t number, char seperator, apr_pool_t *pool) 1147{ 1148 char buffer[2 * SVN_INT64_BUFFER_SIZE]; 1149 ui64toa_sep(number, seperator, buffer); 1150 1151 return apr_pstrdup(pool, buffer); 1152} 1153 1154char * 1155svn__i64toa_sep(apr_int64_t number, char seperator, apr_pool_t *pool) 1156{ 1157 char buffer[2 * SVN_INT64_BUFFER_SIZE]; 1158 if (number < 0) 1159 { 1160 buffer[0] = '-'; 1161 ui64toa_sep((apr_uint64_t)(-number), seperator, &buffer[1]); 1162 } 1163 else 1164 ui64toa_sep((apr_uint64_t)(number), seperator, buffer); 1165 1166 return apr_pstrdup(pool, buffer); 1167} 1168 1169unsigned int 1170svn_cstring__similarity(const char *stra, const char *strb, 1171 svn_membuf_t *buffer, apr_size_t *rlcs) 1172{ 1173 svn_string_t stringa, stringb; 1174 stringa.data = stra; 1175 stringa.len = strlen(stra); 1176 stringb.data = strb; 1177 stringb.len = strlen(strb); 1178 return svn_string__similarity(&stringa, &stringb, buffer, rlcs); 1179} 1180 1181unsigned int 1182svn_string__similarity(const svn_string_t *stringa, 1183 const svn_string_t *stringb, 1184 svn_membuf_t *buffer, apr_size_t *rlcs) 1185{ 1186 const char *stra = stringa->data; 1187 const char *strb = stringb->data; 1188 const apr_size_t lena = stringa->len; 1189 const apr_size_t lenb = stringb->len; 1190 const apr_size_t total = lena + lenb; 1191 const char *enda = stra + lena; 1192 const char *endb = strb + lenb; 1193 apr_size_t lcs = 0; 1194 1195 /* Skip the common prefix ... */ 1196 while (stra < enda && strb < endb && *stra == *strb) 1197 { 1198 ++stra; ++strb; 1199 ++lcs; 1200 } 1201 1202 /* ... and the common suffix */ 1203 while (stra < enda && strb < endb) 1204 { 1205 --enda; --endb; 1206 if (*enda != *endb) 1207 { 1208 ++enda; ++endb; 1209 break; 1210 } 1211 1212 ++lcs; 1213 } 1214 1215 if (stra < enda && strb < endb) 1216 { 1217 const apr_size_t resta = enda - stra; 1218 const apr_size_t restb = endb - strb; 1219 const apr_size_t slots = (resta > restb ? restb : resta); 1220 apr_size_t *curr, *prev; 1221 const char *pstr; 1222 1223 /* The outer loop must iterate on the longer string. */ 1224 if (resta < restb) 1225 { 1226 pstr = stra; 1227 stra = strb; 1228 strb = pstr; 1229 1230 pstr = enda; 1231 enda = endb; 1232 endb = pstr; 1233 } 1234 1235 /* Allocate two columns in the LCS matrix 1236 ### Optimize this to (slots + 2) instesd of 2 * (slots + 1) */ 1237 svn_membuf__ensure(buffer, 2 * (slots + 1) * sizeof(apr_size_t)); 1238 svn_membuf__nzero(buffer, (slots + 2) * sizeof(apr_size_t)); 1239 prev = buffer->data; 1240 curr = prev + slots + 1; 1241 1242 /* Calculate LCS length of the remainder */ 1243 for (pstr = stra; pstr < enda; ++pstr) 1244 { 1245 int i; 1246 for (i = 1; i <= slots; ++i) 1247 { 1248 if (*pstr == strb[i-1]) 1249 curr[i] = prev[i-1] + 1; 1250 else 1251 curr[i] = (curr[i-1] > prev[i] ? curr[i-1] : prev[i]); 1252 } 1253 1254 /* Swap the buffers, making the previous one current */ 1255 { 1256 apr_size_t *const temp = prev; 1257 prev = curr; 1258 curr = temp; 1259 } 1260 } 1261 1262 lcs += prev[slots]; 1263 } 1264 1265 if (rlcs) 1266 *rlcs = lcs; 1267 1268 /* Return similarity ratio rounded to 4 significant digits */ 1269 if (total) 1270 return(unsigned int)((2000 * lcs + total/2) / total); 1271 else 1272 return 1000; 1273} 1274