svn_string_private.h revision 362181
1/** 2 * @copyright 3 * ==================================================================== 4 * Licensed to the Apache Software Foundation (ASF) under one 5 * or more contributor license agreements. See the NOTICE file 6 * distributed with this work for additional information 7 * regarding copyright ownership. The ASF licenses this file 8 * to you under the Apache License, Version 2.0 (the 9 * "License"); you may not use this file except in compliance 10 * with the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, 15 * software distributed under the License is distributed on an 16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17 * KIND, either express or implied. See the License for the 18 * specific language governing permissions and limitations 19 * under the License. 20 * ==================================================================== 21 * @endcopyright 22 * 23 * @file svn_string_private.h 24 * @brief Non-public string utility functions. 25 */ 26 27 28#ifndef SVN_STRING_PRIVATE_H 29#define SVN_STRING_PRIVATE_H 30 31#include "svn_string.h" /* for svn_boolean_t, svn_error_t */ 32 33#ifdef __cplusplus 34extern "C" { 35#endif /* __cplusplus */ 36 37/** 38 * @defgroup svn_string String handling 39 * @{ 40 */ 41 42 43/** Private functions. 44 * 45 * @defgroup svn_string_private Private functions 46 * @{ 47 */ 48 49 50/** A self-contained memory buffer of known size. 51 * 52 * Intended to be used where a single variable-sized buffer is needed 53 * within an iteration, a scratch pool is available and we want to 54 * avoid the cost of creating another pool just for the iteration. 55 */ 56typedef struct svn_membuf_t 57{ 58 /** The a pool from which this buffer was originally allocated, and is not 59 * necessarily specific to this buffer. This is used only for allocating 60 * more memory from when the buffer needs to grow. 61 */ 62 apr_pool_t *pool; 63 64 /** pointer to the memory */ 65 void *data; 66 67 /** total size of buffer allocated */ 68 apr_size_t size; 69} svn_membuf_t; 70 71 72/* Initialize a memory buffer of the given size */ 73void 74svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool); 75 76/* Ensure that the given memory buffer has at least the given size */ 77void 78svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size); 79 80/* Resize the given memory buffer, preserving its contents. */ 81void 82svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size); 83 84/* Zero-fill the given memory */ 85void 86svn_membuf__zero(svn_membuf_t *membuf); 87 88/* Zero-fill the given memory buffer up to the smaller of SIZE and the 89 current buffer size. */ 90void 91svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size); 92 93/* Inline implementation of svn_membuf__zero. 94 * Note that PMEMBUF is evaluated only once. 95 */ 96#define SVN_MEMBUF__ZERO(pmembuf) \ 97 do \ 98 { \ 99 svn_membuf_t *const _m_b_f_ = (pmembuf); \ 100 memset(_m_b_f_->data, 0, _m_b_f_->size); \ 101 } \ 102 while(0) 103 104/* Inline implementation of svn_membuf__nzero 105 * Note that PMEMBUF and PSIZE are evaluated only once. 106 */ 107#define SVN_MEMBUF__NZERO(pmembuf, psize) \ 108 do \ 109 { \ 110 svn_membuf_t *const _m_b_f_ = (pmembuf); \ 111 const apr_size_t _s_z_ = (psize); \ 112 if (_s_z_ > _m_b_f_->size) \ 113 memset(_m_b_f_->data, 0, _m_b_f_->size); \ 114 else \ 115 memset(_m_b_f_->data, 0, _s_z_); \ 116 } \ 117 while(0) 118 119#ifndef SVN_DEBUG 120/* In non-debug mode, just use these inlie replacements */ 121#define svn_membuf__zero(B) SVN_MEMBUF__ZERO((B)) 122#define svn_membuf__nzero(B, S) SVN_MEMBUF__NZERO((B), (S)) 123#endif 124 125 126/** Returns the #svn_string_t information contained in the data and 127 * len members of @a strbuf. This is effectively a typecast, converting 128 * @a strbuf into an #svn_string_t. This first will become invalid and must 129 * not be accessed after this function returned. 130 */ 131svn_string_t * 132svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf); 133 134/** Utility macro to define static svn_string_t objects. @a value must 135 * be a static string; the "" in the macro declaration tries to ensure this. 136 * 137 * Usage: 138 * static const svn_string_t my_string = SVN__STATIC_STRING("my text"); 139 */ 140#define SVN__STATIC_STRING(value) { value "", sizeof(value "") - 1 } 141 142/** Like strtoul but with a fixed base of 10 and without overflow checks. 143 * This allows the compiler to generate massively faster (4x on 64bit LINUX) 144 * code. Overflow checks may be added on the caller side where you might 145 * want to test for a more specific value range anyway. 146 */ 147unsigned long 148svn__strtoul(const char *buffer, const char **end); 149 150/** Number of chars needed to represent signed (19 places + sign + NUL) or 151 * unsigned (20 places + NUL) integers as strings. 152 */ 153#define SVN_INT64_BUFFER_SIZE 21 154 155/** Writes the @a number as string into @a dest. The latter must provide 156 * space for at least #SVN_INT64_BUFFER_SIZE characters. Returns the number 157 * chars written excluding the terminating NUL. 158 */ 159apr_size_t 160svn__ui64toa(char * dest, apr_uint64_t number); 161 162/** Writes the @a number as string into @a dest. The latter must provide 163 * space for at least #SVN_INT64_BUFFER_SIZE characters. Returns the number 164 * chars written excluding the terminating NUL. 165 */ 166apr_size_t 167svn__i64toa(char * dest, apr_int64_t number); 168 169/** Returns a decimal string for @a number allocated in @a pool. Put in 170 * the @a separator at each third place. 171 */ 172char * 173svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool); 174 175/** Returns a decimal string for @a number allocated in @a pool. Put in 176 * the @a separator at each third place. 177 */ 178char * 179svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool); 180 181 182/** Writes the @a number as base36-encoded string into @a dest. The latter 183 * must provide space for at least #SVN_INT64_BUFFER_SIZE characters. 184 * Returns the number chars written excluding the terminating NUL. 185 * 186 * @note The actual maximum buffer requirement is much shorter than 187 * #SVN_INT64_BUFFER_SIZE but introducing yet another constant is only 188 * marginally useful and may open the door to security issues when e.g. 189 * switching between base10 and base36 encoding. 190 */ 191apr_size_t 192svn__ui64tobase36(char *dest, apr_uint64_t number); 193 194/** Returns the value of the base36 encoded unsigned integer starting at 195 * @a source. If @a next is not NULL, @a *next will be set to the first 196 * position after the integer. 197 * 198 * The data in @a source will be considered part of the number to parse 199 * as long as the characters are within the base36 range. If there are 200 * no such characters to begin with, 0 is returned. Inputs with more than 201 * #SVN_INT64_BUFFER_SIZE digits will not be fully parsed, i.e. the value 202 * of @a *next as well as the return value are undefined. 203 */ 204apr_uint64_t 205svn__base36toui64(const char **next, const char *source); 206 207/** 208 * The upper limit of the similarity range returned by 209 * svn_cstring__similarity() and svn_string__similarity(). 210 */ 211#define SVN_STRING__SIM_RANGE_MAX 1000000 212 213/** 214 * Computes the similarity score of STRA and STRB. Returns the ratio 215 * of the length of their longest common subsequence and the average 216 * length of the strings, normalized to the range 217 * [0..SVN_STRING__SIM_RANGE_MAX]. The result is equivalent to 218 * Python's 219 * 220 * difflib.SequenceMatcher.ratio 221 * 222 * Optionally sets *RLCS to the length of the longest common 223 * subsequence of STRA and STRB. Using BUFFER for temporary storage, 224 * requires memory proportional to the length of the shorter string. 225 * 226 * The LCS algorithm used is described in, e.g., 227 * 228 * http://en.wikipedia.org/wiki/Longest_common_subsequence_problem 229 * 230 * Q: Why another LCS when we already have one in libsvn_diff? 231 * A: svn_diff__lcs is too heavyweight and too generic for the 232 * purposes of similarity testing. Whilst it would be possible 233 * to use a character-based tokenizer with it, we really only need 234 * the *length* of the LCS for the similarity score, not all the 235 * other information that svn_diff__lcs produces in order to 236 * make printing diffs possible. 237 * 238 * Q: Is there a limit on the length of the string parameters? 239 * A: Only available memory. But note that the LCS algorithm used 240 * has O(strlen(STRA) * strlen(STRB)) worst-case performance, 241 * so do keep a rein on your enthusiasm. 242 */ 243apr_size_t 244svn_cstring__similarity(const char *stra, const char *strb, 245 svn_membuf_t *buffer, apr_size_t *rlcs); 246 247/** 248 * Like svn_cstring__similarity, but accepts svn_string_t's instead 249 * of NUL-terminated character strings. 250 */ 251apr_size_t 252svn_string__similarity(const svn_string_t *stringa, 253 const svn_string_t *stringb, 254 svn_membuf_t *buffer, apr_size_t *rlcs); 255 256 257/* Return the lowest position at which A and B differ. If no difference 258 * can be found in the first MAX_LEN characters, MAX_LEN will be returned. 259 */ 260apr_size_t 261svn_cstring__match_length(const char *a, 262 const char *b, 263 apr_size_t max_len); 264 265/* Return the number of bytes before A and B that don't differ. If no 266 * difference can be found in the first MAX_LEN characters, MAX_LEN will 267 * be returned. Please note that A-MAX_LEN and B-MAX_LEN must both be 268 * valid addresses. 269 */ 270apr_size_t 271svn_cstring__reverse_match_length(const char *a, 272 const char *b, 273 apr_size_t max_len); 274 275/** @} */ 276 277/** Prefix trees. 278 * 279 * Prefix trees allow for a space-efficient representation of a set of path- 280 * like strings, i.e. those that share common prefixes. Any given string 281 * value will be stored only once, i.e. two strings stored in the same tree 282 * are equal if and only if the point to the same #svn_prefix_string__t. 283 * 284 * @defgroup svn_prefix_string Strings in prefix trees. 285* @{ 286 */ 287 288/** 289 * Opaque data type for prefix-tree-based strings. 290 */ 291typedef struct svn_prefix_string__t svn_prefix_string__t; 292 293/** 294 * Opaque data type representing a prefix tree 295 */ 296typedef struct svn_prefix_tree__t svn_prefix_tree__t; 297 298/** 299 * Return a new prefix tree allocated in @a pool. 300 */ 301svn_prefix_tree__t * 302svn_prefix_tree__create(apr_pool_t *pool); 303 304/** 305 * Return a string with the value @a s stored in @a tree. If no such string 306 * exists yet, add it automatically. 307 */ 308svn_prefix_string__t * 309svn_prefix_string__create(svn_prefix_tree__t *tree, 310 const char *s); 311 312/** 313 * Return the contents of @a s as a new string object allocated in @a pool. 314 */ 315svn_string_t * 316svn_prefix_string__expand(const svn_prefix_string__t *s, 317 apr_pool_t *pool); 318 319/** 320 * Compare the two strings @a lhs and @a rhs that must be part of the same 321 * tree. 322 */ 323int 324svn_prefix_string__compare(const svn_prefix_string__t *lhs, 325 const svn_prefix_string__t *rhs); 326 327/** @} */ 328 329/** @} */ 330 331 332#ifdef __cplusplus 333} 334#endif /* __cplusplus */ 335 336#endif /* SVN_STRING_PRIVATE_H */ 337