svn_string_private.h revision 362181
1/**
2 * @copyright
3 * ====================================================================
4 *    Licensed to the Apache Software Foundation (ASF) under one
5 *    or more contributor license agreements.  See the NOTICE file
6 *    distributed with this work for additional information
7 *    regarding copyright ownership.  The ASF licenses this file
8 *    to you under the Apache License, Version 2.0 (the
9 *    "License"); you may not use this file except in compliance
10 *    with the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 *    Unless required by applicable law or agreed to in writing,
15 *    software distributed under the License is distributed on an
16 *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 *    KIND, either express or implied.  See the License for the
18 *    specific language governing permissions and limitations
19 *    under the License.
20 * ====================================================================
21 * @endcopyright
22 *
23 * @file svn_string_private.h
24 * @brief Non-public string utility functions.
25 */
26
27
28#ifndef SVN_STRING_PRIVATE_H
29#define SVN_STRING_PRIVATE_H
30
31#include "svn_string.h"    /* for svn_boolean_t, svn_error_t */
32
33#ifdef __cplusplus
34extern "C" {
35#endif /* __cplusplus */
36
37/**
38 * @defgroup svn_string String handling
39 * @{
40 */
41
42
43/** Private functions.
44 *
45 * @defgroup svn_string_private Private functions
46 * @{
47 */
48
49
50/** A self-contained memory buffer of known size.
51 *
52 * Intended to be used where a single variable-sized buffer is needed
53 * within an iteration, a scratch pool is available and we want to
54 * avoid the cost of creating another pool just for the iteration.
55 */
56typedef struct svn_membuf_t
57{
58  /** The a pool from which this buffer was originally allocated, and is not
59   * necessarily specific to this buffer.  This is used only for allocating
60   * more memory from when the buffer needs to grow.
61   */
62  apr_pool_t *pool;
63
64  /** pointer to the memory */
65  void *data;
66
67  /** total size of buffer allocated */
68  apr_size_t size;
69} svn_membuf_t;
70
71
72/* Initialize a memory buffer of the given size */
73void
74svn_membuf__create(svn_membuf_t *membuf, apr_size_t size, apr_pool_t *pool);
75
76/* Ensure that the given memory buffer has at least the given size */
77void
78svn_membuf__ensure(svn_membuf_t *membuf, apr_size_t size);
79
80/* Resize the given memory buffer, preserving its contents. */
81void
82svn_membuf__resize(svn_membuf_t *membuf, apr_size_t size);
83
84/* Zero-fill the given memory */
85void
86svn_membuf__zero(svn_membuf_t *membuf);
87
88/* Zero-fill the given memory buffer up to the smaller of SIZE and the
89   current buffer size. */
90void
91svn_membuf__nzero(svn_membuf_t *membuf, apr_size_t size);
92
93/* Inline implementation of svn_membuf__zero.
94 * Note that PMEMBUF is evaluated only once.
95 */
96#define SVN_MEMBUF__ZERO(pmembuf)                \
97  do                                             \
98    {                                            \
99      svn_membuf_t *const _m_b_f_ = (pmembuf);   \
100      memset(_m_b_f_->data, 0, _m_b_f_->size);   \
101    }                                            \
102  while(0)
103
104/* Inline implementation of svn_membuf__nzero
105 * Note that PMEMBUF and PSIZE are evaluated only once.
106 */
107#define SVN_MEMBUF__NZERO(pmembuf, psize)        \
108  do                                             \
109    {                                            \
110      svn_membuf_t *const _m_b_f_ = (pmembuf);   \
111      const apr_size_t _s_z_ = (psize);          \
112      if (_s_z_ > _m_b_f_->size)                 \
113        memset(_m_b_f_->data, 0, _m_b_f_->size); \
114      else                                       \
115        memset(_m_b_f_->data, 0, _s_z_);         \
116    }                                            \
117  while(0)
118
119#ifndef SVN_DEBUG
120/* In non-debug mode, just use these inlie replacements */
121#define svn_membuf__zero(B) SVN_MEMBUF__ZERO((B))
122#define svn_membuf__nzero(B, S) SVN_MEMBUF__NZERO((B), (S))
123#endif
124
125
126/** Returns the #svn_string_t information contained in the data and
127 * len members of @a strbuf. This is effectively a typecast, converting
128 * @a strbuf into an #svn_string_t. This first will become invalid and must
129 * not be accessed after this function returned.
130 */
131svn_string_t *
132svn_stringbuf__morph_into_string(svn_stringbuf_t *strbuf);
133
134/** Utility macro to define static svn_string_t objects.  @a value must
135 * be a static string; the "" in the macro declaration tries to ensure this.
136 *
137 * Usage:
138 * static const svn_string_t my_string = SVN__STATIC_STRING("my text");
139 */
140#define SVN__STATIC_STRING(value) { value "", sizeof(value "") - 1 }
141
142/** Like strtoul but with a fixed base of 10 and without overflow checks.
143 * This allows the compiler to generate massively faster (4x on 64bit LINUX)
144 * code.  Overflow checks may be added on the caller side where you might
145 * want to test for a more specific value range anyway.
146 */
147unsigned long
148svn__strtoul(const char *buffer, const char **end);
149
150/** Number of chars needed to represent signed (19 places + sign + NUL) or
151 * unsigned (20 places + NUL) integers as strings.
152 */
153#define SVN_INT64_BUFFER_SIZE 21
154
155/** Writes the @a number as string into @a dest. The latter must provide
156 * space for at least #SVN_INT64_BUFFER_SIZE characters. Returns the number
157 * chars written excluding the terminating NUL.
158 */
159apr_size_t
160svn__ui64toa(char * dest, apr_uint64_t number);
161
162/** Writes the @a number as string into @a dest. The latter must provide
163 * space for at least #SVN_INT64_BUFFER_SIZE characters. Returns the number
164 * chars written excluding the terminating NUL.
165 */
166apr_size_t
167svn__i64toa(char * dest, apr_int64_t number);
168
169/** Returns a decimal string for @a number allocated in @a pool.  Put in
170 * the @a separator at each third place.
171 */
172char *
173svn__ui64toa_sep(apr_uint64_t number, char separator, apr_pool_t *pool);
174
175/** Returns a decimal string for @a number allocated in @a pool.  Put in
176 * the @a separator at each third place.
177 */
178char *
179svn__i64toa_sep(apr_int64_t number, char separator, apr_pool_t *pool);
180
181
182/** Writes the @a number as base36-encoded string into @a dest. The latter
183 * must provide space for at least #SVN_INT64_BUFFER_SIZE characters.
184 * Returns the number chars written excluding the terminating NUL.
185 *
186 * @note The actual maximum buffer requirement is much shorter than
187 * #SVN_INT64_BUFFER_SIZE but introducing yet another constant is only
188 * marginally useful and may open the door to security issues when e.g.
189 * switching between base10 and base36 encoding.
190 */
191apr_size_t
192svn__ui64tobase36(char *dest, apr_uint64_t number);
193
194/** Returns the value of the base36 encoded unsigned integer starting at
195 * @a source.  If @a next is not NULL, @a *next will be set to the first
196 * position after the integer.
197 *
198 * The data in @a source will be considered part of the number to parse
199 * as long as the characters are within the base36 range.  If there are
200 * no such characters to begin with, 0 is returned.  Inputs with more than
201 * #SVN_INT64_BUFFER_SIZE digits will not be fully parsed, i.e. the value
202 * of @a *next as well as the return value are undefined.
203 */
204apr_uint64_t
205svn__base36toui64(const char **next, const char *source);
206
207/**
208 * The upper limit of the similarity range returned by
209 * svn_cstring__similarity() and svn_string__similarity().
210 */
211#define SVN_STRING__SIM_RANGE_MAX 1000000
212
213/**
214 * Computes the similarity score of STRA and STRB. Returns the ratio
215 * of the length of their longest common subsequence and the average
216 * length of the strings, normalized to the range
217 * [0..SVN_STRING__SIM_RANGE_MAX]. The result is equivalent to
218 * Python's
219 *
220 *   difflib.SequenceMatcher.ratio
221 *
222 * Optionally sets *RLCS to the length of the longest common
223 * subsequence of STRA and STRB. Using BUFFER for temporary storage,
224 * requires memory proportional to the length of the shorter string.
225 *
226 * The LCS algorithm used is described in, e.g.,
227 *
228 *   http://en.wikipedia.org/wiki/Longest_common_subsequence_problem
229 *
230 * Q: Why another LCS when we already have one in libsvn_diff?
231 * A: svn_diff__lcs is too heavyweight and too generic for the
232 *    purposes of similarity testing. Whilst it would be possible
233 *    to use a character-based tokenizer with it, we really only need
234 *    the *length* of the LCS for the similarity score, not all the
235 *    other information that svn_diff__lcs produces in order to
236 *    make printing diffs possible.
237 *
238 * Q: Is there a limit on the length of the string parameters?
239 * A: Only available memory. But note that the LCS algorithm used
240 *    has O(strlen(STRA) * strlen(STRB)) worst-case performance,
241 *    so do keep a rein on your enthusiasm.
242 */
243apr_size_t
244svn_cstring__similarity(const char *stra, const char *strb,
245                        svn_membuf_t *buffer, apr_size_t *rlcs);
246
247/**
248 * Like svn_cstring__similarity, but accepts svn_string_t's instead
249 * of NUL-terminated character strings.
250 */
251apr_size_t
252svn_string__similarity(const svn_string_t *stringa,
253                       const svn_string_t *stringb,
254                       svn_membuf_t *buffer, apr_size_t *rlcs);
255
256
257/* Return the lowest position at which A and B differ. If no difference
258 * can be found in the first MAX_LEN characters, MAX_LEN will be returned.
259 */
260apr_size_t
261svn_cstring__match_length(const char *a,
262                          const char *b,
263                          apr_size_t max_len);
264
265/* Return the number of bytes before A and B that don't differ.  If no
266 * difference can be found in the first MAX_LEN characters,  MAX_LEN will
267 * be returned.  Please note that A-MAX_LEN and B-MAX_LEN must both be
268 * valid addresses.
269 */
270apr_size_t
271svn_cstring__reverse_match_length(const char *a,
272                                  const char *b,
273                                  apr_size_t max_len);
274
275/** @} */
276
277/** Prefix trees.
278 *
279 * Prefix trees allow for a space-efficient representation of a set of path-
280 * like strings, i.e. those that share common prefixes.  Any given string
281 * value will be stored only once, i.e. two strings stored in the same tree
282 * are equal if and only if the point to the same #svn_prefix_string__t.
283 *
284 * @defgroup svn_prefix_string Strings in prefix trees.
285* @{
286 */
287
288/**
289 * Opaque data type for prefix-tree-based strings.
290 */
291typedef struct svn_prefix_string__t svn_prefix_string__t;
292
293/**
294 * Opaque data type representing a prefix tree
295 */
296typedef struct svn_prefix_tree__t svn_prefix_tree__t;
297
298/**
299 * Return a new prefix tree allocated in @a pool.
300 */
301svn_prefix_tree__t *
302svn_prefix_tree__create(apr_pool_t *pool);
303
304/**
305 * Return a string with the value @a s stored in @a tree.  If no such string
306 * exists yet, add it automatically.
307 */
308svn_prefix_string__t *
309svn_prefix_string__create(svn_prefix_tree__t *tree,
310                          const char *s);
311
312/**
313 * Return the contents of @a s as a new string object allocated in @a pool.
314 */
315svn_string_t *
316svn_prefix_string__expand(const svn_prefix_string__t *s,
317                          apr_pool_t *pool);
318
319/**
320 * Compare the two strings @a lhs and @a rhs that must be part of the same
321 * tree.
322 */
323int
324svn_prefix_string__compare(const svn_prefix_string__t *lhs,
325                           const svn_prefix_string__t *rhs);
326
327/** @} */
328
329/** @} */
330
331
332#ifdef __cplusplus
333}
334#endif /* __cplusplus */
335
336#endif  /* SVN_STRING_PRIVATE_H */
337