1235633Sdim/* prefix_string.c --- implement strings based on a prefix tree 2193326Sed * 3193326Sed * ==================================================================== 4193326Sed * Licensed to the Apache Software Foundation (ASF) under one 5193326Sed * or more contributor license agreements. See the NOTICE file 6193326Sed * distributed with this work for additional information 7193326Sed * regarding copyright ownership. The ASF licenses this file 8193326Sed * to you under the Apache License, Version 2.0 (the 9193326Sed * "License"); you may not use this file except in compliance 10221345Sdim * with the License. You may obtain a copy of the License at 11221345Sdim * 12193326Sed * http://www.apache.org/licenses/LICENSE-2.0 13193326Sed * 14193326Sed * Unless required by applicable law or agreed to in writing, 15221345Sdim * software distributed under the License is distributed on an 16221345Sdim * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 17193326Sed * KIND, either express or implied. See the License for the 18252723Sdim * specific language governing permissions and limitations 19245431Sdim * under the License. 20245431Sdim * ==================================================================== 21193326Sed */ 22193326Sed 23235633Sdim#include <assert.h> 24235633Sdim#include "private/svn_string_private.h" 25221345Sdim 26221345Sdim/* A node in the tree represents a common prefix. The root node is the 27221345Sdim * empty prefix. Nodes may have up to 256 sub-nodes, each starting with 28235633Sdim * a different character (possibly '\0'). 29245431Sdim * 30245431Sdim * The nodes in the tree store only up to 8 chars of the respective common 31245431Sdim * prefix, i.e. longer common prefixes must be drawn out over multiple 32245431Sdim * hierarchy levels. This is a space <-> efficiency trade-off. 33245431Sdim * 34245431Sdim * Strings are the leaf nodes in the tree and use a specialized, smaller 35245431Sdim * data structure. They may add 0 to 7 extra chars to the prefix. Both 36245431Sdim * data types can be discerned by the last char in the data buffer. This 37245431Sdim * must be 0 for strings (leaves) and non-0 otherwise. Please note that 38245431Sdim * ordinary nodes have a length information so that no terminating 0 is 39245431Sdim * required for them. 40245431Sdim */ 41263509Sdim 42263509Sdim/* forward declaration */ 43263509Sdimtypedef struct node_t node_t; 44263509Sdim 45263509Sdim/* String type and tree leaf. 46263509Sdim */ 47245431Sdimstruct svn_prefix_string__t 48245431Sdim{ 49245431Sdim /* mandatory prefix */ 50245431Sdim node_t *prefix; 51245431Sdim 52252723Sdim /* 0 ..7 chars to add the the prefix. NUL-terminated. */ 53245431Sdim char data[8]; 54245431Sdim}; 55263509Sdim 56263509Sdim/* A node inside the tree, i.e. not a string and not a leaf (unless this is 57263509Sdim * the root node). 58245431Sdim * 59245431Sdim * Note: keep the ordering to minimize size / alignment overhead on 64 bit 60245431Sdim * machines. 61245431Sdim */ 62245431Sdimstruct node_t 63263509Sdim{ 64263509Sdim /* pointer to the parent prefix plus the 1 .. 8 extra chars. 65263509Sdim * Only the root will provide 0 extra chars. */ 66245431Sdim svn_prefix_string__t key; 67245431Sdim 68245431Sdim /* Length of the prefix from the root down to and including this one. 69245431Sdim * 0 for the root node. Only then will key.prefix be NULL. */ 70245431Sdim apr_uint32_t length; 71245431Sdim 72245431Sdim /* Number of entries used in SUB_NODES. */ 73245431Sdim apr_uint32_t sub_node_count; 74245431Sdim 75263509Sdim /* The sub-nodes, ordered by first char. node_t and svn_prefix_string__t 76263509Sdim * may be mixed here. May be NULL. 77263509Sdim * The number of allocated entries is always a power-of-two and only 78263509Sdim * given implicitly by SUB_NODE_COUNT. */ 79263509Sdim struct node_t **sub_nodes; 80263509Sdim}; 81245431Sdim 82245431Sdim/* The actual tree structure. */ 83245431Sdimstruct svn_prefix_tree__t 84245431Sdim{ 85245431Sdim /* the common tree root (represents the empty prefix). */ 86245431Sdim node_t *root; 87245431Sdim 88263509Sdim /* all sub-nodes & strings will be allocated from this pool */ 89263509Sdim apr_pool_t *pool; 90245431Sdim}; 91245431Sdim 92245431Sdim/* Return TRUE, iff NODE is a leaf node. 93252723Sdim */ 94245431Sdimstatic svn_boolean_t 95245431Sdimis_leaf(node_t *node) 96245431Sdim{ 97245431Sdim return node->key.data[7] == 0; 98245431Sdim} 99245431Sdim 100221345Sdim/* Ensure that the sub-nodes array of NODE within TREE has at least one 101193326Sed * unused entry. Re-allocate as necessary. 102221345Sdim */ 103221345Sdimstatic void 104226890Sdimauto_realloc_sub_nodes(svn_prefix_tree__t *tree, 105226890Sdim node_t *node) 106245431Sdim{ 107245431Sdim if (node->sub_node_count & (node->sub_node_count - 1)) 108235633Sdim return; 109226890Sdim 110226890Sdim if (node->sub_node_count == 0) 111226890Sdim { 112226890Sdim node->sub_nodes = apr_pcalloc(tree->pool, sizeof(*node->sub_nodes)); 113193326Sed } 114224145Sdim else 115224145Sdim { 116224145Sdim node_t **sub_nodes 117224145Sdim = apr_pcalloc(tree->pool, 118224145Sdim 2 * node->sub_node_count * sizeof(*sub_nodes)); 119224145Sdim memcpy(sub_nodes, node->sub_nodes, 120221345Sdim node->sub_node_count * sizeof(*sub_nodes)); 121235633Sdim node->sub_nodes = sub_nodes; 122224145Sdim } 123224145Sdim} 124193326Sed 125221345Sdim/* Given the COUNT pointers in the SUB_NODES array, return the location at 126193326Sed * which KEY is either located or would be inserted. 127 */ 128static int 129search_lower_bound(node_t **sub_nodes, 130 unsigned char key, 131 int count) 132{ 133 int lower = 0; 134 int upper = count - 1; 135 136 /* Binary search for the lowest position at which to insert KEY. */ 137 while (lower <= upper) 138 { 139 int current = lower + (upper - lower) / 2; 140 141 if ((unsigned char)sub_nodes[current]->key.data[0] < key) 142 lower = current + 1; 143 else 144 upper = current - 1; 145 } 146 147 return lower; 148} 149 150svn_prefix_tree__t * 151svn_prefix_tree__create(apr_pool_t *pool) 152{ 153 svn_prefix_tree__t *tree = apr_pcalloc(pool, sizeof(*tree)); 154 tree->pool = pool; 155 156 tree->root = apr_pcalloc(pool, sizeof(*tree->root)); 157 tree->root->key.data[7] = '\xff'; 158 159 return tree; 160} 161 162svn_prefix_string__t * 163svn_prefix_string__create(svn_prefix_tree__t *tree, 164 const char *s) 165{ 166 svn_prefix_string__t *new_string; 167 apr_size_t len = strlen(s); 168 node_t *node = tree->root; 169 node_t *new_node; 170 int idx; 171 172 /* walk the existing tree until we either find S or the node at which S 173 * has to be inserted */ 174 while (TRUE) 175 { 176 node_t *sub_node; 177 int match = 1; 178 179 /* index of the matching sub-node */ 180 idx = node->sub_node_count 181 ? search_lower_bound(node->sub_nodes, 182 (unsigned char)s[node->length], 183 node->sub_node_count) 184 : 0; 185 186 /* any (partially) matching sub-nodes? */ 187 if (idx == (int)node->sub_node_count 188 || node->sub_nodes[idx]->key.data[0] != s[node->length]) 189 break; 190 191 sub_node = node->sub_nodes[idx]; 192 193 /* fully matching sub-node? */ 194 if (is_leaf(sub_node)) 195 { 196 if (strcmp(sub_node->key.data, s + node->length) == 0) 197 return &sub_node->key; 198 } 199 else 200 { 201 apr_size_t sub_node_len = sub_node->length - node->length; 202 if (strncmp(sub_node->key.data, s + node->length, 203 sub_node_len) == 0) 204 { 205 node = sub_node; 206 continue; 207 } 208 } 209 210 /* partial match -> split */ 211 while (sub_node->key.data[match] == s[node->length + match]) 212 ++match; 213 214 new_node = apr_pcalloc(tree->pool, sizeof(*new_node)); 215 new_node->key = sub_node->key; 216 new_node->length = node->length + match; 217 new_node->key.data[7] = '\xff'; 218 new_node->sub_node_count = 1; 219 new_node->sub_nodes = apr_palloc(tree->pool, sizeof(node_t *)); 220 new_node->sub_nodes[0] = sub_node; 221 222 memmove(sub_node->key.data, sub_node->key.data + match, 8 - match); 223 224 /* replace old sub-node with new one and continue lookup */ 225 sub_node->key.prefix = new_node; 226 node->sub_nodes[idx] = new_node; 227 node = new_node; 228 } 229 230 /* add sub-node(s) and final string */ 231 while (node->length + 7 < len) 232 { 233 new_node = apr_pcalloc(tree->pool, sizeof(*new_node)); 234 new_node->key.prefix = node; 235 new_node->length = node->length + 8; 236 memcpy(new_node->key.data, s + node->length, 8); 237 238 auto_realloc_sub_nodes(tree, node); 239 memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx, 240 (node->sub_node_count - idx) * sizeof(node_t *)); 241 242 /* replace old sub-node with new one and continue lookup */ 243 node->sub_nodes[idx] = new_node; 244 node->sub_node_count++; 245 node = new_node; 246 idx = 0; 247 } 248 249 new_string = apr_pcalloc(tree->pool, sizeof(*new_string)); 250 new_string->prefix = node; 251 memcpy(new_string->data, s + node->length, len - node->length); 252 253 auto_realloc_sub_nodes(tree, node); 254 memmove(node->sub_nodes + idx + 1, node->sub_nodes + idx, 255 (node->sub_node_count - idx) * sizeof(node_t *)); 256 257 node->sub_nodes[idx] = (node_t *)new_string; 258 node->sub_node_count++; 259 return new_string; 260} 261 262svn_string_t * 263svn_prefix_string__expand(const svn_prefix_string__t *s, 264 apr_pool_t *pool) 265{ 266 apr_size_t s_len = strlen(s->data); 267 apr_size_t len = s->prefix->length + s_len; 268 char *buffer = apr_palloc(pool, len + 1); 269 270 svn_string_t *result = apr_pcalloc(pool, sizeof(*result)); 271 result->data = buffer; 272 result->len = len; 273 buffer[len] = '\0'; 274 275 while (s->prefix) 276 { 277 memcpy(buffer + s->prefix->length, s->data, len - s->prefix->length); 278 len = s->prefix->length; 279 s = &s->prefix->key; 280 } 281 282 return result; 283} 284 285int 286svn_prefix_string__compare(const svn_prefix_string__t *lhs, 287 const svn_prefix_string__t *rhs) 288{ 289 const node_t *lhs_parent = lhs->prefix; 290 const node_t *rhs_parent = rhs->prefix; 291 292 if (lhs == rhs) 293 return 0; 294 295 /* find the common root */ 296 while (lhs_parent != rhs_parent) 297 { 298 if (lhs_parent->length <= rhs_parent->length) 299 { 300 rhs = &rhs_parent->key; 301 rhs_parent = rhs_parent->key.prefix; 302 } 303 else if (rhs_parent->length <= lhs_parent->length) 304 { 305 lhs = &lhs_parent->key; 306 lhs_parent = lhs_parent->key.prefix; 307 } 308 309 /* same tree? */ 310 assert(lhs_parent && rhs_parent); 311 } 312 313 /* at the common root, strings will differ in the first follow-up char */ 314 return (int)(unsigned char)lhs->data[0] - (int)(unsigned char)rhs->data[0]; 315} 316