1/* 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1988, 1989, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)radix.c 8.4 (Berkeley) 11/2/94 61 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $ 62 */ 63 64/* 65 * Routines to build and maintain radix trees for routing lookups. 66 */ 67#ifndef _RADIX_H_ 68#include <sys/param.h> 69#include <sys/systm.h> 70#include <sys/malloc.h> 71#define M_DONTWAIT M_NOWAIT 72#include <sys/domain.h> 73#include <sys/syslog.h> 74#include <net/radix.h> 75#include <sys/socket.h> 76#include <sys/socketvar.h> 77#include <kern/locks.h> 78#endif 79 80static int rn_walktree_from(struct radix_node_head *h, void *a, 81 void *m, walktree_f_t *f, void *w); 82static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); 83static struct radix_node 84 *rn_insert(void *, struct radix_node_head *, int *, 85 struct radix_node [2]), 86 *rn_newpair(void *, int, struct radix_node[2]), 87 *rn_search(void *, struct radix_node *), 88 *rn_search_m(void *, struct radix_node *, void *); 89 90static int max_keylen; 91static struct radix_mask *rn_mkfreelist; 92static struct radix_node_head *mask_rnhead; 93static char *addmask_key; 94static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1}; 95static char *rn_zeros, *rn_ones; 96 97 98extern lck_grp_t *domain_proto_mtx_grp; 99extern lck_attr_t *domain_proto_mtx_attr; 100 101#define rn_masktop (mask_rnhead->rnh_treetop) 102#undef Bcmp 103#define Bcmp(a, b, l) \ 104 (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (uint32_t)l)) 105 106static int rn_lexobetter(void *m_arg, void *n_arg); 107static struct radix_mask * 108 rn_new_radix_mask(struct radix_node *tt, 109 struct radix_mask *next); 110static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip, 111 rn_matchf_t *f, void *w); 112 113#define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg)) 114 115/* 116 * The data structure for the keys is a radix tree with one way 117 * branching removed. The index rn_bit at an internal node n represents a bit 118 * position to be tested. The tree is arranged so that all descendants 119 * of a node n have keys whose bits all agree up to position rn_bit - 1. 120 * (We say the index of n is rn_bit.) 121 * 122 * There is at least one descendant which has a one bit at position rn_bit, 123 * and at least one with a zero there. 124 * 125 * A route is determined by a pair of key and mask. We require that the 126 * bit-wise logical and of the key and mask to be the key. 127 * We define the index of a route to associated with the mask to be 128 * the first bit number in the mask where 0 occurs (with bit number 0 129 * representing the highest order bit). 130 * 131 * We say a mask is normal if every bit is 0, past the index of the mask. 132 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 133 * and m is a normal mask, then the route applies to every descendant of n. 134 * If the index(m) < rn_bit, this implies the trailing last few bits of k 135 * before bit b are all 0, (and hence consequently true of every descendant 136 * of n), so the route applies to all descendants of the node as well. 137 * 138 * Similar logic shows that a non-normal mask m such that 139 * index(m) <= index(n) could potentially apply to many children of n. 140 * Thus, for each non-host route, we attach its mask to a list at an internal 141 * node as high in the tree as we can go. 142 * 143 * The present version of the code makes use of normal routes in short- 144 * circuiting an explict mask and compare operation when testing whether 145 * a key satisfies a normal route, and also in remembering the unique leaf 146 * that governs a subtree. 147 */ 148 149static struct radix_node * 150rn_search(void *v_arg, struct radix_node *head) 151{ 152 struct radix_node *x; 153 caddr_t v; 154 155 for (x = head, v = v_arg; x->rn_bit >= 0;) { 156 if (x->rn_bmask & v[x->rn_offset]) 157 x = x->rn_right; 158 else 159 x = x->rn_left; 160 } 161 return (x); 162} 163 164static struct radix_node * 165rn_search_m(void *v_arg, struct radix_node *head, void *m_arg) 166{ 167 struct radix_node *x; 168 caddr_t v = v_arg, m = m_arg; 169 170 for (x = head; x->rn_bit >= 0;) { 171 if ((x->rn_bmask & m[x->rn_offset]) && 172 (x->rn_bmask & v[x->rn_offset])) 173 x = x->rn_right; 174 else 175 x = x->rn_left; 176 } 177 return x; 178} 179 180int 181rn_refines(void *m_arg, void *n_arg) 182{ 183 caddr_t m = m_arg, n = n_arg; 184 caddr_t lim, lim2 = lim = n + *(u_char *)n; 185 int longer = (*(u_char *)n++) - (int)(*(u_char *)m++); 186 int masks_are_equal = 1; 187 188 if (longer > 0) 189 lim -= longer; 190 while (n < lim) { 191 if (*n & ~(*m)) 192 return 0; 193 if (*n++ != *m++) 194 masks_are_equal = 0; 195 } 196 while (n < lim2) 197 if (*n++) 198 return 0; 199 if (masks_are_equal && (longer < 0)) 200 for (lim2 = m - longer; m < lim2; ) 201 if (*m++) 202 return 1; 203 return (!masks_are_equal); 204} 205 206struct radix_node * 207rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head) 208{ 209 return (rn_lookup_args(v_arg, m_arg, head, NULL, NULL)); 210} 211 212struct radix_node * 213rn_lookup_args(void *v_arg, void *m_arg, struct radix_node_head *head, 214 rn_matchf_t *f, void *w) 215{ 216 struct radix_node *x; 217 caddr_t netmask = NULL; 218 219 if (m_arg) { 220 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset); 221 if (x == 0) 222 return (NULL); 223 netmask = x->rn_key; 224 } 225 x = rn_match_args(v_arg, head, f, w); 226 if (x && netmask) { 227 while (x && x->rn_mask != netmask) 228 x = x->rn_dupedkey; 229 } 230 return x; 231} 232 233/* 234 * Returns true if address 'trial' has no bits differing from the 235 * leaf's key when compared under the leaf's mask. In other words, 236 * returns true when 'trial' matches leaf. If a leaf-matching 237 * routine is passed in, it is also used to find a match on the 238 * conditions defined by the caller of rn_match. 239 */ 240static int 241rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip, 242 rn_matchf_t *f, void *w) 243{ 244 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; 245 char *cplim; 246 int length = min(*(u_char *)cp, *(u_char *)cp2); 247 248 if (cp3 == 0) 249 cp3 = rn_ones; 250 else 251 length = min(length, *(u_char *)cp3); 252 cplim = cp + length; cp3 += skip; cp2 += skip; 253 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) 254 if ((*cp ^ *cp2) & *cp3) 255 return 0; 256 257 return (RN_MATCHF(leaf, f, w)); 258} 259 260struct radix_node * 261rn_match(void *v_arg, struct radix_node_head *head) 262{ 263 return (rn_match_args(v_arg, head, NULL, NULL)); 264} 265 266struct radix_node * 267rn_match_args(void *v_arg, struct radix_node_head *head, 268 rn_matchf_t *f, void *w) 269{ 270 caddr_t v = v_arg; 271 struct radix_node *t = head->rnh_treetop, *x; 272 caddr_t cp = v, cp2; 273 caddr_t cplim; 274 struct radix_node *saved_t, *top = t; 275 int off = t->rn_offset, vlen = *(u_char *)cp, matched_off; 276 int test, b, rn_bit; 277 278 /* 279 * Open code rn_search(v, top) to avoid overhead of extra 280 * subroutine call. 281 */ 282 for (; t->rn_bit >= 0; ) { 283 if (t->rn_bmask & cp[t->rn_offset]) 284 t = t->rn_right; 285 else 286 t = t->rn_left; 287 } 288 /* 289 * See if we match exactly as a host destination 290 * or at least learn how many bits match, for normal mask finesse. 291 * 292 * It doesn't hurt us to limit how many bytes to check 293 * to the length of the mask, since if it matches we had a genuine 294 * match and the leaf we have is the most specific one anyway; 295 * if it didn't match with a shorter length it would fail 296 * with a long one. This wins big for class B&C netmasks which 297 * are probably the most common case... 298 */ 299 if (t->rn_mask) 300 vlen = *(u_char *)t->rn_mask; 301 cp += off; cp2 = t->rn_key + off; cplim = v + vlen; 302 for (; cp < cplim; cp++, cp2++) 303 if (*cp != *cp2) 304 goto on1; 305 /* 306 * This extra grot is in case we are explicitly asked 307 * to look up the default. Ugh! 308 * 309 * Never return the root node itself, it seems to cause a 310 * lot of confusion. 311 */ 312 if (t->rn_flags & RNF_ROOT) 313 t = t->rn_dupedkey; 314 if (t == NULL || RN_MATCHF(t, f, w)) { 315 return (t); 316 } else { 317 /* 318 * Although we found an exact match on the key, 319 * f() is looking for some other criteria as well. 320 * Continue looking as if the exact match failed. 321 */ 322 if (t->rn_parent->rn_flags & RNF_ROOT) { 323 /* Hit the top; have to give up */ 324 return (NULL); 325 } 326 b = 0; 327 goto keeplooking; 328 } 329on1: 330 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 331 for (b = 7; (test >>= 1) > 0;) 332 b--; 333keeplooking: 334 matched_off = cp - v; 335 b += matched_off << 3; 336 rn_bit = -1 - b; 337 /* 338 * If there is a host route in a duped-key chain, it will be first. 339 */ 340 if ((saved_t = t)->rn_mask == 0) 341 t = t->rn_dupedkey; 342 for (; t; t = t->rn_dupedkey) { 343 /* 344 * Even if we don't match exactly as a host, 345 * we may match if the leaf we wound up at is 346 * a route to a net. 347 */ 348 if (t->rn_flags & RNF_NORMAL) { 349 if ((rn_bit <= t->rn_bit) && RN_MATCHF(t, f, w)) 350 return (t); 351 } else if (rn_satisfies_leaf(v, t, matched_off, f, w)) { 352 return (t); 353 } 354 } 355 t = saved_t; 356 /* start searching up the tree */ 357 do { 358 struct radix_mask *m; 359 t = t->rn_parent; 360 m = t->rn_mklist; 361 /* 362 * If non-contiguous masks ever become important 363 * we can restore the masking and open coding of 364 * the search and satisfaction test and put the 365 * calculation of "off" back before the "do". 366 */ 367 while (m) { 368 if (m->rm_flags & RNF_NORMAL) { 369 if ((rn_bit <= m->rm_bit) && 370 RN_MATCHF(m->rm_leaf, f, w)) 371 return (m->rm_leaf); 372 } else { 373 off = min(t->rn_offset, matched_off); 374 x = rn_search_m(v, t, m->rm_mask); 375 while (x && x->rn_mask != m->rm_mask) 376 x = x->rn_dupedkey; 377 if (x && rn_satisfies_leaf(v, x, off, f, w)) 378 return (x); 379 } 380 m = m->rm_mklist; 381 } 382 } while (t != top); 383 return (NULL); 384} 385 386#ifdef RN_DEBUG 387int rn_nodenum; 388struct radix_node *rn_clist; 389int rn_saveinfo; 390int rn_debug = 1; 391#endif 392 393static struct radix_node * 394rn_newpair(void *v, int b, struct radix_node nodes[2]) 395{ 396 struct radix_node *tt = nodes, *t = tt + 1; 397 t->rn_bit = b; 398 t->rn_bmask = 0x80 >> (b & 7); 399 t->rn_left = tt; 400 t->rn_offset = b >> 3; 401 tt->rn_bit = -1; 402 tt->rn_key = (caddr_t)v; 403 tt->rn_parent = t; 404 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 405 tt->rn_mklist = t->rn_mklist = NULL; 406#ifdef RN_DEBUG 407 tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 408 tt->rn_twin = t; 409 tt->rn_ybro = rn_clist; 410 rn_clist = tt; 411#endif 412 return t; 413} 414 415static struct radix_node * 416rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry, 417 struct radix_node nodes[2]) 418{ 419 caddr_t v = v_arg; 420 struct radix_node *top = head->rnh_treetop; 421 int head_off = top->rn_offset, vlen = (int)*((u_char *)v); 422 struct radix_node *t = rn_search(v_arg, top); 423 caddr_t cp = v + head_off; 424 int b; 425 struct radix_node *tt; 426 /* 427 * Find first bit at which v and t->rn_key differ 428 */ 429 { 430 caddr_t cp2 = t->rn_key + head_off; 431 int cmp_res; 432 caddr_t cplim = v + vlen; 433 434 while (cp < cplim) 435 if (*cp2++ != *cp++) 436 goto on1; 437 *dupentry = 1; 438 return t; 439on1: 440 *dupentry = 0; 441 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 442 for (b = (cp - v) << 3; cmp_res; b--) 443 cmp_res >>= 1; 444 } 445 { 446 struct radix_node *p, *x = top; 447 cp = v; 448 do { 449 p = x; 450 if (cp[x->rn_offset] & x->rn_bmask) 451 x = x->rn_right; 452 else 453 x = x->rn_left; 454 } while (b > (unsigned) x->rn_bit); 455 /* x->rn_bit < b && x->rn_bit >= 0 */ 456#ifdef RN_DEBUG 457 if (rn_debug) 458 log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p); 459#endif 460 t = rn_newpair(v_arg, b, nodes); 461 tt = t->rn_left; 462 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 463 p->rn_left = t; 464 else 465 p->rn_right = t; 466 x->rn_parent = t; 467 t->rn_parent = p; /* frees x, p as temp vars below */ 468 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 469 t->rn_right = x; 470 } else { 471 t->rn_right = tt; 472 t->rn_left = x; 473 } 474#ifdef RN_DEBUG 475 if (rn_debug) 476 log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p); 477#endif 478 } 479 return (tt); 480} 481 482struct radix_node * 483rn_addmask(void *n_arg, int search, int skip) 484{ 485 caddr_t netmask = (caddr_t)n_arg; 486 struct radix_node *x; 487 caddr_t cp, cplim; 488 int b = 0, mlen, j; 489 int maskduplicated, m0, isnormal; 490 struct radix_node *saved_x; 491 static int last_zeroed = 0; 492 493 if ((mlen = *(u_char *)netmask) > max_keylen) 494 mlen = max_keylen; 495 if (skip == 0) 496 skip = 1; 497 if (mlen <= skip) 498 return (mask_rnhead->rnh_nodes); 499 if (skip > 1) 500 Bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 501 if ((m0 = mlen) > skip) 502 Bcopy(netmask + skip, addmask_key + skip, mlen - skip); 503 /* 504 * Trim trailing zeroes. 505 */ 506 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) 507 cp--; 508 mlen = cp - addmask_key; 509 if (mlen <= skip) { 510 if (m0 >= last_zeroed) 511 last_zeroed = mlen; 512 return (mask_rnhead->rnh_nodes); 513 } 514 if (m0 < last_zeroed) 515 Bzero(addmask_key + m0, last_zeroed - m0); 516 *addmask_key = last_zeroed = mlen; 517 x = rn_search(addmask_key, rn_masktop); 518 if (Bcmp(addmask_key, x->rn_key, mlen) != 0) 519 x = NULL; 520 if (x || search) 521 return (x); 522 R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); 523 if ((saved_x = x) == 0) 524 return (NULL); 525 Bzero(x, max_keylen + 2 * sizeof (*x)); 526 netmask = cp = (caddr_t)(x + 2); 527 Bcopy(addmask_key, cp, mlen); 528 x = rn_insert(cp, mask_rnhead, &maskduplicated, x); 529 if (maskduplicated) { 530 log(LOG_ERR, "rn_addmask: mask impossibly already in tree"); 531 R_Free(saved_x); 532 return (x); 533 } 534 mask_rnhead->rnh_cnt++; 535 /* 536 * Calculate index of mask, and check for normalcy. 537 */ 538 cplim = netmask + mlen; isnormal = 1; 539 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) 540 cp++; 541 if (cp != cplim) { 542 for (j = 0x80; (j & *cp) != 0; j >>= 1) 543 b++; 544 if (*cp != normal_chars[b] || cp != (cplim - 1)) 545 isnormal = 0; 546 } 547 b += (cp - netmask) << 3; 548 x->rn_bit = -1 - b; 549 if (isnormal) 550 x->rn_flags |= RNF_NORMAL; 551 return (x); 552} 553 554static int /* XXX: arbitrary ordering for non-contiguous masks */ 555rn_lexobetter(void *m_arg, void *n_arg) 556{ 557 u_char *mp = m_arg, *np = n_arg, *lim; 558 559 if (*mp > *np) 560 return 1; /* not really, but need to check longer one first */ 561 if (*mp == *np) 562 for (lim = mp + *mp; mp < lim;) 563 if (*mp++ > *np++) 564 return 1; 565 return 0; 566} 567 568static struct radix_mask * 569rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next) 570{ 571 struct radix_mask *m; 572 573 MKGet(m); 574 if (m == 0) { 575 log(LOG_ERR, "Mask for route not entered\n"); 576 return (NULL); 577 } 578 Bzero(m, sizeof *m); 579 m->rm_bit = tt->rn_bit; 580 m->rm_flags = tt->rn_flags; 581 if (tt->rn_flags & RNF_NORMAL) 582 m->rm_leaf = tt; 583 else 584 m->rm_mask = tt->rn_mask; 585 m->rm_mklist = next; 586 tt->rn_mklist = m; 587 return m; 588} 589 590struct radix_node * 591rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 592 struct radix_node treenodes[2]) 593{ 594 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; 595 struct radix_node *t, *x = NULL, *tt; 596 struct radix_node *saved_tt, *top = head->rnh_treetop; 597 short b = 0, b_leaf = 0; 598 int keyduplicated; 599 caddr_t mmask; 600 struct radix_mask *m, **mp; 601 602 /* 603 * In dealing with non-contiguous masks, there may be 604 * many different routes which have the same mask. 605 * We will find it useful to have a unique pointer to 606 * the mask to speed avoiding duplicate references at 607 * nodes and possibly save time in calculating indices. 608 */ 609 if (netmask) { 610 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) 611 return (NULL); 612 b_leaf = x->rn_bit; 613 b = -1 - x->rn_bit; 614 netmask = x->rn_key; 615 } 616 /* 617 * Deal with duplicated keys: attach node to previous instance 618 */ 619 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); 620 if (keyduplicated) { 621 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 622 if (tt->rn_mask == netmask) 623 return (NULL); 624 if (netmask == 0 || 625 (tt->rn_mask && 626 ((b_leaf < tt->rn_bit) /* index(netmask) > node */ 627 || rn_refines(netmask, tt->rn_mask) 628 || rn_lexobetter(netmask, tt->rn_mask)))) 629 break; 630 } 631 /* 632 * If the mask is not duplicated, we wouldn't 633 * find it among possible duplicate key entries 634 * anyway, so the above test doesn't hurt. 635 * 636 * We sort the masks for a duplicated key the same way as 637 * in a masklist -- most specific to least specific. 638 * This may require the unfortunate nuisance of relocating 639 * the head of the list. 640 */ 641 if (tt == saved_tt) { 642 struct radix_node *xx = x; 643 /* link in at head of list */ 644 (tt = treenodes)->rn_dupedkey = t; 645 tt->rn_flags = t->rn_flags; 646 tt->rn_parent = x = t->rn_parent; 647 t->rn_parent = tt; /* parent */ 648 if (x->rn_left == t) 649 x->rn_left = tt; 650 else 651 x->rn_right = tt; 652 saved_tt = tt; x = xx; 653 } else { 654 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; 655 t->rn_dupedkey = tt; 656 tt->rn_parent = t; /* parent */ 657 if (tt->rn_dupedkey) /* parent */ 658 tt->rn_dupedkey->rn_parent = tt; /* parent */ 659 } 660#ifdef RN_DEBUG 661 t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 662 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; 663#endif 664 tt->rn_key = (caddr_t) v; 665 tt->rn_bit = -1; 666 tt->rn_flags = RNF_ACTIVE; 667 } 668 head->rnh_cnt++; 669 /* 670 * Put mask in tree. 671 */ 672 if (netmask) { 673 tt->rn_mask = netmask; 674 tt->rn_bit = x->rn_bit; 675 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 676 } 677 t = saved_tt->rn_parent; 678 if (keyduplicated) 679 goto on2; 680 b_leaf = -1 - t->rn_bit; 681 if (t->rn_right == saved_tt) 682 x = t->rn_left; 683 else 684 x = t->rn_right; 685 /* Promote general routes from below */ 686 if (x->rn_bit < 0) { 687 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) 688 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) { 689 *mp = m = rn_new_radix_mask(x, NULL); 690 if (m) 691 mp = &m->rm_mklist; 692 } 693 } else if (x->rn_mklist) { 694 /* 695 * Skip over masks whose index is > that of new node 696 */ 697 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 698 if (m->rm_bit >= b_leaf) 699 break; 700 t->rn_mklist = m; *mp = NULL; 701 } 702on2: 703 /* Add new route to highest possible ancestor's list */ 704 if ((netmask == 0) || (b > t->rn_bit )) 705 return tt; /* can't lift at all */ 706 b_leaf = tt->rn_bit; 707 do { 708 x = t; 709 t = t->rn_parent; 710 } while (b <= t->rn_bit && x != top); 711 /* 712 * Search through routes associated with node to 713 * insert new route according to index. 714 * Need same criteria as when sorting dupedkeys to avoid 715 * double loop on deletion. 716 */ 717 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) { 718 if (m->rm_bit < b_leaf) 719 continue; 720 if (m->rm_bit > b_leaf) 721 break; 722 if (m->rm_flags & RNF_NORMAL) { 723 mmask = m->rm_leaf->rn_mask; 724 if (tt->rn_flags & RNF_NORMAL) { 725 log(LOG_ERR, 726 "Non-unique normal route, mask not entered"); 727 return tt; 728 } 729 } else 730 mmask = m->rm_mask; 731 if (mmask == netmask) { 732 m->rm_refs++; 733 tt->rn_mklist = m; 734 return tt; 735 } 736 if (rn_refines(netmask, mmask) 737 || rn_lexobetter(netmask, mmask)) 738 break; 739 } 740 *mp = rn_new_radix_mask(tt, *mp); 741 return tt; 742} 743 744struct radix_node * 745rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head) 746{ 747 struct radix_node *t, *p, *x, *tt; 748 struct radix_mask *m, *saved_m, **mp; 749 struct radix_node *dupedkey, *saved_tt, *top; 750 caddr_t v, netmask; 751 int b, head_off, vlen; 752 753 v = v_arg; 754 netmask = netmask_arg; 755 x = head->rnh_treetop; 756 tt = rn_search(v, x); 757 head_off = x->rn_offset; 758 vlen = *(u_char *)v; 759 saved_tt = tt; 760 top = x; 761 if (tt == 0 || 762 Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) 763 return (NULL); 764 /* 765 * Delete our route from mask lists. 766 */ 767 if (netmask) { 768 if ((x = rn_addmask(netmask, 1, head_off)) == 0) 769 return (NULL); 770 netmask = x->rn_key; 771 while (tt->rn_mask != netmask) 772 if ((tt = tt->rn_dupedkey) == 0) 773 return (NULL); 774 } 775 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) 776 goto on1; 777 if (tt->rn_flags & RNF_NORMAL) { 778 if (m->rm_leaf != tt || m->rm_refs > 0) { 779 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 780 return NULL; /* dangling ref could cause disaster */ 781 } 782 } else { 783 if (m->rm_mask != tt->rn_mask) { 784 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 785 goto on1; 786 } 787 if (--m->rm_refs >= 0) 788 goto on1; 789 } 790 b = -1 - tt->rn_bit; 791 t = saved_tt->rn_parent; 792 if (b > t->rn_bit) 793 goto on1; /* Wasn't lifted at all */ 794 do { 795 x = t; 796 t = t->rn_parent; 797 } while (b <= t->rn_bit && x != top); 798 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 799 if (m == saved_m) { 800 *mp = m->rm_mklist; 801 MKFree(m); 802 break; 803 } 804 if (m == 0) { 805 log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 806 if (tt->rn_flags & RNF_NORMAL) 807 return (NULL); /* Dangling ref to us */ 808 } 809on1: 810 /* 811 * Eliminate us from tree 812 */ 813 if (tt->rn_flags & RNF_ROOT) 814 return (NULL); 815 head->rnh_cnt--; 816#ifdef RN_DEBUG 817 /* Get us out of the creation list */ 818 for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {} 819 if (t) t->rn_ybro = tt->rn_ybro; 820#endif 821 t = tt->rn_parent; 822 dupedkey = saved_tt->rn_dupedkey; 823 if (dupedkey) { 824 /* 825 * at this point, tt is the deletion target and saved_tt 826 * is the head of the dupekey chain 827 */ 828 if (tt == saved_tt) { 829 /* remove from head of chain */ 830 x = dupedkey; x->rn_parent = t; 831 if (t->rn_left == tt) 832 t->rn_left = x; 833 else 834 t->rn_right = x; 835 } else { 836 /* find node in front of tt on the chain */ 837 for (x = p = saved_tt; p && p->rn_dupedkey != tt;) 838 p = p->rn_dupedkey; 839 if (p) { 840 p->rn_dupedkey = tt->rn_dupedkey; 841 if (tt->rn_dupedkey) /* parent */ 842 tt->rn_dupedkey->rn_parent = p; 843 /* parent */ 844 } else log(LOG_ERR, "rn_delete: couldn't find us\n"); 845 } 846 t = tt + 1; 847 if (t->rn_flags & RNF_ACTIVE) { 848#ifndef RN_DEBUG 849 *++x = *t; 850 p = t->rn_parent; 851#else 852 b = t->rn_info; 853 *++x = *t; 854 t->rn_info = b; 855 p = t->rn_parent; 856#endif 857 if (p->rn_left == t) 858 p->rn_left = x; 859 else 860 p->rn_right = x; 861 x->rn_left->rn_parent = x; 862 x->rn_right->rn_parent = x; 863 } 864 goto out; 865 } 866 if (t->rn_left == tt) 867 x = t->rn_right; 868 else 869 x = t->rn_left; 870 p = t->rn_parent; 871 if (p->rn_right == t) 872 p->rn_right = x; 873 else 874 p->rn_left = x; 875 x->rn_parent = p; 876 /* 877 * Demote routes attached to us. 878 */ 879 if (t->rn_mklist) { 880 if (x->rn_bit >= 0) { 881 for (mp = &x->rn_mklist; (m = *mp);) 882 mp = &m->rm_mklist; 883 *mp = t->rn_mklist; 884 } else { 885 /* If there are any key,mask pairs in a sibling 886 duped-key chain, some subset will appear sorted 887 in the same order attached to our mklist */ 888 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 889 if (m == x->rn_mklist) { 890 struct radix_mask *mm = m->rm_mklist; 891 x->rn_mklist = NULL; 892 if (--(m->rm_refs) < 0) 893 MKFree(m); 894 m = mm; 895 } 896 if (m) 897 log(LOG_ERR, "rn_delete: Orphaned Mask " 898 "0x%llx at 0x%llx\n", 899 (uint64_t)VM_KERNEL_ADDRPERM(m), 900 (uint64_t)VM_KERNEL_ADDRPERM(x)); 901 } 902 } 903 /* 904 * We may be holding an active internal node in the tree. 905 */ 906 x = tt + 1; 907 if (t != x) { 908#ifndef RN_DEBUG 909 *t = *x; 910#else 911 b = t->rn_info; 912 *t = *x; 913 t->rn_info = b; 914#endif 915 t->rn_left->rn_parent = t; 916 t->rn_right->rn_parent = t; 917 p = x->rn_parent; 918 if (p->rn_left == x) 919 p->rn_left = t; 920 else 921 p->rn_right = t; 922 } 923out: 924 tt->rn_flags &= ~RNF_ACTIVE; 925 tt[1].rn_flags &= ~RNF_ACTIVE; 926 return (tt); 927} 928 929/* 930 * This is the same as rn_walktree() except for the parameters and the 931 * exit. 932 */ 933static int 934rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f, 935 void *w) 936{ 937 int error; 938 struct radix_node *base, *next; 939 u_char *xa = (u_char *)a; 940 u_char *xm = (u_char *)m; 941 struct radix_node *rn, *last; 942 int stopping; 943 int lastb; 944 int rnh_cnt; 945 946 /* 947 * This gets complicated because we may delete the node while 948 * applying the function f to it; we cannot simply use the next 949 * leaf as the successor node in advance, because that leaf may 950 * be removed as well during deletion when it is a clone of the 951 * current node. When that happens, we would end up referring 952 * to an already-freed radix node as the successor node. To get 953 * around this issue, if we detect that the radix tree has changed 954 * in dimension (smaller than before), we simply restart the walk 955 * from the top of tree. 956 */ 957restart: 958 last = NULL; 959 stopping = 0; 960 rnh_cnt = h->rnh_cnt; 961 962 /* 963 * rn_search_m is sort-of-open-coded here. 964 */ 965 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) { 966 last = rn; 967 if (!(rn->rn_bmask & xm[rn->rn_offset])) 968 break; 969 970 if (rn->rn_bmask & xa[rn->rn_offset]) 971 rn = rn->rn_right; 972 else 973 rn = rn->rn_left; 974 } 975 976 /* 977 * Two cases: either we stepped off the end of our mask, 978 * in which case last == rn, or we reached a leaf, in which 979 * case we want to start from the last node we looked at. 980 * Either way, last is the node we want to start from. 981 */ 982 rn = last; 983 lastb = rn->rn_bit; 984 985 /* First time through node, go left */ 986 while (rn->rn_bit >= 0) 987 rn = rn->rn_left; 988 989 while (!stopping) { 990 base = rn; 991 /* If at right child go back up, otherwise, go right */ 992 while (rn->rn_parent->rn_right == rn 993 && !(rn->rn_flags & RNF_ROOT)) { 994 rn = rn->rn_parent; 995 996 /* if went up beyond last, stop */ 997 if (rn->rn_bit <= lastb) { 998 stopping = 1; 999 /* 1000 * XXX we should jump to the 'Process leaves' 1001 * part, because the values of 'rn' and 'next' 1002 * we compute will not be used. Not a big deal 1003 * because this loop will terminate, but it is 1004 * inefficient and hard to understand! 1005 */ 1006 } 1007 } 1008 1009 /* 1010 * The following code (bug fix) inherited from FreeBSD is 1011 * currently disabled, because our implementation uses the 1012 * RTF_PRCLONING scheme that has been abandoned in current 1013 * FreeBSD release. The scheme involves setting such a flag 1014 * for the default route entry, and therefore all off-link 1015 * destinations would become clones of that entry. Enabling 1016 * the following code would be problematic at this point, 1017 * because the removal of default route would cause only 1018 * the left-half of the tree to be traversed, leaving the 1019 * right-half untouched. If there are clones of the entry 1020 * that reside in that right-half, they would not be deleted 1021 * and would linger around until they expire or explicitly 1022 * deleted, which is a very bad thing. 1023 * 1024 * This code should be uncommented only after we get rid 1025 * of the RTF_PRCLONING scheme. 1026 */ 1027#if 0 1028 /* 1029 * At the top of the tree, no need to traverse the right 1030 * half, prevent the traversal of the entire tree in the 1031 * case of default route. 1032 */ 1033 if (rn->rn_parent->rn_flags & RNF_ROOT) 1034 stopping = 1; 1035#endif 1036 1037 /* Find the next *leaf* to start from */ 1038 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1039 rn = rn->rn_left; 1040 next = rn; 1041 /* Process leaves */ 1042 while ((rn = base) != 0) { 1043 base = rn->rn_dupedkey; 1044 if (!(rn->rn_flags & RNF_ROOT) 1045 && (error = (*f)(rn, w))) 1046 return (error); 1047 } 1048 /* If one or more nodes got deleted, restart from top */ 1049 if (h->rnh_cnt < rnh_cnt) 1050 goto restart; 1051 rn = next; 1052 if (rn->rn_flags & RNF_ROOT) 1053 stopping = 1; 1054 } 1055 return 0; 1056} 1057 1058static int 1059rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w) 1060{ 1061 int error; 1062 struct radix_node *base, *next; 1063 struct radix_node *rn; 1064 int rnh_cnt; 1065 1066 /* 1067 * This gets complicated because we may delete the node while 1068 * applying the function f to it; we cannot simply use the next 1069 * leaf as the successor node in advance, because that leaf may 1070 * be removed as well during deletion when it is a clone of the 1071 * current node. When that happens, we would end up referring 1072 * to an already-freed radix node as the successor node. To get 1073 * around this issue, if we detect that the radix tree has changed 1074 * in dimension (smaller than before), we simply restart the walk 1075 * from the top of tree. 1076 */ 1077restart: 1078 rn = h->rnh_treetop; 1079 rnh_cnt = h->rnh_cnt; 1080 1081 /* First time through node, go left */ 1082 while (rn->rn_bit >= 0) 1083 rn = rn->rn_left; 1084 for (;;) { 1085 base = rn; 1086 /* If at right child go back up, otherwise, go right */ 1087 while (rn->rn_parent->rn_right == rn && 1088 (rn->rn_flags & RNF_ROOT) == 0) 1089 rn = rn->rn_parent; 1090 /* Find the next *leaf* to start from */ 1091 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1092 rn = rn->rn_left; 1093 next = rn; 1094 /* Process leaves */ 1095 while ((rn = base) != NULL) { 1096 base = rn->rn_dupedkey; 1097 if (!(rn->rn_flags & RNF_ROOT) 1098 && (error = (*f)(rn, w))) 1099 return (error); 1100 } 1101 /* If one or more nodes got deleted, restart from top */ 1102 if (h->rnh_cnt < rnh_cnt) 1103 goto restart; 1104 rn = next; 1105 if (rn->rn_flags & RNF_ROOT) 1106 return (0); 1107 } 1108 /* NOTREACHED */ 1109} 1110 1111int 1112rn_inithead(void **head, int off) 1113{ 1114 struct radix_node_head *rnh; 1115 struct radix_node *t, *tt, *ttt; 1116 if (*head) 1117 return (1); 1118 R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh)); 1119 if (rnh == 0) 1120 return (0); 1121 Bzero(rnh, sizeof (*rnh)); 1122 *head = rnh; 1123 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1124 ttt = rnh->rnh_nodes + 2; 1125 t->rn_right = ttt; 1126 t->rn_parent = t; 1127 tt = t->rn_left; 1128 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1129 tt->rn_bit = -1 - off; 1130 *ttt = *tt; 1131 ttt->rn_key = rn_ones; 1132 rnh->rnh_addaddr = rn_addroute; 1133 rnh->rnh_deladdr = rn_delete; 1134 rnh->rnh_matchaddr = rn_match; 1135 rnh->rnh_matchaddr_args = rn_match_args; 1136 rnh->rnh_lookup = rn_lookup; 1137 rnh->rnh_lookup_args = rn_lookup_args; 1138 rnh->rnh_walktree = rn_walktree; 1139 rnh->rnh_walktree_from = rn_walktree_from; 1140 rnh->rnh_treetop = t; 1141 rnh->rnh_cnt = 3; 1142 return (1); 1143} 1144 1145void 1146rn_init(void) 1147{ 1148 char *cp, *cplim; 1149 struct domain *dom; 1150 1151 /* lock already held when rn_init is called */ 1152 TAILQ_FOREACH(dom, &domains, dom_entry) { 1153 if (dom->dom_maxrtkey > max_keylen) 1154 max_keylen = dom->dom_maxrtkey; 1155 } 1156 if (max_keylen == 0) { 1157 log(LOG_ERR, 1158 "rn_init: radix functions require max_keylen be set\n"); 1159 return; 1160 } 1161 R_Malloc(rn_zeros, char *, 3 * max_keylen); 1162 if (rn_zeros == NULL) 1163 panic("rn_init"); 1164 Bzero(rn_zeros, 3 * max_keylen); 1165 rn_ones = cp = rn_zeros + max_keylen; 1166 addmask_key = cplim = rn_ones + max_keylen; 1167 while (cp < cplim) 1168 *cp++ = -1; 1169 if (rn_inithead((void **)&mask_rnhead, 0) == 0) 1170 panic("rn_init 2"); 1171} 1172