1/* 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* 29 * Copyright (c) 1988, 1989, 1993 30 * The Regents of the University of California. All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. All advertising materials mentioning features or use of this software 41 * must display the following acknowledgement: 42 * This product includes software developed by the University of 43 * California, Berkeley and its contributors. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)radix.c 8.4 (Berkeley) 11/2/94 61 * $FreeBSD: src/sys/net/radix.c,v 1.20.2.2 2001/03/06 00:56:50 obrien Exp $ 62 */ 63 64/* 65 * Routines to build and maintain radix trees for routing lookups. 66 */ 67#ifndef _RADIX_H_ 68#include <sys/param.h> 69#ifdef KERNEL 70#include <sys/systm.h> 71#include <sys/malloc.h> 72#define M_DONTWAIT M_NOWAIT 73#include <sys/domain.h> 74#else 75#include <stdlib.h> 76#endif 77#include <sys/syslog.h> 78#include <net/radix.h> 79#include <sys/socket.h> 80#include <sys/socketvar.h> 81#include <kern/locks.h> 82#endif 83 84static int rn_walktree_from(struct radix_node_head *h, void *a, 85 void *m, walktree_f_t *f, void *w); 86static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *); 87static struct radix_node 88 *rn_insert(void *, struct radix_node_head *, int *, 89 struct radix_node [2]), 90 *rn_newpair(void *, int, struct radix_node[2]), 91 *rn_search(void *, struct radix_node *), 92 *rn_search_m(void *, struct radix_node *, void *); 93 94static int max_keylen; 95static struct radix_mask *rn_mkfreelist; 96static struct radix_node_head *mask_rnhead; 97static char *addmask_key; 98static char normal_chars[] = {0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, -1}; 99static char *rn_zeros, *rn_ones; 100 101 102extern lck_grp_t *domain_proto_mtx_grp; 103extern lck_attr_t *domain_proto_mtx_attr; 104lck_mtx_t *rn_mutex; 105 106#define rn_masktop (mask_rnhead->rnh_treetop) 107#undef Bcmp 108#define Bcmp(a, b, l) \ 109 (l == 0 ? 0 : bcmp((caddr_t)(a), (caddr_t)(b), (u_long)l)) 110 111static int rn_lexobetter(void *m_arg, void *n_arg); 112static struct radix_mask * 113 rn_new_radix_mask(struct radix_node *tt, 114 struct radix_mask *next); 115static int rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip, 116 rn_matchf_t *f, void *w); 117 118#define RN_MATCHF(rn, f, arg) (f == NULL || (*f)((rn), arg)) 119 120/* 121 * The data structure for the keys is a radix tree with one way 122 * branching removed. The index rn_bit at an internal node n represents a bit 123 * position to be tested. The tree is arranged so that all descendants 124 * of a node n have keys whose bits all agree up to position rn_bit - 1. 125 * (We say the index of n is rn_bit.) 126 * 127 * There is at least one descendant which has a one bit at position rn_bit, 128 * and at least one with a zero there. 129 * 130 * A route is determined by a pair of key and mask. We require that the 131 * bit-wise logical and of the key and mask to be the key. 132 * We define the index of a route to associated with the mask to be 133 * the first bit number in the mask where 0 occurs (with bit number 0 134 * representing the highest order bit). 135 * 136 * We say a mask is normal if every bit is 0, past the index of the mask. 137 * If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit, 138 * and m is a normal mask, then the route applies to every descendant of n. 139 * If the index(m) < rn_bit, this implies the trailing last few bits of k 140 * before bit b are all 0, (and hence consequently true of every descendant 141 * of n), so the route applies to all descendants of the node as well. 142 * 143 * Similar logic shows that a non-normal mask m such that 144 * index(m) <= index(n) could potentially apply to many children of n. 145 * Thus, for each non-host route, we attach its mask to a list at an internal 146 * node as high in the tree as we can go. 147 * 148 * The present version of the code makes use of normal routes in short- 149 * circuiting an explict mask and compare operation when testing whether 150 * a key satisfies a normal route, and also in remembering the unique leaf 151 * that governs a subtree. 152 */ 153 154static struct radix_node * 155rn_search(void *v_arg, struct radix_node *head) 156{ 157 struct radix_node *x; 158 caddr_t v; 159 160 for (x = head, v = v_arg; x->rn_bit >= 0;) { 161 if (x->rn_bmask & v[x->rn_offset]) 162 x = x->rn_right; 163 else 164 x = x->rn_left; 165 } 166 return (x); 167} 168 169static struct radix_node * 170rn_search_m(void *v_arg, struct radix_node *head, void *m_arg) 171{ 172 struct radix_node *x; 173 caddr_t v = v_arg, m = m_arg; 174 175 for (x = head; x->rn_bit >= 0;) { 176 if ((x->rn_bmask & m[x->rn_offset]) && 177 (x->rn_bmask & v[x->rn_offset])) 178 x = x->rn_right; 179 else 180 x = x->rn_left; 181 } 182 return x; 183} 184 185int 186rn_refines(void *m_arg, void *n_arg) 187{ 188 caddr_t m = m_arg, n = n_arg; 189 caddr_t lim, lim2 = lim = n + *(u_char *)n; 190 int longer = (*(u_char *)n++) - (int)(*(u_char *)m++); 191 int masks_are_equal = 1; 192 193 if (longer > 0) 194 lim -= longer; 195 while (n < lim) { 196 if (*n & ~(*m)) 197 return 0; 198 if (*n++ != *m++) 199 masks_are_equal = 0; 200 } 201 while (n < lim2) 202 if (*n++) 203 return 0; 204 if (masks_are_equal && (longer < 0)) 205 for (lim2 = m - longer; m < lim2; ) 206 if (*m++) 207 return 1; 208 return (!masks_are_equal); 209} 210 211struct radix_node * 212rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head) 213{ 214 return (rn_lookup_args(v_arg, m_arg, head, NULL, NULL)); 215} 216 217struct radix_node * 218rn_lookup_args(void *v_arg, void *m_arg, struct radix_node_head *head, 219 rn_matchf_t *f, void *w) 220{ 221 struct radix_node *x; 222 caddr_t netmask = NULL; 223 224 if (m_arg) { 225 x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset); 226 if (x == 0) 227 return (NULL); 228 netmask = x->rn_key; 229 } 230 x = rn_match_args(v_arg, head, f, w); 231 if (x && netmask) { 232 while (x && x->rn_mask != netmask) 233 x = x->rn_dupedkey; 234 } 235 return x; 236} 237 238/* 239 * Returns true if address 'trial' has no bits differing from the 240 * leaf's key when compared under the leaf's mask. In other words, 241 * returns true when 'trial' matches leaf. If a leaf-matching 242 * routine is passed in, it is also used to find a match on the 243 * conditions defined by the caller of rn_match. 244 */ 245static int 246rn_satisfies_leaf(char *trial, struct radix_node *leaf, int skip, 247 rn_matchf_t *f, void *w) 248{ 249 char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask; 250 char *cplim; 251 int length = min(*(u_char *)cp, *(u_char *)cp2); 252 253 if (cp3 == 0) 254 cp3 = rn_ones; 255 else 256 length = min(length, *(u_char *)cp3); 257 cplim = cp + length; cp3 += skip; cp2 += skip; 258 for (cp += skip; cp < cplim; cp++, cp2++, cp3++) 259 if ((*cp ^ *cp2) & *cp3) 260 return 0; 261 262 return (RN_MATCHF(leaf, f, w)); 263} 264 265struct radix_node * 266rn_match(void *v_arg, struct radix_node_head *head) 267{ 268 return (rn_match_args(v_arg, head, NULL, NULL)); 269} 270 271struct radix_node * 272rn_match_args(void *v_arg, struct radix_node_head *head, 273 rn_matchf_t *f, void *w) 274{ 275 caddr_t v = v_arg; 276 struct radix_node *t = head->rnh_treetop, *x; 277 caddr_t cp = v, cp2; 278 caddr_t cplim; 279 struct radix_node *saved_t, *top = t; 280 int off = t->rn_offset, vlen = *(u_char *)cp, matched_off; 281 int test, b, rn_bit; 282 283 /* 284 * Open code rn_search(v, top) to avoid overhead of extra 285 * subroutine call. 286 */ 287 for (; t->rn_bit >= 0; ) { 288 if (t->rn_bmask & cp[t->rn_offset]) 289 t = t->rn_right; 290 else 291 t = t->rn_left; 292 } 293 /* 294 * See if we match exactly as a host destination 295 * or at least learn how many bits match, for normal mask finesse. 296 * 297 * It doesn't hurt us to limit how many bytes to check 298 * to the length of the mask, since if it matches we had a genuine 299 * match and the leaf we have is the most specific one anyway; 300 * if it didn't match with a shorter length it would fail 301 * with a long one. This wins big for class B&C netmasks which 302 * are probably the most common case... 303 */ 304 if (t->rn_mask) 305 vlen = *(u_char *)t->rn_mask; 306 cp += off; cp2 = t->rn_key + off; cplim = v + vlen; 307 for (; cp < cplim; cp++, cp2++) 308 if (*cp != *cp2) 309 goto on1; 310 /* 311 * This extra grot is in case we are explicitly asked 312 * to look up the default. Ugh! 313 * 314 * Never return the root node itself, it seems to cause a 315 * lot of confusion. 316 */ 317 if (t->rn_flags & RNF_ROOT) 318 t = t->rn_dupedkey; 319 if (t == NULL || RN_MATCHF(t, f, w)) { 320 return (t); 321 } else { 322 /* 323 * Although we found an exact match on the key, 324 * f() is looking for some other criteria as well. 325 * Continue looking as if the exact match failed. 326 */ 327 if (t->rn_parent->rn_flags & RNF_ROOT) { 328 /* Hit the top; have to give up */ 329 return (NULL); 330 } 331 b = 0; 332 goto keeplooking; 333 } 334on1: 335 test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */ 336 for (b = 7; (test >>= 1) > 0;) 337 b--; 338keeplooking: 339 matched_off = cp - v; 340 b += matched_off << 3; 341 rn_bit = -1 - b; 342 /* 343 * If there is a host route in a duped-key chain, it will be first. 344 */ 345 if ((saved_t = t)->rn_mask == 0) 346 t = t->rn_dupedkey; 347 for (; t; t = t->rn_dupedkey) { 348 /* 349 * Even if we don't match exactly as a host, 350 * we may match if the leaf we wound up at is 351 * a route to a net. 352 */ 353 if (t->rn_flags & RNF_NORMAL) { 354 if ((rn_bit <= t->rn_bit) && RN_MATCHF(t, f, w)) 355 return (t); 356 } else if (rn_satisfies_leaf(v, t, matched_off, f, w)) { 357 return (t); 358 } 359 } 360 t = saved_t; 361 /* start searching up the tree */ 362 do { 363 struct radix_mask *m; 364 t = t->rn_parent; 365 m = t->rn_mklist; 366 /* 367 * If non-contiguous masks ever become important 368 * we can restore the masking and open coding of 369 * the search and satisfaction test and put the 370 * calculation of "off" back before the "do". 371 */ 372 while (m) { 373 if (m->rm_flags & RNF_NORMAL) { 374 if ((rn_bit <= m->rm_bit) && 375 RN_MATCHF(m->rm_leaf, f, w)) 376 return (m->rm_leaf); 377 } else { 378 off = min(t->rn_offset, matched_off); 379 x = rn_search_m(v, t, m->rm_mask); 380 while (x && x->rn_mask != m->rm_mask) 381 x = x->rn_dupedkey; 382 if (x && rn_satisfies_leaf(v, x, off, f, w)) 383 return (x); 384 } 385 m = m->rm_mklist; 386 } 387 } while (t != top); 388 return (NULL); 389} 390 391#ifdef RN_DEBUG 392int rn_nodenum; 393struct radix_node *rn_clist; 394int rn_saveinfo; 395int rn_debug = 1; 396#endif 397 398static struct radix_node * 399rn_newpair(void *v, int b, struct radix_node nodes[2]) 400{ 401 struct radix_node *tt = nodes, *t = tt + 1; 402 t->rn_bit = b; 403 t->rn_bmask = 0x80 >> (b & 7); 404 t->rn_left = tt; 405 t->rn_offset = b >> 3; 406 tt->rn_bit = -1; 407 tt->rn_key = (caddr_t)v; 408 tt->rn_parent = t; 409 tt->rn_flags = t->rn_flags = RNF_ACTIVE; 410 tt->rn_mklist = t->rn_mklist = NULL; 411#ifdef RN_DEBUG 412 tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 413 tt->rn_twin = t; 414 tt->rn_ybro = rn_clist; 415 rn_clist = tt; 416#endif 417 return t; 418} 419 420static struct radix_node * 421rn_insert(void *v_arg, struct radix_node_head *head, int *dupentry, 422 struct radix_node nodes[2]) 423{ 424 caddr_t v = v_arg; 425 struct radix_node *top = head->rnh_treetop; 426 int head_off = top->rn_offset, vlen = (int)*((u_char *)v); 427 struct radix_node *t = rn_search(v_arg, top); 428 caddr_t cp = v + head_off; 429 int b; 430 struct radix_node *tt; 431 /* 432 * Find first bit at which v and t->rn_key differ 433 */ 434 { 435 caddr_t cp2 = t->rn_key + head_off; 436 int cmp_res; 437 caddr_t cplim = v + vlen; 438 439 while (cp < cplim) 440 if (*cp2++ != *cp++) 441 goto on1; 442 *dupentry = 1; 443 return t; 444on1: 445 *dupentry = 0; 446 cmp_res = (cp[-1] ^ cp2[-1]) & 0xff; 447 for (b = (cp - v) << 3; cmp_res; b--) 448 cmp_res >>= 1; 449 } 450 { 451 struct radix_node *p, *x = top; 452 cp = v; 453 do { 454 p = x; 455 if (cp[x->rn_offset] & x->rn_bmask) 456 x = x->rn_right; 457 else 458 x = x->rn_left; 459 } while (b > (unsigned) x->rn_bit); 460 /* x->rn_bit < b && x->rn_bit >= 0 */ 461#ifdef RN_DEBUG 462 if (rn_debug) 463 log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p); 464#endif 465 t = rn_newpair(v_arg, b, nodes); 466 tt = t->rn_left; 467 if ((cp[p->rn_offset] & p->rn_bmask) == 0) 468 p->rn_left = t; 469 else 470 p->rn_right = t; 471 x->rn_parent = t; 472 t->rn_parent = p; /* frees x, p as temp vars below */ 473 if ((cp[t->rn_offset] & t->rn_bmask) == 0) { 474 t->rn_right = x; 475 } else { 476 t->rn_right = tt; 477 t->rn_left = x; 478 } 479#ifdef RN_DEBUG 480 if (rn_debug) 481 log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p); 482#endif 483 } 484 return (tt); 485} 486 487struct radix_node * 488rn_addmask(void *n_arg, int search, int skip) 489{ 490 caddr_t netmask = (caddr_t)n_arg; 491 struct radix_node *x; 492 caddr_t cp, cplim; 493 int b = 0, mlen, j; 494 int maskduplicated, m0, isnormal; 495 struct radix_node *saved_x; 496 static int last_zeroed = 0; 497 498 if ((mlen = *(u_char *)netmask) > max_keylen) 499 mlen = max_keylen; 500 if (skip == 0) 501 skip = 1; 502 if (mlen <= skip) 503 return (mask_rnhead->rnh_nodes); 504 if (skip > 1) 505 Bcopy(rn_ones + 1, addmask_key + 1, skip - 1); 506 if ((m0 = mlen) > skip) 507 Bcopy(netmask + skip, addmask_key + skip, mlen - skip); 508 /* 509 * Trim trailing zeroes. 510 */ 511 for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;) 512 cp--; 513 mlen = cp - addmask_key; 514 if (mlen <= skip) { 515 if (m0 >= last_zeroed) 516 last_zeroed = mlen; 517 return (mask_rnhead->rnh_nodes); 518 } 519 if (m0 < last_zeroed) 520 Bzero(addmask_key + m0, last_zeroed - m0); 521 *addmask_key = last_zeroed = mlen; 522 x = rn_search(addmask_key, rn_masktop); 523 if (Bcmp(addmask_key, x->rn_key, mlen) != 0) 524 x = NULL; 525 if (x || search) 526 return (x); 527 R_Malloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x)); 528 if ((saved_x = x) == 0) 529 return (NULL); 530 Bzero(x, max_keylen + 2 * sizeof (*x)); 531 netmask = cp = (caddr_t)(x + 2); 532 Bcopy(addmask_key, cp, mlen); 533 x = rn_insert(cp, mask_rnhead, &maskduplicated, x); 534 if (maskduplicated) { 535 log(LOG_ERR, "rn_addmask: mask impossibly already in tree"); 536 R_Free(saved_x); 537 return (x); 538 } 539 mask_rnhead->rnh_cnt++; 540 /* 541 * Calculate index of mask, and check for normalcy. 542 */ 543 cplim = netmask + mlen; isnormal = 1; 544 for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;) 545 cp++; 546 if (cp != cplim) { 547 for (j = 0x80; (j & *cp) != 0; j >>= 1) 548 b++; 549 if (*cp != normal_chars[b] || cp != (cplim - 1)) 550 isnormal = 0; 551 } 552 b += (cp - netmask) << 3; 553 x->rn_bit = -1 - b; 554 if (isnormal) 555 x->rn_flags |= RNF_NORMAL; 556 return (x); 557} 558 559static int /* XXX: arbitrary ordering for non-contiguous masks */ 560rn_lexobetter(void *m_arg, void *n_arg) 561{ 562 u_char *mp = m_arg, *np = n_arg, *lim; 563 564 if (*mp > *np) 565 return 1; /* not really, but need to check longer one first */ 566 if (*mp == *np) 567 for (lim = mp + *mp; mp < lim;) 568 if (*mp++ > *np++) 569 return 1; 570 return 0; 571} 572 573static struct radix_mask * 574rn_new_radix_mask(struct radix_node *tt, struct radix_mask *next) 575{ 576 struct radix_mask *m; 577 578 MKGet(m); 579 if (m == 0) { 580 log(LOG_ERR, "Mask for route not entered\n"); 581 return (NULL); 582 } 583 Bzero(m, sizeof *m); 584 m->rm_bit = tt->rn_bit; 585 m->rm_flags = tt->rn_flags; 586 if (tt->rn_flags & RNF_NORMAL) 587 m->rm_leaf = tt; 588 else 589 m->rm_mask = tt->rn_mask; 590 m->rm_mklist = next; 591 tt->rn_mklist = m; 592 return m; 593} 594 595struct radix_node * 596rn_addroute(void *v_arg, void *n_arg, struct radix_node_head *head, 597 struct radix_node treenodes[2]) 598{ 599 caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg; 600 struct radix_node *t, *x = NULL, *tt; 601 struct radix_node *saved_tt, *top = head->rnh_treetop; 602 short b = 0, b_leaf = 0; 603 int keyduplicated; 604 caddr_t mmask; 605 struct radix_mask *m, **mp; 606 607 /* 608 * In dealing with non-contiguous masks, there may be 609 * many different routes which have the same mask. 610 * We will find it useful to have a unique pointer to 611 * the mask to speed avoiding duplicate references at 612 * nodes and possibly save time in calculating indices. 613 */ 614 if (netmask) { 615 if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0) 616 return (NULL); 617 b_leaf = x->rn_bit; 618 b = -1 - x->rn_bit; 619 netmask = x->rn_key; 620 } 621 /* 622 * Deal with duplicated keys: attach node to previous instance 623 */ 624 saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes); 625 if (keyduplicated) { 626 for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) { 627 if (tt->rn_mask == netmask) 628 return (NULL); 629 if (netmask == 0 || 630 (tt->rn_mask && 631 ((b_leaf < tt->rn_bit) /* index(netmask) > node */ 632 || rn_refines(netmask, tt->rn_mask) 633 || rn_lexobetter(netmask, tt->rn_mask)))) 634 break; 635 } 636 /* 637 * If the mask is not duplicated, we wouldn't 638 * find it among possible duplicate key entries 639 * anyway, so the above test doesn't hurt. 640 * 641 * We sort the masks for a duplicated key the same way as 642 * in a masklist -- most specific to least specific. 643 * This may require the unfortunate nuisance of relocating 644 * the head of the list. 645 */ 646 if (tt == saved_tt) { 647 struct radix_node *xx = x; 648 /* link in at head of list */ 649 (tt = treenodes)->rn_dupedkey = t; 650 tt->rn_flags = t->rn_flags; 651 tt->rn_parent = x = t->rn_parent; 652 t->rn_parent = tt; /* parent */ 653 if (x->rn_left == t) 654 x->rn_left = tt; 655 else 656 x->rn_right = tt; 657 saved_tt = tt; x = xx; 658 } else { 659 (tt = treenodes)->rn_dupedkey = t->rn_dupedkey; 660 t->rn_dupedkey = tt; 661 tt->rn_parent = t; /* parent */ 662 if (tt->rn_dupedkey) /* parent */ 663 tt->rn_dupedkey->rn_parent = tt; /* parent */ 664 } 665#ifdef RN_DEBUG 666 t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++; 667 tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt; 668#endif 669 tt->rn_key = (caddr_t) v; 670 tt->rn_bit = -1; 671 tt->rn_flags = RNF_ACTIVE; 672 } 673 head->rnh_cnt++; 674 /* 675 * Put mask in tree. 676 */ 677 if (netmask) { 678 tt->rn_mask = netmask; 679 tt->rn_bit = x->rn_bit; 680 tt->rn_flags |= x->rn_flags & RNF_NORMAL; 681 } 682 t = saved_tt->rn_parent; 683 if (keyduplicated) 684 goto on2; 685 b_leaf = -1 - t->rn_bit; 686 if (t->rn_right == saved_tt) 687 x = t->rn_left; 688 else 689 x = t->rn_right; 690 /* Promote general routes from below */ 691 if (x->rn_bit < 0) { 692 for (mp = &t->rn_mklist; x; x = x->rn_dupedkey) 693 if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) { 694 *mp = m = rn_new_radix_mask(x, NULL); 695 if (m) 696 mp = &m->rm_mklist; 697 } 698 } else if (x->rn_mklist) { 699 /* 700 * Skip over masks whose index is > that of new node 701 */ 702 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 703 if (m->rm_bit >= b_leaf) 704 break; 705 t->rn_mklist = m; *mp = NULL; 706 } 707on2: 708 /* Add new route to highest possible ancestor's list */ 709 if ((netmask == 0) || (b > t->rn_bit )) 710 return tt; /* can't lift at all */ 711 b_leaf = tt->rn_bit; 712 do { 713 x = t; 714 t = t->rn_parent; 715 } while (b <= t->rn_bit && x != top); 716 /* 717 * Search through routes associated with node to 718 * insert new route according to index. 719 * Need same criteria as when sorting dupedkeys to avoid 720 * double loop on deletion. 721 */ 722 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) { 723 if (m->rm_bit < b_leaf) 724 continue; 725 if (m->rm_bit > b_leaf) 726 break; 727 if (m->rm_flags & RNF_NORMAL) { 728 mmask = m->rm_leaf->rn_mask; 729 if (tt->rn_flags & RNF_NORMAL) { 730 log(LOG_ERR, 731 "Non-unique normal route, mask not entered"); 732 return tt; 733 } 734 } else 735 mmask = m->rm_mask; 736 if (mmask == netmask) { 737 m->rm_refs++; 738 tt->rn_mklist = m; 739 return tt; 740 } 741 if (rn_refines(netmask, mmask) 742 || rn_lexobetter(netmask, mmask)) 743 break; 744 } 745 *mp = rn_new_radix_mask(tt, *mp); 746 return tt; 747} 748 749struct radix_node * 750rn_delete(void *v_arg, void *netmask_arg, struct radix_node_head *head) 751{ 752 struct radix_node *t, *p, *x, *tt; 753 struct radix_mask *m, *saved_m, **mp; 754 struct radix_node *dupedkey, *saved_tt, *top; 755 caddr_t v, netmask; 756 int b, head_off, vlen; 757 758 v = v_arg; 759 netmask = netmask_arg; 760 x = head->rnh_treetop; 761 tt = rn_search(v, x); 762 head_off = x->rn_offset; 763 vlen = *(u_char *)v; 764 saved_tt = tt; 765 top = x; 766 if (tt == 0 || 767 Bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off)) 768 return (NULL); 769 /* 770 * Delete our route from mask lists. 771 */ 772 if (netmask) { 773 if ((x = rn_addmask(netmask, 1, head_off)) == 0) 774 return (NULL); 775 netmask = x->rn_key; 776 while (tt->rn_mask != netmask) 777 if ((tt = tt->rn_dupedkey) == 0) 778 return (NULL); 779 } 780 if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0) 781 goto on1; 782 if (tt->rn_flags & RNF_NORMAL) { 783 if (m->rm_leaf != tt || m->rm_refs > 0) { 784 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 785 return NULL; /* dangling ref could cause disaster */ 786 } 787 } else { 788 if (m->rm_mask != tt->rn_mask) { 789 log(LOG_ERR, "rn_delete: inconsistent annotation\n"); 790 goto on1; 791 } 792 if (--m->rm_refs >= 0) 793 goto on1; 794 } 795 b = -1 - tt->rn_bit; 796 t = saved_tt->rn_parent; 797 if (b > t->rn_bit) 798 goto on1; /* Wasn't lifted at all */ 799 do { 800 x = t; 801 t = t->rn_parent; 802 } while (b <= t->rn_bit && x != top); 803 for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) 804 if (m == saved_m) { 805 *mp = m->rm_mklist; 806 MKFree(m); 807 break; 808 } 809 if (m == 0) { 810 log(LOG_ERR, "rn_delete: couldn't find our annotation\n"); 811 if (tt->rn_flags & RNF_NORMAL) 812 return (NULL); /* Dangling ref to us */ 813 } 814on1: 815 /* 816 * Eliminate us from tree 817 */ 818 if (tt->rn_flags & RNF_ROOT) 819 return (NULL); 820 head->rnh_cnt--; 821#ifdef RN_DEBUG 822 /* Get us out of the creation list */ 823 for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {} 824 if (t) t->rn_ybro = tt->rn_ybro; 825#endif 826 t = tt->rn_parent; 827 dupedkey = saved_tt->rn_dupedkey; 828 if (dupedkey) { 829 /* 830 * at this point, tt is the deletion target and saved_tt 831 * is the head of the dupekey chain 832 */ 833 if (tt == saved_tt) { 834 /* remove from head of chain */ 835 x = dupedkey; x->rn_parent = t; 836 if (t->rn_left == tt) 837 t->rn_left = x; 838 else 839 t->rn_right = x; 840 } else { 841 /* find node in front of tt on the chain */ 842 for (x = p = saved_tt; p && p->rn_dupedkey != tt;) 843 p = p->rn_dupedkey; 844 if (p) { 845 p->rn_dupedkey = tt->rn_dupedkey; 846 if (tt->rn_dupedkey) /* parent */ 847 tt->rn_dupedkey->rn_parent = p; 848 /* parent */ 849 } else log(LOG_ERR, "rn_delete: couldn't find us\n"); 850 } 851 t = tt + 1; 852 if (t->rn_flags & RNF_ACTIVE) { 853#ifndef RN_DEBUG 854 *++x = *t; 855 p = t->rn_parent; 856#else 857 b = t->rn_info; 858 *++x = *t; 859 t->rn_info = b; 860 p = t->rn_parent; 861#endif 862 if (p->rn_left == t) 863 p->rn_left = x; 864 else 865 p->rn_right = x; 866 x->rn_left->rn_parent = x; 867 x->rn_right->rn_parent = x; 868 } 869 goto out; 870 } 871 if (t->rn_left == tt) 872 x = t->rn_right; 873 else 874 x = t->rn_left; 875 p = t->rn_parent; 876 if (p->rn_right == t) 877 p->rn_right = x; 878 else 879 p->rn_left = x; 880 x->rn_parent = p; 881 /* 882 * Demote routes attached to us. 883 */ 884 if (t->rn_mklist) { 885 if (x->rn_bit >= 0) { 886 for (mp = &x->rn_mklist; (m = *mp);) 887 mp = &m->rm_mklist; 888 *mp = t->rn_mklist; 889 } else { 890 /* If there are any key,mask pairs in a sibling 891 duped-key chain, some subset will appear sorted 892 in the same order attached to our mklist */ 893 for (m = t->rn_mklist; m && x; x = x->rn_dupedkey) 894 if (m == x->rn_mklist) { 895 struct radix_mask *mm = m->rm_mklist; 896 x->rn_mklist = NULL; 897 if (--(m->rm_refs) < 0) 898 MKFree(m); 899 m = mm; 900 } 901 if (m) 902 log(LOG_ERR, 903 "rn_delete: Orphaned Mask %p at %p\n", 904 (void *)m, (void *)x); 905 } 906 } 907 /* 908 * We may be holding an active internal node in the tree. 909 */ 910 x = tt + 1; 911 if (t != x) { 912#ifndef RN_DEBUG 913 *t = *x; 914#else 915 b = t->rn_info; 916 *t = *x; 917 t->rn_info = b; 918#endif 919 t->rn_left->rn_parent = t; 920 t->rn_right->rn_parent = t; 921 p = x->rn_parent; 922 if (p->rn_left == x) 923 p->rn_left = t; 924 else 925 p->rn_right = t; 926 } 927out: 928 tt->rn_flags &= ~RNF_ACTIVE; 929 tt[1].rn_flags &= ~RNF_ACTIVE; 930 return (tt); 931} 932 933/* 934 * This is the same as rn_walktree() except for the parameters and the 935 * exit. 936 */ 937static int 938rn_walktree_from(struct radix_node_head *h, void *a, void *m, walktree_f_t *f, 939 void *w) 940{ 941 int error; 942 struct radix_node *base, *next; 943 u_char *xa = (u_char *)a; 944 u_char *xm = (u_char *)m; 945 struct radix_node *rn, *last; 946 int stopping; 947 int lastb; 948 int rnh_cnt; 949 950 /* 951 * This gets complicated because we may delete the node while 952 * applying the function f to it; we cannot simply use the next 953 * leaf as the successor node in advance, because that leaf may 954 * be removed as well during deletion when it is a clone of the 955 * current node. When that happens, we would end up referring 956 * to an already-freed radix node as the successor node. To get 957 * around this issue, if we detect that the radix tree has changed 958 * in dimension (smaller than before), we simply restart the walk 959 * from the top of tree. 960 */ 961restart: 962 last = NULL; 963 stopping = 0; 964 rnh_cnt = h->rnh_cnt; 965 966 /* 967 * rn_search_m is sort-of-open-coded here. 968 */ 969 for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) { 970 last = rn; 971 if (!(rn->rn_bmask & xm[rn->rn_offset])) 972 break; 973 974 if (rn->rn_bmask & xa[rn->rn_offset]) 975 rn = rn->rn_right; 976 else 977 rn = rn->rn_left; 978 } 979 980 /* 981 * Two cases: either we stepped off the end of our mask, 982 * in which case last == rn, or we reached a leaf, in which 983 * case we want to start from the last node we looked at. 984 * Either way, last is the node we want to start from. 985 */ 986 rn = last; 987 lastb = rn->rn_bit; 988 989 /* First time through node, go left */ 990 while (rn->rn_bit >= 0) 991 rn = rn->rn_left; 992 993 while (!stopping) { 994 base = rn; 995 /* If at right child go back up, otherwise, go right */ 996 while (rn->rn_parent->rn_right == rn 997 && !(rn->rn_flags & RNF_ROOT)) { 998 rn = rn->rn_parent; 999 1000 /* if went up beyond last, stop */ 1001 if (rn->rn_bit <= lastb) { 1002 stopping = 1; 1003 /* 1004 * XXX we should jump to the 'Process leaves' 1005 * part, because the values of 'rn' and 'next' 1006 * we compute will not be used. Not a big deal 1007 * because this loop will terminate, but it is 1008 * inefficient and hard to understand! 1009 */ 1010 } 1011 } 1012 1013 /* 1014 * The following code (bug fix) inherited from FreeBSD is 1015 * currently disabled, because our implementation uses the 1016 * RTF_PRCLONING scheme that has been abandoned in current 1017 * FreeBSD release. The scheme involves setting such a flag 1018 * for the default route entry, and therefore all off-link 1019 * destinations would become clones of that entry. Enabling 1020 * the following code would be problematic at this point, 1021 * because the removal of default route would cause only 1022 * the left-half of the tree to be traversed, leaving the 1023 * right-half untouched. If there are clones of the entry 1024 * that reside in that right-half, they would not be deleted 1025 * and would linger around until they expire or explicitly 1026 * deleted, which is a very bad thing. 1027 * 1028 * This code should be uncommented only after we get rid 1029 * of the RTF_PRCLONING scheme. 1030 */ 1031#if 0 1032 /* 1033 * At the top of the tree, no need to traverse the right 1034 * half, prevent the traversal of the entire tree in the 1035 * case of default route. 1036 */ 1037 if (rn->rn_parent->rn_flags & RNF_ROOT) 1038 stopping = 1; 1039#endif 1040 1041 /* Find the next *leaf* to start from */ 1042 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1043 rn = rn->rn_left; 1044 next = rn; 1045 /* Process leaves */ 1046 while ((rn = base) != 0) { 1047 base = rn->rn_dupedkey; 1048 if (!(rn->rn_flags & RNF_ROOT) 1049 && (error = (*f)(rn, w))) 1050 return (error); 1051 } 1052 /* If one or more nodes got deleted, restart from top */ 1053 if (h->rnh_cnt < rnh_cnt) 1054 goto restart; 1055 rn = next; 1056 if (rn->rn_flags & RNF_ROOT) 1057 stopping = 1; 1058 } 1059 return 0; 1060} 1061 1062static int 1063rn_walktree(struct radix_node_head *h, walktree_f_t *f, void *w) 1064{ 1065 int error; 1066 struct radix_node *base, *next; 1067 struct radix_node *rn; 1068 int rnh_cnt; 1069 1070 /* 1071 * This gets complicated because we may delete the node while 1072 * applying the function f to it; we cannot simply use the next 1073 * leaf as the successor node in advance, because that leaf may 1074 * be removed as well during deletion when it is a clone of the 1075 * current node. When that happens, we would end up referring 1076 * to an already-freed radix node as the successor node. To get 1077 * around this issue, if we detect that the radix tree has changed 1078 * in dimension (smaller than before), we simply restart the walk 1079 * from the top of tree. 1080 */ 1081restart: 1082 rn = h->rnh_treetop; 1083 rnh_cnt = h->rnh_cnt; 1084 1085 /* First time through node, go left */ 1086 while (rn->rn_bit >= 0) 1087 rn = rn->rn_left; 1088 for (;;) { 1089 base = rn; 1090 /* If at right child go back up, otherwise, go right */ 1091 while (rn->rn_parent->rn_right == rn && 1092 (rn->rn_flags & RNF_ROOT) == 0) 1093 rn = rn->rn_parent; 1094 /* Find the next *leaf* to start from */ 1095 for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;) 1096 rn = rn->rn_left; 1097 next = rn; 1098 /* Process leaves */ 1099 while ((rn = base) != NULL) { 1100 base = rn->rn_dupedkey; 1101 if (!(rn->rn_flags & RNF_ROOT) 1102 && (error = (*f)(rn, w))) 1103 return (error); 1104 } 1105 /* If one or more nodes got deleted, restart from top */ 1106 if (h->rnh_cnt < rnh_cnt) 1107 goto restart; 1108 rn = next; 1109 if (rn->rn_flags & RNF_ROOT) 1110 return (0); 1111 } 1112 /* NOTREACHED */ 1113} 1114 1115int 1116rn_inithead(void **head, int off) 1117{ 1118 struct radix_node_head *rnh; 1119 struct radix_node *t, *tt, *ttt; 1120 if (*head) 1121 return (1); 1122 R_Malloc(rnh, struct radix_node_head *, sizeof (*rnh)); 1123 if (rnh == 0) 1124 return (0); 1125 Bzero(rnh, sizeof (*rnh)); 1126 *head = rnh; 1127 t = rn_newpair(rn_zeros, off, rnh->rnh_nodes); 1128 ttt = rnh->rnh_nodes + 2; 1129 t->rn_right = ttt; 1130 t->rn_parent = t; 1131 tt = t->rn_left; 1132 tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE; 1133 tt->rn_bit = -1 - off; 1134 *ttt = *tt; 1135 ttt->rn_key = rn_ones; 1136 rnh->rnh_addaddr = rn_addroute; 1137 rnh->rnh_deladdr = rn_delete; 1138 rnh->rnh_matchaddr = rn_match; 1139 rnh->rnh_matchaddr_args = rn_match_args; 1140 rnh->rnh_lookup = rn_lookup; 1141 rnh->rnh_lookup_args = rn_lookup_args; 1142 rnh->rnh_walktree = rn_walktree; 1143 rnh->rnh_walktree_from = rn_walktree_from; 1144 rnh->rnh_treetop = t; 1145 rnh->rnh_cnt = 3; 1146 return (1); 1147} 1148 1149void 1150rn_init(void) 1151{ 1152 char *cp, *cplim; 1153#ifdef KERNEL 1154 struct domain *dom; 1155 1156 /* lock already held when rn_init is called */ 1157 for (dom = domains; dom; dom = dom->dom_next) 1158 if (dom->dom_maxrtkey > max_keylen) 1159 max_keylen = dom->dom_maxrtkey; 1160#endif 1161 if (max_keylen == 0) { 1162 log(LOG_ERR, 1163 "rn_init: radix functions require max_keylen be set\n"); 1164 return; 1165 } 1166 R_Malloc(rn_zeros, char *, 3 * max_keylen); 1167 if (rn_zeros == NULL) 1168 panic("rn_init"); 1169 Bzero(rn_zeros, 3 * max_keylen); 1170 rn_ones = cp = rn_zeros + max_keylen; 1171 addmask_key = cplim = rn_ones + max_keylen; 1172 while (cp < cplim) 1173 *cp++ = -1; 1174 if (rn_inithead((void **)&mask_rnhead, 0) == 0) 1175 panic("rn_init 2"); 1176 1177 rn_mutex = lck_mtx_alloc_init(domain_proto_mtx_grp, domain_proto_mtx_attr); 1178} 1179