radix_mpath.c revision 178454
150477Speter/* $KAME: radix_mpath.c,v 1.17 2004/11/08 10:29:39 itojun Exp $ */ 21817Sdg 31817Sdg/* 41541Srgrimes * Copyright (C) 2001 WIDE Project. 51541Srgrimes * All rights reserved. 61541Srgrimes * 7160798Sjhb * Redistribution and use in source and binary forms, with or without 81541Srgrimes * modification, are permitted provided that the following conditions 9146806Srwatson * are met: 10146806Srwatson * 1. Redistributions of source code must retain the above copyright 11146806Srwatson * notice, this list of conditions and the following disclaimer. 12146806Srwatson * 2. Redistributions in binary form must reproduce the above copyright 13146806Srwatson * notice, this list of conditions and the following disclaimer in the 14160798Sjhb * documentation and/or other materials provided with the distribution. 15160798Sjhb * 3. Neither the name of the project nor the names of its contributors 1611294Sswallace * may be used to endorse or promote products derived from this software 1710905Sbde * without specific prior written permission. 181541Srgrimes * 1910905Sbde * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 2010905Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 211541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 221541Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 231541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 241541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 251541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2699855Salfred * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 271541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 281541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 291541Srgrimes * SUCH DAMAGE. 3069449Salfred * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE 31160797Sjhb * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS 32160797Sjhb * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL 33104747Srwatson * PROPERTIES. 34104747Srwatson */ 35123408Speter 36123408Speter#include <sys/cdefs.h> 371541Srgrimes__FBSDID("$FreeBSD: head/sys/net/radix_mpath.c 178454 2008-04-24 05:04:52Z qingli $"); 381541Srgrimes 3911294Sswallace#include "opt_inet6.h" 4011294Sswallace 4111294Sswallace#include <sys/param.h> 4211294Sswallace#include <sys/systm.h> 431541Srgrimes#include <sys/malloc.h> 441541Srgrimes#include <sys/socket.h> 451541Srgrimes#include <sys/domain.h> 461541Srgrimes#include <sys/syslog.h> 471541Srgrimes#include <net/radix.h> 481541Srgrimes#include <net/radix_mpath.h> 49160798Sjhb#include <net/route.h> 50160798Sjhb#include <net/if.h> 51146806Srwatson#include <net/if_var.h> 52160798Sjhb 53160798Sjhb/* 54146806Srwatson * give some jitter to hash, to avoid synchronization between routers 55160798Sjhb */ 56146806Srwatsonstatic u_int32_t hashjitter; 57160798Sjhb 5812216Sbdeint 5912216Sbdern_mpath_capable(struct radix_node_head *rnh) 6012216Sbde{ 61160798Sjhb 62160798Sjhb return rnh->rnh_multipath; 63146806Srwatson} 64146806Srwatson 65160798Sjhbstruct radix_node * 66160798Sjhbrn_mpath_next(struct radix_node *rn) 67160798Sjhb{ 68146806Srwatson struct radix_node *next; 69160798Sjhb 70160798Sjhb if (!rn->rn_dupedkey) 71160798Sjhb return NULL; 72160798Sjhb next = rn->rn_dupedkey; 73160798Sjhb if (rn->rn_mask == next->rn_mask) 74160798Sjhb return next; 75146806Srwatson else 76160798Sjhb return NULL; 77146806Srwatson} 78160798Sjhb 79146806Srwatsonint 80160798Sjhbrn_mpath_count(struct radix_node *rn) 81160798Sjhb{ 82146806Srwatson int i; 8312216Sbde 84160798Sjhb i = 1; 85160798Sjhb while ((rn = rn_mpath_next(rn)) != NULL) 86160798Sjhb i++; 87160798Sjhb return i; 88160798Sjhb} 89146806Srwatson 90160798Sjhbstruct rtentry * 91146806Srwatsonrt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate) 92160798Sjhb{ 93146806Srwatson struct radix_node *rn; 94160798Sjhb 95146806Srwatson if (!rn_mpath_next((struct radix_node *)rt)) 96146806Srwatson return rt; 97146806Srwatson 98160798Sjhb if (!gate) 99146806Srwatson return NULL; 100146806Srwatson 101160798Sjhb /* beyond here, we use rn as the master copy */ 102146806Srwatson rn = (struct radix_node *)rt; 103146806Srwatson do { 104160798Sjhb rt = (struct rtentry *)rn; 105146806Srwatson /* 106146806Srwatson * we are removing an address alias that has 107160798Sjhb * the same prefix as another address 108160798Sjhb * we need to compare the interface address because 109160798Sjhb * rt_gateway is a special sockadd_dl structure 110160798Sjhb */ 111160798Sjhb if (rt->rt_gateway->sa_family == AF_LINK) { 112160798Sjhb if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len)) 113160798Sjhb break; 114160798Sjhb } else { 115160798Sjhb if (rt->rt_gateway->sa_len == gate->sa_len && 116160798Sjhb !memcmp(rt->rt_gateway, gate, gate->sa_len)) 117160798Sjhb break; 118160798Sjhb } 119146806Srwatson } while ((rn = rn_mpath_next(rn)) != NULL); 120160798Sjhb 121146806Srwatson return (struct rtentry *)rn; 122160798Sjhb} 123146806Srwatson 124146806Srwatson/* 125160798Sjhb * go through the chain and unlink "rt" from the list 126160798Sjhb * the caller will free "rt" 12721776Sbde */ 12821776Sbdeint 12921776Sbdert_mpath_deldup(struct rtentry *headrt, struct rtentry *rt) 130160798Sjhb{ 131146806Srwatson struct radix_node *t, *tt; 132160798Sjhb 133160798Sjhb if (!headrt || !rt) 134160798Sjhb return (0); 135160798Sjhb t = (struct radix_node *)headrt; 136146806Srwatson tt = rn_mpath_next(t); 137160798Sjhb while (tt) { 138146806Srwatson if (tt == (struct radix_node *)rt) { 139160798Sjhb t->rn_dupedkey = tt->rn_dupedkey; 140160798Sjhb tt->rn_dupedkey = NULL; 141160798Sjhb tt->rn_flags &= ~RNF_ACTIVE; 142160798Sjhb tt[1].rn_flags &= ~RNF_ACTIVE; 143146806Srwatson return (1); 144160798Sjhb } 145146806Srwatson t = tt; 146160798Sjhb tt = rn_mpath_next((struct radix_node *)t); 147146806Srwatson } 148160798Sjhb return (0); 149160798Sjhb} 150160798Sjhb 151146806Srwatson/* 152146806Srwatson * check if we have the same key/mask/gateway on the table already. 153160798Sjhb */ 154146806Srwatsonint 155160798Sjhbrt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt, 156146806Srwatson struct sockaddr *netmask) 157160798Sjhb{ 158146806Srwatson struct radix_node *rn, *rn1; 159146806Srwatson struct rtentry *rt1; 160160798Sjhb char *p, *q, *eq; 161160798Sjhb int same, l, skip; 162160798Sjhb 163146806Srwatson rn = (struct radix_node *)rt; 164160798Sjhb rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh); 165146806Srwatson if (!rn1 || rn1->rn_flags & RNF_ROOT) 166160798Sjhb return 0; 167160798Sjhb 168146806Srwatson /* 169160798Sjhb * unlike other functions we have in this file, we have to check 170146806Srwatson * all key/mask/gateway as rnh_lookup can match less specific entry. 171146806Srwatson */ 172146806Srwatson rt1 = (struct rtentry *)rn1; 173160798Sjhb 174146806Srwatson /* compare key. */ 175160798Sjhb if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len || 176146806Srwatson bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len)) 177160798Sjhb goto different; 178146806Srwatson 179160798Sjhb /* key was the same. compare netmask. hairy... */ 180160798Sjhb if (rt_mask(rt1) && netmask) { 181160798Sjhb skip = rnh->rnh_treetop->rn_offset; 182146806Srwatson if (rt_mask(rt1)->sa_len > netmask->sa_len) { 183160798Sjhb /* 184160798Sjhb * as rt_mask(rt1) is made optimal by radix.c, 185160798Sjhb * there must be some 1-bits on rt_mask(rt1) 186146806Srwatson * after netmask->sa_len. therefore, in 187160798Sjhb * this case, the entries are different. 188146806Srwatson */ 189146806Srwatson if (rt_mask(rt1)->sa_len > skip) 190160798Sjhb goto different; 191146806Srwatson else { 192146806Srwatson /* no bits to compare, i.e. same*/ 193160798Sjhb goto maskmatched; 194160798Sjhb } 195146806Srwatson } 196160798Sjhb 197123750Speter l = rt_mask(rt1)->sa_len; 19812216Sbde if (skip > l) { 199160798Sjhb /* no bits to compare, i.e. same */ 200146806Srwatson goto maskmatched; 201146806Srwatson } 202160798Sjhb p = (char *)rt_mask(rt1); 203160798Sjhb q = (char *)netmask; 204146806Srwatson if (bcmp(p + skip, q + skip, l - skip)) 205160798Sjhb goto different; 206146806Srwatson /* 207160798Sjhb * need to go through all the bit, as netmask is not 208146806Srwatson * optimal and can contain trailing 0s 209160798Sjhb */ 210146806Srwatson eq = (char *)netmask + netmask->sa_len; 211160798Sjhb q += l; 212160798Sjhb same = 1; 213146806Srwatson while (eq > q) 214160798Sjhb if (*q++) { 215146806Srwatson same = 0; 216160798Sjhb break; 217146806Srwatson } 218160798Sjhb if (!same) 219146806Srwatson goto different; 220160798Sjhb } else if (!rt_mask(rt1) && !netmask) 221146806Srwatson ; /* no mask to compare, i.e. same */ 222160798Sjhb else { 223146806Srwatson /* one has mask and the other does not, different */ 224160798Sjhb goto different; 225146806Srwatson } 226160798Sjhb 227160798Sjhbmaskmatched: 228160798Sjhb 22921776Sbde /* key/mask were the same. compare gateway for all multipaths */ 23021776Sbde do { 231160798Sjhb rt1 = (struct rtentry *)rn1; 232146806Srwatson 233160798Sjhb /* sanity: no use in comparing the same thing */ 234146806Srwatson if (rn1 == rn) 235160798Sjhb continue; 236146806Srwatson 237146806Srwatson if (rt1->rt_gateway->sa_family == AF_LINK) { 238160798Sjhb if (rt1->rt_ifa->ifa_addr->sa_len != rt->rt_ifa->ifa_addr->sa_len || 239146806Srwatson bcmp(rt1->rt_ifa->ifa_addr, rt->rt_ifa->ifa_addr, 240160798Sjhb rt1->rt_ifa->ifa_addr->sa_len)) 241146806Srwatson continue; 242160798Sjhb } else { 243146806Srwatson if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len || 244146806Srwatson bcmp(rt1->rt_gateway, rt->rt_gateway, 245160798Sjhb rt1->rt_gateway->sa_len)) 246146806Srwatson continue; 247160798Sjhb } 248146806Srwatson 249160798Sjhb /* all key/mask/gateway are the same. conflicting entry. */ 250146806Srwatson return EEXIST; 251160798Sjhb } while ((rn1 = rn_mpath_next(rn1)) != NULL); 252160798Sjhb 253160798Sjhbdifferent: 254146806Srwatson return 0; 255146806Srwatson} 256146806Srwatson 257160798Sjhbvoid 258160798Sjhbrtalloc_mpath(struct route *ro, int hash) 259160798Sjhb{ 260160798Sjhb struct radix_node *rn0, *rn; 261160798Sjhb int n; 262160798Sjhb 263160798Sjhb /* 264160798Sjhb * XXX we don't attempt to lookup cached route again; what should 265146806Srwatson * be done for sendto(3) case? 266160798Sjhb */ 267160798Sjhb if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)) 268146806Srwatson return; /* XXX */ 269160798Sjhb ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0UL); 270160798Sjhb 271160798Sjhb /* if the route does not exist or it is not multipath, don't care */ 272146806Srwatson if (ro->ro_rt == NULL) 273146806Srwatson return; 274160798Sjhb if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) { 275146806Srwatson RT_UNLOCK(ro->ro_rt); 276160798Sjhb return; 277146806Srwatson } 278160798Sjhb 279160798Sjhb /* beyond here, we use rn as the master copy */ 280160798Sjhb rn0 = rn = (struct radix_node *)ro->ro_rt; 281146806Srwatson n = rn_mpath_count(rn0); 282160798Sjhb 283146806Srwatson /* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */ 284160798Sjhb hash += hashjitter; 285160798Sjhb hash %= n; 286160798Sjhb while (hash-- > 0 && rn) { 287146806Srwatson /* stay within the multipath routes */ 288160798Sjhb if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask) 289160798Sjhb break; 290146806Srwatson rn = rn->rn_dupedkey; 291146806Srwatson } 2921541Srgrimes 2931541Srgrimes /* XXX try filling rt_gwroute and avoid unreachable gw */ 2941541Srgrimes 2951541Srgrimes /* if gw selection fails, use the first match (default) */ 2961541Srgrimes if (!rn) { 297146806Srwatson RT_UNLOCK(ro->ro_rt); 298146806Srwatson return; 299146806Srwatson } 300146806Srwatson 30130740Sphk rtfree(ro->ro_rt); 302161325Sjhb ro->ro_rt = (struct rtentry *)rn; 303160798Sjhb RT_LOCK(ro->ro_rt); 304146806Srwatson RT_ADDREF(ro->ro_rt); 305160798Sjhb RT_UNLOCK(ro->ro_rt); 306146806Srwatson} 307160798Sjhb 308146806Srwatsonextern int in6_inithead(void **head, int off); 309146806Srwatsonextern int in_inithead(void **head, int off); 310160798Sjhb 311146806Srwatsonint 312160798Sjhbrn4_mpath_inithead(void **head, int off) 313146806Srwatson{ 314160798Sjhb struct radix_node_head *rnh; 315146806Srwatson 316160798Sjhb hashjitter = arc4random(); 317146806Srwatson if (in_inithead(head, off) == 1) { 318160798Sjhb rnh = (struct radix_node_head *)*head; 319160798Sjhb rnh->rnh_multipath = 1; 320160798Sjhb return 1; 321146806Srwatson } else 322146806Srwatson return 0; 323146806Srwatson} 32469449Salfred 325160798Sjhb#ifdef INET6 326146806Srwatsonint 32769449Salfredrn6_mpath_inithead(void **head, int off) 328123750Speter{ 329160798Sjhb struct radix_node_head *rnh; 330146806Srwatson 33169449Salfred hashjitter = arc4random(); 332123750Speter if (in6_inithead(head, off) == 1) { 333160798Sjhb rnh = (struct radix_node_head *)*head; 334146806Srwatson rnh->rnh_multipath = 1; 335123750Speter return 1; 336146806Srwatson } else 337160798Sjhb return 0; 338146806Srwatson} 339160798Sjhb 340146806Srwatson#endif 341146806Srwatson