radix_mpath.c revision 178454
150477Speter/*	$KAME: radix_mpath.c,v 1.17 2004/11/08 10:29:39 itojun Exp $	*/
21817Sdg
31817Sdg/*
41541Srgrimes * Copyright (C) 2001 WIDE Project.
51541Srgrimes * All rights reserved.
61541Srgrimes *
7160798Sjhb * Redistribution and use in source and binary forms, with or without
81541Srgrimes * modification, are permitted provided that the following conditions
9146806Srwatson * are met:
10146806Srwatson * 1. Redistributions of source code must retain the above copyright
11146806Srwatson *    notice, this list of conditions and the following disclaimer.
12146806Srwatson * 2. Redistributions in binary form must reproduce the above copyright
13146806Srwatson *    notice, this list of conditions and the following disclaimer in the
14160798Sjhb *    documentation and/or other materials provided with the distribution.
15160798Sjhb * 3. Neither the name of the project nor the names of its contributors
1611294Sswallace *    may be used to endorse or promote products derived from this software
1710905Sbde *    without specific prior written permission.
181541Srgrimes *
1910905Sbde * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
2010905Sbde * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
211541Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
221541Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
231541Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
241541Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
251541Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2699855Salfred * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
271541Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
281541Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
291541Srgrimes * SUCH DAMAGE.
3069449Salfred * THE AUTHORS DO NOT GUARANTEE THAT THIS SOFTWARE DOES NOT INFRINGE
31160797Sjhb * ANY OTHERS' INTELLECTUAL PROPERTIES. IN NO EVENT SHALL THE AUTHORS
32160797Sjhb * BE LIABLE FOR ANY INFRINGEMENT OF ANY OTHERS' INTELLECTUAL
33104747Srwatson * PROPERTIES.
34104747Srwatson */
35123408Speter
36123408Speter#include <sys/cdefs.h>
371541Srgrimes__FBSDID("$FreeBSD: head/sys/net/radix_mpath.c 178454 2008-04-24 05:04:52Z qingli $");
381541Srgrimes
3911294Sswallace#include "opt_inet6.h"
4011294Sswallace
4111294Sswallace#include <sys/param.h>
4211294Sswallace#include <sys/systm.h>
431541Srgrimes#include <sys/malloc.h>
441541Srgrimes#include <sys/socket.h>
451541Srgrimes#include <sys/domain.h>
461541Srgrimes#include <sys/syslog.h>
471541Srgrimes#include <net/radix.h>
481541Srgrimes#include <net/radix_mpath.h>
49160798Sjhb#include <net/route.h>
50160798Sjhb#include <net/if.h>
51146806Srwatson#include <net/if_var.h>
52160798Sjhb
53160798Sjhb/*
54146806Srwatson * give some jitter to hash, to avoid synchronization between routers
55160798Sjhb */
56146806Srwatsonstatic u_int32_t hashjitter;
57160798Sjhb
5812216Sbdeint
5912216Sbdern_mpath_capable(struct radix_node_head *rnh)
6012216Sbde{
61160798Sjhb
62160798Sjhb	return rnh->rnh_multipath;
63146806Srwatson}
64146806Srwatson
65160798Sjhbstruct radix_node *
66160798Sjhbrn_mpath_next(struct radix_node *rn)
67160798Sjhb{
68146806Srwatson	struct radix_node *next;
69160798Sjhb
70160798Sjhb	if (!rn->rn_dupedkey)
71160798Sjhb		return NULL;
72160798Sjhb	next = rn->rn_dupedkey;
73160798Sjhb	if (rn->rn_mask == next->rn_mask)
74160798Sjhb		return next;
75146806Srwatson	else
76160798Sjhb		return NULL;
77146806Srwatson}
78160798Sjhb
79146806Srwatsonint
80160798Sjhbrn_mpath_count(struct radix_node *rn)
81160798Sjhb{
82146806Srwatson	int i;
8312216Sbde
84160798Sjhb	i = 1;
85160798Sjhb	while ((rn = rn_mpath_next(rn)) != NULL)
86160798Sjhb		i++;
87160798Sjhb	return i;
88160798Sjhb}
89146806Srwatson
90160798Sjhbstruct rtentry *
91146806Srwatsonrt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
92160798Sjhb{
93146806Srwatson	struct radix_node *rn;
94160798Sjhb
95146806Srwatson	if (!rn_mpath_next((struct radix_node *)rt))
96146806Srwatson		return rt;
97146806Srwatson
98160798Sjhb	if (!gate)
99146806Srwatson		return NULL;
100146806Srwatson
101160798Sjhb	/* beyond here, we use rn as the master copy */
102146806Srwatson	rn = (struct radix_node *)rt;
103146806Srwatson	do {
104160798Sjhb		rt = (struct rtentry *)rn;
105146806Srwatson		/*
106146806Srwatson		 * we are removing an address alias that has
107160798Sjhb		 * the same prefix as another address
108160798Sjhb		 * we need to compare the interface address because
109160798Sjhb		 * rt_gateway is a special sockadd_dl structure
110160798Sjhb		 */
111160798Sjhb		if (rt->rt_gateway->sa_family == AF_LINK) {
112160798Sjhb			if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len))
113160798Sjhb				break;
114160798Sjhb		} else {
115160798Sjhb			if (rt->rt_gateway->sa_len == gate->sa_len &&
116160798Sjhb			    !memcmp(rt->rt_gateway, gate, gate->sa_len))
117160798Sjhb				break;
118160798Sjhb		}
119146806Srwatson	} while ((rn = rn_mpath_next(rn)) != NULL);
120160798Sjhb
121146806Srwatson	return (struct rtentry *)rn;
122160798Sjhb}
123146806Srwatson
124146806Srwatson/*
125160798Sjhb * go through the chain and unlink "rt" from the list
126160798Sjhb * the caller will free "rt"
12721776Sbde */
12821776Sbdeint
12921776Sbdert_mpath_deldup(struct rtentry *headrt, struct rtentry *rt)
130160798Sjhb{
131146806Srwatson        struct radix_node *t, *tt;
132160798Sjhb
133160798Sjhb        if (!headrt || !rt)
134160798Sjhb            return (0);
135160798Sjhb        t = (struct radix_node *)headrt;
136146806Srwatson        tt = rn_mpath_next(t);
137160798Sjhb        while (tt) {
138146806Srwatson            if (tt == (struct radix_node *)rt) {
139160798Sjhb                t->rn_dupedkey = tt->rn_dupedkey;
140160798Sjhb                tt->rn_dupedkey = NULL;
141160798Sjhb    	        tt->rn_flags &= ~RNF_ACTIVE;
142160798Sjhb	        tt[1].rn_flags &= ~RNF_ACTIVE;
143146806Srwatson                return (1);
144160798Sjhb            }
145146806Srwatson            t = tt;
146160798Sjhb            tt = rn_mpath_next((struct radix_node *)t);
147146806Srwatson        }
148160798Sjhb        return (0);
149160798Sjhb}
150160798Sjhb
151146806Srwatson/*
152146806Srwatson * check if we have the same key/mask/gateway on the table already.
153160798Sjhb */
154146806Srwatsonint
155160798Sjhbrt_mpath_conflict(struct radix_node_head *rnh, struct rtentry *rt,
156146806Srwatson    struct sockaddr *netmask)
157160798Sjhb{
158146806Srwatson	struct radix_node *rn, *rn1;
159146806Srwatson	struct rtentry *rt1;
160160798Sjhb	char *p, *q, *eq;
161160798Sjhb	int same, l, skip;
162160798Sjhb
163146806Srwatson	rn = (struct radix_node *)rt;
164160798Sjhb	rn1 = rnh->rnh_lookup(rt_key(rt), netmask, rnh);
165146806Srwatson	if (!rn1 || rn1->rn_flags & RNF_ROOT)
166160798Sjhb		return 0;
167160798Sjhb
168146806Srwatson	/*
169160798Sjhb	 * unlike other functions we have in this file, we have to check
170146806Srwatson	 * all key/mask/gateway as rnh_lookup can match less specific entry.
171146806Srwatson	 */
172146806Srwatson	rt1 = (struct rtentry *)rn1;
173160798Sjhb
174146806Srwatson	/* compare key. */
175160798Sjhb	if (rt_key(rt1)->sa_len != rt_key(rt)->sa_len ||
176146806Srwatson	    bcmp(rt_key(rt1), rt_key(rt), rt_key(rt1)->sa_len))
177160798Sjhb		goto different;
178146806Srwatson
179160798Sjhb	/* key was the same.  compare netmask.  hairy... */
180160798Sjhb	if (rt_mask(rt1) && netmask) {
181160798Sjhb		skip = rnh->rnh_treetop->rn_offset;
182146806Srwatson		if (rt_mask(rt1)->sa_len > netmask->sa_len) {
183160798Sjhb			/*
184160798Sjhb			 * as rt_mask(rt1) is made optimal by radix.c,
185160798Sjhb			 * there must be some 1-bits on rt_mask(rt1)
186146806Srwatson			 * after netmask->sa_len.  therefore, in
187160798Sjhb			 * this case, the entries are different.
188146806Srwatson			 */
189146806Srwatson			if (rt_mask(rt1)->sa_len > skip)
190160798Sjhb				goto different;
191146806Srwatson			else {
192146806Srwatson				/* no bits to compare, i.e. same*/
193160798Sjhb				goto maskmatched;
194160798Sjhb			}
195146806Srwatson		}
196160798Sjhb
197123750Speter		l = rt_mask(rt1)->sa_len;
19812216Sbde		if (skip > l) {
199160798Sjhb			/* no bits to compare, i.e. same */
200146806Srwatson			goto maskmatched;
201146806Srwatson		}
202160798Sjhb		p = (char *)rt_mask(rt1);
203160798Sjhb		q = (char *)netmask;
204146806Srwatson		if (bcmp(p + skip, q + skip, l - skip))
205160798Sjhb			goto different;
206146806Srwatson		/*
207160798Sjhb		 * need to go through all the bit, as netmask is not
208146806Srwatson		 * optimal and can contain trailing 0s
209160798Sjhb		 */
210146806Srwatson		eq = (char *)netmask + netmask->sa_len;
211160798Sjhb		q += l;
212160798Sjhb		same = 1;
213146806Srwatson		while (eq > q)
214160798Sjhb			if (*q++) {
215146806Srwatson				same = 0;
216160798Sjhb				break;
217146806Srwatson			}
218160798Sjhb		if (!same)
219146806Srwatson			goto different;
220160798Sjhb	} else if (!rt_mask(rt1) && !netmask)
221146806Srwatson		; /* no mask to compare, i.e. same */
222160798Sjhb	else {
223146806Srwatson		/* one has mask and the other does not, different */
224160798Sjhb		goto different;
225146806Srwatson	}
226160798Sjhb
227160798Sjhbmaskmatched:
228160798Sjhb
22921776Sbde	/* key/mask were the same.  compare gateway for all multipaths */
23021776Sbde	do {
231160798Sjhb		rt1 = (struct rtentry *)rn1;
232146806Srwatson
233160798Sjhb		/* sanity: no use in comparing the same thing */
234146806Srwatson		if (rn1 == rn)
235160798Sjhb			continue;
236146806Srwatson
237146806Srwatson		if (rt1->rt_gateway->sa_family == AF_LINK) {
238160798Sjhb			if (rt1->rt_ifa->ifa_addr->sa_len != rt->rt_ifa->ifa_addr->sa_len ||
239146806Srwatson			    bcmp(rt1->rt_ifa->ifa_addr, rt->rt_ifa->ifa_addr,
240160798Sjhb			    rt1->rt_ifa->ifa_addr->sa_len))
241146806Srwatson				continue;
242160798Sjhb		} else {
243146806Srwatson			if (rt1->rt_gateway->sa_len != rt->rt_gateway->sa_len ||
244146806Srwatson			    bcmp(rt1->rt_gateway, rt->rt_gateway,
245160798Sjhb			    rt1->rt_gateway->sa_len))
246146806Srwatson				continue;
247160798Sjhb		}
248146806Srwatson
249160798Sjhb		/* all key/mask/gateway are the same.  conflicting entry. */
250146806Srwatson		return EEXIST;
251160798Sjhb	} while ((rn1 = rn_mpath_next(rn1)) != NULL);
252160798Sjhb
253160798Sjhbdifferent:
254146806Srwatson	return 0;
255146806Srwatson}
256146806Srwatson
257160798Sjhbvoid
258160798Sjhbrtalloc_mpath(struct route *ro, int hash)
259160798Sjhb{
260160798Sjhb	struct radix_node *rn0, *rn;
261160798Sjhb	int n;
262160798Sjhb
263160798Sjhb	/*
264160798Sjhb	 * XXX we don't attempt to lookup cached route again; what should
265146806Srwatson	 * be done for sendto(3) case?
266160798Sjhb	 */
267160798Sjhb	if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
268146806Srwatson		return;				 /* XXX */
269160798Sjhb	ro->ro_rt = rtalloc1(&ro->ro_dst, 1, 0UL);
270160798Sjhb
271160798Sjhb	/* if the route does not exist or it is not multipath, don't care */
272146806Srwatson	if (ro->ro_rt == NULL)
273146806Srwatson		return;
274160798Sjhb	if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) {
275146806Srwatson		RT_UNLOCK(ro->ro_rt);
276160798Sjhb		return;
277146806Srwatson	}
278160798Sjhb
279160798Sjhb	/* beyond here, we use rn as the master copy */
280160798Sjhb	rn0 = rn = (struct radix_node *)ro->ro_rt;
281146806Srwatson	n = rn_mpath_count(rn0);
282160798Sjhb
283146806Srwatson	/* gw selection by Modulo-N Hash (RFC2991) XXX need improvement? */
284160798Sjhb	hash += hashjitter;
285160798Sjhb	hash %= n;
286160798Sjhb	while (hash-- > 0 && rn) {
287146806Srwatson		/* stay within the multipath routes */
288160798Sjhb		if (rn->rn_dupedkey && rn->rn_mask != rn->rn_dupedkey->rn_mask)
289160798Sjhb			break;
290146806Srwatson		rn = rn->rn_dupedkey;
291146806Srwatson	}
2921541Srgrimes
2931541Srgrimes	/* XXX try filling rt_gwroute and avoid unreachable gw  */
2941541Srgrimes
2951541Srgrimes	/* if gw selection fails, use the first match (default) */
2961541Srgrimes	if (!rn) {
297146806Srwatson		RT_UNLOCK(ro->ro_rt);
298146806Srwatson		return;
299146806Srwatson	}
300146806Srwatson
30130740Sphk	rtfree(ro->ro_rt);
302161325Sjhb	ro->ro_rt = (struct rtentry *)rn;
303160798Sjhb	RT_LOCK(ro->ro_rt);
304146806Srwatson	RT_ADDREF(ro->ro_rt);
305160798Sjhb	RT_UNLOCK(ro->ro_rt);
306146806Srwatson}
307160798Sjhb
308146806Srwatsonextern int	in6_inithead(void **head, int off);
309146806Srwatsonextern int	in_inithead(void **head, int off);
310160798Sjhb
311146806Srwatsonint
312160798Sjhbrn4_mpath_inithead(void **head, int off)
313146806Srwatson{
314160798Sjhb	struct radix_node_head *rnh;
315146806Srwatson
316160798Sjhb	hashjitter = arc4random();
317146806Srwatson	if (in_inithead(head, off) == 1) {
318160798Sjhb		rnh = (struct radix_node_head *)*head;
319160798Sjhb		rnh->rnh_multipath = 1;
320160798Sjhb		return 1;
321146806Srwatson	} else
322146806Srwatson		return 0;
323146806Srwatson}
32469449Salfred
325160798Sjhb#ifdef INET6
326146806Srwatsonint
32769449Salfredrn6_mpath_inithead(void **head, int off)
328123750Speter{
329160798Sjhb	struct radix_node_head *rnh;
330146806Srwatson
33169449Salfred	hashjitter = arc4random();
332123750Speter	if (in6_inithead(head, off) == 1) {
333160798Sjhb		rnh = (struct radix_node_head *)*head;
334146806Srwatson		rnh->rnh_multipath = 1;
335123750Speter		return 1;
336146806Srwatson	} else
337160798Sjhb		return 0;
338146806Srwatson}
339160798Sjhb
340146806Srwatson#endif
341146806Srwatson