1/* 2 * Copyright (c) 2000,2007 Apple Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/*- 29 * Copyright (c) 1998 The NetBSD Foundation, Inc. 30 * All rights reserved. 31 * 32 * This code is derived from software contributed to The NetBSD Foundation 33 * by the 3am Software Foundry ("3am"). It was developed by Matt Thomas. 34 * 35 * Redistribution and use in source and binary forms, with or without 36 * modification, are permitted provided that the following conditions 37 * are met: 38 * 1. Redistributions of source code must retain the above copyright 39 * notice, this list of conditions and the following disclaimer. 40 * 2. Redistributions in binary form must reproduce the above copyright 41 * notice, this list of conditions and the following disclaimer in the 42 * documentation and/or other materials provided with the distribution. 43 * 3. All advertising materials mentioning features or use of this software 44 * must display the following acknowledgement: 45 * This product includes software developed by the NetBSD 46 * Foundation, Inc. and its contributors. 47 * 4. Neither the name of The NetBSD Foundation nor the names of its 48 * contributors may be used to endorse or promote products derived 49 * from this software without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 52 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 53 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 54 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 55 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 56 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 57 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 58 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 59 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 60 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 61 * POSSIBILITY OF SUCH DAMAGE. 62 * 63 * $FreeBSD: src/sys/netinet/ip_flow.c,v 1.9.2.1 2001/08/08 08:20:35 ru Exp $ 64 */ 65 66#include <sys/param.h> 67#include <sys/systm.h> 68#include <sys/malloc.h> 69#include <sys/mbuf.h> 70#include <sys/protosw.h> 71#include <sys/socket.h> 72#include <sys/kernel.h> 73 74#include <sys/sysctl.h> 75#include <libkern/OSAtomic.h> 76 77#include <net/if.h> 78#include <net/route.h> 79 80#include <netinet/in.h> 81#include <netinet/in_systm.h> 82#include <netinet/ip.h> 83#include <netinet/in_var.h> 84#include <netinet/ip_var.h> 85#include <netinet/ip_flow.h> 86#include <net/dlil.h> 87 88#define IPFLOW_TIMER (5 * PR_SLOWHZ) 89#define IPFLOW_HASHBITS 6 /* should not be a multiple of 8 */ 90#define IPFLOW_HASHSIZE (1 << IPFLOW_HASHBITS) 91static LIST_HEAD(ipflowhead, ipflow) ipflows[IPFLOW_HASHSIZE]; 92static int ipflow_inuse; 93#define IPFLOW_MAX 256 94 95#ifdef __APPLE__ 96#define M_IPFLOW M_TEMP 97#endif 98 99static int ipflow_active = 0; 100SYSCTL_INT(_net_inet_ip, IPCTL_FASTFORWARDING, fastforwarding, CTLFLAG_RW, 101 &ipflow_active, 0, "Enable flow-based IP forwarding"); 102 103#ifndef __APPLE__ 104static MALLOC_DEFINE(M_IPFLOW, "ip_flow", "IP flow"); 105#endif 106 107static unsigned 108ipflow_hash( 109 struct in_addr dst, 110 struct in_addr src, 111 unsigned tos) 112{ 113 unsigned hash = tos; 114 int idx; 115 for (idx = 0; idx < 32; idx += IPFLOW_HASHBITS) 116 hash += (dst.s_addr >> (32 - idx)) + (src.s_addr >> idx); 117 return hash & (IPFLOW_HASHSIZE-1); 118} 119 120static struct ipflow * 121ipflow_lookup( 122 const struct ip *ip) 123{ 124 unsigned hash; 125 struct ipflow *ipf; 126 127 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); 128 129 ipf = LIST_FIRST(&ipflows[hash]); 130 while (ipf != NULL) { 131 if (ip->ip_dst.s_addr == ipf->ipf_dst.s_addr 132 && ip->ip_src.s_addr == ipf->ipf_src.s_addr 133 && ip->ip_tos == ipf->ipf_tos) 134 break; 135 ipf = LIST_NEXT(ipf, ipf_next); 136 } 137 return ipf; 138} 139 140int 141ipflow_fastforward( 142 struct mbuf *m) 143{ 144 struct ip *ip; 145 struct ipflow *ipf; 146 struct rtentry *rt; 147 struct sockaddr *dst; 148 int error; 149 150 /* 151 * Are we forwarding packets? Big enough for an IP packet? 152 */ 153 if (!ipforwarding || !ipflow_active || m->m_len < sizeof(struct ip)) 154 return 0; 155 /* 156 * IP header with no option and valid version and length 157 */ 158 ip = mtod(m, struct ip *); 159 if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) 160 || ntohs(ip->ip_len) > m->m_pkthdr.len) 161 return 0; 162 /* 163 * Find a flow. 164 */ 165 if ((ipf = ipflow_lookup(ip)) == NULL) 166 return 0; 167 168 /* 169 * Route and interface still up? 170 */ 171 rt = ipf->ipf_ro.ro_rt; 172 if ((rt->rt_flags & RTF_UP) == 0 || (rt->rt_ifp->if_flags & IFF_UP) == 0) 173 return 0; 174 175 /* 176 * Packet size OK? TTL? 177 */ 178 if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC) 179 return 0; 180 181 /* 182 * Everything checks out and so we can forward this packet. 183 * Modify the TTL and incrementally change the checksum. 184 */ 185 ip->ip_ttl -= IPTTLDEC; 186 if (ip->ip_sum >= htons(0xffff - (IPTTLDEC << 8))) { 187 ip->ip_sum += htons(IPTTLDEC << 8) + 1; 188 } else { 189 ip->ip_sum += htons(IPTTLDEC << 8); 190 } 191 192 /* 193 * Send the packet on its way. All we can get back is ENOBUFS 194 */ 195 ipf->ipf_uses++; 196 ipf->ipf_timer = IPFLOW_TIMER; 197 198 if (rt->rt_flags & RTF_GATEWAY) 199 dst = rt->rt_gateway; 200 else 201 dst = &ipf->ipf_ro.ro_dst; 202#ifdef __APPLE__ 203 /* Not sure the rt_dlt is valid here !! XXX */ 204 if ((error = dlil_output(rt->rt_ifp, PF_INET, m, (caddr_t) rt, dst, 0)) != 0) { 205 206#else 207 if ((error = (*rt->rt_ifp->if_output)(rt->rt_ifp, m, dst, rt)) != 0) { 208#endif 209 if (error == ENOBUFS) 210 ipf->ipf_dropped++; 211 else 212 ipf->ipf_errors++; 213 } 214 return 1; 215} 216 217static void 218ipflow_addstats( 219 struct ipflow *ipf) 220{ 221 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; 222 OSAddAtomic(ipf->ipf_errors + ipf->ipf_dropped, (SInt32*)&ipstat.ips_cantforward); 223 OSAddAtomic(ipf->ipf_uses, (SInt32*)&ipstat.ips_forward); 224 OSAddAtomic(ipf->ipf_uses, (SInt32*)&ipstat.ips_fastforward); 225} 226 227static void 228ipflow_free( 229 struct ipflow *ipf) 230{ 231 /* 232 * Remove the flow from the hash table (at elevated IPL). 233 * Once it's off the list, we can deal with it at normal 234 * network IPL. 235 */ 236 LIST_REMOVE(ipf, ipf_next); 237 ipflow_addstats(ipf); 238 rtfree(ipf->ipf_ro.ro_rt); 239 ipflow_inuse--; 240 FREE(ipf, M_IPFLOW); 241} 242 243static struct ipflow * 244ipflow_reap( 245 void) 246{ 247 struct ipflow *ipf, *maybe_ipf = NULL; 248 int idx; 249 250 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { 251 ipf = LIST_FIRST(&ipflows[idx]); 252 while (ipf != NULL) { 253 /* 254 * If this no longer points to a valid route 255 * reclaim it. 256 */ 257 if ((ipf->ipf_ro.ro_rt->rt_flags & RTF_UP) == 0) 258 goto done; 259 /* 260 * choose the one that's been least recently used 261 * or has had the least uses in the last 1.5 262 * intervals. 263 */ 264 if (maybe_ipf == NULL 265 || ipf->ipf_timer < maybe_ipf->ipf_timer 266 || (ipf->ipf_timer == maybe_ipf->ipf_timer 267 && ipf->ipf_last_uses + ipf->ipf_uses < 268 maybe_ipf->ipf_last_uses + 269 maybe_ipf->ipf_uses)) 270 maybe_ipf = ipf; 271 ipf = LIST_NEXT(ipf, ipf_next); 272 } 273 } 274 ipf = maybe_ipf; 275 done: 276 /* 277 * Remove the entry from the flow table. 278 */ 279 LIST_REMOVE(ipf, ipf_next); 280 ipflow_addstats(ipf); 281 rtfree(ipf->ipf_ro.ro_rt); 282 ipf->ipf_ro.ro_rt = NULL; 283 return ipf; 284} 285/* note: called under the ip_mutex lock */ 286void 287ipflow_slowtimo( 288 void) 289{ 290 struct ipflow *ipf; 291 int idx; 292 293 for (idx = 0; idx < IPFLOW_HASHSIZE; idx++) { 294 ipf = LIST_FIRST(&ipflows[idx]); 295 while (ipf != NULL) { 296 struct ipflow *next_ipf = LIST_NEXT(ipf, ipf_next); 297 if (--ipf->ipf_timer == 0) { 298 ipflow_free(ipf); 299 } else { 300 ipf->ipf_last_uses = ipf->ipf_uses; 301 ipf->ipf_ro.ro_rt->rt_use += ipf->ipf_uses; 302 OSAddAtomic(ipf->ipf_uses, (SInt32*)&ipstat.ips_forward); 303 OSAddAtomic(ipf->ipf_uses, (SInt32*)&ipstat.ips_fastforward); 304 ipstat.ips_forward += ipf->ipf_uses; 305 ipstat.ips_fastforward += ipf->ipf_uses; 306 ipf->ipf_uses = 0; 307 } 308 ipf = next_ipf; 309 } 310 } 311} 312 313void 314ipflow_create( 315 const struct route *ro, 316 struct mbuf *m) 317{ 318 const struct ip *const ip = mtod(m, struct ip *); 319 struct ipflow *ipf; 320 unsigned hash; 321 322 /* 323 * Don't create cache entries for ICMP messages. 324 */ 325 if (!ipflow_active || ip->ip_p == IPPROTO_ICMP) 326 return; 327 /* 328 * See if an existing flow struct exists. If so remove it from it's 329 * list and free the old route. If not, try to malloc a new one 330 * (if we aren't at our limit). 331 */ 332 ipf = ipflow_lookup(ip); 333 if (ipf == NULL) { 334 if (ipflow_inuse == IPFLOW_MAX) { 335 ipf = ipflow_reap(); 336 } else { 337 ipf = (struct ipflow *) _MALLOC(sizeof(*ipf), M_IPFLOW, 338 M_NOWAIT); 339 if (ipf == NULL) 340 return; 341 ipflow_inuse++; 342 } 343 bzero((caddr_t) ipf, sizeof(*ipf)); 344 } else { 345 LIST_REMOVE(ipf, ipf_next); 346 ipflow_addstats(ipf); 347 rtfree(ipf->ipf_ro.ro_rt); 348 ipf->ipf_ro.ro_rt = NULL; 349 ipf->ipf_uses = ipf->ipf_last_uses = 0; 350 ipf->ipf_errors = ipf->ipf_dropped = 0; 351 } 352 353 /* 354 * Fill in the updated information. 355 */ 356 lck_mtx_lock(rt_mtx); 357 ipf->ipf_ro = *ro; 358 rtref(ro->ro_rt); 359 lck_mtx_unlock(rt_mtx); 360 ipf->ipf_dst = ip->ip_dst; 361 ipf->ipf_src = ip->ip_src; 362 ipf->ipf_tos = ip->ip_tos; 363 ipf->ipf_timer = IPFLOW_TIMER; 364 /* 365 * Insert into the approriate bucket of the flow table. 366 */ 367 hash = ipflow_hash(ip->ip_dst, ip->ip_src, ip->ip_tos); 368 LIST_INSERT_HEAD(&ipflows[hash], ipf, ipf_next); 369} 370