1/* 2 * INET An implementation of the TCP/IP protocol suite for the LINUX 3 * operating system. INET is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * IPv4 Forwarding Information Base: FIB frontend. 7 * 8 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16#include <linux/module.h> 17#include <asm/uaccess.h> 18#include <asm/system.h> 19#include <linux/bitops.h> 20#include <linux/capability.h> 21#include <linux/types.h> 22#include <linux/kernel.h> 23#include <linux/mm.h> 24#include <linux/string.h> 25#include <linux/socket.h> 26#include <linux/sockios.h> 27#include <linux/errno.h> 28#include <linux/in.h> 29#include <linux/inet.h> 30#include <linux/inetdevice.h> 31#include <linux/netdevice.h> 32#include <linux/if_addr.h> 33#include <linux/if_arp.h> 34#include <linux/skbuff.h> 35#include <linux/init.h> 36#include <linux/list.h> 37#include <linux/slab.h> 38 39#include <net/ip.h> 40#include <net/protocol.h> 41#include <net/route.h> 42#include <net/tcp.h> 43#include <net/sock.h> 44#include <net/arp.h> 45#include <net/ip_fib.h> 46#include <net/rtnetlink.h> 47 48#ifndef CONFIG_IP_MULTIPLE_TABLES 49 50static int __net_init fib4_rules_init(struct net *net) 51{ 52 struct fib_table *local_table, *main_table; 53 54 local_table = fib_hash_table(RT_TABLE_LOCAL); 55 if (local_table == NULL) 56 return -ENOMEM; 57 58 main_table = fib_hash_table(RT_TABLE_MAIN); 59 if (main_table == NULL) 60 goto fail; 61 62 hlist_add_head_rcu(&local_table->tb_hlist, 63 &net->ipv4.fib_table_hash[TABLE_LOCAL_INDEX]); 64 hlist_add_head_rcu(&main_table->tb_hlist, 65 &net->ipv4.fib_table_hash[TABLE_MAIN_INDEX]); 66 return 0; 67 68fail: 69 kfree(local_table); 70 return -ENOMEM; 71} 72#else 73 74struct fib_table *fib_new_table(struct net *net, u32 id) 75{ 76 struct fib_table *tb; 77 unsigned int h; 78 79 if (id == 0) 80 id = RT_TABLE_MAIN; 81 tb = fib_get_table(net, id); 82 if (tb) 83 return tb; 84 85 tb = fib_hash_table(id); 86 if (!tb) 87 return NULL; 88 h = id & (FIB_TABLE_HASHSZ - 1); 89 hlist_add_head_rcu(&tb->tb_hlist, &net->ipv4.fib_table_hash[h]); 90 return tb; 91} 92 93struct fib_table *fib_get_table(struct net *net, u32 id) 94{ 95 struct fib_table *tb; 96 struct hlist_node *node; 97 struct hlist_head *head; 98 unsigned int h; 99 100 if (id == 0) 101 id = RT_TABLE_MAIN; 102 h = id & (FIB_TABLE_HASHSZ - 1); 103 104 rcu_read_lock(); 105 head = &net->ipv4.fib_table_hash[h]; 106 hlist_for_each_entry_rcu(tb, node, head, tb_hlist) { 107 if (tb->tb_id == id) { 108 rcu_read_unlock(); 109 return tb; 110 } 111 } 112 rcu_read_unlock(); 113 return NULL; 114} 115#endif /* CONFIG_IP_MULTIPLE_TABLES */ 116 117void fib_select_default(struct net *net, 118 const struct flowi *flp, struct fib_result *res) 119{ 120 struct fib_table *tb; 121 int table = RT_TABLE_MAIN; 122#ifdef CONFIG_IP_MULTIPLE_TABLES 123 if (res->r == NULL || res->r->action != FR_ACT_TO_TBL) 124 return; 125 table = res->r->table; 126#endif 127 tb = fib_get_table(net, table); 128 if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 129 fib_table_select_default(tb, flp, res); 130} 131 132static void fib_flush(struct net *net) 133{ 134 int flushed = 0; 135 struct fib_table *tb; 136 struct hlist_node *node; 137 struct hlist_head *head; 138 unsigned int h; 139 140 for (h = 0; h < FIB_TABLE_HASHSZ; h++) { 141 head = &net->ipv4.fib_table_hash[h]; 142 hlist_for_each_entry(tb, node, head, tb_hlist) 143 flushed += fib_table_flush(tb); 144 } 145 146 if (flushed) 147 rt_cache_flush(net, -1); 148} 149 150/* 151 * Find the first device with a given source address. 152 */ 153 154struct net_device * ip_dev_find(struct net *net, __be32 addr) 155{ 156 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 157 struct fib_result res; 158 struct net_device *dev = NULL; 159 struct fib_table *local_table; 160 161#ifdef CONFIG_IP_MULTIPLE_TABLES 162 res.r = NULL; 163#endif 164 165 local_table = fib_get_table(net, RT_TABLE_LOCAL); 166 if (!local_table || fib_table_lookup(local_table, &fl, &res)) 167 return NULL; 168 if (res.type != RTN_LOCAL) 169 goto out; 170 dev = FIB_RES_DEV(res); 171 172 if (dev) 173 dev_hold(dev); 174out: 175 fib_res_put(&res); 176 return dev; 177} 178EXPORT_SYMBOL(ip_dev_find); 179 180/* 181 * Find address type as if only "dev" was present in the system. If 182 * on_dev is NULL then all interfaces are taken into consideration. 183 */ 184static inline unsigned __inet_dev_addr_type(struct net *net, 185 const struct net_device *dev, 186 __be32 addr) 187{ 188 struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; 189 struct fib_result res; 190 unsigned ret = RTN_BROADCAST; 191 struct fib_table *local_table; 192 193 if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) 194 return RTN_BROADCAST; 195 if (ipv4_is_multicast(addr)) 196 return RTN_MULTICAST; 197 198#ifdef CONFIG_IP_MULTIPLE_TABLES 199 res.r = NULL; 200#endif 201 202 local_table = fib_get_table(net, RT_TABLE_LOCAL); 203 if (local_table) { 204 ret = RTN_UNICAST; 205 if (!fib_table_lookup(local_table, &fl, &res)) { 206 if (!dev || dev == res.fi->fib_dev) 207 ret = res.type; 208 fib_res_put(&res); 209 } 210 } 211 return ret; 212} 213 214unsigned int inet_addr_type(struct net *net, __be32 addr) 215{ 216 return __inet_dev_addr_type(net, NULL, addr); 217} 218EXPORT_SYMBOL(inet_addr_type); 219 220unsigned int inet_dev_addr_type(struct net *net, const struct net_device *dev, 221 __be32 addr) 222{ 223 return __inet_dev_addr_type(net, dev, addr); 224} 225EXPORT_SYMBOL(inet_dev_addr_type); 226 227/* Given (packet source, input interface) and optional (dst, oif, tos): 228 - (main) check, that source is valid i.e. not broadcast or our local 229 address. 230 - figure out what "logical" interface this packet arrived 231 and calculate "specific destination" address. 232 - check, that packet arrived from expected physical interface. 233 */ 234 235int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, 236 struct net_device *dev, __be32 *spec_dst, 237 u32 *itag, u32 mark) 238{ 239 struct in_device *in_dev; 240 struct flowi fl = { .nl_u = { .ip4_u = 241 { .daddr = src, 242 .saddr = dst, 243 .tos = tos } }, 244 .mark = mark, 245 .iif = oif }; 246 247 struct fib_result res; 248 int no_addr, rpf, accept_local; 249 bool dev_match; 250 int ret; 251 struct net *net; 252 253 no_addr = rpf = accept_local = 0; 254 rcu_read_lock(); 255 in_dev = __in_dev_get_rcu(dev); 256 if (in_dev) { 257 no_addr = in_dev->ifa_list == NULL; 258 rpf = IN_DEV_RPFILTER(in_dev); 259 accept_local = IN_DEV_ACCEPT_LOCAL(in_dev); 260 if (mark && !IN_DEV_SRC_VMARK(in_dev)) 261 fl.mark = 0; 262 } 263 rcu_read_unlock(); 264 265 if (in_dev == NULL) 266 goto e_inval; 267 268 net = dev_net(dev); 269 if (fib_lookup(net, &fl, &res)) 270 goto last_resort; 271 if (res.type != RTN_UNICAST) { 272 if (res.type != RTN_LOCAL || !accept_local) 273 goto e_inval_res; 274 } 275 *spec_dst = FIB_RES_PREFSRC(res); 276 fib_combine_itag(itag, &res); 277 dev_match = false; 278 279#ifdef CONFIG_IP_ROUTE_MULTIPATH 280 for (ret = 0; ret < res.fi->fib_nhs; ret++) { 281 struct fib_nh *nh = &res.fi->fib_nh[ret]; 282 283 if (nh->nh_dev == dev) { 284 dev_match = true; 285 break; 286 } 287 } 288#else 289 if (FIB_RES_DEV(res) == dev) 290 dev_match = true; 291#endif 292 if (dev_match) { 293 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 294 fib_res_put(&res); 295 return ret; 296 } 297 fib_res_put(&res); 298 if (no_addr) 299 goto last_resort; 300 if (rpf == 1) 301 goto e_rpf; 302 fl.oif = dev->ifindex; 303 304 ret = 0; 305 if (fib_lookup(net, &fl, &res) == 0) { 306 if (res.type == RTN_UNICAST) { 307 *spec_dst = FIB_RES_PREFSRC(res); 308 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; 309 } 310 fib_res_put(&res); 311 } 312 return ret; 313 314last_resort: 315 if (rpf) 316 goto e_rpf; 317 *spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 318 *itag = 0; 319 return 0; 320 321e_inval_res: 322 fib_res_put(&res); 323e_inval: 324 return -EINVAL; 325e_rpf: 326 return -EXDEV; 327} 328 329static inline __be32 sk_extract_addr(struct sockaddr *addr) 330{ 331 return ((struct sockaddr_in *) addr)->sin_addr.s_addr; 332} 333 334static int put_rtax(struct nlattr *mx, int len, int type, u32 value) 335{ 336 struct nlattr *nla; 337 338 nla = (struct nlattr *) ((char *) mx + len); 339 nla->nla_type = type; 340 nla->nla_len = nla_attr_size(4); 341 *(u32 *) nla_data(nla) = value; 342 343 return len + nla_total_size(4); 344} 345 346static int rtentry_to_fib_config(struct net *net, int cmd, struct rtentry *rt, 347 struct fib_config *cfg) 348{ 349 __be32 addr; 350 int plen; 351 352 memset(cfg, 0, sizeof(*cfg)); 353 cfg->fc_nlinfo.nl_net = net; 354 355 if (rt->rt_dst.sa_family != AF_INET) 356 return -EAFNOSUPPORT; 357 358 /* 359 * Check mask for validity: 360 * a) it must be contiguous. 361 * b) destination must have all host bits clear. 362 * c) if application forgot to set correct family (AF_INET), 363 * reject request unless it is absolutely clear i.e. 364 * both family and mask are zero. 365 */ 366 plen = 32; 367 addr = sk_extract_addr(&rt->rt_dst); 368 if (!(rt->rt_flags & RTF_HOST)) { 369 __be32 mask = sk_extract_addr(&rt->rt_genmask); 370 371 if (rt->rt_genmask.sa_family != AF_INET) { 372 if (mask || rt->rt_genmask.sa_family) 373 return -EAFNOSUPPORT; 374 } 375 376 if (bad_mask(mask, addr)) 377 return -EINVAL; 378 379 plen = inet_mask_len(mask); 380 } 381 382 cfg->fc_dst_len = plen; 383 cfg->fc_dst = addr; 384 385 if (cmd != SIOCDELRT) { 386 cfg->fc_nlflags = NLM_F_CREATE; 387 cfg->fc_protocol = RTPROT_BOOT; 388 } 389 390 if (rt->rt_metric) 391 cfg->fc_priority = rt->rt_metric - 1; 392 393 if (rt->rt_flags & RTF_REJECT) { 394 cfg->fc_scope = RT_SCOPE_HOST; 395 cfg->fc_type = RTN_UNREACHABLE; 396 return 0; 397 } 398 399 cfg->fc_scope = RT_SCOPE_NOWHERE; 400 cfg->fc_type = RTN_UNICAST; 401 402 if (rt->rt_dev) { 403 char *colon; 404 struct net_device *dev; 405 char devname[IFNAMSIZ]; 406 407 if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) 408 return -EFAULT; 409 410 devname[IFNAMSIZ-1] = 0; 411 colon = strchr(devname, ':'); 412 if (colon) 413 *colon = 0; 414 dev = __dev_get_by_name(net, devname); 415 if (!dev) 416 return -ENODEV; 417 cfg->fc_oif = dev->ifindex; 418 if (colon) { 419 struct in_ifaddr *ifa; 420 struct in_device *in_dev = __in_dev_get_rtnl(dev); 421 if (!in_dev) 422 return -ENODEV; 423 *colon = ':'; 424 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) 425 if (strcmp(ifa->ifa_label, devname) == 0) 426 break; 427 if (ifa == NULL) 428 return -ENODEV; 429 cfg->fc_prefsrc = ifa->ifa_local; 430 } 431 } 432 433 addr = sk_extract_addr(&rt->rt_gateway); 434 if (rt->rt_gateway.sa_family == AF_INET && addr) { 435 cfg->fc_gw = addr; 436 if (rt->rt_flags & RTF_GATEWAY && 437 inet_addr_type(net, addr) == RTN_UNICAST) 438 cfg->fc_scope = RT_SCOPE_UNIVERSE; 439 } 440 441 if (cmd == SIOCDELRT) 442 return 0; 443 444 if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) 445 return -EINVAL; 446 447 if (cfg->fc_scope == RT_SCOPE_NOWHERE) 448 cfg->fc_scope = RT_SCOPE_LINK; 449 450 if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { 451 struct nlattr *mx; 452 int len = 0; 453 454 mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); 455 if (mx == NULL) 456 return -ENOMEM; 457 458 if (rt->rt_flags & RTF_MTU) 459 len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); 460 461 if (rt->rt_flags & RTF_WINDOW) 462 len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); 463 464 if (rt->rt_flags & RTF_IRTT) 465 len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); 466 467 cfg->fc_mx = mx; 468 cfg->fc_mx_len = len; 469 } 470 471 return 0; 472} 473 474/* 475 * Handle IP routing ioctl calls. These are used to manipulate the routing tables 476 */ 477 478int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) 479{ 480 struct fib_config cfg; 481 struct rtentry rt; 482 int err; 483 484 switch (cmd) { 485 case SIOCADDRT: /* Add a route */ 486 case SIOCDELRT: /* Delete a route */ 487 if (!capable(CAP_NET_ADMIN)) 488 return -EPERM; 489 490 if (copy_from_user(&rt, arg, sizeof(rt))) 491 return -EFAULT; 492 493 rtnl_lock(); 494 err = rtentry_to_fib_config(net, cmd, &rt, &cfg); 495 if (err == 0) { 496 struct fib_table *tb; 497 498 if (cmd == SIOCDELRT) { 499 tb = fib_get_table(net, cfg.fc_table); 500 if (tb) 501 err = fib_table_delete(tb, &cfg); 502 else 503 err = -ESRCH; 504 } else { 505 tb = fib_new_table(net, cfg.fc_table); 506 if (tb) 507 err = fib_table_insert(tb, &cfg); 508 else 509 err = -ENOBUFS; 510 } 511 512 /* allocated by rtentry_to_fib_config() */ 513 kfree(cfg.fc_mx); 514 } 515 rtnl_unlock(); 516 return err; 517 } 518 return -EINVAL; 519} 520 521const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { 522 [RTA_DST] = { .type = NLA_U32 }, 523 [RTA_SRC] = { .type = NLA_U32 }, 524 [RTA_IIF] = { .type = NLA_U32 }, 525 [RTA_OIF] = { .type = NLA_U32 }, 526 [RTA_GATEWAY] = { .type = NLA_U32 }, 527 [RTA_PRIORITY] = { .type = NLA_U32 }, 528 [RTA_PREFSRC] = { .type = NLA_U32 }, 529 [RTA_METRICS] = { .type = NLA_NESTED }, 530 [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, 531 [RTA_FLOW] = { .type = NLA_U32 }, 532}; 533 534static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, 535 struct nlmsghdr *nlh, struct fib_config *cfg) 536{ 537 struct nlattr *attr; 538 int err, remaining; 539 struct rtmsg *rtm; 540 541 err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); 542 if (err < 0) 543 goto errout; 544 545 memset(cfg, 0, sizeof(*cfg)); 546 547 rtm = nlmsg_data(nlh); 548 cfg->fc_dst_len = rtm->rtm_dst_len; 549 cfg->fc_tos = rtm->rtm_tos; 550 cfg->fc_table = rtm->rtm_table; 551 cfg->fc_protocol = rtm->rtm_protocol; 552 cfg->fc_scope = rtm->rtm_scope; 553 cfg->fc_type = rtm->rtm_type; 554 cfg->fc_flags = rtm->rtm_flags; 555 cfg->fc_nlflags = nlh->nlmsg_flags; 556 557 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; 558 cfg->fc_nlinfo.nlh = nlh; 559 cfg->fc_nlinfo.nl_net = net; 560 561 if (cfg->fc_type > RTN_MAX) { 562 err = -EINVAL; 563 goto errout; 564 } 565 566 nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { 567 switch (nla_type(attr)) { 568 case RTA_DST: 569 cfg->fc_dst = nla_get_be32(attr); 570 break; 571 case RTA_OIF: 572 cfg->fc_oif = nla_get_u32(attr); 573 break; 574 case RTA_GATEWAY: 575 cfg->fc_gw = nla_get_be32(attr); 576 break; 577 case RTA_PRIORITY: 578 cfg->fc_priority = nla_get_u32(attr); 579 break; 580 case RTA_PREFSRC: 581 cfg->fc_prefsrc = nla_get_be32(attr); 582 break; 583 case RTA_METRICS: 584 cfg->fc_mx = nla_data(attr); 585 cfg->fc_mx_len = nla_len(attr); 586 break; 587 case RTA_MULTIPATH: 588 cfg->fc_mp = nla_data(attr); 589 cfg->fc_mp_len = nla_len(attr); 590 break; 591 case RTA_FLOW: 592 cfg->fc_flow = nla_get_u32(attr); 593 break; 594 case RTA_TABLE: 595 cfg->fc_table = nla_get_u32(attr); 596 break; 597 } 598 } 599 600 return 0; 601errout: 602 return err; 603} 604 605static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 606{ 607 struct net *net = sock_net(skb->sk); 608 struct fib_config cfg; 609 struct fib_table *tb; 610 int err; 611 612 err = rtm_to_fib_config(net, skb, nlh, &cfg); 613 if (err < 0) 614 goto errout; 615 616 tb = fib_get_table(net, cfg.fc_table); 617 if (tb == NULL) { 618 err = -ESRCH; 619 goto errout; 620 } 621 622 err = fib_table_delete(tb, &cfg); 623errout: 624 return err; 625} 626 627static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) 628{ 629 struct net *net = sock_net(skb->sk); 630 struct fib_config cfg; 631 struct fib_table *tb; 632 int err; 633 634 err = rtm_to_fib_config(net, skb, nlh, &cfg); 635 if (err < 0) 636 goto errout; 637 638 tb = fib_new_table(net, cfg.fc_table); 639 if (tb == NULL) { 640 err = -ENOBUFS; 641 goto errout; 642 } 643 644 err = fib_table_insert(tb, &cfg); 645errout: 646 return err; 647} 648 649static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) 650{ 651 struct net *net = sock_net(skb->sk); 652 unsigned int h, s_h; 653 unsigned int e = 0, s_e; 654 struct fib_table *tb; 655 struct hlist_node *node; 656 struct hlist_head *head; 657 int dumped = 0; 658 659 if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && 660 ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) 661 return ip_rt_dump(skb, cb); 662 663 s_h = cb->args[0]; 664 s_e = cb->args[1]; 665 666 for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { 667 e = 0; 668 head = &net->ipv4.fib_table_hash[h]; 669 hlist_for_each_entry(tb, node, head, tb_hlist) { 670 if (e < s_e) 671 goto next; 672 if (dumped) 673 memset(&cb->args[2], 0, sizeof(cb->args) - 674 2 * sizeof(cb->args[0])); 675 if (fib_table_dump(tb, skb, cb) < 0) 676 goto out; 677 dumped = 1; 678next: 679 e++; 680 } 681 } 682out: 683 cb->args[1] = e; 684 cb->args[0] = h; 685 686 return skb->len; 687} 688 689/* Prepare and feed intra-kernel routing request. 690 Really, it should be netlink message, but :-( netlink 691 can be not configured, so that we feed it directly 692 to fib engine. It is legal, because all events occur 693 only when netlink is already locked. 694 */ 695 696static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) 697{ 698 struct net *net = dev_net(ifa->ifa_dev->dev); 699 struct fib_table *tb; 700 struct fib_config cfg = { 701 .fc_protocol = RTPROT_KERNEL, 702 .fc_type = type, 703 .fc_dst = dst, 704 .fc_dst_len = dst_len, 705 .fc_prefsrc = ifa->ifa_local, 706 .fc_oif = ifa->ifa_dev->dev->ifindex, 707 .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, 708 .fc_nlinfo = { 709 .nl_net = net, 710 }, 711 }; 712 713 if (type == RTN_UNICAST) 714 tb = fib_new_table(net, RT_TABLE_MAIN); 715 else 716 tb = fib_new_table(net, RT_TABLE_LOCAL); 717 718 if (tb == NULL) 719 return; 720 721 cfg.fc_table = tb->tb_id; 722 723 if (type != RTN_LOCAL) 724 cfg.fc_scope = RT_SCOPE_LINK; 725 else 726 cfg.fc_scope = RT_SCOPE_HOST; 727 728 if (cmd == RTM_NEWROUTE) 729 fib_table_insert(tb, &cfg); 730 else 731 fib_table_delete(tb, &cfg); 732} 733 734void fib_add_ifaddr(struct in_ifaddr *ifa) 735{ 736 struct in_device *in_dev = ifa->ifa_dev; 737 struct net_device *dev = in_dev->dev; 738 struct in_ifaddr *prim = ifa; 739 __be32 mask = ifa->ifa_mask; 740 __be32 addr = ifa->ifa_local; 741 __be32 prefix = ifa->ifa_address&mask; 742 743 if (ifa->ifa_flags&IFA_F_SECONDARY) { 744 prim = inet_ifa_byprefix(in_dev, prefix, mask); 745 if (prim == NULL) { 746 printk(KERN_WARNING "fib_add_ifaddr: bug: prim == NULL\n"); 747 return; 748 } 749 } 750 751 fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim); 752 753 if (!(dev->flags&IFF_UP)) 754 return; 755 756 /* Add broadcast address, if it is explicitly assigned. */ 757 if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF)) 758 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 759 760 if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags&IFA_F_SECONDARY) && 761 (prefix != addr || ifa->ifa_prefixlen < 32)) { 762 fib_magic(RTM_NEWROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 763 RTN_UNICAST, prefix, ifa->ifa_prefixlen, prim); 764 765 /* Add network specific broadcasts, when it takes a sense */ 766 if (ifa->ifa_prefixlen < 31) { 767 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim); 768 fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix|~mask, 32, prim); 769 } 770 } 771} 772 773static void fib_del_ifaddr(struct in_ifaddr *ifa) 774{ 775 struct in_device *in_dev = ifa->ifa_dev; 776 struct net_device *dev = in_dev->dev; 777 struct in_ifaddr *ifa1; 778 struct in_ifaddr *prim = ifa; 779 __be32 brd = ifa->ifa_address|~ifa->ifa_mask; 780 __be32 any = ifa->ifa_address&ifa->ifa_mask; 781#define LOCAL_OK 1 782#define BRD_OK 2 783#define BRD0_OK 4 784#define BRD1_OK 8 785 unsigned ok = 0; 786 787 if (!(ifa->ifa_flags&IFA_F_SECONDARY)) 788 fib_magic(RTM_DELROUTE, dev->flags&IFF_LOOPBACK ? RTN_LOCAL : 789 RTN_UNICAST, any, ifa->ifa_prefixlen, prim); 790 else { 791 prim = inet_ifa_byprefix(in_dev, any, ifa->ifa_mask); 792 if (prim == NULL) { 793 printk(KERN_WARNING "fib_del_ifaddr: bug: prim == NULL\n"); 794 return; 795 } 796 } 797 798 /* Deletion is more complicated than add. 799 We should take care of not to delete too much :-) 800 801 Scan address list to be sure that addresses are really gone. 802 */ 803 804 for (ifa1 = in_dev->ifa_list; ifa1; ifa1 = ifa1->ifa_next) { 805 if (ifa->ifa_local == ifa1->ifa_local) 806 ok |= LOCAL_OK; 807 if (ifa->ifa_broadcast == ifa1->ifa_broadcast) 808 ok |= BRD_OK; 809 if (brd == ifa1->ifa_broadcast) 810 ok |= BRD1_OK; 811 if (any == ifa1->ifa_broadcast) 812 ok |= BRD0_OK; 813 } 814 815 if (!(ok&BRD_OK)) 816 fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim); 817 if (!(ok&BRD1_OK)) 818 fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim); 819 if (!(ok&BRD0_OK)) 820 fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim); 821 if (!(ok&LOCAL_OK)) { 822 fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim); 823 824 /* Check, that this local address finally disappeared. */ 825 if (inet_addr_type(dev_net(dev), ifa->ifa_local) != RTN_LOCAL) { 826 /* And the last, but not the least thing. 827 We must flush stray FIB entries. 828 829 First of all, we scan fib_info list searching 830 for stray nexthop entries, then ignite fib_flush. 831 */ 832 if (fib_sync_down_addr(dev_net(dev), ifa->ifa_local)) 833 fib_flush(dev_net(dev)); 834 } 835 } 836#undef LOCAL_OK 837#undef BRD_OK 838#undef BRD0_OK 839#undef BRD1_OK 840} 841 842static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb ) 843{ 844 845 struct fib_result res; 846 struct flowi fl = { .mark = frn->fl_mark, 847 .nl_u = { .ip4_u = { .daddr = frn->fl_addr, 848 .tos = frn->fl_tos, 849 .scope = frn->fl_scope } } }; 850 851#ifdef CONFIG_IP_MULTIPLE_TABLES 852 res.r = NULL; 853#endif 854 855 frn->err = -ENOENT; 856 if (tb) { 857 local_bh_disable(); 858 859 frn->tb_id = tb->tb_id; 860 frn->err = fib_table_lookup(tb, &fl, &res); 861 862 if (!frn->err) { 863 frn->prefixlen = res.prefixlen; 864 frn->nh_sel = res.nh_sel; 865 frn->type = res.type; 866 frn->scope = res.scope; 867 fib_res_put(&res); 868 } 869 local_bh_enable(); 870 } 871} 872 873static void nl_fib_input(struct sk_buff *skb) 874{ 875 struct net *net; 876 struct fib_result_nl *frn; 877 struct nlmsghdr *nlh; 878 struct fib_table *tb; 879 u32 pid; 880 881 net = sock_net(skb->sk); 882 nlh = nlmsg_hdr(skb); 883 if (skb->len < NLMSG_SPACE(0) || skb->len < nlh->nlmsg_len || 884 nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*frn))) 885 return; 886 887 skb = skb_clone(skb, GFP_KERNEL); 888 if (skb == NULL) 889 return; 890 nlh = nlmsg_hdr(skb); 891 892 frn = (struct fib_result_nl *) NLMSG_DATA(nlh); 893 tb = fib_get_table(net, frn->tb_id_in); 894 895 nl_fib_lookup(frn, tb); 896 897 pid = NETLINK_CB(skb).pid; /* pid of sending process */ 898 NETLINK_CB(skb).pid = 0; /* from kernel */ 899 NETLINK_CB(skb).dst_group = 0; /* unicast */ 900 netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); 901} 902 903static int __net_init nl_fib_lookup_init(struct net *net) 904{ 905 struct sock *sk; 906 sk = netlink_kernel_create(net, NETLINK_FIB_LOOKUP, 0, 907 nl_fib_input, NULL, THIS_MODULE); 908 if (sk == NULL) 909 return -EAFNOSUPPORT; 910 net->ipv4.fibnl = sk; 911 return 0; 912} 913 914static void nl_fib_lookup_exit(struct net *net) 915{ 916 netlink_kernel_release(net->ipv4.fibnl); 917 net->ipv4.fibnl = NULL; 918} 919 920static void fib_disable_ip(struct net_device *dev, int force, int delay) 921{ 922 if (fib_sync_down_dev(dev, force)) 923 fib_flush(dev_net(dev)); 924 rt_cache_flush(dev_net(dev), delay); 925 arp_ifdown(dev); 926} 927 928static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr) 929{ 930 struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; 931 struct net_device *dev = ifa->ifa_dev->dev; 932 933 switch (event) { 934 case NETDEV_UP: 935 fib_add_ifaddr(ifa); 936#ifdef CONFIG_IP_ROUTE_MULTIPATH 937 fib_sync_up(dev); 938#endif 939 rt_cache_flush(dev_net(dev), -1); 940 break; 941 case NETDEV_DOWN: 942 fib_del_ifaddr(ifa); 943 if (ifa->ifa_dev->ifa_list == NULL) { 944 /* Last address was deleted from this interface. 945 Disable IP. 946 */ 947 fib_disable_ip(dev, 1, 0); 948 } else { 949 rt_cache_flush(dev_net(dev), -1); 950 } 951 break; 952 } 953 return NOTIFY_DONE; 954} 955 956static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) 957{ 958 struct net_device *dev = ptr; 959 struct in_device *in_dev = __in_dev_get_rtnl(dev); 960 961 if (event == NETDEV_UNREGISTER) { 962 fib_disable_ip(dev, 2, -1); 963 return NOTIFY_DONE; 964 } 965 966 if (!in_dev) 967 return NOTIFY_DONE; 968 969 switch (event) { 970 case NETDEV_UP: 971 for_ifa(in_dev) { 972 fib_add_ifaddr(ifa); 973 } endfor_ifa(in_dev); 974#ifdef CONFIG_IP_ROUTE_MULTIPATH 975 fib_sync_up(dev); 976#endif 977 rt_cache_flush(dev_net(dev), -1); 978 break; 979 case NETDEV_DOWN: 980 fib_disable_ip(dev, 0, 0); 981 break; 982 case NETDEV_CHANGEMTU: 983 case NETDEV_CHANGE: 984 rt_cache_flush(dev_net(dev), 0); 985 break; 986 case NETDEV_UNREGISTER_BATCH: 987 rt_cache_flush_batch(); 988 break; 989 } 990 return NOTIFY_DONE; 991} 992 993static struct notifier_block fib_inetaddr_notifier = { 994 .notifier_call = fib_inetaddr_event, 995}; 996 997static struct notifier_block fib_netdev_notifier = { 998 .notifier_call = fib_netdev_event, 999}; 1000 1001static int __net_init ip_fib_net_init(struct net *net) 1002{ 1003 int err; 1004 unsigned int i; 1005 1006 net->ipv4.fib_table_hash = kzalloc( 1007 sizeof(struct hlist_head)*FIB_TABLE_HASHSZ, GFP_KERNEL); 1008 if (net->ipv4.fib_table_hash == NULL) 1009 return -ENOMEM; 1010 1011 for (i = 0; i < FIB_TABLE_HASHSZ; i++) 1012 INIT_HLIST_HEAD(&net->ipv4.fib_table_hash[i]); 1013 1014 err = fib4_rules_init(net); 1015 if (err < 0) 1016 goto fail; 1017 return 0; 1018 1019fail: 1020 kfree(net->ipv4.fib_table_hash); 1021 return err; 1022} 1023 1024static void ip_fib_net_exit(struct net *net) 1025{ 1026 unsigned int i; 1027 1028#ifdef CONFIG_IP_MULTIPLE_TABLES 1029 fib4_rules_exit(net); 1030#endif 1031 1032 for (i = 0; i < FIB_TABLE_HASHSZ; i++) { 1033 struct fib_table *tb; 1034 struct hlist_head *head; 1035 struct hlist_node *node, *tmp; 1036 1037 head = &net->ipv4.fib_table_hash[i]; 1038 hlist_for_each_entry_safe(tb, node, tmp, head, tb_hlist) { 1039 hlist_del(node); 1040 fib_table_flush(tb); 1041 kfree(tb); 1042 } 1043 } 1044 kfree(net->ipv4.fib_table_hash); 1045} 1046 1047static int __net_init fib_net_init(struct net *net) 1048{ 1049 int error; 1050 1051 error = ip_fib_net_init(net); 1052 if (error < 0) 1053 goto out; 1054 error = nl_fib_lookup_init(net); 1055 if (error < 0) 1056 goto out_nlfl; 1057 error = fib_proc_init(net); 1058 if (error < 0) 1059 goto out_proc; 1060out: 1061 return error; 1062 1063out_proc: 1064 nl_fib_lookup_exit(net); 1065out_nlfl: 1066 ip_fib_net_exit(net); 1067 goto out; 1068} 1069 1070static void __net_exit fib_net_exit(struct net *net) 1071{ 1072 fib_proc_exit(net); 1073 nl_fib_lookup_exit(net); 1074 ip_fib_net_exit(net); 1075} 1076 1077static struct pernet_operations fib_net_ops = { 1078 .init = fib_net_init, 1079 .exit = fib_net_exit, 1080}; 1081 1082void __init ip_fib_init(void) 1083{ 1084 rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL); 1085 rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL); 1086 rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib); 1087 1088 register_pernet_subsys(&fib_net_ops); 1089 register_netdevice_notifier(&fib_netdev_notifier); 1090 register_inetaddr_notifier(&fib_inetaddr_notifier); 1091 1092 fib_hash_init(); 1093} 1094