1/* 2 * Anycast support for IPv6 3 * Linux INET6 implementation 4 * 5 * Authors: 6 * David L Stevens (dlstevens@us.ibm.com) 7 * 8 * based heavily on net/ipv6/mcast.c 9 * 10 * This program is free software; you can redistribute it and/or 11 * modify it under the terms of the GNU General Public License 12 * as published by the Free Software Foundation; either version 13 * 2 of the License, or (at your option) any later version. 14 */ 15 16#include <linux/capability.h> 17#include <linux/module.h> 18#include <linux/errno.h> 19#include <linux/types.h> 20#include <linux/random.h> 21#include <linux/string.h> 22#include <linux/socket.h> 23#include <linux/sockios.h> 24#include <linux/net.h> 25#include <linux/in6.h> 26#include <linux/netdevice.h> 27#include <linux/if_arp.h> 28#include <linux/route.h> 29#include <linux/init.h> 30#include <linux/proc_fs.h> 31#include <linux/seq_file.h> 32#include <linux/slab.h> 33 34#include <net/net_namespace.h> 35#include <net/sock.h> 36#include <net/snmp.h> 37 38#include <net/ipv6.h> 39#include <net/protocol.h> 40#include <net/if_inet6.h> 41#include <net/ndisc.h> 42#include <net/addrconf.h> 43#include <net/ip6_route.h> 44 45#include <net/checksum.h> 46 47static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr); 48 49/* Big ac list lock for all the sockets */ 50static DEFINE_RWLOCK(ipv6_sk_ac_lock); 51 52 53/* 54 * socket join an anycast group 55 */ 56 57int ipv6_sock_ac_join(struct sock *sk, int ifindex, struct in6_addr *addr) 58{ 59 struct ipv6_pinfo *np = inet6_sk(sk); 60 struct net_device *dev = NULL; 61 struct inet6_dev *idev; 62 struct ipv6_ac_socklist *pac; 63 struct net *net = sock_net(sk); 64 int ishost = !net->ipv6.devconf_all->forwarding; 65 int err = 0; 66 67 if (!capable(CAP_NET_ADMIN)) 68 return -EPERM; 69 if (ipv6_addr_is_multicast(addr)) 70 return -EINVAL; 71 if (ipv6_chk_addr(net, addr, NULL, 0)) 72 return -EINVAL; 73 74 pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); 75 if (pac == NULL) 76 return -ENOMEM; 77 pac->acl_next = NULL; 78 ipv6_addr_copy(&pac->acl_addr, addr); 79 80 rcu_read_lock(); 81 if (ifindex == 0) { 82 struct rt6_info *rt; 83 84 rt = rt6_lookup(net, addr, NULL, 0, 0); 85 if (rt) { 86 dev = rt->rt6i_dev; 87 dst_release(&rt->dst); 88 } else if (ishost) { 89 err = -EADDRNOTAVAIL; 90 goto error; 91 } else { 92 /* router, no matching interface: just pick one */ 93 dev = dev_get_by_flags_rcu(net, IFF_UP, 94 IFF_UP | IFF_LOOPBACK); 95 } 96 } else 97 dev = dev_get_by_index_rcu(net, ifindex); 98 99 if (dev == NULL) { 100 err = -ENODEV; 101 goto error; 102 } 103 104 idev = __in6_dev_get(dev); 105 if (!idev) { 106 if (ifindex) 107 err = -ENODEV; 108 else 109 err = -EADDRNOTAVAIL; 110 goto error; 111 } 112 /* reset ishost, now that we have a specific device */ 113 ishost = !idev->cnf.forwarding; 114 115 pac->acl_ifindex = dev->ifindex; 116 117 if (!ipv6_chk_prefix(addr, dev)) { 118 if (ishost) 119 err = -EADDRNOTAVAIL; 120 if (err) 121 goto error; 122 } 123 124 err = ipv6_dev_ac_inc(dev, addr); 125 if (!err) { 126 write_lock_bh(&ipv6_sk_ac_lock); 127 pac->acl_next = np->ipv6_ac_list; 128 np->ipv6_ac_list = pac; 129 write_unlock_bh(&ipv6_sk_ac_lock); 130 pac = NULL; 131 } 132 133error: 134 rcu_read_unlock(); 135 if (pac) 136 sock_kfree_s(sk, pac, sizeof(*pac)); 137 return err; 138} 139 140/* 141 * socket leave an anycast group 142 */ 143int ipv6_sock_ac_drop(struct sock *sk, int ifindex, struct in6_addr *addr) 144{ 145 struct ipv6_pinfo *np = inet6_sk(sk); 146 struct net_device *dev; 147 struct ipv6_ac_socklist *pac, *prev_pac; 148 struct net *net = sock_net(sk); 149 150 write_lock_bh(&ipv6_sk_ac_lock); 151 prev_pac = NULL; 152 for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { 153 if ((ifindex == 0 || pac->acl_ifindex == ifindex) && 154 ipv6_addr_equal(&pac->acl_addr, addr)) 155 break; 156 prev_pac = pac; 157 } 158 if (!pac) { 159 write_unlock_bh(&ipv6_sk_ac_lock); 160 return -ENOENT; 161 } 162 if (prev_pac) 163 prev_pac->acl_next = pac->acl_next; 164 else 165 np->ipv6_ac_list = pac->acl_next; 166 167 write_unlock_bh(&ipv6_sk_ac_lock); 168 169 rcu_read_lock(); 170 dev = dev_get_by_index_rcu(net, pac->acl_ifindex); 171 if (dev) 172 ipv6_dev_ac_dec(dev, &pac->acl_addr); 173 rcu_read_unlock(); 174 175 sock_kfree_s(sk, pac, sizeof(*pac)); 176 return 0; 177} 178 179void ipv6_sock_ac_close(struct sock *sk) 180{ 181 struct ipv6_pinfo *np = inet6_sk(sk); 182 struct net_device *dev = NULL; 183 struct ipv6_ac_socklist *pac; 184 struct net *net = sock_net(sk); 185 int prev_index; 186 187 write_lock_bh(&ipv6_sk_ac_lock); 188 pac = np->ipv6_ac_list; 189 np->ipv6_ac_list = NULL; 190 write_unlock_bh(&ipv6_sk_ac_lock); 191 192 prev_index = 0; 193 rcu_read_lock(); 194 while (pac) { 195 struct ipv6_ac_socklist *next = pac->acl_next; 196 197 if (pac->acl_ifindex != prev_index) { 198 dev = dev_get_by_index_rcu(net, pac->acl_ifindex); 199 prev_index = pac->acl_ifindex; 200 } 201 if (dev) 202 ipv6_dev_ac_dec(dev, &pac->acl_addr); 203 sock_kfree_s(sk, pac, sizeof(*pac)); 204 pac = next; 205 } 206 rcu_read_unlock(); 207} 208 209 210static void aca_put(struct ifacaddr6 *ac) 211{ 212 if (atomic_dec_and_test(&ac->aca_refcnt)) { 213 in6_dev_put(ac->aca_idev); 214 dst_release(&ac->aca_rt->dst); 215 kfree(ac); 216 } 217} 218 219/* 220 * device anycast group inc (add if not found) 221 */ 222int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) 223{ 224 struct ifacaddr6 *aca; 225 struct inet6_dev *idev; 226 struct rt6_info *rt; 227 int err; 228 229 idev = in6_dev_get(dev); 230 231 if (idev == NULL) 232 return -EINVAL; 233 234 write_lock_bh(&idev->lock); 235 if (idev->dead) { 236 err = -ENODEV; 237 goto out; 238 } 239 240 for (aca = idev->ac_list; aca; aca = aca->aca_next) { 241 if (ipv6_addr_equal(&aca->aca_addr, addr)) { 242 aca->aca_users++; 243 err = 0; 244 goto out; 245 } 246 } 247 248 /* 249 * not found: create a new one. 250 */ 251 252 aca = kzalloc(sizeof(struct ifacaddr6), GFP_ATOMIC); 253 254 if (aca == NULL) { 255 err = -ENOMEM; 256 goto out; 257 } 258 259 rt = addrconf_dst_alloc(idev, addr, 1); 260 if (IS_ERR(rt)) { 261 kfree(aca); 262 err = PTR_ERR(rt); 263 goto out; 264 } 265 266 ipv6_addr_copy(&aca->aca_addr, addr); 267 aca->aca_idev = idev; 268 aca->aca_rt = rt; 269 aca->aca_users = 1; 270 /* aca_tstamp should be updated upon changes */ 271 aca->aca_cstamp = aca->aca_tstamp = jiffies; 272 atomic_set(&aca->aca_refcnt, 2); 273 spin_lock_init(&aca->aca_lock); 274 275 aca->aca_next = idev->ac_list; 276 idev->ac_list = aca; 277 write_unlock_bh(&idev->lock); 278 279 ip6_ins_rt(rt); 280 281 addrconf_join_solict(dev, &aca->aca_addr); 282 283 aca_put(aca); 284 return 0; 285out: 286 write_unlock_bh(&idev->lock); 287 in6_dev_put(idev); 288 return err; 289} 290 291/* 292 * device anycast group decrement 293 */ 294int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) 295{ 296 struct ifacaddr6 *aca, *prev_aca; 297 298 write_lock_bh(&idev->lock); 299 prev_aca = NULL; 300 for (aca = idev->ac_list; aca; aca = aca->aca_next) { 301 if (ipv6_addr_equal(&aca->aca_addr, addr)) 302 break; 303 prev_aca = aca; 304 } 305 if (!aca) { 306 write_unlock_bh(&idev->lock); 307 return -ENOENT; 308 } 309 if (--aca->aca_users > 0) { 310 write_unlock_bh(&idev->lock); 311 return 0; 312 } 313 if (prev_aca) 314 prev_aca->aca_next = aca->aca_next; 315 else 316 idev->ac_list = aca->aca_next; 317 write_unlock_bh(&idev->lock); 318 addrconf_leave_solict(idev, &aca->aca_addr); 319 320 dst_hold(&aca->aca_rt->dst); 321 ip6_del_rt(aca->aca_rt); 322 323 aca_put(aca); 324 return 0; 325} 326 327/* called with rcu_read_lock() */ 328static int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) 329{ 330 struct inet6_dev *idev = __in6_dev_get(dev); 331 332 if (idev == NULL) 333 return -ENODEV; 334 return __ipv6_dev_ac_dec(idev, addr); 335} 336 337/* 338 * check if the interface has this anycast address 339 * called with rcu_read_lock() 340 */ 341static int ipv6_chk_acast_dev(struct net_device *dev, struct in6_addr *addr) 342{ 343 struct inet6_dev *idev; 344 struct ifacaddr6 *aca; 345 346 idev = __in6_dev_get(dev); 347 if (idev) { 348 read_lock_bh(&idev->lock); 349 for (aca = idev->ac_list; aca; aca = aca->aca_next) 350 if (ipv6_addr_equal(&aca->aca_addr, addr)) 351 break; 352 read_unlock_bh(&idev->lock); 353 return aca != NULL; 354 } 355 return 0; 356} 357 358/* 359 * check if given interface (or any, if dev==0) has this anycast address 360 */ 361int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, 362 struct in6_addr *addr) 363{ 364 int found = 0; 365 366 rcu_read_lock(); 367 if (dev) 368 found = ipv6_chk_acast_dev(dev, addr); 369 else 370 for_each_netdev_rcu(net, dev) 371 if (ipv6_chk_acast_dev(dev, addr)) { 372 found = 1; 373 break; 374 } 375 rcu_read_unlock(); 376 return found; 377} 378 379 380#ifdef CONFIG_PROC_FS 381struct ac6_iter_state { 382 struct seq_net_private p; 383 struct net_device *dev; 384 struct inet6_dev *idev; 385}; 386 387#define ac6_seq_private(seq) ((struct ac6_iter_state *)(seq)->private) 388 389static inline struct ifacaddr6 *ac6_get_first(struct seq_file *seq) 390{ 391 struct ifacaddr6 *im = NULL; 392 struct ac6_iter_state *state = ac6_seq_private(seq); 393 struct net *net = seq_file_net(seq); 394 395 state->idev = NULL; 396 for_each_netdev_rcu(net, state->dev) { 397 struct inet6_dev *idev; 398 idev = __in6_dev_get(state->dev); 399 if (!idev) 400 continue; 401 read_lock_bh(&idev->lock); 402 im = idev->ac_list; 403 if (im) { 404 state->idev = idev; 405 break; 406 } 407 read_unlock_bh(&idev->lock); 408 } 409 return im; 410} 411 412static struct ifacaddr6 *ac6_get_next(struct seq_file *seq, struct ifacaddr6 *im) 413{ 414 struct ac6_iter_state *state = ac6_seq_private(seq); 415 416 im = im->aca_next; 417 while (!im) { 418 if (likely(state->idev != NULL)) 419 read_unlock_bh(&state->idev->lock); 420 421 state->dev = next_net_device_rcu(state->dev); 422 if (!state->dev) { 423 state->idev = NULL; 424 break; 425 } 426 state->idev = __in6_dev_get(state->dev); 427 if (!state->idev) 428 continue; 429 read_lock_bh(&state->idev->lock); 430 im = state->idev->ac_list; 431 } 432 return im; 433} 434 435static struct ifacaddr6 *ac6_get_idx(struct seq_file *seq, loff_t pos) 436{ 437 struct ifacaddr6 *im = ac6_get_first(seq); 438 if (im) 439 while (pos && (im = ac6_get_next(seq, im)) != NULL) 440 --pos; 441 return pos ? NULL : im; 442} 443 444static void *ac6_seq_start(struct seq_file *seq, loff_t *pos) 445 __acquires(RCU) 446{ 447 rcu_read_lock(); 448 return ac6_get_idx(seq, *pos); 449} 450 451static void *ac6_seq_next(struct seq_file *seq, void *v, loff_t *pos) 452{ 453 struct ifacaddr6 *im = ac6_get_next(seq, v); 454 455 ++*pos; 456 return im; 457} 458 459static void ac6_seq_stop(struct seq_file *seq, void *v) 460 __releases(RCU) 461{ 462 struct ac6_iter_state *state = ac6_seq_private(seq); 463 464 if (likely(state->idev != NULL)) { 465 read_unlock_bh(&state->idev->lock); 466 state->idev = NULL; 467 } 468 rcu_read_unlock(); 469} 470 471static int ac6_seq_show(struct seq_file *seq, void *v) 472{ 473 struct ifacaddr6 *im = (struct ifacaddr6 *)v; 474 struct ac6_iter_state *state = ac6_seq_private(seq); 475 476 seq_printf(seq, "%-4d %-15s %pi6 %5d\n", 477 state->dev->ifindex, state->dev->name, 478 &im->aca_addr, im->aca_users); 479 return 0; 480} 481 482static const struct seq_operations ac6_seq_ops = { 483 .start = ac6_seq_start, 484 .next = ac6_seq_next, 485 .stop = ac6_seq_stop, 486 .show = ac6_seq_show, 487}; 488 489static int ac6_seq_open(struct inode *inode, struct file *file) 490{ 491 return seq_open_net(inode, file, &ac6_seq_ops, 492 sizeof(struct ac6_iter_state)); 493} 494 495static const struct file_operations ac6_seq_fops = { 496 .owner = THIS_MODULE, 497 .open = ac6_seq_open, 498 .read = seq_read, 499 .llseek = seq_lseek, 500 .release = seq_release_net, 501}; 502 503int __net_init ac6_proc_init(struct net *net) 504{ 505 if (!proc_net_fops_create(net, "anycast6", S_IRUGO, &ac6_seq_fops)) 506 return -ENOMEM; 507 508 return 0; 509} 510 511void ac6_proc_exit(struct net *net) 512{ 513 proc_net_remove(net, "anycast6"); 514} 515#endif 516