1/* 2 * This is a module which is used for queueing IPv6 packets and 3 * communicating with userspace via netlink. 4 * 5 * (C) 2001 Fernando Anton, this code is GPL. 6 * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. 7 * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain 8 * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain 9 * email: fanton@it.uc3m.es 10 * 11 * 2001-11-06: First try. Working with ip_queue.c for IPv4 and trying 12 * to adapt it to IPv6 13 * HEAVILY based in ipqueue.c by James Morris. It's just 14 * a little modified version of it, so he's nearly the 15 * real coder of this. 16 * Few changes needed, mainly the hard_routing code and 17 * the netlink socket protocol (we're NETLINK_IP6_FW). 18 * 2002-06-25: Code cleanup. [JM: ported cleanup over from ip_queue.c] 19 */ 20#include <linux/module.h> 21#include <linux/skbuff.h> 22#include <linux/init.h> 23#include <linux/ipv6.h> 24#include <linux/notifier.h> 25#include <linux/netdevice.h> 26#include <linux/netfilter.h> 27#include <linux/netlink.h> 28#include <linux/spinlock.h> 29#include <linux/brlock.h> 30#include <linux/sysctl.h> 31#include <linux/proc_fs.h> 32#include <net/sock.h> 33#include <net/ipv6.h> 34#include <net/ip6_route.h> 35#include <linux/netfilter_ipv4/ip_queue.h> 36#include <linux/netfilter_ipv4/ip_tables.h> 37#include <linux/netfilter_ipv6/ip6_tables.h> 38 39#define IPQ_QMAX_DEFAULT 1024 40#define IPQ_PROC_FS_NAME "ip6_queue" 41#define NET_IPQ_QMAX 2088 42#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" 43 44struct ipq_rt_info { 45 struct in6_addr daddr; 46 struct in6_addr saddr; 47}; 48 49struct ipq_queue_entry { 50 struct list_head list; 51 struct nf_info *info; 52 struct sk_buff *skb; 53 struct ipq_rt_info rt_info; 54}; 55 56typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); 57 58static unsigned char copy_mode = IPQ_COPY_NONE; 59static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; 60static rwlock_t queue_lock = RW_LOCK_UNLOCKED; 61static int peer_pid; 62static unsigned int copy_range; 63static unsigned int queue_total; 64static struct sock *ipqnl; 65static LIST_HEAD(queue_list); 66static DECLARE_MUTEX(ipqnl_sem); 67 68static void 69ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) 70{ 71 nf_reinject(entry->skb, entry->info, verdict); 72 kfree(entry); 73} 74 75static inline int 76__ipq_enqueue_entry(struct ipq_queue_entry *entry) 77{ 78 if (queue_total >= queue_maxlen) { 79 if (net_ratelimit()) 80 printk(KERN_WARNING "ip6_queue: full at %d entries, " 81 "dropping packet(s).\n", queue_total); 82 return -ENOSPC; 83 } 84 list_add(&entry->list, &queue_list); 85 queue_total++; 86 return 0; 87} 88 89/* 90 * Find and return a queued entry matched by cmpfn, or return the last 91 * entry if cmpfn is NULL. 92 */ 93static inline struct ipq_queue_entry * 94__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data) 95{ 96 struct list_head *p; 97 98 list_for_each_prev(p, &queue_list) { 99 struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p; 100 101 if (!cmpfn || cmpfn(entry, data)) 102 return entry; 103 } 104 return NULL; 105} 106 107static inline void 108__ipq_dequeue_entry(struct ipq_queue_entry *entry) 109{ 110 list_del(&entry->list); 111 queue_total--; 112} 113 114static inline struct ipq_queue_entry * 115__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) 116{ 117 struct ipq_queue_entry *entry; 118 119 entry = __ipq_find_entry(cmpfn, data); 120 if (entry == NULL) 121 return NULL; 122 123 __ipq_dequeue_entry(entry); 124 return entry; 125} 126 127 128static inline void 129__ipq_flush(int verdict) 130{ 131 struct ipq_queue_entry *entry; 132 133 while ((entry = __ipq_find_dequeue_entry(NULL, 0))) 134 ipq_issue_verdict(entry, verdict); 135} 136 137static inline int 138__ipq_set_mode(unsigned char mode, unsigned int range) 139{ 140 int status = 0; 141 142 switch(mode) { 143 case IPQ_COPY_NONE: 144 case IPQ_COPY_META: 145 copy_mode = mode; 146 copy_range = 0; 147 break; 148 149 case IPQ_COPY_PACKET: 150 copy_mode = mode; 151 copy_range = range; 152 if (copy_range > 0xFFFF) 153 copy_range = 0xFFFF; 154 break; 155 156 default: 157 status = -EINVAL; 158 159 } 160 return status; 161} 162 163static inline void 164__ipq_reset(void) 165{ 166 peer_pid = 0; 167 __ipq_set_mode(IPQ_COPY_NONE, 0); 168 __ipq_flush(NF_DROP); 169} 170 171static struct ipq_queue_entry * 172ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) 173{ 174 struct ipq_queue_entry *entry; 175 176 write_lock_bh(&queue_lock); 177 entry = __ipq_find_dequeue_entry(cmpfn, data); 178 write_unlock_bh(&queue_lock); 179 return entry; 180} 181 182static void 183ipq_flush(int verdict) 184{ 185 write_lock_bh(&queue_lock); 186 __ipq_flush(verdict); 187 write_unlock_bh(&queue_lock); 188} 189 190static struct sk_buff * 191ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) 192{ 193 unsigned char *old_tail; 194 size_t size = 0; 195 size_t data_len = 0; 196 struct sk_buff *skb; 197 struct ipq_packet_msg *pmsg; 198 struct nlmsghdr *nlh; 199 200 read_lock_bh(&queue_lock); 201 202 switch (copy_mode) { 203 case IPQ_COPY_META: 204 case IPQ_COPY_NONE: 205 size = NLMSG_SPACE(sizeof(*pmsg)); 206 data_len = 0; 207 break; 208 209 case IPQ_COPY_PACKET: 210 if (copy_range == 0 || copy_range > entry->skb->len) 211 data_len = entry->skb->len; 212 else 213 data_len = copy_range; 214 215 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 216 break; 217 218 default: 219 *errp = -EINVAL; 220 read_unlock_bh(&queue_lock); 221 return NULL; 222 } 223 224 read_unlock_bh(&queue_lock); 225 226 skb = alloc_skb(size, GFP_ATOMIC); 227 if (!skb) 228 goto nlmsg_failure; 229 230 old_tail= skb->tail; 231 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); 232 pmsg = NLMSG_DATA(nlh); 233 memset(pmsg, 0, sizeof(*pmsg)); 234 235 pmsg->packet_id = (unsigned long )entry; 236 pmsg->data_len = data_len; 237 pmsg->timestamp_sec = entry->skb->stamp.tv_sec; 238 pmsg->timestamp_usec = entry->skb->stamp.tv_usec; 239 pmsg->mark = entry->skb->nfmark; 240 pmsg->hook = entry->info->hook; 241 pmsg->hw_protocol = entry->skb->protocol; 242 243 if (entry->info->indev) 244 strcpy(pmsg->indev_name, entry->info->indev->name); 245 else 246 pmsg->indev_name[0] = '\0'; 247 248 if (entry->info->outdev) 249 strcpy(pmsg->outdev_name, entry->info->outdev->name); 250 else 251 pmsg->outdev_name[0] = '\0'; 252 253 if (entry->info->indev && entry->skb->dev) { 254 pmsg->hw_type = entry->skb->dev->type; 255 if (entry->skb->dev->hard_header_parse) 256 pmsg->hw_addrlen = 257 entry->skb->dev->hard_header_parse(entry->skb, 258 pmsg->hw_addr); 259 } 260 261 if (data_len) 262 memcpy(pmsg->payload, entry->skb->data, data_len); 263 264 nlh->nlmsg_len = skb->tail - old_tail; 265 return skb; 266 267nlmsg_failure: 268 if (skb) 269 kfree_skb(skb); 270 *errp = -EINVAL; 271 printk(KERN_ERR "ip6_queue: error creating packet message\n"); 272 return NULL; 273} 274 275static int 276ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) 277{ 278 int status = -EINVAL; 279 struct sk_buff *nskb; 280 struct ipq_queue_entry *entry; 281 282 if (copy_mode == IPQ_COPY_NONE) 283 return -EAGAIN; 284 285 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 286 if (entry == NULL) { 287 printk(KERN_ERR "ip6_queue: OOM in ipq_enqueue_packet()\n"); 288 return -ENOMEM; 289 } 290 291 entry->info = info; 292 entry->skb = skb; 293 294 if (entry->info->hook == NF_IP_LOCAL_OUT) { 295 struct ipv6hdr *iph = skb->nh.ipv6h; 296 297 entry->rt_info.daddr = iph->daddr; 298 entry->rt_info.saddr = iph->saddr; 299 } 300 301 nskb = ipq_build_packet_message(entry, &status); 302 if (nskb == NULL) 303 goto err_out_free; 304 305 write_lock_bh(&queue_lock); 306 307 if (!peer_pid) 308 goto err_out_unlock; 309 310 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); 311 if (status < 0) 312 goto err_out_unlock; 313 314 status = __ipq_enqueue_entry(entry); 315 if (status < 0) 316 goto err_out_unlock; 317 318 write_unlock_bh(&queue_lock); 319 return status; 320 321err_out_unlock: 322 write_unlock_bh(&queue_lock); 323 324err_out_free: 325 kfree(entry); 326 return status; 327} 328 329/* 330 * Taken from net/ipv6/ip6_output.c 331 * 332 * We should use the one there, but is defined static 333 * so we put this just here and let the things as 334 * they are now. 335 * 336 * If that one is modified, this one should be modified too. 337 */ 338static int 339route6_me_harder(struct sk_buff *skb) 340{ 341 struct ipv6hdr *iph = skb->nh.ipv6h; 342 struct dst_entry *dst; 343 struct flowi fl; 344 345 fl.proto = iph->nexthdr; 346 fl.fl6_dst = &iph->daddr; 347 fl.fl6_src = &iph->saddr; 348 fl.oif = skb->sk ? skb->sk->bound_dev_if : 0; 349 fl.fl6_flowlabel = 0; 350 fl.uli_u.ports.dport = 0; 351 fl.uli_u.ports.sport = 0; 352 353 dst = ip6_route_output(skb->sk, &fl); 354 355 if (dst->error) { 356 if (net_ratelimit()) 357 printk(KERN_DEBUG "route6_me_harder: No more route.\n"); 358 return -EINVAL; 359 } 360 361 /* Drop old route. */ 362 dst_release(skb->dst); 363 364 skb->dst = dst; 365 return 0; 366} 367 368static int 369ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) 370{ 371 int diff; 372 struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; 373 374 if (v->data_len < sizeof(*user_iph)) 375 return 0; 376 diff = v->data_len - e->skb->len; 377 if (diff < 0) 378 skb_trim(e->skb, v->data_len); 379 else if (diff > 0) { 380 if (v->data_len > 0xFFFF) 381 return -EINVAL; 382 if (diff > skb_tailroom(e->skb)) { 383 struct sk_buff *newskb; 384 385 newskb = skb_copy_expand(e->skb, 386 skb_headroom(e->skb), 387 diff, 388 GFP_ATOMIC); 389 if (newskb == NULL) { 390 printk(KERN_WARNING "ip6_queue: OOM " 391 "in mangle, dropping packet\n"); 392 return -ENOMEM; 393 } 394 if (e->skb->sk) 395 skb_set_owner_w(newskb, e->skb->sk); 396 kfree_skb(e->skb); 397 e->skb = newskb; 398 } 399 skb_put(e->skb, diff); 400 } 401 memcpy(e->skb->data, v->payload, v->data_len); 402 e->skb->nfcache |= NFC_ALTERED; 403 404 /* 405 * Extra routing may needed on local out, as the QUEUE target never 406 * returns control to the table. 407 * Not a nice way to cmp, but works 408 */ 409 if (e->info->hook == NF_IP_LOCAL_OUT) { 410 struct ipv6hdr *iph = e->skb->nh.ipv6h; 411 if (ipv6_addr_cmp(&iph->daddr, &e->rt_info.daddr) || 412 ipv6_addr_cmp(&iph->saddr, &e->rt_info.saddr)) 413 return route6_me_harder(e->skb); 414 } 415 return 0; 416} 417 418static inline int 419id_cmp(struct ipq_queue_entry *e, unsigned long id) 420{ 421 return (id == (unsigned long )e); 422} 423 424static int 425ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) 426{ 427 struct ipq_queue_entry *entry; 428 429 if (vmsg->value > NF_MAX_VERDICT) 430 return -EINVAL; 431 432 entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); 433 if (entry == NULL) 434 return -ENOENT; 435 else { 436 int verdict = vmsg->value; 437 438 if (vmsg->data_len && vmsg->data_len == len) 439 if (ipq_mangle_ipv6(vmsg, entry) < 0) 440 verdict = NF_DROP; 441 442 ipq_issue_verdict(entry, verdict); 443 return 0; 444 } 445} 446 447static int 448ipq_set_mode(unsigned char mode, unsigned int range) 449{ 450 int status; 451 452 write_lock_bh(&queue_lock); 453 status = __ipq_set_mode(mode, range); 454 write_unlock_bh(&queue_lock); 455 return status; 456} 457 458static int 459ipq_receive_peer(struct ipq_peer_msg *pmsg, 460 unsigned char type, unsigned int len) 461{ 462 int status = 0; 463 464 if (len < sizeof(*pmsg)) 465 return -EINVAL; 466 467 switch (type) { 468 case IPQM_MODE: 469 status = ipq_set_mode(pmsg->msg.mode.value, 470 pmsg->msg.mode.range); 471 break; 472 473 case IPQM_VERDICT: 474 if (pmsg->msg.verdict.value > NF_MAX_VERDICT) 475 status = -EINVAL; 476 else 477 status = ipq_set_verdict(&pmsg->msg.verdict, 478 len - sizeof(*pmsg)); 479 break; 480 default: 481 status = -EINVAL; 482 } 483 return status; 484} 485 486static int 487dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) 488{ 489 if (entry->info->indev) 490 if (entry->info->indev->ifindex == ifindex) 491 return 1; 492 493 if (entry->info->outdev) 494 if (entry->info->outdev->ifindex == ifindex) 495 return 1; 496 497 return 0; 498} 499 500static void 501ipq_dev_drop(int ifindex) 502{ 503 struct ipq_queue_entry *entry; 504 505 while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL) 506 ipq_issue_verdict(entry, NF_DROP); 507} 508 509#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 510 511static inline void 512ipq_rcv_skb(struct sk_buff *skb) 513{ 514 int status, type, pid, flags, nlmsglen, skblen; 515 struct nlmsghdr *nlh; 516 517 skblen = skb->len; 518 if (skblen < sizeof(*nlh)) 519 return; 520 521 nlh = (struct nlmsghdr *)skb->data; 522 nlmsglen = nlh->nlmsg_len; 523 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) 524 return; 525 526 pid = nlh->nlmsg_pid; 527 flags = nlh->nlmsg_flags; 528 529 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) 530 RCV_SKB_FAIL(-EINVAL); 531 532 if (flags & MSG_TRUNC) 533 RCV_SKB_FAIL(-ECOMM); 534 535 type = nlh->nlmsg_type; 536 if (type < NLMSG_NOOP || type >= IPQM_MAX) 537 RCV_SKB_FAIL(-EINVAL); 538 539 if (type <= IPQM_BASE) 540 return; 541 542 if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) 543 RCV_SKB_FAIL(-EPERM); 544 545 write_lock_bh(&queue_lock); 546 547 if (peer_pid) { 548 if (peer_pid != pid) { 549 write_unlock_bh(&queue_lock); 550 RCV_SKB_FAIL(-EBUSY); 551 } 552 } 553 else 554 peer_pid = pid; 555 556 write_unlock_bh(&queue_lock); 557 558 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 559 skblen - NLMSG_LENGTH(0)); 560 if (status < 0) 561 RCV_SKB_FAIL(status); 562 563 if (flags & NLM_F_ACK) 564 netlink_ack(skb, nlh, 0); 565 return; 566} 567 568static void 569ipq_rcv_sk(struct sock *sk, int len) 570{ 571 do { 572 struct sk_buff *skb; 573 574 if (down_trylock(&ipqnl_sem)) 575 return; 576 577 while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { 578 ipq_rcv_skb(skb); 579 kfree_skb(skb); 580 } 581 582 up(&ipqnl_sem); 583 584 } while (ipqnl && ipqnl->receive_queue.qlen); 585} 586 587static int 588ipq_rcv_dev_event(struct notifier_block *this, 589 unsigned long event, void *ptr) 590{ 591 struct net_device *dev = ptr; 592 593 /* Drop any packets associated with the downed device */ 594 if (event == NETDEV_DOWN) 595 ipq_dev_drop(dev->ifindex); 596 return NOTIFY_DONE; 597} 598 599static struct notifier_block ipq_dev_notifier = { 600 ipq_rcv_dev_event, 601 NULL, 602 0 603}; 604 605static int 606ipq_rcv_nl_event(struct notifier_block *this, 607 unsigned long event, void *ptr) 608{ 609 struct netlink_notify *n = ptr; 610 611 if (event == NETLINK_URELEASE && 612 n->protocol == NETLINK_IP6_FW && n->pid) { 613 write_lock_bh(&queue_lock); 614 if (n->pid == peer_pid) 615 __ipq_reset(); 616 write_unlock_bh(&queue_lock); 617 } 618 return NOTIFY_DONE; 619} 620 621static struct notifier_block ipq_nl_notifier = { 622 ipq_rcv_nl_event, 623 NULL, 624 0 625}; 626 627static int sysctl_maxlen = IPQ_QMAX_DEFAULT; 628static struct ctl_table_header *ipq_sysctl_header; 629 630static ctl_table ipq_table[] = { 631 { NET_IPQ_QMAX, NET_IPQ_QMAX_NAME, &sysctl_maxlen, 632 sizeof(sysctl_maxlen), 0644, NULL, proc_dointvec }, 633 { 0 } 634}; 635 636static ctl_table ipq_dir_table[] = { 637 {NET_IPV6, "ipv6", NULL, 0, 0555, ipq_table, 0, 0, 0, 0, 0}, 638 { 0 } 639}; 640 641static ctl_table ipq_root_table[] = { 642 {CTL_NET, "net", NULL, 0, 0555, ipq_dir_table, 0, 0, 0, 0, 0}, 643 { 0 } 644}; 645 646static int 647ipq_get_info(char *buffer, char **start, off_t offset, int length) 648{ 649 int len; 650 651 read_lock_bh(&queue_lock); 652 653 len = sprintf(buffer, 654 "Peer PID : %d\n" 655 "Copy mode : %hu\n" 656 "Copy range : %u\n" 657 "Queue length : %u\n" 658 "Queue max. length : %u\n", 659 peer_pid, 660 copy_mode, 661 copy_range, 662 queue_total, 663 queue_maxlen); 664 665 read_unlock_bh(&queue_lock); 666 667 *start = buffer + offset; 668 len -= offset; 669 if (len > length) 670 len = length; 671 else if (len < 0) 672 len = 0; 673 return len; 674} 675 676static int 677init_or_cleanup(int init) 678{ 679 int status = -ENOMEM; 680 struct proc_dir_entry *proc; 681 682 if (!init) 683 goto cleanup; 684 685 netlink_register_notifier(&ipq_nl_notifier); 686 ipqnl = netlink_kernel_create(NETLINK_IP6_FW, ipq_rcv_sk); 687 if (ipqnl == NULL) { 688 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); 689 goto cleanup_netlink_notifier; 690 } 691 692 proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); 693 if (proc) 694 proc->owner = THIS_MODULE; 695 else { 696 printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); 697 goto cleanup_ipqnl; 698 } 699 700 register_netdevice_notifier(&ipq_dev_notifier); 701 ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); 702 703 status = nf_register_queue_handler(PF_INET6, ipq_enqueue_packet, NULL); 704 if (status < 0) { 705 printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); 706 goto cleanup_sysctl; 707 } 708 return status; 709 710cleanup: 711 nf_unregister_queue_handler(PF_INET6); 712 br_write_lock_bh(BR_NETPROTO_LOCK); 713 br_write_unlock_bh(BR_NETPROTO_LOCK); 714 ipq_flush(NF_DROP); 715 716cleanup_sysctl: 717 unregister_sysctl_table(ipq_sysctl_header); 718 unregister_netdevice_notifier(&ipq_dev_notifier); 719 proc_net_remove(IPQ_PROC_FS_NAME); 720 721cleanup_ipqnl: 722 sock_release(ipqnl->socket); 723 down(&ipqnl_sem); 724 up(&ipqnl_sem); 725 726cleanup_netlink_notifier: 727 netlink_unregister_notifier(&ipq_nl_notifier); 728 return status; 729} 730 731static int __init init(void) 732{ 733 734 return init_or_cleanup(1); 735} 736 737static void __exit fini(void) 738{ 739 init_or_cleanup(0); 740} 741 742MODULE_DESCRIPTION("IPv6 packet queue handler"); 743MODULE_LICENSE("GPL"); 744 745module_init(init); 746module_exit(fini); 747