1/* 2 * This is a module which is used for queueing IPv6 packets and 3 * communicating with userspace via netlink. 4 * 5 * (C) 2001 Fernando Anton, this code is GPL. 6 * IPv64 Project - Work based in IPv64 draft by Arturo Azcorra. 7 * Universidad Carlos III de Madrid - Leganes (Madrid) - Spain 8 * Universidad Politecnica de Alcala de Henares - Alcala de H. (Madrid) - Spain 9 * email: fanton@it.uc3m.es 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License version 2 as 13 * published by the Free Software Foundation. 14 */ 15#include <linux/module.h> 16#include <linux/skbuff.h> 17#include <linux/init.h> 18#include <linux/ipv6.h> 19#include <linux/notifier.h> 20#include <linux/netdevice.h> 21#include <linux/netfilter.h> 22#include <linux/netlink.h> 23#include <linux/spinlock.h> 24#include <linux/sysctl.h> 25#include <linux/proc_fs.h> 26#include <linux/seq_file.h> 27#include <linux/mutex.h> 28#include <linux/slab.h> 29#include <net/net_namespace.h> 30#include <net/sock.h> 31#include <net/ipv6.h> 32#include <net/ip6_route.h> 33#include <net/netfilter/nf_queue.h> 34#include <linux/netfilter_ipv4/ip_queue.h> 35#include <linux/netfilter_ipv4/ip_tables.h> 36#include <linux/netfilter_ipv6/ip6_tables.h> 37 38#define IPQ_QMAX_DEFAULT 1024 39#define IPQ_PROC_FS_NAME "ip6_queue" 40#define NET_IPQ_QMAX_NAME "ip6_queue_maxlen" 41 42typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); 43 44static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 45static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 46static DEFINE_SPINLOCK(queue_lock); 47static int peer_pid __read_mostly; 48static unsigned int copy_range __read_mostly; 49static unsigned int queue_total; 50static unsigned int queue_dropped = 0; 51static unsigned int queue_user_dropped = 0; 52static struct sock *ipqnl __read_mostly; 53static LIST_HEAD(queue_list); 54static DEFINE_MUTEX(ipqnl_mutex); 55 56static inline void 57__ipq_enqueue_entry(struct nf_queue_entry *entry) 58{ 59 list_add_tail(&entry->list, &queue_list); 60 queue_total++; 61} 62 63static inline int 64__ipq_set_mode(unsigned char mode, unsigned int range) 65{ 66 int status = 0; 67 68 switch(mode) { 69 case IPQ_COPY_NONE: 70 case IPQ_COPY_META: 71 copy_mode = mode; 72 copy_range = 0; 73 break; 74 75 case IPQ_COPY_PACKET: 76 if (range > 0xFFFF) 77 range = 0xFFFF; 78 copy_range = range; 79 copy_mode = mode; 80 break; 81 82 default: 83 status = -EINVAL; 84 85 } 86 return status; 87} 88 89static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); 90 91static inline void 92__ipq_reset(void) 93{ 94 peer_pid = 0; 95 net_disable_timestamp(); 96 __ipq_set_mode(IPQ_COPY_NONE, 0); 97 __ipq_flush(NULL, 0); 98} 99 100static struct nf_queue_entry * 101ipq_find_dequeue_entry(unsigned long id) 102{ 103 struct nf_queue_entry *entry = NULL, *i; 104 105 spin_lock_bh(&queue_lock); 106 107 list_for_each_entry(i, &queue_list, list) { 108 if ((unsigned long)i == id) { 109 entry = i; 110 break; 111 } 112 } 113 114 if (entry) { 115 list_del(&entry->list); 116 queue_total--; 117 } 118 119 spin_unlock_bh(&queue_lock); 120 return entry; 121} 122 123static void 124__ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 125{ 126 struct nf_queue_entry *entry, *next; 127 128 list_for_each_entry_safe(entry, next, &queue_list, list) { 129 if (!cmpfn || cmpfn(entry, data)) { 130 list_del(&entry->list); 131 queue_total--; 132 nf_reinject(entry, NF_DROP); 133 } 134 } 135} 136 137static void 138ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 139{ 140 spin_lock_bh(&queue_lock); 141 __ipq_flush(cmpfn, data); 142 spin_unlock_bh(&queue_lock); 143} 144 145static struct sk_buff * 146ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) 147{ 148 sk_buff_data_t old_tail; 149 size_t size = 0; 150 size_t data_len = 0; 151 struct sk_buff *skb; 152 struct ipq_packet_msg *pmsg; 153 struct nlmsghdr *nlh; 154 struct timeval tv; 155 156 switch (ACCESS_ONCE(copy_mode)) { 157 case IPQ_COPY_META: 158 case IPQ_COPY_NONE: 159 size = NLMSG_SPACE(sizeof(*pmsg)); 160 break; 161 162 case IPQ_COPY_PACKET: 163 if (entry->skb->ip_summed == CHECKSUM_PARTIAL && 164 (*errp = skb_checksum_help(entry->skb))) 165 return NULL; 166 167 data_len = ACCESS_ONCE(copy_range); 168 if (data_len == 0 || data_len > entry->skb->len) 169 data_len = entry->skb->len; 170 171 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 172 break; 173 174 default: 175 *errp = -EINVAL; 176 return NULL; 177 } 178 179 skb = alloc_skb(size, GFP_ATOMIC); 180 if (!skb) 181 goto nlmsg_failure; 182 183 old_tail = skb->tail; 184 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); 185 pmsg = NLMSG_DATA(nlh); 186 memset(pmsg, 0, sizeof(*pmsg)); 187 188 pmsg->packet_id = (unsigned long )entry; 189 pmsg->data_len = data_len; 190 tv = ktime_to_timeval(entry->skb->tstamp); 191 pmsg->timestamp_sec = tv.tv_sec; 192 pmsg->timestamp_usec = tv.tv_usec; 193 pmsg->mark = entry->skb->mark; 194 pmsg->hook = entry->hook; 195 pmsg->hw_protocol = entry->skb->protocol; 196 197 if (entry->indev) 198 strcpy(pmsg->indev_name, entry->indev->name); 199 else 200 pmsg->indev_name[0] = '\0'; 201 202 if (entry->outdev) 203 strcpy(pmsg->outdev_name, entry->outdev->name); 204 else 205 pmsg->outdev_name[0] = '\0'; 206 207 if (entry->indev && entry->skb->dev) { 208 pmsg->hw_type = entry->skb->dev->type; 209 pmsg->hw_addrlen = dev_parse_header(entry->skb, pmsg->hw_addr); 210 } 211 212 if (data_len) 213 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) 214 BUG(); 215 216 nlh->nlmsg_len = skb->tail - old_tail; 217 return skb; 218 219nlmsg_failure: 220 *errp = -EINVAL; 221 printk(KERN_ERR "ip6_queue: error creating packet message\n"); 222 return NULL; 223} 224 225static int 226ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) 227{ 228 int status = -EINVAL; 229 struct sk_buff *nskb; 230 231 if (copy_mode == IPQ_COPY_NONE) 232 return -EAGAIN; 233 234 nskb = ipq_build_packet_message(entry, &status); 235 if (nskb == NULL) 236 return status; 237 238 spin_lock_bh(&queue_lock); 239 240 if (!peer_pid) 241 goto err_out_free_nskb; 242 243 if (queue_total >= queue_maxlen) { 244 queue_dropped++; 245 status = -ENOSPC; 246 if (net_ratelimit()) 247 printk (KERN_WARNING "ip6_queue: fill at %d entries, " 248 "dropping packet(s). Dropped: %d\n", queue_total, 249 queue_dropped); 250 goto err_out_free_nskb; 251 } 252 253 /* netlink_unicast will either free the nskb or attach it to a socket */ 254 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); 255 if (status < 0) { 256 queue_user_dropped++; 257 goto err_out_unlock; 258 } 259 260 __ipq_enqueue_entry(entry); 261 262 spin_unlock_bh(&queue_lock); 263 return status; 264 265err_out_free_nskb: 266 kfree_skb(nskb); 267 268err_out_unlock: 269 spin_unlock_bh(&queue_lock); 270 return status; 271} 272 273static int 274ipq_mangle_ipv6(ipq_verdict_msg_t *v, struct nf_queue_entry *e) 275{ 276 int diff; 277 struct ipv6hdr *user_iph = (struct ipv6hdr *)v->payload; 278 struct sk_buff *nskb; 279 280 if (v->data_len < sizeof(*user_iph)) 281 return 0; 282 diff = v->data_len - e->skb->len; 283 if (diff < 0) { 284 if (pskb_trim(e->skb, v->data_len)) 285 return -ENOMEM; 286 } else if (diff > 0) { 287 if (v->data_len > 0xFFFF) 288 return -EINVAL; 289 if (diff > skb_tailroom(e->skb)) { 290 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), 291 diff, GFP_ATOMIC); 292 if (!nskb) { 293 printk(KERN_WARNING "ip6_queue: OOM " 294 "in mangle, dropping packet\n"); 295 return -ENOMEM; 296 } 297 kfree_skb(e->skb); 298 e->skb = nskb; 299 } 300 skb_put(e->skb, diff); 301 } 302 if (!skb_make_writable(e->skb, v->data_len)) 303 return -ENOMEM; 304 skb_copy_to_linear_data(e->skb, v->payload, v->data_len); 305 e->skb->ip_summed = CHECKSUM_NONE; 306 307 return 0; 308} 309 310static int 311ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) 312{ 313 struct nf_queue_entry *entry; 314 315 if (vmsg->value > NF_MAX_VERDICT) 316 return -EINVAL; 317 318 entry = ipq_find_dequeue_entry(vmsg->id); 319 if (entry == NULL) 320 return -ENOENT; 321 else { 322 int verdict = vmsg->value; 323 324 if (vmsg->data_len && vmsg->data_len == len) 325 if (ipq_mangle_ipv6(vmsg, entry) < 0) 326 verdict = NF_DROP; 327 328 nf_reinject(entry, verdict); 329 return 0; 330 } 331} 332 333static int 334ipq_set_mode(unsigned char mode, unsigned int range) 335{ 336 int status; 337 338 spin_lock_bh(&queue_lock); 339 status = __ipq_set_mode(mode, range); 340 spin_unlock_bh(&queue_lock); 341 return status; 342} 343 344static int 345ipq_receive_peer(struct ipq_peer_msg *pmsg, 346 unsigned char type, unsigned int len) 347{ 348 int status = 0; 349 350 if (len < sizeof(*pmsg)) 351 return -EINVAL; 352 353 switch (type) { 354 case IPQM_MODE: 355 status = ipq_set_mode(pmsg->msg.mode.value, 356 pmsg->msg.mode.range); 357 break; 358 359 case IPQM_VERDICT: 360 if (pmsg->msg.verdict.value > NF_MAX_VERDICT) 361 status = -EINVAL; 362 else 363 status = ipq_set_verdict(&pmsg->msg.verdict, 364 len - sizeof(*pmsg)); 365 break; 366 default: 367 status = -EINVAL; 368 } 369 return status; 370} 371 372static int 373dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) 374{ 375 if (entry->indev) 376 if (entry->indev->ifindex == ifindex) 377 return 1; 378 379 if (entry->outdev) 380 if (entry->outdev->ifindex == ifindex) 381 return 1; 382#ifdef CONFIG_BRIDGE_NETFILTER 383 if (entry->skb->nf_bridge) { 384 if (entry->skb->nf_bridge->physindev && 385 entry->skb->nf_bridge->physindev->ifindex == ifindex) 386 return 1; 387 if (entry->skb->nf_bridge->physoutdev && 388 entry->skb->nf_bridge->physoutdev->ifindex == ifindex) 389 return 1; 390 } 391#endif 392 return 0; 393} 394 395static void 396ipq_dev_drop(int ifindex) 397{ 398 ipq_flush(dev_cmp, ifindex); 399} 400 401#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 402 403static inline void 404__ipq_rcv_skb(struct sk_buff *skb) 405{ 406 int status, type, pid, flags, nlmsglen, skblen; 407 struct nlmsghdr *nlh; 408 409 skblen = skb->len; 410 if (skblen < sizeof(*nlh)) 411 return; 412 413 nlh = nlmsg_hdr(skb); 414 nlmsglen = nlh->nlmsg_len; 415 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) 416 return; 417 418 pid = nlh->nlmsg_pid; 419 flags = nlh->nlmsg_flags; 420 421 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) 422 RCV_SKB_FAIL(-EINVAL); 423 424 if (flags & MSG_TRUNC) 425 RCV_SKB_FAIL(-ECOMM); 426 427 type = nlh->nlmsg_type; 428 if (type < NLMSG_NOOP || type >= IPQM_MAX) 429 RCV_SKB_FAIL(-EINVAL); 430 431 if (type <= IPQM_BASE) 432 return; 433 434 if (security_netlink_recv(skb, CAP_NET_ADMIN)) 435 RCV_SKB_FAIL(-EPERM); 436 437 spin_lock_bh(&queue_lock); 438 439 if (peer_pid) { 440 if (peer_pid != pid) { 441 spin_unlock_bh(&queue_lock); 442 RCV_SKB_FAIL(-EBUSY); 443 } 444 } else { 445 net_enable_timestamp(); 446 peer_pid = pid; 447 } 448 449 spin_unlock_bh(&queue_lock); 450 451 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 452 nlmsglen - NLMSG_LENGTH(0)); 453 if (status < 0) 454 RCV_SKB_FAIL(status); 455 456 if (flags & NLM_F_ACK) 457 netlink_ack(skb, nlh, 0); 458} 459 460static void 461ipq_rcv_skb(struct sk_buff *skb) 462{ 463 mutex_lock(&ipqnl_mutex); 464 __ipq_rcv_skb(skb); 465 mutex_unlock(&ipqnl_mutex); 466} 467 468static int 469ipq_rcv_dev_event(struct notifier_block *this, 470 unsigned long event, void *ptr) 471{ 472 struct net_device *dev = ptr; 473 474 if (!net_eq(dev_net(dev), &init_net)) 475 return NOTIFY_DONE; 476 477 /* Drop any packets associated with the downed device */ 478 if (event == NETDEV_DOWN) 479 ipq_dev_drop(dev->ifindex); 480 return NOTIFY_DONE; 481} 482 483static struct notifier_block ipq_dev_notifier = { 484 .notifier_call = ipq_rcv_dev_event, 485}; 486 487static int 488ipq_rcv_nl_event(struct notifier_block *this, 489 unsigned long event, void *ptr) 490{ 491 struct netlink_notify *n = ptr; 492 493 if (event == NETLINK_URELEASE && n->protocol == NETLINK_IP6_FW) { 494 spin_lock_bh(&queue_lock); 495 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) 496 __ipq_reset(); 497 spin_unlock_bh(&queue_lock); 498 } 499 return NOTIFY_DONE; 500} 501 502static struct notifier_block ipq_nl_notifier = { 503 .notifier_call = ipq_rcv_nl_event, 504}; 505 506#ifdef CONFIG_SYSCTL 507static struct ctl_table_header *ipq_sysctl_header; 508 509static ctl_table ipq_table[] = { 510 { 511 .procname = NET_IPQ_QMAX_NAME, 512 .data = &queue_maxlen, 513 .maxlen = sizeof(queue_maxlen), 514 .mode = 0644, 515 .proc_handler = proc_dointvec 516 }, 517 { } 518}; 519#endif 520 521#ifdef CONFIG_PROC_FS 522static int ip6_queue_show(struct seq_file *m, void *v) 523{ 524 spin_lock_bh(&queue_lock); 525 526 seq_printf(m, 527 "Peer PID : %d\n" 528 "Copy mode : %hu\n" 529 "Copy range : %u\n" 530 "Queue length : %u\n" 531 "Queue max. length : %u\n" 532 "Queue dropped : %u\n" 533 "Netfilter dropped : %u\n", 534 peer_pid, 535 copy_mode, 536 copy_range, 537 queue_total, 538 queue_maxlen, 539 queue_dropped, 540 queue_user_dropped); 541 542 spin_unlock_bh(&queue_lock); 543 return 0; 544} 545 546static int ip6_queue_open(struct inode *inode, struct file *file) 547{ 548 return single_open(file, ip6_queue_show, NULL); 549} 550 551static const struct file_operations ip6_queue_proc_fops = { 552 .open = ip6_queue_open, 553 .read = seq_read, 554 .llseek = seq_lseek, 555 .release = single_release, 556 .owner = THIS_MODULE, 557}; 558#endif 559 560static const struct nf_queue_handler nfqh = { 561 .name = "ip6_queue", 562 .outfn = &ipq_enqueue_packet, 563}; 564 565static int __init ip6_queue_init(void) 566{ 567 int status = -ENOMEM; 568 struct proc_dir_entry *proc __maybe_unused; 569 570 netlink_register_notifier(&ipq_nl_notifier); 571 ipqnl = netlink_kernel_create(&init_net, NETLINK_IP6_FW, 0, 572 ipq_rcv_skb, NULL, THIS_MODULE); 573 if (ipqnl == NULL) { 574 printk(KERN_ERR "ip6_queue: failed to create netlink socket\n"); 575 goto cleanup_netlink_notifier; 576 } 577 578#ifdef CONFIG_PROC_FS 579 proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, 580 &ip6_queue_proc_fops); 581 if (!proc) { 582 printk(KERN_ERR "ip6_queue: failed to create proc entry\n"); 583 goto cleanup_ipqnl; 584 } 585#endif 586 register_netdevice_notifier(&ipq_dev_notifier); 587#ifdef CONFIG_SYSCTL 588 ipq_sysctl_header = register_sysctl_paths(net_ipv6_ctl_path, ipq_table); 589#endif 590 status = nf_register_queue_handler(NFPROTO_IPV6, &nfqh); 591 if (status < 0) { 592 printk(KERN_ERR "ip6_queue: failed to register queue handler\n"); 593 goto cleanup_sysctl; 594 } 595 return status; 596 597cleanup_sysctl: 598#ifdef CONFIG_SYSCTL 599 unregister_sysctl_table(ipq_sysctl_header); 600#endif 601 unregister_netdevice_notifier(&ipq_dev_notifier); 602 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 603 604cleanup_ipqnl: __maybe_unused 605 netlink_kernel_release(ipqnl); 606 mutex_lock(&ipqnl_mutex); 607 mutex_unlock(&ipqnl_mutex); 608 609cleanup_netlink_notifier: 610 netlink_unregister_notifier(&ipq_nl_notifier); 611 return status; 612} 613 614static void __exit ip6_queue_fini(void) 615{ 616 nf_unregister_queue_handlers(&nfqh); 617 618 ipq_flush(NULL, 0); 619 620#ifdef CONFIG_SYSCTL 621 unregister_sysctl_table(ipq_sysctl_header); 622#endif 623 unregister_netdevice_notifier(&ipq_dev_notifier); 624 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 625 626 netlink_kernel_release(ipqnl); 627 mutex_lock(&ipqnl_mutex); 628 mutex_unlock(&ipqnl_mutex); 629 630 netlink_unregister_notifier(&ipq_nl_notifier); 631} 632 633MODULE_DESCRIPTION("IPv6 packet queue handler"); 634MODULE_LICENSE("GPL"); 635MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW); 636 637module_init(ip6_queue_init); 638module_exit(ip6_queue_fini); 639