1/* 2 * This is a module which is used for queueing IPv4 packets and 3 * communicating with userspace via netlink. 4 * 5 * (C) 2000-2002 James Morris <jmorris@intercode.com.au> 6 * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License version 2 as 10 * published by the Free Software Foundation. 11 */ 12#include <linux/module.h> 13#include <linux/skbuff.h> 14#include <linux/init.h> 15#include <linux/ip.h> 16#include <linux/notifier.h> 17#include <linux/netdevice.h> 18#include <linux/netfilter.h> 19#include <linux/netfilter_ipv4/ip_queue.h> 20#include <linux/netfilter_ipv4/ip_tables.h> 21#include <linux/netlink.h> 22#include <linux/spinlock.h> 23#include <linux/sysctl.h> 24#include <linux/proc_fs.h> 25#include <linux/seq_file.h> 26#include <linux/security.h> 27#include <linux/net.h> 28#include <linux/mutex.h> 29#include <linux/slab.h> 30#include <net/net_namespace.h> 31#include <net/sock.h> 32#include <net/route.h> 33#include <net/netfilter/nf_queue.h> 34#include <net/ip.h> 35 36#define IPQ_QMAX_DEFAULT 1024 37#define IPQ_PROC_FS_NAME "ip_queue" 38#define NET_IPQ_QMAX 2088 39#define NET_IPQ_QMAX_NAME "ip_queue_maxlen" 40 41typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long); 42 43static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; 44static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; 45static DEFINE_SPINLOCK(queue_lock); 46static int peer_pid __read_mostly; 47static unsigned int copy_range __read_mostly; 48static unsigned int queue_total; 49static unsigned int queue_dropped = 0; 50static unsigned int queue_user_dropped = 0; 51static struct sock *ipqnl __read_mostly; 52static LIST_HEAD(queue_list); 53static DEFINE_MUTEX(ipqnl_mutex); 54 55static inline void 56__ipq_enqueue_entry(struct nf_queue_entry *entry) 57{ 58 list_add_tail(&entry->list, &queue_list); 59 queue_total++; 60} 61 62static inline int 63__ipq_set_mode(unsigned char mode, unsigned int range) 64{ 65 int status = 0; 66 67 switch(mode) { 68 case IPQ_COPY_NONE: 69 case IPQ_COPY_META: 70 copy_mode = mode; 71 copy_range = 0; 72 break; 73 74 case IPQ_COPY_PACKET: 75 if (range > 0xFFFF) 76 range = 0xFFFF; 77 copy_range = range; 78 copy_mode = mode; 79 break; 80 81 default: 82 status = -EINVAL; 83 84 } 85 return status; 86} 87 88static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data); 89 90static inline void 91__ipq_reset(void) 92{ 93 peer_pid = 0; 94 net_disable_timestamp(); 95 __ipq_set_mode(IPQ_COPY_NONE, 0); 96 __ipq_flush(NULL, 0); 97} 98 99static struct nf_queue_entry * 100ipq_find_dequeue_entry(unsigned long id) 101{ 102 struct nf_queue_entry *entry = NULL, *i; 103 104 spin_lock_bh(&queue_lock); 105 106 list_for_each_entry(i, &queue_list, list) { 107 if ((unsigned long)i == id) { 108 entry = i; 109 break; 110 } 111 } 112 113 if (entry) { 114 list_del(&entry->list); 115 queue_total--; 116 } 117 118 spin_unlock_bh(&queue_lock); 119 return entry; 120} 121 122static void 123__ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 124{ 125 struct nf_queue_entry *entry, *next; 126 127 list_for_each_entry_safe(entry, next, &queue_list, list) { 128 if (!cmpfn || cmpfn(entry, data)) { 129 list_del(&entry->list); 130 queue_total--; 131 nf_reinject(entry, NF_DROP); 132 } 133 } 134} 135 136static void 137ipq_flush(ipq_cmpfn cmpfn, unsigned long data) 138{ 139 spin_lock_bh(&queue_lock); 140 __ipq_flush(cmpfn, data); 141 spin_unlock_bh(&queue_lock); 142} 143 144static struct sk_buff * 145ipq_build_packet_message(struct nf_queue_entry *entry, int *errp) 146{ 147 sk_buff_data_t old_tail; 148 size_t size = 0; 149 size_t data_len = 0; 150 struct sk_buff *skb; 151 struct ipq_packet_msg *pmsg; 152 struct nlmsghdr *nlh; 153 struct timeval tv; 154 155 switch (ACCESS_ONCE(copy_mode)) { 156 case IPQ_COPY_META: 157 case IPQ_COPY_NONE: 158 size = NLMSG_SPACE(sizeof(*pmsg)); 159 break; 160 161 case IPQ_COPY_PACKET: 162 if (entry->skb->ip_summed == CHECKSUM_PARTIAL && 163 (*errp = skb_checksum_help(entry->skb))) 164 return NULL; 165 166 data_len = ACCESS_ONCE(copy_range); 167 if (data_len == 0 || data_len > entry->skb->len) 168 data_len = entry->skb->len; 169 170 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 171 break; 172 173 default: 174 *errp = -EINVAL; 175 return NULL; 176 } 177 178 skb = alloc_skb(size, GFP_ATOMIC); 179 if (!skb) 180 goto nlmsg_failure; 181 182 old_tail = skb->tail; 183 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); 184 pmsg = NLMSG_DATA(nlh); 185 memset(pmsg, 0, sizeof(*pmsg)); 186 187 pmsg->packet_id = (unsigned long )entry; 188 pmsg->data_len = data_len; 189 tv = ktime_to_timeval(entry->skb->tstamp); 190 pmsg->timestamp_sec = tv.tv_sec; 191 pmsg->timestamp_usec = tv.tv_usec; 192 pmsg->mark = entry->skb->mark; 193 pmsg->hook = entry->hook; 194 pmsg->hw_protocol = entry->skb->protocol; 195 196 if (entry->indev) 197 strcpy(pmsg->indev_name, entry->indev->name); 198 else 199 pmsg->indev_name[0] = '\0'; 200 201 if (entry->outdev) 202 strcpy(pmsg->outdev_name, entry->outdev->name); 203 else 204 pmsg->outdev_name[0] = '\0'; 205 206 if (entry->indev && entry->skb->dev) { 207 pmsg->hw_type = entry->skb->dev->type; 208 pmsg->hw_addrlen = dev_parse_header(entry->skb, 209 pmsg->hw_addr); 210 } 211 212 if (data_len) 213 if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) 214 BUG(); 215 216 nlh->nlmsg_len = skb->tail - old_tail; 217 return skb; 218 219nlmsg_failure: 220 *errp = -EINVAL; 221 printk(KERN_ERR "ip_queue: error creating packet message\n"); 222 return NULL; 223} 224 225static int 226ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) 227{ 228 int status = -EINVAL; 229 struct sk_buff *nskb; 230 231 if (copy_mode == IPQ_COPY_NONE) 232 return -EAGAIN; 233 234 nskb = ipq_build_packet_message(entry, &status); 235 if (nskb == NULL) 236 return status; 237 238 spin_lock_bh(&queue_lock); 239 240 if (!peer_pid) 241 goto err_out_free_nskb; 242 243 if (queue_total >= queue_maxlen) { 244 queue_dropped++; 245 status = -ENOSPC; 246 if (net_ratelimit()) 247 printk (KERN_WARNING "ip_queue: full at %d entries, " 248 "dropping packets(s). Dropped: %d\n", queue_total, 249 queue_dropped); 250 goto err_out_free_nskb; 251 } 252 253 /* netlink_unicast will either free the nskb or attach it to a socket */ 254 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); 255 if (status < 0) { 256 queue_user_dropped++; 257 goto err_out_unlock; 258 } 259 260 __ipq_enqueue_entry(entry); 261 262 spin_unlock_bh(&queue_lock); 263 return status; 264 265err_out_free_nskb: 266 kfree_skb(nskb); 267 268err_out_unlock: 269 spin_unlock_bh(&queue_lock); 270 return status; 271} 272 273static int 274ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e) 275{ 276 int diff; 277 struct iphdr *user_iph = (struct iphdr *)v->payload; 278 struct sk_buff *nskb; 279 280 if (v->data_len < sizeof(*user_iph)) 281 return 0; 282 diff = v->data_len - e->skb->len; 283 if (diff < 0) { 284 if (pskb_trim(e->skb, v->data_len)) 285 return -ENOMEM; 286 } else if (diff > 0) { 287 if (v->data_len > 0xFFFF) 288 return -EINVAL; 289 if (diff > skb_tailroom(e->skb)) { 290 nskb = skb_copy_expand(e->skb, skb_headroom(e->skb), 291 diff, GFP_ATOMIC); 292 if (!nskb) { 293 printk(KERN_WARNING "ip_queue: error " 294 "in mangle, dropping packet\n"); 295 return -ENOMEM; 296 } 297 kfree_skb(e->skb); 298 e->skb = nskb; 299 } 300 skb_put(e->skb, diff); 301 } 302 if (!skb_make_writable(e->skb, v->data_len)) 303 return -ENOMEM; 304 skb_copy_to_linear_data(e->skb, v->payload, v->data_len); 305 e->skb->ip_summed = CHECKSUM_NONE; 306 307 return 0; 308} 309 310static int 311ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) 312{ 313 struct nf_queue_entry *entry; 314 315 if (vmsg->value > NF_MAX_VERDICT) 316 return -EINVAL; 317 318 entry = ipq_find_dequeue_entry(vmsg->id); 319 if (entry == NULL) 320 return -ENOENT; 321 else { 322 int verdict = vmsg->value; 323 324 if (vmsg->data_len && vmsg->data_len == len) 325 if (ipq_mangle_ipv4(vmsg, entry) < 0) 326 verdict = NF_DROP; 327 328 nf_reinject(entry, verdict); 329 return 0; 330 } 331} 332 333static int 334ipq_set_mode(unsigned char mode, unsigned int range) 335{ 336 int status; 337 338 spin_lock_bh(&queue_lock); 339 status = __ipq_set_mode(mode, range); 340 spin_unlock_bh(&queue_lock); 341 return status; 342} 343 344static int 345ipq_receive_peer(struct ipq_peer_msg *pmsg, 346 unsigned char type, unsigned int len) 347{ 348 int status = 0; 349 350 if (len < sizeof(*pmsg)) 351 return -EINVAL; 352 353 switch (type) { 354 case IPQM_MODE: 355 status = ipq_set_mode(pmsg->msg.mode.value, 356 pmsg->msg.mode.range); 357 break; 358 359 case IPQM_VERDICT: 360 if (pmsg->msg.verdict.value > NF_MAX_VERDICT) 361 status = -EINVAL; 362 else 363 status = ipq_set_verdict(&pmsg->msg.verdict, 364 len - sizeof(*pmsg)); 365 break; 366 default: 367 status = -EINVAL; 368 } 369 return status; 370} 371 372static int 373dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) 374{ 375 if (entry->indev) 376 if (entry->indev->ifindex == ifindex) 377 return 1; 378 if (entry->outdev) 379 if (entry->outdev->ifindex == ifindex) 380 return 1; 381#ifdef CONFIG_BRIDGE_NETFILTER 382 if (entry->skb->nf_bridge) { 383 if (entry->skb->nf_bridge->physindev && 384 entry->skb->nf_bridge->physindev->ifindex == ifindex) 385 return 1; 386 if (entry->skb->nf_bridge->physoutdev && 387 entry->skb->nf_bridge->physoutdev->ifindex == ifindex) 388 return 1; 389 } 390#endif 391 return 0; 392} 393 394static void 395ipq_dev_drop(int ifindex) 396{ 397 ipq_flush(dev_cmp, ifindex); 398} 399 400#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 401 402static inline void 403__ipq_rcv_skb(struct sk_buff *skb) 404{ 405 int status, type, pid, flags, nlmsglen, skblen; 406 struct nlmsghdr *nlh; 407 408 skblen = skb->len; 409 if (skblen < sizeof(*nlh)) 410 return; 411 412 nlh = nlmsg_hdr(skb); 413 nlmsglen = nlh->nlmsg_len; 414 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) 415 return; 416 417 pid = nlh->nlmsg_pid; 418 flags = nlh->nlmsg_flags; 419 420 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) 421 RCV_SKB_FAIL(-EINVAL); 422 423 if (flags & MSG_TRUNC) 424 RCV_SKB_FAIL(-ECOMM); 425 426 type = nlh->nlmsg_type; 427 if (type < NLMSG_NOOP || type >= IPQM_MAX) 428 RCV_SKB_FAIL(-EINVAL); 429 430 if (type <= IPQM_BASE) 431 return; 432 433 if (security_netlink_recv(skb, CAP_NET_ADMIN)) 434 RCV_SKB_FAIL(-EPERM); 435 436 spin_lock_bh(&queue_lock); 437 438 if (peer_pid) { 439 if (peer_pid != pid) { 440 spin_unlock_bh(&queue_lock); 441 RCV_SKB_FAIL(-EBUSY); 442 } 443 } else { 444 net_enable_timestamp(); 445 peer_pid = pid; 446 } 447 448 spin_unlock_bh(&queue_lock); 449 450 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 451 nlmsglen - NLMSG_LENGTH(0)); 452 if (status < 0) 453 RCV_SKB_FAIL(status); 454 455 if (flags & NLM_F_ACK) 456 netlink_ack(skb, nlh, 0); 457} 458 459static void 460ipq_rcv_skb(struct sk_buff *skb) 461{ 462 mutex_lock(&ipqnl_mutex); 463 __ipq_rcv_skb(skb); 464 mutex_unlock(&ipqnl_mutex); 465} 466 467static int 468ipq_rcv_dev_event(struct notifier_block *this, 469 unsigned long event, void *ptr) 470{ 471 struct net_device *dev = ptr; 472 473 if (!net_eq(dev_net(dev), &init_net)) 474 return NOTIFY_DONE; 475 476 /* Drop any packets associated with the downed device */ 477 if (event == NETDEV_DOWN) 478 ipq_dev_drop(dev->ifindex); 479 return NOTIFY_DONE; 480} 481 482static struct notifier_block ipq_dev_notifier = { 483 .notifier_call = ipq_rcv_dev_event, 484}; 485 486static int 487ipq_rcv_nl_event(struct notifier_block *this, 488 unsigned long event, void *ptr) 489{ 490 struct netlink_notify *n = ptr; 491 492 if (event == NETLINK_URELEASE && n->protocol == NETLINK_FIREWALL) { 493 spin_lock_bh(&queue_lock); 494 if ((net_eq(n->net, &init_net)) && (n->pid == peer_pid)) 495 __ipq_reset(); 496 spin_unlock_bh(&queue_lock); 497 } 498 return NOTIFY_DONE; 499} 500 501static struct notifier_block ipq_nl_notifier = { 502 .notifier_call = ipq_rcv_nl_event, 503}; 504 505#ifdef CONFIG_SYSCTL 506static struct ctl_table_header *ipq_sysctl_header; 507 508static ctl_table ipq_table[] = { 509 { 510 .procname = NET_IPQ_QMAX_NAME, 511 .data = &queue_maxlen, 512 .maxlen = sizeof(queue_maxlen), 513 .mode = 0644, 514 .proc_handler = proc_dointvec 515 }, 516 { } 517}; 518#endif 519 520#ifdef CONFIG_PROC_FS 521static int ip_queue_show(struct seq_file *m, void *v) 522{ 523 spin_lock_bh(&queue_lock); 524 525 seq_printf(m, 526 "Peer PID : %d\n" 527 "Copy mode : %hu\n" 528 "Copy range : %u\n" 529 "Queue length : %u\n" 530 "Queue max. length : %u\n" 531 "Queue dropped : %u\n" 532 "Netlink dropped : %u\n", 533 peer_pid, 534 copy_mode, 535 copy_range, 536 queue_total, 537 queue_maxlen, 538 queue_dropped, 539 queue_user_dropped); 540 541 spin_unlock_bh(&queue_lock); 542 return 0; 543} 544 545static int ip_queue_open(struct inode *inode, struct file *file) 546{ 547 return single_open(file, ip_queue_show, NULL); 548} 549 550static const struct file_operations ip_queue_proc_fops = { 551 .open = ip_queue_open, 552 .read = seq_read, 553 .llseek = seq_lseek, 554 .release = single_release, 555 .owner = THIS_MODULE, 556}; 557#endif 558 559static const struct nf_queue_handler nfqh = { 560 .name = "ip_queue", 561 .outfn = &ipq_enqueue_packet, 562}; 563 564static int __init ip_queue_init(void) 565{ 566 int status = -ENOMEM; 567 struct proc_dir_entry *proc __maybe_unused; 568 569 netlink_register_notifier(&ipq_nl_notifier); 570 ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0, 571 ipq_rcv_skb, NULL, THIS_MODULE); 572 if (ipqnl == NULL) { 573 printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); 574 goto cleanup_netlink_notifier; 575 } 576 577#ifdef CONFIG_PROC_FS 578 proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net, 579 &ip_queue_proc_fops); 580 if (!proc) { 581 printk(KERN_ERR "ip_queue: failed to create proc entry\n"); 582 goto cleanup_ipqnl; 583 } 584#endif 585 register_netdevice_notifier(&ipq_dev_notifier); 586#ifdef CONFIG_SYSCTL 587 ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table); 588#endif 589 status = nf_register_queue_handler(NFPROTO_IPV4, &nfqh); 590 if (status < 0) { 591 printk(KERN_ERR "ip_queue: failed to register queue handler\n"); 592 goto cleanup_sysctl; 593 } 594 return status; 595 596cleanup_sysctl: 597#ifdef CONFIG_SYSCTL 598 unregister_sysctl_table(ipq_sysctl_header); 599#endif 600 unregister_netdevice_notifier(&ipq_dev_notifier); 601 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 602cleanup_ipqnl: __maybe_unused 603 netlink_kernel_release(ipqnl); 604 mutex_lock(&ipqnl_mutex); 605 mutex_unlock(&ipqnl_mutex); 606 607cleanup_netlink_notifier: 608 netlink_unregister_notifier(&ipq_nl_notifier); 609 return status; 610} 611 612static void __exit ip_queue_fini(void) 613{ 614 nf_unregister_queue_handlers(&nfqh); 615 616 ipq_flush(NULL, 0); 617 618#ifdef CONFIG_SYSCTL 619 unregister_sysctl_table(ipq_sysctl_header); 620#endif 621 unregister_netdevice_notifier(&ipq_dev_notifier); 622 proc_net_remove(&init_net, IPQ_PROC_FS_NAME); 623 624 netlink_kernel_release(ipqnl); 625 mutex_lock(&ipqnl_mutex); 626 mutex_unlock(&ipqnl_mutex); 627 628 netlink_unregister_notifier(&ipq_nl_notifier); 629} 630 631MODULE_DESCRIPTION("IPv4 packet queue handler"); 632MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); 633MODULE_LICENSE("GPL"); 634MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); 635 636module_init(ip_queue_init); 637module_exit(ip_queue_fini); 638