1/* 2 * This is a module which is used for queueing IPv4 packets and 3 * communicating with userspace via netlink. 4 * 5 * (C) 2000-2002 James Morris, this code is GPL. 6 * 7 * 2000-03-27: Simplified code (thanks to Andi Kleen for clues). 8 * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report). 9 * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian 10 * Zander). 11 * 2000-08-01: Added Nick Williams' MAC support. 12 * 2002-06-25: Code cleanup. 13 * 14 */ 15#include <linux/module.h> 16#include <linux/skbuff.h> 17#include <linux/init.h> 18#include <linux/ip.h> 19#include <linux/notifier.h> 20#include <linux/netdevice.h> 21#include <linux/netfilter.h> 22#include <linux/netfilter_ipv4/ip_queue.h> 23#include <linux/netfilter_ipv4/ip_tables.h> 24#include <linux/netlink.h> 25#include <linux/spinlock.h> 26#include <linux/brlock.h> 27#include <linux/sysctl.h> 28#include <linux/proc_fs.h> 29#include <net/sock.h> 30#include <net/route.h> 31 32#define IPQ_QMAX_DEFAULT 1024 33#define IPQ_PROC_FS_NAME "ip_queue" 34#define NET_IPQ_QMAX 2088 35#define NET_IPQ_QMAX_NAME "ip_queue_maxlen" 36 37struct ipq_rt_info { 38 __u8 tos; 39 __u32 daddr; 40 __u32 saddr; 41}; 42 43struct ipq_queue_entry { 44 struct list_head list; 45 struct nf_info *info; 46 struct sk_buff *skb; 47 struct ipq_rt_info rt_info; 48}; 49 50typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); 51 52static unsigned char copy_mode = IPQ_COPY_NONE; 53static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; 54static rwlock_t queue_lock = RW_LOCK_UNLOCKED; 55static int peer_pid; 56static unsigned int copy_range; 57static unsigned int queue_total; 58static struct sock *ipqnl; 59static LIST_HEAD(queue_list); 60static DECLARE_MUTEX(ipqnl_sem); 61 62static void 63ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) 64{ 65 nf_reinject(entry->skb, entry->info, verdict); 66 kfree(entry); 67} 68 69static inline int 70__ipq_enqueue_entry(struct ipq_queue_entry *entry) 71{ 72 if (queue_total >= queue_maxlen) { 73 if (net_ratelimit()) 74 printk(KERN_WARNING "ip_queue: full at %d entries, " 75 "dropping packet(s).\n", queue_total); 76 return -ENOSPC; 77 } 78 list_add(&entry->list, &queue_list); 79 queue_total++; 80 return 0; 81} 82 83/* 84 * Find and return a queued entry matched by cmpfn, or return the last 85 * entry if cmpfn is NULL. 86 */ 87static inline struct ipq_queue_entry * 88__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data) 89{ 90 struct list_head *p; 91 92 list_for_each_prev(p, &queue_list) { 93 struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p; 94 95 if (!cmpfn || cmpfn(entry, data)) 96 return entry; 97 } 98 return NULL; 99} 100 101static inline void 102__ipq_dequeue_entry(struct ipq_queue_entry *entry) 103{ 104 list_del(&entry->list); 105 queue_total--; 106} 107 108static inline struct ipq_queue_entry * 109__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) 110{ 111 struct ipq_queue_entry *entry; 112 113 entry = __ipq_find_entry(cmpfn, data); 114 if (entry == NULL) 115 return NULL; 116 117 __ipq_dequeue_entry(entry); 118 return entry; 119} 120 121 122static inline void 123__ipq_flush(int verdict) 124{ 125 struct ipq_queue_entry *entry; 126 127 while ((entry = __ipq_find_dequeue_entry(NULL, 0))) 128 ipq_issue_verdict(entry, verdict); 129} 130 131static inline int 132__ipq_set_mode(unsigned char mode, unsigned int range) 133{ 134 int status = 0; 135 136 switch(mode) { 137 case IPQ_COPY_NONE: 138 case IPQ_COPY_META: 139 copy_mode = mode; 140 copy_range = 0; 141 break; 142 143 case IPQ_COPY_PACKET: 144 copy_mode = mode; 145 copy_range = range; 146 if (copy_range > 0xFFFF) 147 copy_range = 0xFFFF; 148 break; 149 150 default: 151 status = -EINVAL; 152 153 } 154 return status; 155} 156 157static inline void 158__ipq_reset(void) 159{ 160 peer_pid = 0; 161 __ipq_set_mode(IPQ_COPY_NONE, 0); 162 __ipq_flush(NF_DROP); 163} 164 165static struct ipq_queue_entry * 166ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) 167{ 168 struct ipq_queue_entry *entry; 169 170 write_lock_bh(&queue_lock); 171 entry = __ipq_find_dequeue_entry(cmpfn, data); 172 write_unlock_bh(&queue_lock); 173 return entry; 174} 175 176static void 177ipq_flush(int verdict) 178{ 179 write_lock_bh(&queue_lock); 180 __ipq_flush(verdict); 181 write_unlock_bh(&queue_lock); 182} 183 184static struct sk_buff * 185ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) 186{ 187 unsigned char *old_tail; 188 size_t size = 0; 189 size_t data_len = 0; 190 struct sk_buff *skb; 191 struct ipq_packet_msg *pmsg; 192 struct nlmsghdr *nlh; 193 194 read_lock_bh(&queue_lock); 195 196 switch (copy_mode) { 197 case IPQ_COPY_META: 198 case IPQ_COPY_NONE: 199 size = NLMSG_SPACE(sizeof(*pmsg)); 200 data_len = 0; 201 break; 202 203 case IPQ_COPY_PACKET: 204 if (copy_range == 0 || copy_range > entry->skb->len) 205 data_len = entry->skb->len; 206 else 207 data_len = copy_range; 208 209 size = NLMSG_SPACE(sizeof(*pmsg) + data_len); 210 break; 211 212 default: 213 *errp = -EINVAL; 214 read_unlock_bh(&queue_lock); 215 return NULL; 216 } 217 218 read_unlock_bh(&queue_lock); 219 220 skb = alloc_skb(size, GFP_ATOMIC); 221 if (!skb) 222 goto nlmsg_failure; 223 224 old_tail= skb->tail; 225 nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); 226 pmsg = NLMSG_DATA(nlh); 227 memset(pmsg, 0, sizeof(*pmsg)); 228 229 pmsg->packet_id = (unsigned long )entry; 230 pmsg->data_len = data_len; 231 pmsg->timestamp_sec = entry->skb->stamp.tv_sec; 232 pmsg->timestamp_usec = entry->skb->stamp.tv_usec; 233 pmsg->mark = entry->skb->nfmark; 234 pmsg->hook = entry->info->hook; 235 pmsg->hw_protocol = entry->skb->protocol; 236 237 if (entry->info->indev) 238 strcpy(pmsg->indev_name, entry->info->indev->name); 239 else 240 pmsg->indev_name[0] = '\0'; 241 242 if (entry->info->outdev) 243 strcpy(pmsg->outdev_name, entry->info->outdev->name); 244 else 245 pmsg->outdev_name[0] = '\0'; 246 247 if (entry->info->indev && entry->skb->dev) { 248 pmsg->hw_type = entry->skb->dev->type; 249 if (entry->skb->dev->hard_header_parse) 250 pmsg->hw_addrlen = 251 entry->skb->dev->hard_header_parse(entry->skb, 252 pmsg->hw_addr); 253 } 254 255 if (data_len) 256 memcpy(pmsg->payload, entry->skb->data, data_len); 257 258 nlh->nlmsg_len = skb->tail - old_tail; 259 return skb; 260 261nlmsg_failure: 262 if (skb) 263 kfree_skb(skb); 264 *errp = -EINVAL; 265 printk(KERN_ERR "ip_queue: error creating packet message\n"); 266 return NULL; 267} 268 269static int 270ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, void *data) 271{ 272 int status = -EINVAL; 273 struct sk_buff *nskb; 274 struct ipq_queue_entry *entry; 275 276 if (copy_mode == IPQ_COPY_NONE) 277 return -EAGAIN; 278 279 entry = kmalloc(sizeof(*entry), GFP_ATOMIC); 280 if (entry == NULL) { 281 printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n"); 282 return -ENOMEM; 283 } 284 285 entry->info = info; 286 entry->skb = skb; 287 288 if (entry->info->hook == NF_IP_LOCAL_OUT) { 289 struct iphdr *iph = skb->nh.iph; 290 291 entry->rt_info.tos = iph->tos; 292 entry->rt_info.daddr = iph->daddr; 293 entry->rt_info.saddr = iph->saddr; 294 } 295 296 nskb = ipq_build_packet_message(entry, &status); 297 if (nskb == NULL) 298 goto err_out_free; 299 300 write_lock_bh(&queue_lock); 301 302 if (!peer_pid) 303 goto err_out_unlock; 304 305 status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); 306 if (status < 0) 307 goto err_out_unlock; 308 309 status = __ipq_enqueue_entry(entry); 310 if (status < 0) 311 goto err_out_unlock; 312 313 write_unlock_bh(&queue_lock); 314 return status; 315 316err_out_unlock: 317 write_unlock_bh(&queue_lock); 318 319err_out_free: 320 kfree(entry); 321 return status; 322} 323 324static int 325ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) 326{ 327 int diff; 328 struct iphdr *user_iph = (struct iphdr *)v->payload; 329 330 if (v->data_len < sizeof(*user_iph)) 331 return 0; 332 diff = v->data_len - e->skb->len; 333 if (diff < 0) 334 skb_trim(e->skb, v->data_len); 335 else if (diff > 0) { 336 if (v->data_len > 0xFFFF) 337 return -EINVAL; 338 if (diff > skb_tailroom(e->skb)) { 339 struct sk_buff *newskb; 340 341 newskb = skb_copy_expand(e->skb, 342 skb_headroom(e->skb), 343 diff, 344 GFP_ATOMIC); 345 if (newskb == NULL) { 346 printk(KERN_WARNING "ip_queue: OOM " 347 "in mangle, dropping packet\n"); 348 return -ENOMEM; 349 } 350 if (e->skb->sk) 351 skb_set_owner_w(newskb, e->skb->sk); 352 kfree_skb(e->skb); 353 e->skb = newskb; 354 } 355 skb_put(e->skb, diff); 356 } 357 memcpy(e->skb->data, v->payload, v->data_len); 358 e->skb->nfcache |= NFC_ALTERED; 359 360 /* 361 * Extra routing may needed on local out, as the QUEUE target never 362 * returns control to the table. 363 */ 364 if (e->info->hook == NF_IP_LOCAL_OUT) { 365 struct iphdr *iph = e->skb->nh.iph; 366 367 if (!(iph->tos == e->rt_info.tos 368 && iph->daddr == e->rt_info.daddr 369 && iph->saddr == e->rt_info.saddr)) 370 return ip_route_me_harder(&e->skb); 371 } 372 return 0; 373} 374 375static inline int 376id_cmp(struct ipq_queue_entry *e, unsigned long id) 377{ 378 return (id == (unsigned long )e); 379} 380 381static int 382ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) 383{ 384 struct ipq_queue_entry *entry; 385 386 if (vmsg->value > NF_MAX_VERDICT) 387 return -EINVAL; 388 389 entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); 390 if (entry == NULL) 391 return -ENOENT; 392 else { 393 int verdict = vmsg->value; 394 395 if (vmsg->data_len && vmsg->data_len == len) 396 if (ipq_mangle_ipv4(vmsg, entry) < 0) 397 verdict = NF_DROP; 398 399 ipq_issue_verdict(entry, verdict); 400 return 0; 401 } 402} 403 404static int 405ipq_set_mode(unsigned char mode, unsigned int range) 406{ 407 int status; 408 409 write_lock_bh(&queue_lock); 410 status = __ipq_set_mode(mode, range); 411 write_unlock_bh(&queue_lock); 412 return status; 413} 414 415static int 416ipq_receive_peer(struct ipq_peer_msg *pmsg, 417 unsigned char type, unsigned int len) 418{ 419 int status = 0; 420 421 if (len < sizeof(*pmsg)) 422 return -EINVAL; 423 424 switch (type) { 425 case IPQM_MODE: 426 status = ipq_set_mode(pmsg->msg.mode.value, 427 pmsg->msg.mode.range); 428 break; 429 430 case IPQM_VERDICT: 431 if (pmsg->msg.verdict.value > NF_MAX_VERDICT) 432 status = -EINVAL; 433 else 434 status = ipq_set_verdict(&pmsg->msg.verdict, 435 len - sizeof(*pmsg)); 436 break; 437 default: 438 status = -EINVAL; 439 } 440 return status; 441} 442 443static int 444dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) 445{ 446 if (entry->info->indev) 447 if (entry->info->indev->ifindex == ifindex) 448 return 1; 449 450 if (entry->info->outdev) 451 if (entry->info->outdev->ifindex == ifindex) 452 return 1; 453 454 return 0; 455} 456 457static void 458ipq_dev_drop(int ifindex) 459{ 460 struct ipq_queue_entry *entry; 461 462 while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL) 463 ipq_issue_verdict(entry, NF_DROP); 464} 465 466#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) 467 468static inline void 469ipq_rcv_skb(struct sk_buff *skb) 470{ 471 int status, type, pid, flags, nlmsglen, skblen; 472 struct nlmsghdr *nlh; 473 474 skblen = skb->len; 475 if (skblen < sizeof(*nlh)) 476 return; 477 478 nlh = (struct nlmsghdr *)skb->data; 479 nlmsglen = nlh->nlmsg_len; 480 if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) 481 return; 482 483 pid = nlh->nlmsg_pid; 484 flags = nlh->nlmsg_flags; 485 486 if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) 487 RCV_SKB_FAIL(-EINVAL); 488 489 if (flags & MSG_TRUNC) 490 RCV_SKB_FAIL(-ECOMM); 491 492 type = nlh->nlmsg_type; 493 if (type < NLMSG_NOOP || type >= IPQM_MAX) 494 RCV_SKB_FAIL(-EINVAL); 495 496 if (type <= IPQM_BASE) 497 return; 498 499 if(!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) 500 RCV_SKB_FAIL(-EPERM); 501 502 write_lock_bh(&queue_lock); 503 504 if (peer_pid) { 505 if (peer_pid != pid) { 506 write_unlock_bh(&queue_lock); 507 RCV_SKB_FAIL(-EBUSY); 508 } 509 } 510 else 511 peer_pid = pid; 512 513 write_unlock_bh(&queue_lock); 514 515 status = ipq_receive_peer(NLMSG_DATA(nlh), type, 516 skblen - NLMSG_LENGTH(0)); 517 if (status < 0) 518 RCV_SKB_FAIL(status); 519 520 if (flags & NLM_F_ACK) 521 netlink_ack(skb, nlh, 0); 522 return; 523} 524 525static void 526ipq_rcv_sk(struct sock *sk, int len) 527{ 528 do { 529 struct sk_buff *skb; 530 531 if (down_trylock(&ipqnl_sem)) 532 return; 533 534 while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) { 535 ipq_rcv_skb(skb); 536 kfree_skb(skb); 537 } 538 539 up(&ipqnl_sem); 540 541 } while (ipqnl && ipqnl->receive_queue.qlen); 542} 543 544static int 545ipq_rcv_dev_event(struct notifier_block *this, 546 unsigned long event, void *ptr) 547{ 548 struct net_device *dev = ptr; 549 550 /* Drop any packets associated with the downed device */ 551 if (event == NETDEV_DOWN) 552 ipq_dev_drop(dev->ifindex); 553 return NOTIFY_DONE; 554} 555 556static struct notifier_block ipq_dev_notifier = { 557 ipq_rcv_dev_event, 558 NULL, 559 0 560}; 561 562static int 563ipq_rcv_nl_event(struct notifier_block *this, 564 unsigned long event, void *ptr) 565{ 566 struct netlink_notify *n = ptr; 567 568 if (event == NETLINK_URELEASE && 569 n->protocol == NETLINK_FIREWALL && n->pid) { 570 write_lock_bh(&queue_lock); 571 if (n->pid == peer_pid) 572 __ipq_reset(); 573 write_unlock_bh(&queue_lock); 574 } 575 return NOTIFY_DONE; 576} 577 578static struct notifier_block ipq_nl_notifier = { 579 ipq_rcv_nl_event, 580 NULL, 581 0 582}; 583 584static int sysctl_maxlen = IPQ_QMAX_DEFAULT; 585static struct ctl_table_header *ipq_sysctl_header; 586 587static ctl_table ipq_table[] = { 588 { NET_IPQ_QMAX, NET_IPQ_QMAX_NAME, &sysctl_maxlen, 589 sizeof(sysctl_maxlen), 0644, NULL, proc_dointvec }, 590 { 0 } 591}; 592 593static ctl_table ipq_dir_table[] = { 594 {NET_IPV4, "ipv4", NULL, 0, 0555, ipq_table, 0, 0, 0, 0, 0}, 595 { 0 } 596}; 597 598static ctl_table ipq_root_table[] = { 599 {CTL_NET, "net", NULL, 0, 0555, ipq_dir_table, 0, 0, 0, 0, 0}, 600 { 0 } 601}; 602 603static int 604ipq_get_info(char *buffer, char **start, off_t offset, int length) 605{ 606 int len; 607 608 read_lock_bh(&queue_lock); 609 610 len = sprintf(buffer, 611 "Peer PID : %d\n" 612 "Copy mode : %hu\n" 613 "Copy range : %u\n" 614 "Queue length : %u\n" 615 "Queue max. length : %u\n", 616 peer_pid, 617 copy_mode, 618 copy_range, 619 queue_total, 620 queue_maxlen); 621 622 read_unlock_bh(&queue_lock); 623 624 *start = buffer + offset; 625 len -= offset; 626 if (len > length) 627 len = length; 628 else if (len < 0) 629 len = 0; 630 return len; 631} 632 633static int 634init_or_cleanup(int init) 635{ 636 int status = -ENOMEM; 637 struct proc_dir_entry *proc; 638 639 if (!init) 640 goto cleanup; 641 642 netlink_register_notifier(&ipq_nl_notifier); 643 ipqnl = netlink_kernel_create(NETLINK_FIREWALL, ipq_rcv_sk); 644 if (ipqnl == NULL) { 645 printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); 646 goto cleanup_netlink_notifier; 647 } 648 649 proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); 650 if (proc) 651 proc->owner = THIS_MODULE; 652 else { 653 printk(KERN_ERR "ip_queue: failed to create proc entry\n"); 654 goto cleanup_ipqnl; 655 } 656 657 register_netdevice_notifier(&ipq_dev_notifier); 658 ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); 659 660 status = nf_register_queue_handler(PF_INET, ipq_enqueue_packet, NULL); 661 if (status < 0) { 662 printk(KERN_ERR "ip_queue: failed to register queue handler\n"); 663 goto cleanup_sysctl; 664 } 665 return status; 666 667cleanup: 668 nf_unregister_queue_handler(PF_INET); 669 br_write_lock_bh(BR_NETPROTO_LOCK); 670 br_write_unlock_bh(BR_NETPROTO_LOCK); 671 ipq_flush(NF_DROP); 672 673cleanup_sysctl: 674 unregister_sysctl_table(ipq_sysctl_header); 675 unregister_netdevice_notifier(&ipq_dev_notifier); 676 proc_net_remove(IPQ_PROC_FS_NAME); 677 678cleanup_ipqnl: 679 sock_release(ipqnl->socket); 680 down(&ipqnl_sem); 681 up(&ipqnl_sem); 682 683cleanup_netlink_notifier: 684 netlink_unregister_notifier(&ipq_nl_notifier); 685 return status; 686} 687 688static int __init init(void) 689{ 690 691 return init_or_cleanup(1); 692} 693 694static void __exit fini(void) 695{ 696 init_or_cleanup(0); 697} 698 699MODULE_DESCRIPTION("IPv4 packet queue handler"); 700MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); 701MODULE_LICENSE("GPL"); 702 703module_init(init); 704module_exit(fini); 705