1/* 2 * IPVS An implementation of the IP virtual server support for the 3 * LINUX operating system. IPVS is now implemented as a module 4 * over the NetFilter framework. IPVS can be used to build a 5 * high-performance and highly available server based on a 6 * cluster of servers. 7 * 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 9 * Peter Kese <peter.kese@ijs.si> 10 * Julian Anastasov <ja@ssi.bg> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 * Changes: 18 * 19 */ 20 21#define KMSG_COMPONENT "IPVS" 22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 23 24#include <linux/module.h> 25#include <linux/init.h> 26#include <linux/types.h> 27#include <linux/capability.h> 28#include <linux/fs.h> 29#include <linux/sysctl.h> 30#include <linux/proc_fs.h> 31#include <linux/workqueue.h> 32#include <linux/swap.h> 33#include <linux/seq_file.h> 34#include <linux/slab.h> 35 36#include <linux/netfilter.h> 37#include <linux/netfilter_ipv4.h> 38#include <linux/mutex.h> 39 40#include <net/net_namespace.h> 41#include <net/ip.h> 42#ifdef CONFIG_IP_VS_IPV6 43#include <net/ipv6.h> 44#include <net/ip6_route.h> 45#endif 46#include <net/route.h> 47#include <net/sock.h> 48#include <net/genetlink.h> 49 50#include <asm/uaccess.h> 51 52#include <net/ip_vs.h> 53 54/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 55static DEFINE_MUTEX(__ip_vs_mutex); 56 57/* lock for service table */ 58static DEFINE_RWLOCK(__ip_vs_svc_lock); 59 60/* lock for table with the real services */ 61static DEFINE_RWLOCK(__ip_vs_rs_lock); 62 63/* lock for state and timeout tables */ 64static DEFINE_RWLOCK(__ip_vs_securetcp_lock); 65 66/* lock for drop entry handling */ 67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); 68 69/* lock for drop packet handling */ 70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); 71 72/* 1/rate drop and drop-entry variables */ 73int ip_vs_drop_rate = 0; 74int ip_vs_drop_counter = 0; 75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); 76 77/* number of virtual services */ 78static int ip_vs_num_services = 0; 79 80/* sysctl variables */ 81static int sysctl_ip_vs_drop_entry = 0; 82static int sysctl_ip_vs_drop_packet = 0; 83static int sysctl_ip_vs_secure_tcp = 0; 84static int sysctl_ip_vs_amemthresh = 1024; 85static int sysctl_ip_vs_am_droprate = 10; 86int sysctl_ip_vs_cache_bypass = 0; 87int sysctl_ip_vs_expire_nodest_conn = 0; 88int sysctl_ip_vs_expire_quiescent_template = 0; 89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; 90int sysctl_ip_vs_nat_icmp_send = 0; 91 92 93#ifdef CONFIG_IP_VS_DEBUG 94static int sysctl_ip_vs_debug_level = 0; 95 96int ip_vs_get_debug_level(void) 97{ 98 return sysctl_ip_vs_debug_level; 99} 100#endif 101 102#ifdef CONFIG_IP_VS_IPV6 103/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 104static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) 105{ 106 struct rt6_info *rt; 107 struct flowi fl = { 108 .oif = 0, 109 .nl_u = { 110 .ip6_u = { 111 .daddr = *addr, 112 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, 113 }; 114 115 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); 116 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) 117 return 1; 118 119 return 0; 120} 121#endif 122/* 123 * update_defense_level is called from keventd and from sysctl, 124 * so it needs to protect itself from softirqs 125 */ 126static void update_defense_level(void) 127{ 128 struct sysinfo i; 129 static int old_secure_tcp = 0; 130 int availmem; 131 int nomem; 132 int to_change = -1; 133 134 /* we only count free and buffered memory (in pages) */ 135 si_meminfo(&i); 136 availmem = i.freeram + i.bufferram; 137 /* however in linux 2.5 the i.bufferram is total page cache size, 138 we need adjust it */ 139 /* si_swapinfo(&i); */ 140 /* availmem = availmem - (i.totalswap - i.freeswap); */ 141 142 nomem = (availmem < sysctl_ip_vs_amemthresh); 143 144 local_bh_disable(); 145 146 /* drop_entry */ 147 spin_lock(&__ip_vs_dropentry_lock); 148 switch (sysctl_ip_vs_drop_entry) { 149 case 0: 150 atomic_set(&ip_vs_dropentry, 0); 151 break; 152 case 1: 153 if (nomem) { 154 atomic_set(&ip_vs_dropentry, 1); 155 sysctl_ip_vs_drop_entry = 2; 156 } else { 157 atomic_set(&ip_vs_dropentry, 0); 158 } 159 break; 160 case 2: 161 if (nomem) { 162 atomic_set(&ip_vs_dropentry, 1); 163 } else { 164 atomic_set(&ip_vs_dropentry, 0); 165 sysctl_ip_vs_drop_entry = 1; 166 }; 167 break; 168 case 3: 169 atomic_set(&ip_vs_dropentry, 1); 170 break; 171 } 172 spin_unlock(&__ip_vs_dropentry_lock); 173 174 /* drop_packet */ 175 spin_lock(&__ip_vs_droppacket_lock); 176 switch (sysctl_ip_vs_drop_packet) { 177 case 0: 178 ip_vs_drop_rate = 0; 179 break; 180 case 1: 181 if (nomem) { 182 ip_vs_drop_rate = ip_vs_drop_counter 183 = sysctl_ip_vs_amemthresh / 184 (sysctl_ip_vs_amemthresh-availmem); 185 sysctl_ip_vs_drop_packet = 2; 186 } else { 187 ip_vs_drop_rate = 0; 188 } 189 break; 190 case 2: 191 if (nomem) { 192 ip_vs_drop_rate = ip_vs_drop_counter 193 = sysctl_ip_vs_amemthresh / 194 (sysctl_ip_vs_amemthresh-availmem); 195 } else { 196 ip_vs_drop_rate = 0; 197 sysctl_ip_vs_drop_packet = 1; 198 } 199 break; 200 case 3: 201 ip_vs_drop_rate = sysctl_ip_vs_am_droprate; 202 break; 203 } 204 spin_unlock(&__ip_vs_droppacket_lock); 205 206 /* secure_tcp */ 207 write_lock(&__ip_vs_securetcp_lock); 208 switch (sysctl_ip_vs_secure_tcp) { 209 case 0: 210 if (old_secure_tcp >= 2) 211 to_change = 0; 212 break; 213 case 1: 214 if (nomem) { 215 if (old_secure_tcp < 2) 216 to_change = 1; 217 sysctl_ip_vs_secure_tcp = 2; 218 } else { 219 if (old_secure_tcp >= 2) 220 to_change = 0; 221 } 222 break; 223 case 2: 224 if (nomem) { 225 if (old_secure_tcp < 2) 226 to_change = 1; 227 } else { 228 if (old_secure_tcp >= 2) 229 to_change = 0; 230 sysctl_ip_vs_secure_tcp = 1; 231 } 232 break; 233 case 3: 234 if (old_secure_tcp < 2) 235 to_change = 1; 236 break; 237 } 238 old_secure_tcp = sysctl_ip_vs_secure_tcp; 239 if (to_change >= 0) 240 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 241 write_unlock(&__ip_vs_securetcp_lock); 242 243 local_bh_enable(); 244} 245 246 247/* 248 * Timer for checking the defense 249 */ 250#define DEFENSE_TIMER_PERIOD 1*HZ 251static void defense_work_handler(struct work_struct *work); 252static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); 253 254static void defense_work_handler(struct work_struct *work) 255{ 256 update_defense_level(); 257 if (atomic_read(&ip_vs_dropentry)) 258 ip_vs_random_dropentry(); 259 260 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 261} 262 263int 264ip_vs_use_count_inc(void) 265{ 266 return try_module_get(THIS_MODULE); 267} 268 269void 270ip_vs_use_count_dec(void) 271{ 272 module_put(THIS_MODULE); 273} 274 275 276/* 277 * Hash table: for virtual service lookups 278 */ 279#define IP_VS_SVC_TAB_BITS 8 280#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 281#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 282 283/* the service table hashed by <protocol, addr, port> */ 284static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 285/* the service table hashed by fwmark */ 286static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 287 288/* 289 * Hash table: for real service lookups 290 */ 291#define IP_VS_RTAB_BITS 4 292#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) 293#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) 294 295static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; 296 297/* 298 * Trash for destinations 299 */ 300static LIST_HEAD(ip_vs_dest_trash); 301 302/* 303 * FTP & NULL virtual service counters 304 */ 305static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); 306static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); 307 308 309/* 310 * Returns hash value for virtual service 311 */ 312static __inline__ unsigned 313ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, 314 __be16 port) 315{ 316 register unsigned porth = ntohs(port); 317 __be32 addr_fold = addr->ip; 318 319#ifdef CONFIG_IP_VS_IPV6 320 if (af == AF_INET6) 321 addr_fold = addr->ip6[0]^addr->ip6[1]^ 322 addr->ip6[2]^addr->ip6[3]; 323#endif 324 325 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 326 & IP_VS_SVC_TAB_MASK; 327} 328 329/* 330 * Returns hash value of fwmark for virtual service lookup 331 */ 332static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) 333{ 334 return fwmark & IP_VS_SVC_TAB_MASK; 335} 336 337/* 338 * Hashes a service in the ip_vs_svc_table by <proto,addr,port> 339 * or in the ip_vs_svc_fwm_table by fwmark. 340 * Should be called with locked tables. 341 */ 342static int ip_vs_svc_hash(struct ip_vs_service *svc) 343{ 344 unsigned hash; 345 346 if (svc->flags & IP_VS_SVC_F_HASHED) { 347 pr_err("%s(): request for already hashed, called from %pF\n", 348 __func__, __builtin_return_address(0)); 349 return 0; 350 } 351 352 if (svc->fwmark == 0) { 353 /* 354 * Hash it by <protocol,addr,port> in ip_vs_svc_table 355 */ 356 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, 357 svc->port); 358 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 359 } else { 360 /* 361 * Hash it by fwmark in ip_vs_svc_fwm_table 362 */ 363 hash = ip_vs_svc_fwm_hashkey(svc->fwmark); 364 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 365 } 366 367 svc->flags |= IP_VS_SVC_F_HASHED; 368 /* increase its refcnt because it is referenced by the svc table */ 369 atomic_inc(&svc->refcnt); 370 return 1; 371} 372 373 374/* 375 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. 376 * Should be called with locked tables. 377 */ 378static int ip_vs_svc_unhash(struct ip_vs_service *svc) 379{ 380 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 381 pr_err("%s(): request for unhash flagged, called from %pF\n", 382 __func__, __builtin_return_address(0)); 383 return 0; 384 } 385 386 if (svc->fwmark == 0) { 387 /* Remove it from the ip_vs_svc_table table */ 388 list_del(&svc->s_list); 389 } else { 390 /* Remove it from the ip_vs_svc_fwm_table table */ 391 list_del(&svc->f_list); 392 } 393 394 svc->flags &= ~IP_VS_SVC_F_HASHED; 395 atomic_dec(&svc->refcnt); 396 return 1; 397} 398 399 400/* 401 * Get service by {proto,addr,port} in the service table. 402 */ 403static inline struct ip_vs_service * 404__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, 405 __be16 vport) 406{ 407 unsigned hash; 408 struct ip_vs_service *svc; 409 410 /* Check for "full" addressed entries */ 411 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); 412 413 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ 414 if ((svc->af == af) 415 && ip_vs_addr_equal(af, &svc->addr, vaddr) 416 && (svc->port == vport) 417 && (svc->protocol == protocol)) { 418 /* HIT */ 419 atomic_inc(&svc->usecnt); 420 return svc; 421 } 422 } 423 424 return NULL; 425} 426 427 428/* 429 * Get service by {fwmark} in the service table. 430 */ 431static inline struct ip_vs_service * 432__ip_vs_svc_fwm_get(int af, __u32 fwmark) 433{ 434 unsigned hash; 435 struct ip_vs_service *svc; 436 437 /* Check for fwmark addressed entries */ 438 hash = ip_vs_svc_fwm_hashkey(fwmark); 439 440 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 441 if (svc->fwmark == fwmark && svc->af == af) { 442 /* HIT */ 443 atomic_inc(&svc->usecnt); 444 return svc; 445 } 446 } 447 448 return NULL; 449} 450 451struct ip_vs_service * 452ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, 453 const union nf_inet_addr *vaddr, __be16 vport) 454{ 455 struct ip_vs_service *svc; 456 457 read_lock(&__ip_vs_svc_lock); 458 459 /* 460 * Check the table hashed by fwmark first 461 */ 462 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) 463 goto out; 464 465 /* 466 * Check the table hashed by <protocol,addr,port> 467 * for "full" addressed entries 468 */ 469 svc = __ip_vs_service_get(af, protocol, vaddr, vport); 470 471 if (svc == NULL 472 && protocol == IPPROTO_TCP 473 && atomic_read(&ip_vs_ftpsvc_counter) 474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 475 /* 476 * Check if ftp service entry exists, the packet 477 * might belong to FTP data connections. 478 */ 479 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); 480 } 481 482 if (svc == NULL 483 && atomic_read(&ip_vs_nullsvc_counter)) { 484 /* 485 * Check if the catch-all port (port zero) exists 486 */ 487 svc = __ip_vs_service_get(af, protocol, vaddr, 0); 488 } 489 490 out: 491 read_unlock(&__ip_vs_svc_lock); 492 493 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 494 fwmark, ip_vs_proto_name(protocol), 495 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 496 svc ? "hit" : "not hit"); 497 498 return svc; 499} 500 501 502static inline void 503__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 504{ 505 atomic_inc(&svc->refcnt); 506 dest->svc = svc; 507} 508 509static inline void 510__ip_vs_unbind_svc(struct ip_vs_dest *dest) 511{ 512 struct ip_vs_service *svc = dest->svc; 513 514 dest->svc = NULL; 515 if (atomic_dec_and_test(&svc->refcnt)) 516 kfree(svc); 517} 518 519 520/* 521 * Returns hash value for real service 522 */ 523static inline unsigned ip_vs_rs_hashkey(int af, 524 const union nf_inet_addr *addr, 525 __be16 port) 526{ 527 register unsigned porth = ntohs(port); 528 __be32 addr_fold = addr->ip; 529 530#ifdef CONFIG_IP_VS_IPV6 531 if (af == AF_INET6) 532 addr_fold = addr->ip6[0]^addr->ip6[1]^ 533 addr->ip6[2]^addr->ip6[3]; 534#endif 535 536 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 537 & IP_VS_RTAB_MASK; 538} 539 540/* 541 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. 542 * should be called with locked tables. 543 */ 544static int ip_vs_rs_hash(struct ip_vs_dest *dest) 545{ 546 unsigned hash; 547 548 if (!list_empty(&dest->d_list)) { 549 return 0; 550 } 551 552 /* 553 * Hash by proto,addr,port, 554 * which are the parameters of the real service. 555 */ 556 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 557 558 list_add(&dest->d_list, &ip_vs_rtable[hash]); 559 560 return 1; 561} 562 563/* 564 * UNhashes ip_vs_dest from ip_vs_rtable. 565 * should be called with locked tables. 566 */ 567static int ip_vs_rs_unhash(struct ip_vs_dest *dest) 568{ 569 /* 570 * Remove it from the ip_vs_rtable table. 571 */ 572 if (!list_empty(&dest->d_list)) { 573 list_del(&dest->d_list); 574 INIT_LIST_HEAD(&dest->d_list); 575 } 576 577 return 1; 578} 579 580/* 581 * Lookup real service by <proto,addr,port> in the real service table. 582 */ 583struct ip_vs_dest * 584ip_vs_lookup_real_service(int af, __u16 protocol, 585 const union nf_inet_addr *daddr, 586 __be16 dport) 587{ 588 unsigned hash; 589 struct ip_vs_dest *dest; 590 591 /* 592 * Check for "full" addressed entries 593 * Return the first found entry 594 */ 595 hash = ip_vs_rs_hashkey(af, daddr, dport); 596 597 read_lock(&__ip_vs_rs_lock); 598 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { 599 if ((dest->af == af) 600 && ip_vs_addr_equal(af, &dest->addr, daddr) 601 && (dest->port == dport) 602 && ((dest->protocol == protocol) || 603 dest->vfwmark)) { 604 /* HIT */ 605 read_unlock(&__ip_vs_rs_lock); 606 return dest; 607 } 608 } 609 read_unlock(&__ip_vs_rs_lock); 610 611 return NULL; 612} 613 614/* 615 * Lookup destination by {addr,port} in the given service 616 */ 617static struct ip_vs_dest * 618ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, 619 __be16 dport) 620{ 621 struct ip_vs_dest *dest; 622 623 /* 624 * Find the destination for the given service 625 */ 626 list_for_each_entry(dest, &svc->destinations, n_list) { 627 if ((dest->af == svc->af) 628 && ip_vs_addr_equal(svc->af, &dest->addr, daddr) 629 && (dest->port == dport)) { 630 /* HIT */ 631 return dest; 632 } 633 } 634 635 return NULL; 636} 637 638/* 639 * Find destination by {daddr,dport,vaddr,protocol} 640 * Cretaed to be used in ip_vs_process_message() in 641 * the backup synchronization daemon. It finds the 642 * destination to be bound to the received connection 643 * on the backup. 644 * 645 * ip_vs_lookup_real_service() looked promissing, but 646 * seems not working as expected. 647 */ 648struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, 649 __be16 dport, 650 const union nf_inet_addr *vaddr, 651 __be16 vport, __u16 protocol) 652{ 653 struct ip_vs_dest *dest; 654 struct ip_vs_service *svc; 655 656 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); 657 if (!svc) 658 return NULL; 659 dest = ip_vs_lookup_dest(svc, daddr, dport); 660 if (dest) 661 atomic_inc(&dest->refcnt); 662 ip_vs_service_put(svc); 663 return dest; 664} 665 666/* 667 * Lookup dest by {svc,addr,port} in the destination trash. 668 * The destination trash is used to hold the destinations that are removed 669 * from the service table but are still referenced by some conn entries. 670 * The reason to add the destination trash is when the dest is temporary 671 * down (either by administrator or by monitor program), the dest can be 672 * picked back from the trash, the remaining connections to the dest can 673 * continue, and the counting information of the dest is also useful for 674 * scheduling. 675 */ 676static struct ip_vs_dest * 677ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, 678 __be16 dport) 679{ 680 struct ip_vs_dest *dest, *nxt; 681 682 /* 683 * Find the destination in trash 684 */ 685 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 686 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 687 "dest->refcnt=%d\n", 688 dest->vfwmark, 689 IP_VS_DBG_ADDR(svc->af, &dest->addr), 690 ntohs(dest->port), 691 atomic_read(&dest->refcnt)); 692 if (dest->af == svc->af && 693 ip_vs_addr_equal(svc->af, &dest->addr, daddr) && 694 dest->port == dport && 695 dest->vfwmark == svc->fwmark && 696 dest->protocol == svc->protocol && 697 (svc->fwmark || 698 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 699 dest->vport == svc->port))) { 700 /* HIT */ 701 return dest; 702 } 703 704 /* 705 * Try to purge the destination from trash if not referenced 706 */ 707 if (atomic_read(&dest->refcnt) == 1) { 708 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " 709 "from trash\n", 710 dest->vfwmark, 711 IP_VS_DBG_ADDR(svc->af, &dest->addr), 712 ntohs(dest->port)); 713 list_del(&dest->n_list); 714 ip_vs_dst_reset(dest); 715 __ip_vs_unbind_svc(dest); 716 kfree(dest); 717 } 718 } 719 720 return NULL; 721} 722 723 724/* 725 * Clean up all the destinations in the trash 726 * Called by the ip_vs_control_cleanup() 727 * 728 * When the ip_vs_control_clearup is activated by ipvs module exit, 729 * the service tables must have been flushed and all the connections 730 * are expired, and the refcnt of each destination in the trash must 731 * be 1, so we simply release them here. 732 */ 733static void ip_vs_trash_cleanup(void) 734{ 735 struct ip_vs_dest *dest, *nxt; 736 737 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 738 list_del(&dest->n_list); 739 ip_vs_dst_reset(dest); 740 __ip_vs_unbind_svc(dest); 741 kfree(dest); 742 } 743} 744 745 746static void 747ip_vs_zero_stats(struct ip_vs_stats *stats) 748{ 749 spin_lock_bh(&stats->lock); 750 751 memset(&stats->ustats, 0, sizeof(stats->ustats)); 752 ip_vs_zero_estimator(stats); 753 754 spin_unlock_bh(&stats->lock); 755} 756 757/* 758 * Update a destination in the given service 759 */ 760static void 761__ip_vs_update_dest(struct ip_vs_service *svc, 762 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) 763{ 764 int conn_flags; 765 766 /* set the weight and the flags */ 767 atomic_set(&dest->weight, udest->weight); 768 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; 769 770 /* check if local node and update the flags */ 771#ifdef CONFIG_IP_VS_IPV6 772 if (svc->af == AF_INET6) { 773 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { 774 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) 775 | IP_VS_CONN_F_LOCALNODE; 776 } 777 } else 778#endif 779 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { 780 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) 781 | IP_VS_CONN_F_LOCALNODE; 782 } 783 784 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 785 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { 786 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 787 } else { 788 /* 789 * Put the real service in ip_vs_rtable if not present. 790 * For now only for NAT! 791 */ 792 write_lock_bh(&__ip_vs_rs_lock); 793 ip_vs_rs_hash(dest); 794 write_unlock_bh(&__ip_vs_rs_lock); 795 } 796 atomic_set(&dest->conn_flags, conn_flags); 797 798 /* bind the service */ 799 if (!dest->svc) { 800 __ip_vs_bind_svc(dest, svc); 801 } else { 802 if (dest->svc != svc) { 803 __ip_vs_unbind_svc(dest); 804 ip_vs_zero_stats(&dest->stats); 805 __ip_vs_bind_svc(dest, svc); 806 } 807 } 808 809 /* set the dest status flags */ 810 dest->flags |= IP_VS_DEST_F_AVAILABLE; 811 812 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 813 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 814 dest->u_threshold = udest->u_threshold; 815 dest->l_threshold = udest->l_threshold; 816} 817 818 819/* 820 * Create a destination for the given service 821 */ 822static int 823ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, 824 struct ip_vs_dest **dest_p) 825{ 826 struct ip_vs_dest *dest; 827 unsigned atype; 828 829 EnterFunction(2); 830 831#ifdef CONFIG_IP_VS_IPV6 832 if (svc->af == AF_INET6) { 833 atype = ipv6_addr_type(&udest->addr.in6); 834 if ((!(atype & IPV6_ADDR_UNICAST) || 835 atype & IPV6_ADDR_LINKLOCAL) && 836 !__ip_vs_addr_is_local_v6(&udest->addr.in6)) 837 return -EINVAL; 838 } else 839#endif 840 { 841 atype = inet_addr_type(&init_net, udest->addr.ip); 842 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 843 return -EINVAL; 844 } 845 846 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 847 if (dest == NULL) { 848 pr_err("%s(): no memory.\n", __func__); 849 return -ENOMEM; 850 } 851 852 dest->af = svc->af; 853 dest->protocol = svc->protocol; 854 dest->vaddr = svc->addr; 855 dest->vport = svc->port; 856 dest->vfwmark = svc->fwmark; 857 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); 858 dest->port = udest->port; 859 860 atomic_set(&dest->activeconns, 0); 861 atomic_set(&dest->inactconns, 0); 862 atomic_set(&dest->persistconns, 0); 863 atomic_set(&dest->refcnt, 0); 864 865 INIT_LIST_HEAD(&dest->d_list); 866 spin_lock_init(&dest->dst_lock); 867 spin_lock_init(&dest->stats.lock); 868 __ip_vs_update_dest(svc, dest, udest); 869 ip_vs_new_estimator(&dest->stats); 870 871 *dest_p = dest; 872 873 LeaveFunction(2); 874 return 0; 875} 876 877 878/* 879 * Add a destination into an existing service 880 */ 881static int 882ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 883{ 884 struct ip_vs_dest *dest; 885 union nf_inet_addr daddr; 886 __be16 dport = udest->port; 887 int ret; 888 889 EnterFunction(2); 890 891 if (udest->weight < 0) { 892 pr_err("%s(): server weight less than zero\n", __func__); 893 return -ERANGE; 894 } 895 896 if (udest->l_threshold > udest->u_threshold) { 897 pr_err("%s(): lower threshold is higher than upper threshold\n", 898 __func__); 899 return -ERANGE; 900 } 901 902 ip_vs_addr_copy(svc->af, &daddr, &udest->addr); 903 904 /* 905 * Check if the dest already exists in the list 906 */ 907 dest = ip_vs_lookup_dest(svc, &daddr, dport); 908 909 if (dest != NULL) { 910 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 911 return -EEXIST; 912 } 913 914 /* 915 * Check if the dest already exists in the trash and 916 * is from the same service 917 */ 918 dest = ip_vs_trash_get_dest(svc, &daddr, dport); 919 920 if (dest != NULL) { 921 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 922 "dest->refcnt=%d, service %u/%s:%u\n", 923 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), 924 atomic_read(&dest->refcnt), 925 dest->vfwmark, 926 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 927 ntohs(dest->vport)); 928 929 __ip_vs_update_dest(svc, dest, udest); 930 931 /* 932 * Get the destination from the trash 933 */ 934 list_del(&dest->n_list); 935 936 ip_vs_new_estimator(&dest->stats); 937 938 write_lock_bh(&__ip_vs_svc_lock); 939 940 /* 941 * Wait until all other svc users go away. 942 */ 943 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 944 945 list_add(&dest->n_list, &svc->destinations); 946 svc->num_dests++; 947 948 /* call the update_service function of its scheduler */ 949 if (svc->scheduler->update_service) 950 svc->scheduler->update_service(svc); 951 952 write_unlock_bh(&__ip_vs_svc_lock); 953 return 0; 954 } 955 956 /* 957 * Allocate and initialize the dest structure 958 */ 959 ret = ip_vs_new_dest(svc, udest, &dest); 960 if (ret) { 961 return ret; 962 } 963 964 /* 965 * Add the dest entry into the list 966 */ 967 atomic_inc(&dest->refcnt); 968 969 write_lock_bh(&__ip_vs_svc_lock); 970 971 /* 972 * Wait until all other svc users go away. 973 */ 974 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 975 976 list_add(&dest->n_list, &svc->destinations); 977 svc->num_dests++; 978 979 /* call the update_service function of its scheduler */ 980 if (svc->scheduler->update_service) 981 svc->scheduler->update_service(svc); 982 983 write_unlock_bh(&__ip_vs_svc_lock); 984 985 LeaveFunction(2); 986 987 return 0; 988} 989 990 991/* 992 * Edit a destination in the given service 993 */ 994static int 995ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 996{ 997 struct ip_vs_dest *dest; 998 union nf_inet_addr daddr; 999 __be16 dport = udest->port; 1000 1001 EnterFunction(2); 1002 1003 if (udest->weight < 0) { 1004 pr_err("%s(): server weight less than zero\n", __func__); 1005 return -ERANGE; 1006 } 1007 1008 if (udest->l_threshold > udest->u_threshold) { 1009 pr_err("%s(): lower threshold is higher than upper threshold\n", 1010 __func__); 1011 return -ERANGE; 1012 } 1013 1014 ip_vs_addr_copy(svc->af, &daddr, &udest->addr); 1015 1016 /* 1017 * Lookup the destination list 1018 */ 1019 dest = ip_vs_lookup_dest(svc, &daddr, dport); 1020 1021 if (dest == NULL) { 1022 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 1023 return -ENOENT; 1024 } 1025 1026 __ip_vs_update_dest(svc, dest, udest); 1027 1028 write_lock_bh(&__ip_vs_svc_lock); 1029 1030 /* Wait until all other svc users go away */ 1031 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1032 1033 /* call the update_service, because server weight may be changed */ 1034 if (svc->scheduler->update_service) 1035 svc->scheduler->update_service(svc); 1036 1037 write_unlock_bh(&__ip_vs_svc_lock); 1038 1039 LeaveFunction(2); 1040 1041 return 0; 1042} 1043 1044 1045/* 1046 * Delete a destination (must be already unlinked from the service) 1047 */ 1048static void __ip_vs_del_dest(struct ip_vs_dest *dest) 1049{ 1050 ip_vs_kill_estimator(&dest->stats); 1051 1052 /* 1053 * Remove it from the d-linked list with the real services. 1054 */ 1055 write_lock_bh(&__ip_vs_rs_lock); 1056 ip_vs_rs_unhash(dest); 1057 write_unlock_bh(&__ip_vs_rs_lock); 1058 1059 /* 1060 * Decrease the refcnt of the dest, and free the dest 1061 * if nobody refers to it (refcnt=0). Otherwise, throw 1062 * the destination into the trash. 1063 */ 1064 if (atomic_dec_and_test(&dest->refcnt)) { 1065 ip_vs_dst_reset(dest); 1066 /* simply decrease svc->refcnt here, let the caller check 1067 and release the service if nobody refers to it. 1068 Only user context can release destination and service, 1069 and only one user context can update virtual service at a 1070 time, so the operation here is OK */ 1071 atomic_dec(&dest->svc->refcnt); 1072 kfree(dest); 1073 } else { 1074 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " 1075 "dest->refcnt=%d\n", 1076 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1077 ntohs(dest->port), 1078 atomic_read(&dest->refcnt)); 1079 list_add(&dest->n_list, &ip_vs_dest_trash); 1080 atomic_inc(&dest->refcnt); 1081 } 1082} 1083 1084 1085/* 1086 * Unlink a destination from the given service 1087 */ 1088static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1089 struct ip_vs_dest *dest, 1090 int svcupd) 1091{ 1092 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1093 1094 /* 1095 * Remove it from the d-linked destination list. 1096 */ 1097 list_del(&dest->n_list); 1098 svc->num_dests--; 1099 1100 /* 1101 * Call the update_service function of its scheduler 1102 */ 1103 if (svcupd && svc->scheduler->update_service) 1104 svc->scheduler->update_service(svc); 1105} 1106 1107 1108/* 1109 * Delete a destination server in the given service 1110 */ 1111static int 1112ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1113{ 1114 struct ip_vs_dest *dest; 1115 __be16 dport = udest->port; 1116 1117 EnterFunction(2); 1118 1119 dest = ip_vs_lookup_dest(svc, &udest->addr, dport); 1120 1121 if (dest == NULL) { 1122 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1123 return -ENOENT; 1124 } 1125 1126 write_lock_bh(&__ip_vs_svc_lock); 1127 1128 /* 1129 * Wait until all other svc users go away. 1130 */ 1131 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1132 1133 /* 1134 * Unlink dest from the service 1135 */ 1136 __ip_vs_unlink_dest(svc, dest, 1); 1137 1138 write_unlock_bh(&__ip_vs_svc_lock); 1139 1140 /* 1141 * Delete the destination 1142 */ 1143 __ip_vs_del_dest(dest); 1144 1145 LeaveFunction(2); 1146 1147 return 0; 1148} 1149 1150 1151/* 1152 * Add a service into the service hash table 1153 */ 1154static int 1155ip_vs_add_service(struct ip_vs_service_user_kern *u, 1156 struct ip_vs_service **svc_p) 1157{ 1158 int ret = 0; 1159 struct ip_vs_scheduler *sched = NULL; 1160 struct ip_vs_service *svc = NULL; 1161 1162 /* increase the module use count */ 1163 ip_vs_use_count_inc(); 1164 1165 /* Lookup the scheduler by 'u->sched_name' */ 1166 sched = ip_vs_scheduler_get(u->sched_name); 1167 if (sched == NULL) { 1168 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1169 ret = -ENOENT; 1170 goto out_mod_dec; 1171 } 1172 1173#ifdef CONFIG_IP_VS_IPV6 1174 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { 1175 ret = -EINVAL; 1176 goto out_err; 1177 } 1178#endif 1179 1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); 1181 if (svc == NULL) { 1182 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1183 ret = -ENOMEM; 1184 goto out_err; 1185 } 1186 1187 /* I'm the first user of the service */ 1188 atomic_set(&svc->usecnt, 1); 1189 atomic_set(&svc->refcnt, 0); 1190 1191 svc->af = u->af; 1192 svc->protocol = u->protocol; 1193 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1194 svc->port = u->port; 1195 svc->fwmark = u->fwmark; 1196 svc->flags = u->flags; 1197 svc->timeout = u->timeout * HZ; 1198 svc->netmask = u->netmask; 1199 1200 INIT_LIST_HEAD(&svc->destinations); 1201 rwlock_init(&svc->sched_lock); 1202 spin_lock_init(&svc->stats.lock); 1203 1204 /* Bind the scheduler */ 1205 ret = ip_vs_bind_scheduler(svc, sched); 1206 if (ret) 1207 goto out_err; 1208 sched = NULL; 1209 1210 /* Update the virtual service counters */ 1211 if (svc->port == FTPPORT) 1212 atomic_inc(&ip_vs_ftpsvc_counter); 1213 else if (svc->port == 0) 1214 atomic_inc(&ip_vs_nullsvc_counter); 1215 1216 ip_vs_new_estimator(&svc->stats); 1217 1218 /* Count only IPv4 services for old get/setsockopt interface */ 1219 if (svc->af == AF_INET) 1220 ip_vs_num_services++; 1221 1222 /* Hash the service into the service table */ 1223 write_lock_bh(&__ip_vs_svc_lock); 1224 ip_vs_svc_hash(svc); 1225 write_unlock_bh(&__ip_vs_svc_lock); 1226 1227 *svc_p = svc; 1228 return 0; 1229 1230 out_err: 1231 if (svc != NULL) { 1232 if (svc->scheduler) 1233 ip_vs_unbind_scheduler(svc); 1234 if (svc->inc) { 1235 local_bh_disable(); 1236 ip_vs_app_inc_put(svc->inc); 1237 local_bh_enable(); 1238 } 1239 kfree(svc); 1240 } 1241 ip_vs_scheduler_put(sched); 1242 1243 out_mod_dec: 1244 /* decrease the module use count */ 1245 ip_vs_use_count_dec(); 1246 1247 return ret; 1248} 1249 1250 1251/* 1252 * Edit a service and bind it with a new scheduler 1253 */ 1254static int 1255ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1256{ 1257 struct ip_vs_scheduler *sched, *old_sched; 1258 int ret = 0; 1259 1260 /* 1261 * Lookup the scheduler, by 'u->sched_name' 1262 */ 1263 sched = ip_vs_scheduler_get(u->sched_name); 1264 if (sched == NULL) { 1265 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name); 1266 return -ENOENT; 1267 } 1268 old_sched = sched; 1269 1270#ifdef CONFIG_IP_VS_IPV6 1271 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { 1272 ret = -EINVAL; 1273 goto out; 1274 } 1275#endif 1276 1277 write_lock_bh(&__ip_vs_svc_lock); 1278 1279 /* 1280 * Wait until all other svc users go away. 1281 */ 1282 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1283 1284 /* 1285 * Set the flags and timeout value 1286 */ 1287 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1288 svc->timeout = u->timeout * HZ; 1289 svc->netmask = u->netmask; 1290 1291 old_sched = svc->scheduler; 1292 if (sched != old_sched) { 1293 /* 1294 * Unbind the old scheduler 1295 */ 1296 if ((ret = ip_vs_unbind_scheduler(svc))) { 1297 old_sched = sched; 1298 goto out_unlock; 1299 } 1300 1301 /* 1302 * Bind the new scheduler 1303 */ 1304 if ((ret = ip_vs_bind_scheduler(svc, sched))) { 1305 /* 1306 * If ip_vs_bind_scheduler fails, restore the old 1307 * scheduler. 1308 * The main reason of failure is out of memory. 1309 * 1310 * The question is if the old scheduler can be 1311 * restored all the time. TODO: if it cannot be 1312 * restored some time, we must delete the service, 1313 * otherwise the system may crash. 1314 */ 1315 ip_vs_bind_scheduler(svc, old_sched); 1316 old_sched = sched; 1317 goto out_unlock; 1318 } 1319 } 1320 1321 out_unlock: 1322 write_unlock_bh(&__ip_vs_svc_lock); 1323#ifdef CONFIG_IP_VS_IPV6 1324 out: 1325#endif 1326 1327 if (old_sched) 1328 ip_vs_scheduler_put(old_sched); 1329 1330 return ret; 1331} 1332 1333 1334/* 1335 * Delete a service from the service list 1336 * - The service must be unlinked, unlocked and not referenced! 1337 * - We are called under _bh lock 1338 */ 1339static void __ip_vs_del_service(struct ip_vs_service *svc) 1340{ 1341 struct ip_vs_dest *dest, *nxt; 1342 struct ip_vs_scheduler *old_sched; 1343 1344 /* Count only IPv4 services for old get/setsockopt interface */ 1345 if (svc->af == AF_INET) 1346 ip_vs_num_services--; 1347 1348 ip_vs_kill_estimator(&svc->stats); 1349 1350 /* Unbind scheduler */ 1351 old_sched = svc->scheduler; 1352 ip_vs_unbind_scheduler(svc); 1353 if (old_sched) 1354 ip_vs_scheduler_put(old_sched); 1355 1356 /* Unbind app inc */ 1357 if (svc->inc) { 1358 ip_vs_app_inc_put(svc->inc); 1359 svc->inc = NULL; 1360 } 1361 1362 /* 1363 * Unlink the whole destination list 1364 */ 1365 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1366 __ip_vs_unlink_dest(svc, dest, 0); 1367 __ip_vs_del_dest(dest); 1368 } 1369 1370 /* 1371 * Update the virtual service counters 1372 */ 1373 if (svc->port == FTPPORT) 1374 atomic_dec(&ip_vs_ftpsvc_counter); 1375 else if (svc->port == 0) 1376 atomic_dec(&ip_vs_nullsvc_counter); 1377 1378 /* 1379 * Free the service if nobody refers to it 1380 */ 1381 if (atomic_read(&svc->refcnt) == 0) 1382 kfree(svc); 1383 1384 /* decrease the module use count */ 1385 ip_vs_use_count_dec(); 1386} 1387 1388/* 1389 * Delete a service from the service list 1390 */ 1391static int ip_vs_del_service(struct ip_vs_service *svc) 1392{ 1393 if (svc == NULL) 1394 return -EEXIST; 1395 1396 /* 1397 * Unhash it from the service table 1398 */ 1399 write_lock_bh(&__ip_vs_svc_lock); 1400 1401 ip_vs_svc_unhash(svc); 1402 1403 /* 1404 * Wait until all the svc users go away. 1405 */ 1406 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1407 1408 __ip_vs_del_service(svc); 1409 1410 write_unlock_bh(&__ip_vs_svc_lock); 1411 1412 return 0; 1413} 1414 1415 1416/* 1417 * Flush all the virtual services 1418 */ 1419static int ip_vs_flush(void) 1420{ 1421 int idx; 1422 struct ip_vs_service *svc, *nxt; 1423 1424 /* 1425 * Flush the service table hashed by <protocol,addr,port> 1426 */ 1427 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1428 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1429 write_lock_bh(&__ip_vs_svc_lock); 1430 ip_vs_svc_unhash(svc); 1431 /* 1432 * Wait until all the svc users go away. 1433 */ 1434 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); 1435 __ip_vs_del_service(svc); 1436 write_unlock_bh(&__ip_vs_svc_lock); 1437 } 1438 } 1439 1440 /* 1441 * Flush the service table hashed by fwmark 1442 */ 1443 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1444 list_for_each_entry_safe(svc, nxt, 1445 &ip_vs_svc_fwm_table[idx], f_list) { 1446 write_lock_bh(&__ip_vs_svc_lock); 1447 ip_vs_svc_unhash(svc); 1448 /* 1449 * Wait until all the svc users go away. 1450 */ 1451 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); 1452 __ip_vs_del_service(svc); 1453 write_unlock_bh(&__ip_vs_svc_lock); 1454 } 1455 } 1456 1457 return 0; 1458} 1459 1460 1461/* 1462 * Zero counters in a service or all services 1463 */ 1464static int ip_vs_zero_service(struct ip_vs_service *svc) 1465{ 1466 struct ip_vs_dest *dest; 1467 1468 write_lock_bh(&__ip_vs_svc_lock); 1469 list_for_each_entry(dest, &svc->destinations, n_list) { 1470 ip_vs_zero_stats(&dest->stats); 1471 } 1472 ip_vs_zero_stats(&svc->stats); 1473 write_unlock_bh(&__ip_vs_svc_lock); 1474 return 0; 1475} 1476 1477static int ip_vs_zero_all(void) 1478{ 1479 int idx; 1480 struct ip_vs_service *svc; 1481 1482 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1483 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1484 ip_vs_zero_service(svc); 1485 } 1486 } 1487 1488 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1489 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1490 ip_vs_zero_service(svc); 1491 } 1492 } 1493 1494 ip_vs_zero_stats(&ip_vs_stats); 1495 return 0; 1496} 1497 1498 1499static int 1500proc_do_defense_mode(ctl_table *table, int write, 1501 void __user *buffer, size_t *lenp, loff_t *ppos) 1502{ 1503 int *valp = table->data; 1504 int val = *valp; 1505 int rc; 1506 1507 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1508 if (write && (*valp != val)) { 1509 if ((*valp < 0) || (*valp > 3)) { 1510 /* Restore the correct value */ 1511 *valp = val; 1512 } else { 1513 update_defense_level(); 1514 } 1515 } 1516 return rc; 1517} 1518 1519 1520static int 1521proc_do_sync_threshold(ctl_table *table, int write, 1522 void __user *buffer, size_t *lenp, loff_t *ppos) 1523{ 1524 int *valp = table->data; 1525 int val[2]; 1526 int rc; 1527 1528 /* backup the value first */ 1529 memcpy(val, valp, sizeof(val)); 1530 1531 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1532 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { 1533 /* Restore the correct value */ 1534 memcpy(valp, val, sizeof(val)); 1535 } 1536 return rc; 1537} 1538 1539 1540/* 1541 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1542 */ 1543 1544static struct ctl_table vs_vars[] = { 1545 { 1546 .procname = "amemthresh", 1547 .data = &sysctl_ip_vs_amemthresh, 1548 .maxlen = sizeof(int), 1549 .mode = 0644, 1550 .proc_handler = proc_dointvec, 1551 }, 1552#ifdef CONFIG_IP_VS_DEBUG 1553 { 1554 .procname = "debug_level", 1555 .data = &sysctl_ip_vs_debug_level, 1556 .maxlen = sizeof(int), 1557 .mode = 0644, 1558 .proc_handler = proc_dointvec, 1559 }, 1560#endif 1561 { 1562 .procname = "am_droprate", 1563 .data = &sysctl_ip_vs_am_droprate, 1564 .maxlen = sizeof(int), 1565 .mode = 0644, 1566 .proc_handler = proc_dointvec, 1567 }, 1568 { 1569 .procname = "drop_entry", 1570 .data = &sysctl_ip_vs_drop_entry, 1571 .maxlen = sizeof(int), 1572 .mode = 0644, 1573 .proc_handler = proc_do_defense_mode, 1574 }, 1575 { 1576 .procname = "drop_packet", 1577 .data = &sysctl_ip_vs_drop_packet, 1578 .maxlen = sizeof(int), 1579 .mode = 0644, 1580 .proc_handler = proc_do_defense_mode, 1581 }, 1582 { 1583 .procname = "secure_tcp", 1584 .data = &sysctl_ip_vs_secure_tcp, 1585 .maxlen = sizeof(int), 1586 .mode = 0644, 1587 .proc_handler = proc_do_defense_mode, 1588 }, 1589 { 1590 .procname = "cache_bypass", 1591 .data = &sysctl_ip_vs_cache_bypass, 1592 .maxlen = sizeof(int), 1593 .mode = 0644, 1594 .proc_handler = proc_dointvec, 1595 }, 1596 { 1597 .procname = "expire_nodest_conn", 1598 .data = &sysctl_ip_vs_expire_nodest_conn, 1599 .maxlen = sizeof(int), 1600 .mode = 0644, 1601 .proc_handler = proc_dointvec, 1602 }, 1603 { 1604 .procname = "expire_quiescent_template", 1605 .data = &sysctl_ip_vs_expire_quiescent_template, 1606 .maxlen = sizeof(int), 1607 .mode = 0644, 1608 .proc_handler = proc_dointvec, 1609 }, 1610 { 1611 .procname = "sync_threshold", 1612 .data = &sysctl_ip_vs_sync_threshold, 1613 .maxlen = sizeof(sysctl_ip_vs_sync_threshold), 1614 .mode = 0644, 1615 .proc_handler = proc_do_sync_threshold, 1616 }, 1617 { 1618 .procname = "nat_icmp_send", 1619 .data = &sysctl_ip_vs_nat_icmp_send, 1620 .maxlen = sizeof(int), 1621 .mode = 0644, 1622 .proc_handler = proc_dointvec, 1623 }, 1624 { } 1625}; 1626 1627const struct ctl_path net_vs_ctl_path[] = { 1628 { .procname = "net", }, 1629 { .procname = "ipv4", }, 1630 { .procname = "vs", }, 1631 { } 1632}; 1633EXPORT_SYMBOL_GPL(net_vs_ctl_path); 1634 1635static struct ctl_table_header * sysctl_header; 1636 1637#ifdef CONFIG_PROC_FS 1638 1639struct ip_vs_iter { 1640 struct list_head *table; 1641 int bucket; 1642}; 1643 1644/* 1645 * Write the contents of the VS rule table to a PROCfs file. 1646 * (It is kept just for backward compatibility) 1647 */ 1648static inline const char *ip_vs_fwd_name(unsigned flags) 1649{ 1650 switch (flags & IP_VS_CONN_F_FWD_MASK) { 1651 case IP_VS_CONN_F_LOCALNODE: 1652 return "Local"; 1653 case IP_VS_CONN_F_TUNNEL: 1654 return "Tunnel"; 1655 case IP_VS_CONN_F_DROUTE: 1656 return "Route"; 1657 default: 1658 return "Masq"; 1659 } 1660} 1661 1662 1663/* Get the Nth entry in the two lists */ 1664static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1665{ 1666 struct ip_vs_iter *iter = seq->private; 1667 int idx; 1668 struct ip_vs_service *svc; 1669 1670 /* look in hash by protocol */ 1671 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1672 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1673 if (pos-- == 0){ 1674 iter->table = ip_vs_svc_table; 1675 iter->bucket = idx; 1676 return svc; 1677 } 1678 } 1679 } 1680 1681 /* keep looking in fwmark */ 1682 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1683 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1684 if (pos-- == 0) { 1685 iter->table = ip_vs_svc_fwm_table; 1686 iter->bucket = idx; 1687 return svc; 1688 } 1689 } 1690 } 1691 1692 return NULL; 1693} 1694 1695static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 1696__acquires(__ip_vs_svc_lock) 1697{ 1698 1699 read_lock_bh(&__ip_vs_svc_lock); 1700 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 1701} 1702 1703 1704static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1705{ 1706 struct list_head *e; 1707 struct ip_vs_iter *iter; 1708 struct ip_vs_service *svc; 1709 1710 ++*pos; 1711 if (v == SEQ_START_TOKEN) 1712 return ip_vs_info_array(seq,0); 1713 1714 svc = v; 1715 iter = seq->private; 1716 1717 if (iter->table == ip_vs_svc_table) { 1718 /* next service in table hashed by protocol */ 1719 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) 1720 return list_entry(e, struct ip_vs_service, s_list); 1721 1722 1723 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1724 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], 1725 s_list) { 1726 return svc; 1727 } 1728 } 1729 1730 iter->table = ip_vs_svc_fwm_table; 1731 iter->bucket = -1; 1732 goto scan_fwmark; 1733 } 1734 1735 /* next service in hashed by fwmark */ 1736 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) 1737 return list_entry(e, struct ip_vs_service, f_list); 1738 1739 scan_fwmark: 1740 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1741 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], 1742 f_list) 1743 return svc; 1744 } 1745 1746 return NULL; 1747} 1748 1749static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 1750__releases(__ip_vs_svc_lock) 1751{ 1752 read_unlock_bh(&__ip_vs_svc_lock); 1753} 1754 1755 1756static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 1757{ 1758 if (v == SEQ_START_TOKEN) { 1759 seq_printf(seq, 1760 "IP Virtual Server version %d.%d.%d (size=%d)\n", 1761 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 1762 seq_puts(seq, 1763 "Prot LocalAddress:Port Scheduler Flags\n"); 1764 seq_puts(seq, 1765 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 1766 } else { 1767 const struct ip_vs_service *svc = v; 1768 const struct ip_vs_iter *iter = seq->private; 1769 const struct ip_vs_dest *dest; 1770 1771 if (iter->table == ip_vs_svc_table) { 1772#ifdef CONFIG_IP_VS_IPV6 1773 if (svc->af == AF_INET6) 1774 seq_printf(seq, "%s [%pI6]:%04X %s ", 1775 ip_vs_proto_name(svc->protocol), 1776 &svc->addr.in6, 1777 ntohs(svc->port), 1778 svc->scheduler->name); 1779 else 1780#endif 1781 seq_printf(seq, "%s %08X:%04X %s %s ", 1782 ip_vs_proto_name(svc->protocol), 1783 ntohl(svc->addr.ip), 1784 ntohs(svc->port), 1785 svc->scheduler->name, 1786 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 1787 } else { 1788 seq_printf(seq, "FWM %08X %s %s", 1789 svc->fwmark, svc->scheduler->name, 1790 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 1791 } 1792 1793 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 1794 seq_printf(seq, "persistent %d %08X\n", 1795 svc->timeout, 1796 ntohl(svc->netmask)); 1797 else 1798 seq_putc(seq, '\n'); 1799 1800 list_for_each_entry(dest, &svc->destinations, n_list) { 1801#ifdef CONFIG_IP_VS_IPV6 1802 if (dest->af == AF_INET6) 1803 seq_printf(seq, 1804 " -> [%pI6]:%04X" 1805 " %-7s %-6d %-10d %-10d\n", 1806 &dest->addr.in6, 1807 ntohs(dest->port), 1808 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 1809 atomic_read(&dest->weight), 1810 atomic_read(&dest->activeconns), 1811 atomic_read(&dest->inactconns)); 1812 else 1813#endif 1814 seq_printf(seq, 1815 " -> %08X:%04X " 1816 "%-7s %-6d %-10d %-10d\n", 1817 ntohl(dest->addr.ip), 1818 ntohs(dest->port), 1819 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 1820 atomic_read(&dest->weight), 1821 atomic_read(&dest->activeconns), 1822 atomic_read(&dest->inactconns)); 1823 1824 } 1825 } 1826 return 0; 1827} 1828 1829static const struct seq_operations ip_vs_info_seq_ops = { 1830 .start = ip_vs_info_seq_start, 1831 .next = ip_vs_info_seq_next, 1832 .stop = ip_vs_info_seq_stop, 1833 .show = ip_vs_info_seq_show, 1834}; 1835 1836static int ip_vs_info_open(struct inode *inode, struct file *file) 1837{ 1838 return seq_open_private(file, &ip_vs_info_seq_ops, 1839 sizeof(struct ip_vs_iter)); 1840} 1841 1842static const struct file_operations ip_vs_info_fops = { 1843 .owner = THIS_MODULE, 1844 .open = ip_vs_info_open, 1845 .read = seq_read, 1846 .llseek = seq_lseek, 1847 .release = seq_release_private, 1848}; 1849 1850#endif 1851 1852struct ip_vs_stats ip_vs_stats = { 1853 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock), 1854}; 1855 1856#ifdef CONFIG_PROC_FS 1857static int ip_vs_stats_show(struct seq_file *seq, void *v) 1858{ 1859 1860/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1861 seq_puts(seq, 1862 " Total Incoming Outgoing Incoming Outgoing\n"); 1863 seq_printf(seq, 1864 " Conns Packets Packets Bytes Bytes\n"); 1865 1866 spin_lock_bh(&ip_vs_stats.lock); 1867 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, 1868 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, 1869 (unsigned long long) ip_vs_stats.ustats.inbytes, 1870 (unsigned long long) ip_vs_stats.ustats.outbytes); 1871 1872/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1873 seq_puts(seq, 1874 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 1875 seq_printf(seq,"%8X %8X %8X %16X %16X\n", 1876 ip_vs_stats.ustats.cps, 1877 ip_vs_stats.ustats.inpps, 1878 ip_vs_stats.ustats.outpps, 1879 ip_vs_stats.ustats.inbps, 1880 ip_vs_stats.ustats.outbps); 1881 spin_unlock_bh(&ip_vs_stats.lock); 1882 1883 return 0; 1884} 1885 1886static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 1887{ 1888 return single_open(file, ip_vs_stats_show, NULL); 1889} 1890 1891static const struct file_operations ip_vs_stats_fops = { 1892 .owner = THIS_MODULE, 1893 .open = ip_vs_stats_seq_open, 1894 .read = seq_read, 1895 .llseek = seq_lseek, 1896 .release = single_release, 1897}; 1898 1899#endif 1900 1901/* 1902 * Set timeout values for tcp tcpfin udp in the timeout_table. 1903 */ 1904static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) 1905{ 1906 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 1907 u->tcp_timeout, 1908 u->tcp_fin_timeout, 1909 u->udp_timeout); 1910 1911#ifdef CONFIG_IP_VS_PROTO_TCP 1912 if (u->tcp_timeout) { 1913 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] 1914 = u->tcp_timeout * HZ; 1915 } 1916 1917 if (u->tcp_fin_timeout) { 1918 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] 1919 = u->tcp_fin_timeout * HZ; 1920 } 1921#endif 1922 1923#ifdef CONFIG_IP_VS_PROTO_UDP 1924 if (u->udp_timeout) { 1925 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] 1926 = u->udp_timeout * HZ; 1927 } 1928#endif 1929 return 0; 1930} 1931 1932 1933#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL) 1934#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user)) 1935#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \ 1936 sizeof(struct ip_vs_dest_user)) 1937#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user)) 1938#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user)) 1939#define MAX_ARG_LEN SVCDEST_ARG_LEN 1940 1941static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { 1942 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN, 1943 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN, 1944 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN, 1945 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0, 1946 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN, 1947 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN, 1948 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN, 1949 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN, 1950 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN, 1951 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN, 1952 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, 1953}; 1954 1955static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 1956 struct ip_vs_service_user *usvc_compat) 1957{ 1958 usvc->af = AF_INET; 1959 usvc->protocol = usvc_compat->protocol; 1960 usvc->addr.ip = usvc_compat->addr; 1961 usvc->port = usvc_compat->port; 1962 usvc->fwmark = usvc_compat->fwmark; 1963 1964 /* Deep copy of sched_name is not needed here */ 1965 usvc->sched_name = usvc_compat->sched_name; 1966 1967 usvc->flags = usvc_compat->flags; 1968 usvc->timeout = usvc_compat->timeout; 1969 usvc->netmask = usvc_compat->netmask; 1970} 1971 1972static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 1973 struct ip_vs_dest_user *udest_compat) 1974{ 1975 udest->addr.ip = udest_compat->addr; 1976 udest->port = udest_compat->port; 1977 udest->conn_flags = udest_compat->conn_flags; 1978 udest->weight = udest_compat->weight; 1979 udest->u_threshold = udest_compat->u_threshold; 1980 udest->l_threshold = udest_compat->l_threshold; 1981} 1982 1983static int 1984do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 1985{ 1986 int ret; 1987 unsigned char arg[MAX_ARG_LEN]; 1988 struct ip_vs_service_user *usvc_compat; 1989 struct ip_vs_service_user_kern usvc; 1990 struct ip_vs_service *svc; 1991 struct ip_vs_dest_user *udest_compat; 1992 struct ip_vs_dest_user_kern udest; 1993 1994 if (!capable(CAP_NET_ADMIN)) 1995 return -EPERM; 1996 1997 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 1998 return -EINVAL; 1999 if (len < 0 || len > MAX_ARG_LEN) 2000 return -EINVAL; 2001 if (len != set_arglen[SET_CMDID(cmd)]) { 2002 pr_err("set_ctl: len %u != %u\n", 2003 len, set_arglen[SET_CMDID(cmd)]); 2004 return -EINVAL; 2005 } 2006 2007 if (copy_from_user(arg, user, len) != 0) 2008 return -EFAULT; 2009 2010 /* increase the module use count */ 2011 ip_vs_use_count_inc(); 2012 2013 if (mutex_lock_interruptible(&__ip_vs_mutex)) { 2014 ret = -ERESTARTSYS; 2015 goto out_dec; 2016 } 2017 2018 if (cmd == IP_VS_SO_SET_FLUSH) { 2019 /* Flush the virtual service */ 2020 ret = ip_vs_flush(); 2021 goto out_unlock; 2022 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2023 /* Set timeout values for (tcp tcpfin udp) */ 2024 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); 2025 goto out_unlock; 2026 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2027 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2028 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); 2029 goto out_unlock; 2030 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { 2031 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2032 ret = stop_sync_thread(dm->state); 2033 goto out_unlock; 2034 } 2035 2036 usvc_compat = (struct ip_vs_service_user *)arg; 2037 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2038 2039 /* We only use the new structs internally, so copy userspace compat 2040 * structs to extended internal versions */ 2041 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2042 ip_vs_copy_udest_compat(&udest, udest_compat); 2043 2044 if (cmd == IP_VS_SO_SET_ZERO) { 2045 /* if no service address is set, zero counters in all */ 2046 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2047 ret = ip_vs_zero_all(); 2048 goto out_unlock; 2049 } 2050 } 2051 2052 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2053 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2054 usvc.protocol != IPPROTO_SCTP) { 2055 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n", 2056 usvc.protocol, &usvc.addr.ip, 2057 ntohs(usvc.port), usvc.sched_name); 2058 ret = -EFAULT; 2059 goto out_unlock; 2060 } 2061 2062 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2063 if (usvc.fwmark == 0) 2064 svc = __ip_vs_service_get(usvc.af, usvc.protocol, 2065 &usvc.addr, usvc.port); 2066 else 2067 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); 2068 2069 if (cmd != IP_VS_SO_SET_ADD 2070 && (svc == NULL || svc->protocol != usvc.protocol)) { 2071 ret = -ESRCH; 2072 goto out_unlock; 2073 } 2074 2075 switch (cmd) { 2076 case IP_VS_SO_SET_ADD: 2077 if (svc != NULL) 2078 ret = -EEXIST; 2079 else 2080 ret = ip_vs_add_service(&usvc, &svc); 2081 break; 2082 case IP_VS_SO_SET_EDIT: 2083 ret = ip_vs_edit_service(svc, &usvc); 2084 break; 2085 case IP_VS_SO_SET_DEL: 2086 ret = ip_vs_del_service(svc); 2087 if (!ret) 2088 goto out_unlock; 2089 break; 2090 case IP_VS_SO_SET_ZERO: 2091 ret = ip_vs_zero_service(svc); 2092 break; 2093 case IP_VS_SO_SET_ADDDEST: 2094 ret = ip_vs_add_dest(svc, &udest); 2095 break; 2096 case IP_VS_SO_SET_EDITDEST: 2097 ret = ip_vs_edit_dest(svc, &udest); 2098 break; 2099 case IP_VS_SO_SET_DELDEST: 2100 ret = ip_vs_del_dest(svc, &udest); 2101 break; 2102 default: 2103 ret = -EINVAL; 2104 } 2105 2106 if (svc) 2107 ip_vs_service_put(svc); 2108 2109 out_unlock: 2110 mutex_unlock(&__ip_vs_mutex); 2111 out_dec: 2112 /* decrease the module use count */ 2113 ip_vs_use_count_dec(); 2114 2115 return ret; 2116} 2117 2118 2119static void 2120ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) 2121{ 2122 spin_lock_bh(&src->lock); 2123 memcpy(dst, &src->ustats, sizeof(*dst)); 2124 spin_unlock_bh(&src->lock); 2125} 2126 2127static void 2128ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2129{ 2130 dst->protocol = src->protocol; 2131 dst->addr = src->addr.ip; 2132 dst->port = src->port; 2133 dst->fwmark = src->fwmark; 2134 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); 2135 dst->flags = src->flags; 2136 dst->timeout = src->timeout / HZ; 2137 dst->netmask = src->netmask; 2138 dst->num_dests = src->num_dests; 2139 ip_vs_copy_stats(&dst->stats, &src->stats); 2140} 2141 2142static inline int 2143__ip_vs_get_service_entries(const struct ip_vs_get_services *get, 2144 struct ip_vs_get_services __user *uptr) 2145{ 2146 int idx, count=0; 2147 struct ip_vs_service *svc; 2148 struct ip_vs_service_entry entry; 2149 int ret = 0; 2150 2151 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2152 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2153 /* Only expose IPv4 entries to old interface */ 2154 if (svc->af != AF_INET) 2155 continue; 2156 2157 if (count >= get->num_services) 2158 goto out; 2159 memset(&entry, 0, sizeof(entry)); 2160 ip_vs_copy_service(&entry, svc); 2161 if (copy_to_user(&uptr->entrytable[count], 2162 &entry, sizeof(entry))) { 2163 ret = -EFAULT; 2164 goto out; 2165 } 2166 count++; 2167 } 2168 } 2169 2170 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2171 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2172 /* Only expose IPv4 entries to old interface */ 2173 if (svc->af != AF_INET) 2174 continue; 2175 2176 if (count >= get->num_services) 2177 goto out; 2178 memset(&entry, 0, sizeof(entry)); 2179 ip_vs_copy_service(&entry, svc); 2180 if (copy_to_user(&uptr->entrytable[count], 2181 &entry, sizeof(entry))) { 2182 ret = -EFAULT; 2183 goto out; 2184 } 2185 count++; 2186 } 2187 } 2188 out: 2189 return ret; 2190} 2191 2192static inline int 2193__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, 2194 struct ip_vs_get_dests __user *uptr) 2195{ 2196 struct ip_vs_service *svc; 2197 union nf_inet_addr addr = { .ip = get->addr }; 2198 int ret = 0; 2199 2200 if (get->fwmark) 2201 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); 2202 else 2203 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, 2204 get->port); 2205 2206 if (svc) { 2207 int count = 0; 2208 struct ip_vs_dest *dest; 2209 struct ip_vs_dest_entry entry; 2210 2211 list_for_each_entry(dest, &svc->destinations, n_list) { 2212 if (count >= get->num_dests) 2213 break; 2214 2215 entry.addr = dest->addr.ip; 2216 entry.port = dest->port; 2217 entry.conn_flags = atomic_read(&dest->conn_flags); 2218 entry.weight = atomic_read(&dest->weight); 2219 entry.u_threshold = dest->u_threshold; 2220 entry.l_threshold = dest->l_threshold; 2221 entry.activeconns = atomic_read(&dest->activeconns); 2222 entry.inactconns = atomic_read(&dest->inactconns); 2223 entry.persistconns = atomic_read(&dest->persistconns); 2224 ip_vs_copy_stats(&entry.stats, &dest->stats); 2225 if (copy_to_user(&uptr->entrytable[count], 2226 &entry, sizeof(entry))) { 2227 ret = -EFAULT; 2228 break; 2229 } 2230 count++; 2231 } 2232 ip_vs_service_put(svc); 2233 } else 2234 ret = -ESRCH; 2235 return ret; 2236} 2237 2238static inline void 2239__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) 2240{ 2241#ifdef CONFIG_IP_VS_PROTO_TCP 2242 u->tcp_timeout = 2243 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2244 u->tcp_fin_timeout = 2245 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2246#endif 2247#ifdef CONFIG_IP_VS_PROTO_UDP 2248 u->udp_timeout = 2249 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2250#endif 2251} 2252 2253 2254#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2255#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo)) 2256#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services)) 2257#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry)) 2258#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests)) 2259#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user)) 2260#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2) 2261 2262static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = { 2263 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64, 2264 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN, 2265 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN, 2266 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN, 2267 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN, 2268 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN, 2269 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN, 2270}; 2271 2272static int 2273do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2274{ 2275 unsigned char arg[128]; 2276 int ret = 0; 2277 unsigned int copylen; 2278 2279 if (!capable(CAP_NET_ADMIN)) 2280 return -EPERM; 2281 2282 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2283 return -EINVAL; 2284 2285 if (*len < get_arglen[GET_CMDID(cmd)]) { 2286 pr_err("get_ctl: len %u < %u\n", 2287 *len, get_arglen[GET_CMDID(cmd)]); 2288 return -EINVAL; 2289 } 2290 2291 copylen = get_arglen[GET_CMDID(cmd)]; 2292 if (copylen > 128) 2293 return -EINVAL; 2294 2295 if (copy_from_user(arg, user, copylen) != 0) 2296 return -EFAULT; 2297 2298 if (mutex_lock_interruptible(&__ip_vs_mutex)) 2299 return -ERESTARTSYS; 2300 2301 switch (cmd) { 2302 case IP_VS_SO_GET_VERSION: 2303 { 2304 char buf[64]; 2305 2306 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2307 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2308 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2309 ret = -EFAULT; 2310 goto out; 2311 } 2312 *len = strlen(buf)+1; 2313 } 2314 break; 2315 2316 case IP_VS_SO_GET_INFO: 2317 { 2318 struct ip_vs_getinfo info; 2319 info.version = IP_VS_VERSION_CODE; 2320 info.size = ip_vs_conn_tab_size; 2321 info.num_services = ip_vs_num_services; 2322 if (copy_to_user(user, &info, sizeof(info)) != 0) 2323 ret = -EFAULT; 2324 } 2325 break; 2326 2327 case IP_VS_SO_GET_SERVICES: 2328 { 2329 struct ip_vs_get_services *get; 2330 int size; 2331 2332 get = (struct ip_vs_get_services *)arg; 2333 size = sizeof(*get) + 2334 sizeof(struct ip_vs_service_entry) * get->num_services; 2335 if (*len != size) { 2336 pr_err("length: %u != %u\n", *len, size); 2337 ret = -EINVAL; 2338 goto out; 2339 } 2340 ret = __ip_vs_get_service_entries(get, user); 2341 } 2342 break; 2343 2344 case IP_VS_SO_GET_SERVICE: 2345 { 2346 struct ip_vs_service_entry *entry; 2347 struct ip_vs_service *svc; 2348 union nf_inet_addr addr; 2349 2350 entry = (struct ip_vs_service_entry *)arg; 2351 addr.ip = entry->addr; 2352 if (entry->fwmark) 2353 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); 2354 else 2355 svc = __ip_vs_service_get(AF_INET, entry->protocol, 2356 &addr, entry->port); 2357 if (svc) { 2358 ip_vs_copy_service(entry, svc); 2359 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2360 ret = -EFAULT; 2361 ip_vs_service_put(svc); 2362 } else 2363 ret = -ESRCH; 2364 } 2365 break; 2366 2367 case IP_VS_SO_GET_DESTS: 2368 { 2369 struct ip_vs_get_dests *get; 2370 int size; 2371 2372 get = (struct ip_vs_get_dests *)arg; 2373 size = sizeof(*get) + 2374 sizeof(struct ip_vs_dest_entry) * get->num_dests; 2375 if (*len != size) { 2376 pr_err("length: %u != %u\n", *len, size); 2377 ret = -EINVAL; 2378 goto out; 2379 } 2380 ret = __ip_vs_get_dest_entries(get, user); 2381 } 2382 break; 2383 2384 case IP_VS_SO_GET_TIMEOUT: 2385 { 2386 struct ip_vs_timeout_user t; 2387 2388 __ip_vs_get_timeouts(&t); 2389 if (copy_to_user(user, &t, sizeof(t)) != 0) 2390 ret = -EFAULT; 2391 } 2392 break; 2393 2394 case IP_VS_SO_GET_DAEMON: 2395 { 2396 struct ip_vs_daemon_user d[2]; 2397 2398 memset(&d, 0, sizeof(d)); 2399 if (ip_vs_sync_state & IP_VS_STATE_MASTER) { 2400 d[0].state = IP_VS_STATE_MASTER; 2401 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); 2402 d[0].syncid = ip_vs_master_syncid; 2403 } 2404 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { 2405 d[1].state = IP_VS_STATE_BACKUP; 2406 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); 2407 d[1].syncid = ip_vs_backup_syncid; 2408 } 2409 if (copy_to_user(user, &d, sizeof(d)) != 0) 2410 ret = -EFAULT; 2411 } 2412 break; 2413 2414 default: 2415 ret = -EINVAL; 2416 } 2417 2418 out: 2419 mutex_unlock(&__ip_vs_mutex); 2420 return ret; 2421} 2422 2423 2424static struct nf_sockopt_ops ip_vs_sockopts = { 2425 .pf = PF_INET, 2426 .set_optmin = IP_VS_BASE_CTL, 2427 .set_optmax = IP_VS_SO_SET_MAX+1, 2428 .set = do_ip_vs_set_ctl, 2429 .get_optmin = IP_VS_BASE_CTL, 2430 .get_optmax = IP_VS_SO_GET_MAX+1, 2431 .get = do_ip_vs_get_ctl, 2432 .owner = THIS_MODULE, 2433}; 2434 2435/* 2436 * Generic Netlink interface 2437 */ 2438 2439/* IPVS genetlink family */ 2440static struct genl_family ip_vs_genl_family = { 2441 .id = GENL_ID_GENERATE, 2442 .hdrsize = 0, 2443 .name = IPVS_GENL_NAME, 2444 .version = IPVS_GENL_VERSION, 2445 .maxattr = IPVS_CMD_MAX, 2446}; 2447 2448/* Policy used for first-level command attributes */ 2449static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2450 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2451 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2452 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2453 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2454 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2455 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2456}; 2457 2458/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2459static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2460 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2461 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2462 .len = IP_VS_IFNAME_MAXLEN }, 2463 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2464}; 2465 2466/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2467static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2468 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2469 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2470 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2471 .len = sizeof(union nf_inet_addr) }, 2472 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2473 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2474 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2475 .len = IP_VS_SCHEDNAME_MAXLEN }, 2476 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2477 .len = sizeof(struct ip_vs_flags) }, 2478 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2479 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2480 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2481}; 2482 2483/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2484static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2485 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2486 .len = sizeof(union nf_inet_addr) }, 2487 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2488 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2489 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2490 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2491 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2492 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 2493 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 2494 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 2495 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 2496}; 2497 2498static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 2499 struct ip_vs_stats *stats) 2500{ 2501 struct nlattr *nl_stats = nla_nest_start(skb, container_type); 2502 if (!nl_stats) 2503 return -EMSGSIZE; 2504 2505 spin_lock_bh(&stats->lock); 2506 2507 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); 2508 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); 2509 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); 2510 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); 2511 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); 2512 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); 2513 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); 2514 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); 2515 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); 2516 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); 2517 2518 spin_unlock_bh(&stats->lock); 2519 2520 nla_nest_end(skb, nl_stats); 2521 2522 return 0; 2523 2524nla_put_failure: 2525 spin_unlock_bh(&stats->lock); 2526 nla_nest_cancel(skb, nl_stats); 2527 return -EMSGSIZE; 2528} 2529 2530static int ip_vs_genl_fill_service(struct sk_buff *skb, 2531 struct ip_vs_service *svc) 2532{ 2533 struct nlattr *nl_service; 2534 struct ip_vs_flags flags = { .flags = svc->flags, 2535 .mask = ~0 }; 2536 2537 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); 2538 if (!nl_service) 2539 return -EMSGSIZE; 2540 2541 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af); 2542 2543 if (svc->fwmark) { 2544 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark); 2545 } else { 2546 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol); 2547 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr); 2548 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port); 2549 } 2550 2551 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); 2552 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); 2553 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); 2554 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); 2555 2556 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) 2557 goto nla_put_failure; 2558 2559 nla_nest_end(skb, nl_service); 2560 2561 return 0; 2562 2563nla_put_failure: 2564 nla_nest_cancel(skb, nl_service); 2565 return -EMSGSIZE; 2566} 2567 2568static int ip_vs_genl_dump_service(struct sk_buff *skb, 2569 struct ip_vs_service *svc, 2570 struct netlink_callback *cb) 2571{ 2572 void *hdr; 2573 2574 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 2575 &ip_vs_genl_family, NLM_F_MULTI, 2576 IPVS_CMD_NEW_SERVICE); 2577 if (!hdr) 2578 return -EMSGSIZE; 2579 2580 if (ip_vs_genl_fill_service(skb, svc) < 0) 2581 goto nla_put_failure; 2582 2583 return genlmsg_end(skb, hdr); 2584 2585nla_put_failure: 2586 genlmsg_cancel(skb, hdr); 2587 return -EMSGSIZE; 2588} 2589 2590static int ip_vs_genl_dump_services(struct sk_buff *skb, 2591 struct netlink_callback *cb) 2592{ 2593 int idx = 0, i; 2594 int start = cb->args[0]; 2595 struct ip_vs_service *svc; 2596 2597 mutex_lock(&__ip_vs_mutex); 2598 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2599 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 2600 if (++idx <= start) 2601 continue; 2602 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2603 idx--; 2604 goto nla_put_failure; 2605 } 2606 } 2607 } 2608 2609 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 2610 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 2611 if (++idx <= start) 2612 continue; 2613 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 2614 idx--; 2615 goto nla_put_failure; 2616 } 2617 } 2618 } 2619 2620nla_put_failure: 2621 mutex_unlock(&__ip_vs_mutex); 2622 cb->args[0] = idx; 2623 2624 return skb->len; 2625} 2626 2627static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, 2628 struct nlattr *nla, int full_entry) 2629{ 2630 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 2631 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 2632 2633 /* Parse mandatory identifying service fields first */ 2634 if (nla == NULL || 2635 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) 2636 return -EINVAL; 2637 2638 nla_af = attrs[IPVS_SVC_ATTR_AF]; 2639 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 2640 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 2641 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 2642 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 2643 2644 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 2645 return -EINVAL; 2646 2647 memset(usvc, 0, sizeof(*usvc)); 2648 2649 usvc->af = nla_get_u16(nla_af); 2650#ifdef CONFIG_IP_VS_IPV6 2651 if (usvc->af != AF_INET && usvc->af != AF_INET6) 2652#else 2653 if (usvc->af != AF_INET) 2654#endif 2655 return -EAFNOSUPPORT; 2656 2657 if (nla_fwmark) { 2658 usvc->protocol = IPPROTO_TCP; 2659 usvc->fwmark = nla_get_u32(nla_fwmark); 2660 } else { 2661 usvc->protocol = nla_get_u16(nla_protocol); 2662 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 2663 usvc->port = nla_get_u16(nla_port); 2664 usvc->fwmark = 0; 2665 } 2666 2667 /* If a full entry was requested, check for the additional fields */ 2668 if (full_entry) { 2669 struct nlattr *nla_sched, *nla_flags, *nla_timeout, 2670 *nla_netmask; 2671 struct ip_vs_flags flags; 2672 struct ip_vs_service *svc; 2673 2674 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 2675 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 2676 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 2677 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 2678 2679 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 2680 return -EINVAL; 2681 2682 nla_memcpy(&flags, nla_flags, sizeof(flags)); 2683 2684 /* prefill flags from service if it already exists */ 2685 if (usvc->fwmark) 2686 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark); 2687 else 2688 svc = __ip_vs_service_get(usvc->af, usvc->protocol, 2689 &usvc->addr, usvc->port); 2690 if (svc) { 2691 usvc->flags = svc->flags; 2692 ip_vs_service_put(svc); 2693 } else 2694 usvc->flags = 0; 2695 2696 /* set new flags from userland */ 2697 usvc->flags = (usvc->flags & ~flags.mask) | 2698 (flags.flags & flags.mask); 2699 usvc->sched_name = nla_data(nla_sched); 2700 usvc->timeout = nla_get_u32(nla_timeout); 2701 usvc->netmask = nla_get_u32(nla_netmask); 2702 } 2703 2704 return 0; 2705} 2706 2707static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) 2708{ 2709 struct ip_vs_service_user_kern usvc; 2710 int ret; 2711 2712 ret = ip_vs_genl_parse_service(&usvc, nla, 0); 2713 if (ret) 2714 return ERR_PTR(ret); 2715 2716 if (usvc.fwmark) 2717 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); 2718 else 2719 return __ip_vs_service_get(usvc.af, usvc.protocol, 2720 &usvc.addr, usvc.port); 2721} 2722 2723static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 2724{ 2725 struct nlattr *nl_dest; 2726 2727 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); 2728 if (!nl_dest) 2729 return -EMSGSIZE; 2730 2731 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr); 2732 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port); 2733 2734 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD, 2735 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK); 2736 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)); 2737 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold); 2738 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold); 2739 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 2740 atomic_read(&dest->activeconns)); 2741 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS, 2742 atomic_read(&dest->inactconns)); 2743 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 2744 atomic_read(&dest->persistconns)); 2745 2746 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) 2747 goto nla_put_failure; 2748 2749 nla_nest_end(skb, nl_dest); 2750 2751 return 0; 2752 2753nla_put_failure: 2754 nla_nest_cancel(skb, nl_dest); 2755 return -EMSGSIZE; 2756} 2757 2758static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 2759 struct netlink_callback *cb) 2760{ 2761 void *hdr; 2762 2763 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 2764 &ip_vs_genl_family, NLM_F_MULTI, 2765 IPVS_CMD_NEW_DEST); 2766 if (!hdr) 2767 return -EMSGSIZE; 2768 2769 if (ip_vs_genl_fill_dest(skb, dest) < 0) 2770 goto nla_put_failure; 2771 2772 return genlmsg_end(skb, hdr); 2773 2774nla_put_failure: 2775 genlmsg_cancel(skb, hdr); 2776 return -EMSGSIZE; 2777} 2778 2779static int ip_vs_genl_dump_dests(struct sk_buff *skb, 2780 struct netlink_callback *cb) 2781{ 2782 int idx = 0; 2783 int start = cb->args[0]; 2784 struct ip_vs_service *svc; 2785 struct ip_vs_dest *dest; 2786 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 2787 2788 mutex_lock(&__ip_vs_mutex); 2789 2790 /* Try to find the service for which to dump destinations */ 2791 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, 2792 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) 2793 goto out_err; 2794 2795 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); 2796 if (IS_ERR(svc) || svc == NULL) 2797 goto out_err; 2798 2799 /* Dump the destinations */ 2800 list_for_each_entry(dest, &svc->destinations, n_list) { 2801 if (++idx <= start) 2802 continue; 2803 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 2804 idx--; 2805 goto nla_put_failure; 2806 } 2807 } 2808 2809nla_put_failure: 2810 cb->args[0] = idx; 2811 ip_vs_service_put(svc); 2812 2813out_err: 2814 mutex_unlock(&__ip_vs_mutex); 2815 2816 return skb->len; 2817} 2818 2819static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 2820 struct nlattr *nla, int full_entry) 2821{ 2822 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 2823 struct nlattr *nla_addr, *nla_port; 2824 2825 /* Parse mandatory identifying destination fields first */ 2826 if (nla == NULL || 2827 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) 2828 return -EINVAL; 2829 2830 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 2831 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 2832 2833 if (!(nla_addr && nla_port)) 2834 return -EINVAL; 2835 2836 memset(udest, 0, sizeof(*udest)); 2837 2838 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 2839 udest->port = nla_get_u16(nla_port); 2840 2841 /* If a full entry was requested, check for the additional fields */ 2842 if (full_entry) { 2843 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 2844 *nla_l_thresh; 2845 2846 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 2847 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 2848 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 2849 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 2850 2851 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 2852 return -EINVAL; 2853 2854 udest->conn_flags = nla_get_u32(nla_fwd) 2855 & IP_VS_CONN_F_FWD_MASK; 2856 udest->weight = nla_get_u32(nla_weight); 2857 udest->u_threshold = nla_get_u32(nla_u_thresh); 2858 udest->l_threshold = nla_get_u32(nla_l_thresh); 2859 } 2860 2861 return 0; 2862} 2863 2864static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, 2865 const char *mcast_ifn, __be32 syncid) 2866{ 2867 struct nlattr *nl_daemon; 2868 2869 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); 2870 if (!nl_daemon) 2871 return -EMSGSIZE; 2872 2873 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state); 2874 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn); 2875 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid); 2876 2877 nla_nest_end(skb, nl_daemon); 2878 2879 return 0; 2880 2881nla_put_failure: 2882 nla_nest_cancel(skb, nl_daemon); 2883 return -EMSGSIZE; 2884} 2885 2886static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, 2887 const char *mcast_ifn, __be32 syncid, 2888 struct netlink_callback *cb) 2889{ 2890 void *hdr; 2891 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, 2892 &ip_vs_genl_family, NLM_F_MULTI, 2893 IPVS_CMD_NEW_DAEMON); 2894 if (!hdr) 2895 return -EMSGSIZE; 2896 2897 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) 2898 goto nla_put_failure; 2899 2900 return genlmsg_end(skb, hdr); 2901 2902nla_put_failure: 2903 genlmsg_cancel(skb, hdr); 2904 return -EMSGSIZE; 2905} 2906 2907static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 2908 struct netlink_callback *cb) 2909{ 2910 mutex_lock(&__ip_vs_mutex); 2911 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 2912 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 2913 ip_vs_master_mcast_ifn, 2914 ip_vs_master_syncid, cb) < 0) 2915 goto nla_put_failure; 2916 2917 cb->args[0] = 1; 2918 } 2919 2920 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 2921 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 2922 ip_vs_backup_mcast_ifn, 2923 ip_vs_backup_syncid, cb) < 0) 2924 goto nla_put_failure; 2925 2926 cb->args[1] = 1; 2927 } 2928 2929nla_put_failure: 2930 mutex_unlock(&__ip_vs_mutex); 2931 2932 return skb->len; 2933} 2934 2935static int ip_vs_genl_new_daemon(struct nlattr **attrs) 2936{ 2937 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 2938 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 2939 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 2940 return -EINVAL; 2941 2942 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), 2943 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 2944 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); 2945} 2946 2947static int ip_vs_genl_del_daemon(struct nlattr **attrs) 2948{ 2949 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 2950 return -EINVAL; 2951 2952 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 2953} 2954 2955static int ip_vs_genl_set_config(struct nlattr **attrs) 2956{ 2957 struct ip_vs_timeout_user t; 2958 2959 __ip_vs_get_timeouts(&t); 2960 2961 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 2962 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 2963 2964 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 2965 t.tcp_fin_timeout = 2966 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 2967 2968 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 2969 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 2970 2971 return ip_vs_set_timeout(&t); 2972} 2973 2974static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 2975{ 2976 struct ip_vs_service *svc = NULL; 2977 struct ip_vs_service_user_kern usvc; 2978 struct ip_vs_dest_user_kern udest; 2979 int ret = 0, cmd; 2980 int need_full_svc = 0, need_full_dest = 0; 2981 2982 cmd = info->genlhdr->cmd; 2983 2984 mutex_lock(&__ip_vs_mutex); 2985 2986 if (cmd == IPVS_CMD_FLUSH) { 2987 ret = ip_vs_flush(); 2988 goto out; 2989 } else if (cmd == IPVS_CMD_SET_CONFIG) { 2990 ret = ip_vs_genl_set_config(info->attrs); 2991 goto out; 2992 } else if (cmd == IPVS_CMD_NEW_DAEMON || 2993 cmd == IPVS_CMD_DEL_DAEMON) { 2994 2995 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 2996 2997 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 2998 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, 2999 info->attrs[IPVS_CMD_ATTR_DAEMON], 3000 ip_vs_daemon_policy)) { 3001 ret = -EINVAL; 3002 goto out; 3003 } 3004 3005 if (cmd == IPVS_CMD_NEW_DAEMON) 3006 ret = ip_vs_genl_new_daemon(daemon_attrs); 3007 else 3008 ret = ip_vs_genl_del_daemon(daemon_attrs); 3009 goto out; 3010 } else if (cmd == IPVS_CMD_ZERO && 3011 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3012 ret = ip_vs_zero_all(); 3013 goto out; 3014 } 3015 3016 /* All following commands require a service argument, so check if we 3017 * received a valid one. We need a full service specification when 3018 * adding / editing a service. Only identifying members otherwise. */ 3019 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3020 need_full_svc = 1; 3021 3022 ret = ip_vs_genl_parse_service(&usvc, 3023 info->attrs[IPVS_CMD_ATTR_SERVICE], 3024 need_full_svc); 3025 if (ret) 3026 goto out; 3027 3028 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 3029 if (usvc.fwmark == 0) 3030 svc = __ip_vs_service_get(usvc.af, usvc.protocol, 3031 &usvc.addr, usvc.port); 3032 else 3033 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); 3034 3035 /* Unless we're adding a new service, the service must already exist */ 3036 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3037 ret = -ESRCH; 3038 goto out; 3039 } 3040 3041 /* Destination commands require a valid destination argument. For 3042 * adding / editing a destination, we need a full destination 3043 * specification. */ 3044 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3045 cmd == IPVS_CMD_DEL_DEST) { 3046 if (cmd != IPVS_CMD_DEL_DEST) 3047 need_full_dest = 1; 3048 3049 ret = ip_vs_genl_parse_dest(&udest, 3050 info->attrs[IPVS_CMD_ATTR_DEST], 3051 need_full_dest); 3052 if (ret) 3053 goto out; 3054 } 3055 3056 switch (cmd) { 3057 case IPVS_CMD_NEW_SERVICE: 3058 if (svc == NULL) 3059 ret = ip_vs_add_service(&usvc, &svc); 3060 else 3061 ret = -EEXIST; 3062 break; 3063 case IPVS_CMD_SET_SERVICE: 3064 ret = ip_vs_edit_service(svc, &usvc); 3065 break; 3066 case IPVS_CMD_DEL_SERVICE: 3067 ret = ip_vs_del_service(svc); 3068 break; 3069 case IPVS_CMD_NEW_DEST: 3070 ret = ip_vs_add_dest(svc, &udest); 3071 break; 3072 case IPVS_CMD_SET_DEST: 3073 ret = ip_vs_edit_dest(svc, &udest); 3074 break; 3075 case IPVS_CMD_DEL_DEST: 3076 ret = ip_vs_del_dest(svc, &udest); 3077 break; 3078 case IPVS_CMD_ZERO: 3079 ret = ip_vs_zero_service(svc); 3080 break; 3081 default: 3082 ret = -EINVAL; 3083 } 3084 3085out: 3086 if (svc) 3087 ip_vs_service_put(svc); 3088 mutex_unlock(&__ip_vs_mutex); 3089 3090 return ret; 3091} 3092 3093static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3094{ 3095 struct sk_buff *msg; 3096 void *reply; 3097 int ret, cmd, reply_cmd; 3098 3099 cmd = info->genlhdr->cmd; 3100 3101 if (cmd == IPVS_CMD_GET_SERVICE) 3102 reply_cmd = IPVS_CMD_NEW_SERVICE; 3103 else if (cmd == IPVS_CMD_GET_INFO) 3104 reply_cmd = IPVS_CMD_SET_INFO; 3105 else if (cmd == IPVS_CMD_GET_CONFIG) 3106 reply_cmd = IPVS_CMD_SET_CONFIG; 3107 else { 3108 pr_err("unknown Generic Netlink command\n"); 3109 return -EINVAL; 3110 } 3111 3112 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3113 if (!msg) 3114 return -ENOMEM; 3115 3116 mutex_lock(&__ip_vs_mutex); 3117 3118 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3119 if (reply == NULL) 3120 goto nla_put_failure; 3121 3122 switch (cmd) { 3123 case IPVS_CMD_GET_SERVICE: 3124 { 3125 struct ip_vs_service *svc; 3126 3127 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); 3128 if (IS_ERR(svc)) { 3129 ret = PTR_ERR(svc); 3130 goto out_err; 3131 } else if (svc) { 3132 ret = ip_vs_genl_fill_service(msg, svc); 3133 ip_vs_service_put(svc); 3134 if (ret) 3135 goto nla_put_failure; 3136 } else { 3137 ret = -ESRCH; 3138 goto out_err; 3139 } 3140 3141 break; 3142 } 3143 3144 case IPVS_CMD_GET_CONFIG: 3145 { 3146 struct ip_vs_timeout_user t; 3147 3148 __ip_vs_get_timeouts(&t); 3149#ifdef CONFIG_IP_VS_PROTO_TCP 3150 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); 3151 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3152 t.tcp_fin_timeout); 3153#endif 3154#ifdef CONFIG_IP_VS_PROTO_UDP 3155 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout); 3156#endif 3157 3158 break; 3159 } 3160 3161 case IPVS_CMD_GET_INFO: 3162 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); 3163 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3164 ip_vs_conn_tab_size); 3165 break; 3166 } 3167 3168 genlmsg_end(msg, reply); 3169 ret = genlmsg_reply(msg, info); 3170 goto out; 3171 3172nla_put_failure: 3173 pr_err("not enough space in Netlink message\n"); 3174 ret = -EMSGSIZE; 3175 3176out_err: 3177 nlmsg_free(msg); 3178out: 3179 mutex_unlock(&__ip_vs_mutex); 3180 3181 return ret; 3182} 3183 3184 3185static struct genl_ops ip_vs_genl_ops[] __read_mostly = { 3186 { 3187 .cmd = IPVS_CMD_NEW_SERVICE, 3188 .flags = GENL_ADMIN_PERM, 3189 .policy = ip_vs_cmd_policy, 3190 .doit = ip_vs_genl_set_cmd, 3191 }, 3192 { 3193 .cmd = IPVS_CMD_SET_SERVICE, 3194 .flags = GENL_ADMIN_PERM, 3195 .policy = ip_vs_cmd_policy, 3196 .doit = ip_vs_genl_set_cmd, 3197 }, 3198 { 3199 .cmd = IPVS_CMD_DEL_SERVICE, 3200 .flags = GENL_ADMIN_PERM, 3201 .policy = ip_vs_cmd_policy, 3202 .doit = ip_vs_genl_set_cmd, 3203 }, 3204 { 3205 .cmd = IPVS_CMD_GET_SERVICE, 3206 .flags = GENL_ADMIN_PERM, 3207 .doit = ip_vs_genl_get_cmd, 3208 .dumpit = ip_vs_genl_dump_services, 3209 .policy = ip_vs_cmd_policy, 3210 }, 3211 { 3212 .cmd = IPVS_CMD_NEW_DEST, 3213 .flags = GENL_ADMIN_PERM, 3214 .policy = ip_vs_cmd_policy, 3215 .doit = ip_vs_genl_set_cmd, 3216 }, 3217 { 3218 .cmd = IPVS_CMD_SET_DEST, 3219 .flags = GENL_ADMIN_PERM, 3220 .policy = ip_vs_cmd_policy, 3221 .doit = ip_vs_genl_set_cmd, 3222 }, 3223 { 3224 .cmd = IPVS_CMD_DEL_DEST, 3225 .flags = GENL_ADMIN_PERM, 3226 .policy = ip_vs_cmd_policy, 3227 .doit = ip_vs_genl_set_cmd, 3228 }, 3229 { 3230 .cmd = IPVS_CMD_GET_DEST, 3231 .flags = GENL_ADMIN_PERM, 3232 .policy = ip_vs_cmd_policy, 3233 .dumpit = ip_vs_genl_dump_dests, 3234 }, 3235 { 3236 .cmd = IPVS_CMD_NEW_DAEMON, 3237 .flags = GENL_ADMIN_PERM, 3238 .policy = ip_vs_cmd_policy, 3239 .doit = ip_vs_genl_set_cmd, 3240 }, 3241 { 3242 .cmd = IPVS_CMD_DEL_DAEMON, 3243 .flags = GENL_ADMIN_PERM, 3244 .policy = ip_vs_cmd_policy, 3245 .doit = ip_vs_genl_set_cmd, 3246 }, 3247 { 3248 .cmd = IPVS_CMD_GET_DAEMON, 3249 .flags = GENL_ADMIN_PERM, 3250 .dumpit = ip_vs_genl_dump_daemons, 3251 }, 3252 { 3253 .cmd = IPVS_CMD_SET_CONFIG, 3254 .flags = GENL_ADMIN_PERM, 3255 .policy = ip_vs_cmd_policy, 3256 .doit = ip_vs_genl_set_cmd, 3257 }, 3258 { 3259 .cmd = IPVS_CMD_GET_CONFIG, 3260 .flags = GENL_ADMIN_PERM, 3261 .doit = ip_vs_genl_get_cmd, 3262 }, 3263 { 3264 .cmd = IPVS_CMD_GET_INFO, 3265 .flags = GENL_ADMIN_PERM, 3266 .doit = ip_vs_genl_get_cmd, 3267 }, 3268 { 3269 .cmd = IPVS_CMD_ZERO, 3270 .flags = GENL_ADMIN_PERM, 3271 .policy = ip_vs_cmd_policy, 3272 .doit = ip_vs_genl_set_cmd, 3273 }, 3274 { 3275 .cmd = IPVS_CMD_FLUSH, 3276 .flags = GENL_ADMIN_PERM, 3277 .doit = ip_vs_genl_set_cmd, 3278 }, 3279}; 3280 3281static int __init ip_vs_genl_register(void) 3282{ 3283 return genl_register_family_with_ops(&ip_vs_genl_family, 3284 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops)); 3285} 3286 3287static void ip_vs_genl_unregister(void) 3288{ 3289 genl_unregister_family(&ip_vs_genl_family); 3290} 3291 3292/* End of Generic Netlink interface definitions */ 3293 3294 3295int __init ip_vs_control_init(void) 3296{ 3297 int ret; 3298 int idx; 3299 3300 EnterFunction(2); 3301 3302 ret = nf_register_sockopt(&ip_vs_sockopts); 3303 if (ret) { 3304 pr_err("cannot register sockopt.\n"); 3305 return ret; 3306 } 3307 3308 ret = ip_vs_genl_register(); 3309 if (ret) { 3310 pr_err("cannot register Generic Netlink interface.\n"); 3311 nf_unregister_sockopt(&ip_vs_sockopts); 3312 return ret; 3313 } 3314 3315 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); 3316 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); 3317 3318 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars); 3319 3320 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ 3321 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 3322 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 3323 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 3324 } 3325 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { 3326 INIT_LIST_HEAD(&ip_vs_rtable[idx]); 3327 } 3328 3329 ip_vs_new_estimator(&ip_vs_stats); 3330 3331 /* Hook the defense timer */ 3332 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 3333 3334 LeaveFunction(2); 3335 return 0; 3336} 3337 3338 3339void ip_vs_control_cleanup(void) 3340{ 3341 EnterFunction(2); 3342 ip_vs_trash_cleanup(); 3343 cancel_rearming_delayed_work(&defense_work); 3344 cancel_work_sync(&defense_work.work); 3345 ip_vs_kill_estimator(&ip_vs_stats); 3346 unregister_sysctl_table(sysctl_header); 3347 proc_net_remove(&init_net, "ip_vs_stats"); 3348 proc_net_remove(&init_net, "ip_vs"); 3349 ip_vs_genl_unregister(); 3350 nf_unregister_sockopt(&ip_vs_sockopts); 3351 LeaveFunction(2); 3352} 3353