1/* 2 * IPVS An implementation of the IP virtual server support for the 3 * LINUX operating system. IPVS is now implemented as a module 4 * over the NetFilter framework. IPVS can be used to build a 5 * high-performance and highly available server based on a 6 * cluster of servers. 7 * 8 * Version: $Id: ip_vs_ctl.c,v 1.1.1.1 2007/08/03 18:53:51 Exp $ 9 * 10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 11 * Peter Kese <peter.kese@ijs.si> 12 * Julian Anastasov <ja@ssi.bg> 13 * 14 * This program is free software; you can redistribute it and/or 15 * modify it under the terms of the GNU General Public License 16 * as published by the Free Software Foundation; either version 17 * 2 of the License, or (at your option) any later version. 18 * 19 * Changes: 20 * 21 */ 22 23#include <linux/module.h> 24#include <linux/init.h> 25#include <linux/types.h> 26#include <linux/capability.h> 27#include <linux/fs.h> 28#include <linux/sysctl.h> 29#include <linux/proc_fs.h> 30#include <linux/workqueue.h> 31#include <linux/swap.h> 32#include <linux/proc_fs.h> 33#include <linux/seq_file.h> 34 35#include <linux/netfilter.h> 36#include <linux/netfilter_ipv4.h> 37#include <linux/mutex.h> 38 39#include <net/ip.h> 40#include <net/route.h> 41#include <net/sock.h> 42 43#include <asm/uaccess.h> 44 45#include <net/ip_vs.h> 46 47/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 48static DEFINE_MUTEX(__ip_vs_mutex); 49 50/* lock for service table */ 51static DEFINE_RWLOCK(__ip_vs_svc_lock); 52 53/* lock for table with the real services */ 54static DEFINE_RWLOCK(__ip_vs_rs_lock); 55 56/* lock for state and timeout tables */ 57static DEFINE_RWLOCK(__ip_vs_securetcp_lock); 58 59/* lock for drop entry handling */ 60static DEFINE_SPINLOCK(__ip_vs_dropentry_lock); 61 62/* lock for drop packet handling */ 63static DEFINE_SPINLOCK(__ip_vs_droppacket_lock); 64 65/* 1/rate drop and drop-entry variables */ 66int ip_vs_drop_rate = 0; 67int ip_vs_drop_counter = 0; 68static atomic_t ip_vs_dropentry = ATOMIC_INIT(0); 69 70/* number of virtual services */ 71static int ip_vs_num_services = 0; 72 73/* sysctl variables */ 74static int sysctl_ip_vs_drop_entry = 0; 75static int sysctl_ip_vs_drop_packet = 0; 76static int sysctl_ip_vs_secure_tcp = 0; 77static int sysctl_ip_vs_amemthresh = 1024; 78static int sysctl_ip_vs_am_droprate = 10; 79int sysctl_ip_vs_cache_bypass = 0; 80int sysctl_ip_vs_expire_nodest_conn = 0; 81int sysctl_ip_vs_expire_quiescent_template = 0; 82int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; 83int sysctl_ip_vs_nat_icmp_send = 0; 84 85 86#ifdef CONFIG_IP_VS_DEBUG 87static int sysctl_ip_vs_debug_level = 0; 88 89int ip_vs_get_debug_level(void) 90{ 91 return sysctl_ip_vs_debug_level; 92} 93#endif 94 95/* 96 * update_defense_level is called from keventd and from sysctl, 97 * so it needs to protect itself from softirqs 98 */ 99static void update_defense_level(void) 100{ 101 struct sysinfo i; 102 static int old_secure_tcp = 0; 103 int availmem; 104 int nomem; 105 int to_change = -1; 106 107 /* we only count free and buffered memory (in pages) */ 108 si_meminfo(&i); 109 availmem = i.freeram + i.bufferram; 110 /* however in linux 2.5 the i.bufferram is total page cache size, 111 we need adjust it */ 112 /* si_swapinfo(&i); */ 113 /* availmem = availmem - (i.totalswap - i.freeswap); */ 114 115 nomem = (availmem < sysctl_ip_vs_amemthresh); 116 117 local_bh_disable(); 118 119 /* drop_entry */ 120 spin_lock(&__ip_vs_dropentry_lock); 121 switch (sysctl_ip_vs_drop_entry) { 122 case 0: 123 atomic_set(&ip_vs_dropentry, 0); 124 break; 125 case 1: 126 if (nomem) { 127 atomic_set(&ip_vs_dropentry, 1); 128 sysctl_ip_vs_drop_entry = 2; 129 } else { 130 atomic_set(&ip_vs_dropentry, 0); 131 } 132 break; 133 case 2: 134 if (nomem) { 135 atomic_set(&ip_vs_dropentry, 1); 136 } else { 137 atomic_set(&ip_vs_dropentry, 0); 138 sysctl_ip_vs_drop_entry = 1; 139 }; 140 break; 141 case 3: 142 atomic_set(&ip_vs_dropentry, 1); 143 break; 144 } 145 spin_unlock(&__ip_vs_dropentry_lock); 146 147 /* drop_packet */ 148 spin_lock(&__ip_vs_droppacket_lock); 149 switch (sysctl_ip_vs_drop_packet) { 150 case 0: 151 ip_vs_drop_rate = 0; 152 break; 153 case 1: 154 if (nomem) { 155 ip_vs_drop_rate = ip_vs_drop_counter 156 = sysctl_ip_vs_amemthresh / 157 (sysctl_ip_vs_amemthresh-availmem); 158 sysctl_ip_vs_drop_packet = 2; 159 } else { 160 ip_vs_drop_rate = 0; 161 } 162 break; 163 case 2: 164 if (nomem) { 165 ip_vs_drop_rate = ip_vs_drop_counter 166 = sysctl_ip_vs_amemthresh / 167 (sysctl_ip_vs_amemthresh-availmem); 168 } else { 169 ip_vs_drop_rate = 0; 170 sysctl_ip_vs_drop_packet = 1; 171 } 172 break; 173 case 3: 174 ip_vs_drop_rate = sysctl_ip_vs_am_droprate; 175 break; 176 } 177 spin_unlock(&__ip_vs_droppacket_lock); 178 179 /* secure_tcp */ 180 write_lock(&__ip_vs_securetcp_lock); 181 switch (sysctl_ip_vs_secure_tcp) { 182 case 0: 183 if (old_secure_tcp >= 2) 184 to_change = 0; 185 break; 186 case 1: 187 if (nomem) { 188 if (old_secure_tcp < 2) 189 to_change = 1; 190 sysctl_ip_vs_secure_tcp = 2; 191 } else { 192 if (old_secure_tcp >= 2) 193 to_change = 0; 194 } 195 break; 196 case 2: 197 if (nomem) { 198 if (old_secure_tcp < 2) 199 to_change = 1; 200 } else { 201 if (old_secure_tcp >= 2) 202 to_change = 0; 203 sysctl_ip_vs_secure_tcp = 1; 204 } 205 break; 206 case 3: 207 if (old_secure_tcp < 2) 208 to_change = 1; 209 break; 210 } 211 old_secure_tcp = sysctl_ip_vs_secure_tcp; 212 if (to_change >= 0) 213 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1); 214 write_unlock(&__ip_vs_securetcp_lock); 215 216 local_bh_enable(); 217} 218 219 220/* 221 * Timer for checking the defense 222 */ 223#define DEFENSE_TIMER_PERIOD 1*HZ 224static void defense_work_handler(struct work_struct *work); 225static DECLARE_DELAYED_WORK(defense_work, defense_work_handler); 226 227static void defense_work_handler(struct work_struct *work) 228{ 229 update_defense_level(); 230 if (atomic_read(&ip_vs_dropentry)) 231 ip_vs_random_dropentry(); 232 233 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 234} 235 236int 237ip_vs_use_count_inc(void) 238{ 239 return try_module_get(THIS_MODULE); 240} 241 242void 243ip_vs_use_count_dec(void) 244{ 245 module_put(THIS_MODULE); 246} 247 248 249/* 250 * Hash table: for virtual service lookups 251 */ 252#define IP_VS_SVC_TAB_BITS 8 253#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 254#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 255 256/* the service table hashed by <protocol, addr, port> */ 257static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 258/* the service table hashed by fwmark */ 259static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 260 261/* 262 * Hash table: for real service lookups 263 */ 264#define IP_VS_RTAB_BITS 4 265#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS) 266#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1) 267 268static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE]; 269 270/* 271 * Trash for destinations 272 */ 273static LIST_HEAD(ip_vs_dest_trash); 274 275/* 276 * FTP & NULL virtual service counters 277 */ 278static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0); 279static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); 280 281 282/* 283 * Returns hash value for virtual service 284 */ 285static __inline__ unsigned 286ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port) 287{ 288 register unsigned porth = ntohs(port); 289 290 return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth) 291 & IP_VS_SVC_TAB_MASK; 292} 293 294/* 295 * Returns hash value of fwmark for virtual service lookup 296 */ 297static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark) 298{ 299 return fwmark & IP_VS_SVC_TAB_MASK; 300} 301 302/* 303 * Hashes a service in the ip_vs_svc_table by <proto,addr,port> 304 * or in the ip_vs_svc_fwm_table by fwmark. 305 * Should be called with locked tables. 306 */ 307static int ip_vs_svc_hash(struct ip_vs_service *svc) 308{ 309 unsigned hash; 310 311 if (svc->flags & IP_VS_SVC_F_HASHED) { 312 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, " 313 "called from %p\n", __builtin_return_address(0)); 314 return 0; 315 } 316 317 if (svc->fwmark == 0) { 318 /* 319 * Hash it by <protocol,addr,port> in ip_vs_svc_table 320 */ 321 hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port); 322 list_add(&svc->s_list, &ip_vs_svc_table[hash]); 323 } else { 324 /* 325 * Hash it by fwmark in ip_vs_svc_fwm_table 326 */ 327 hash = ip_vs_svc_fwm_hashkey(svc->fwmark); 328 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 329 } 330 331 svc->flags |= IP_VS_SVC_F_HASHED; 332 /* increase its refcnt because it is referenced by the svc table */ 333 atomic_inc(&svc->refcnt); 334 return 1; 335} 336 337 338/* 339 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table. 340 * Should be called with locked tables. 341 */ 342static int ip_vs_svc_unhash(struct ip_vs_service *svc) 343{ 344 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 345 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, " 346 "called from %p\n", __builtin_return_address(0)); 347 return 0; 348 } 349 350 if (svc->fwmark == 0) { 351 /* Remove it from the ip_vs_svc_table table */ 352 list_del(&svc->s_list); 353 } else { 354 /* Remove it from the ip_vs_svc_fwm_table table */ 355 list_del(&svc->f_list); 356 } 357 358 svc->flags &= ~IP_VS_SVC_F_HASHED; 359 atomic_dec(&svc->refcnt); 360 return 1; 361} 362 363 364/* 365 * Get service by {proto,addr,port} in the service table. 366 */ 367static __inline__ struct ip_vs_service * 368__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) 369{ 370 unsigned hash; 371 struct ip_vs_service *svc; 372 373 /* Check for "full" addressed entries */ 374 hash = ip_vs_svc_hashkey(protocol, vaddr, vport); 375 376 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ 377 if ((svc->addr == vaddr) 378 && (svc->port == vport) 379 && (svc->protocol == protocol)) { 380 /* HIT */ 381 atomic_inc(&svc->usecnt); 382 return svc; 383 } 384 } 385 386 return NULL; 387} 388 389 390/* 391 * Get service by {fwmark} in the service table. 392 */ 393static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) 394{ 395 unsigned hash; 396 struct ip_vs_service *svc; 397 398 /* Check for fwmark addressed entries */ 399 hash = ip_vs_svc_fwm_hashkey(fwmark); 400 401 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { 402 if (svc->fwmark == fwmark) { 403 /* HIT */ 404 atomic_inc(&svc->usecnt); 405 return svc; 406 } 407 } 408 409 return NULL; 410} 411 412struct ip_vs_service * 413ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) 414{ 415 struct ip_vs_service *svc; 416 417 read_lock(&__ip_vs_svc_lock); 418 419 /* 420 * Check the table hashed by fwmark first 421 */ 422 if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark))) 423 goto out; 424 425 /* 426 * Check the table hashed by <protocol,addr,port> 427 * for "full" addressed entries 428 */ 429 svc = __ip_vs_service_get(protocol, vaddr, vport); 430 431 if (svc == NULL 432 && protocol == IPPROTO_TCP 433 && atomic_read(&ip_vs_ftpsvc_counter) 434 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 435 /* 436 * Check if ftp service entry exists, the packet 437 * might belong to FTP data connections. 438 */ 439 svc = __ip_vs_service_get(protocol, vaddr, FTPPORT); 440 } 441 442 if (svc == NULL 443 && atomic_read(&ip_vs_nullsvc_counter)) { 444 /* 445 * Check if the catch-all port (port zero) exists 446 */ 447 svc = __ip_vs_service_get(protocol, vaddr, 0); 448 } 449 450 out: 451 read_unlock(&__ip_vs_svc_lock); 452 453 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", 454 fwmark, ip_vs_proto_name(protocol), 455 NIPQUAD(vaddr), ntohs(vport), 456 svc?"hit":"not hit"); 457 458 return svc; 459} 460 461 462static inline void 463__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 464{ 465 atomic_inc(&svc->refcnt); 466 dest->svc = svc; 467} 468 469static inline void 470__ip_vs_unbind_svc(struct ip_vs_dest *dest) 471{ 472 struct ip_vs_service *svc = dest->svc; 473 474 dest->svc = NULL; 475 if (atomic_dec_and_test(&svc->refcnt)) 476 kfree(svc); 477} 478 479 480/* 481 * Returns hash value for real service 482 */ 483static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) 484{ 485 register unsigned porth = ntohs(port); 486 487 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth) 488 & IP_VS_RTAB_MASK; 489} 490 491/* 492 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>. 493 * should be called with locked tables. 494 */ 495static int ip_vs_rs_hash(struct ip_vs_dest *dest) 496{ 497 unsigned hash; 498 499 if (!list_empty(&dest->d_list)) { 500 return 0; 501 } 502 503 /* 504 * Hash by proto,addr,port, 505 * which are the parameters of the real service. 506 */ 507 hash = ip_vs_rs_hashkey(dest->addr, dest->port); 508 list_add(&dest->d_list, &ip_vs_rtable[hash]); 509 510 return 1; 511} 512 513/* 514 * UNhashes ip_vs_dest from ip_vs_rtable. 515 * should be called with locked tables. 516 */ 517static int ip_vs_rs_unhash(struct ip_vs_dest *dest) 518{ 519 /* 520 * Remove it from the ip_vs_rtable table. 521 */ 522 if (!list_empty(&dest->d_list)) { 523 list_del(&dest->d_list); 524 INIT_LIST_HEAD(&dest->d_list); 525 } 526 527 return 1; 528} 529 530/* 531 * Lookup real service by <proto,addr,port> in the real service table. 532 */ 533struct ip_vs_dest * 534ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) 535{ 536 unsigned hash; 537 struct ip_vs_dest *dest; 538 539 /* 540 * Check for "full" addressed entries 541 * Return the first found entry 542 */ 543 hash = ip_vs_rs_hashkey(daddr, dport); 544 545 read_lock(&__ip_vs_rs_lock); 546 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { 547 if ((dest->addr == daddr) 548 && (dest->port == dport) 549 && ((dest->protocol == protocol) || 550 dest->vfwmark)) { 551 /* HIT */ 552 read_unlock(&__ip_vs_rs_lock); 553 return dest; 554 } 555 } 556 read_unlock(&__ip_vs_rs_lock); 557 558 return NULL; 559} 560 561/* 562 * Lookup destination by {addr,port} in the given service 563 */ 564static struct ip_vs_dest * 565ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) 566{ 567 struct ip_vs_dest *dest; 568 569 /* 570 * Find the destination for the given service 571 */ 572 list_for_each_entry(dest, &svc->destinations, n_list) { 573 if ((dest->addr == daddr) && (dest->port == dport)) { 574 /* HIT */ 575 return dest; 576 } 577 } 578 579 return NULL; 580} 581 582 583/* 584 * Lookup dest by {svc,addr,port} in the destination trash. 585 * The destination trash is used to hold the destinations that are removed 586 * from the service table but are still referenced by some conn entries. 587 * The reason to add the destination trash is when the dest is temporary 588 * down (either by administrator or by monitor program), the dest can be 589 * picked back from the trash, the remaining connections to the dest can 590 * continue, and the counting information of the dest is also useful for 591 * scheduling. 592 */ 593static struct ip_vs_dest * 594ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) 595{ 596 struct ip_vs_dest *dest, *nxt; 597 598 /* 599 * Find the destination in trash 600 */ 601 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 602 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " 603 "dest->refcnt=%d\n", 604 dest->vfwmark, 605 NIPQUAD(dest->addr), ntohs(dest->port), 606 atomic_read(&dest->refcnt)); 607 if (dest->addr == daddr && 608 dest->port == dport && 609 dest->vfwmark == svc->fwmark && 610 dest->protocol == svc->protocol && 611 (svc->fwmark || 612 (dest->vaddr == svc->addr && 613 dest->vport == svc->port))) { 614 /* HIT */ 615 return dest; 616 } 617 618 /* 619 * Try to purge the destination from trash if not referenced 620 */ 621 if (atomic_read(&dest->refcnt) == 1) { 622 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u " 623 "from trash\n", 624 dest->vfwmark, 625 NIPQUAD(dest->addr), ntohs(dest->port)); 626 list_del(&dest->n_list); 627 ip_vs_dst_reset(dest); 628 __ip_vs_unbind_svc(dest); 629 kfree(dest); 630 } 631 } 632 633 return NULL; 634} 635 636 637/* 638 * Clean up all the destinations in the trash 639 * Called by the ip_vs_control_cleanup() 640 * 641 * When the ip_vs_control_clearup is activated by ipvs module exit, 642 * the service tables must have been flushed and all the connections 643 * are expired, and the refcnt of each destination in the trash must 644 * be 1, so we simply release them here. 645 */ 646static void ip_vs_trash_cleanup(void) 647{ 648 struct ip_vs_dest *dest, *nxt; 649 650 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { 651 list_del(&dest->n_list); 652 ip_vs_dst_reset(dest); 653 __ip_vs_unbind_svc(dest); 654 kfree(dest); 655 } 656} 657 658 659static void 660ip_vs_zero_stats(struct ip_vs_stats *stats) 661{ 662 spin_lock_bh(&stats->lock); 663 memset(stats, 0, (char *)&stats->lock - (char *)stats); 664 spin_unlock_bh(&stats->lock); 665 ip_vs_zero_estimator(stats); 666} 667 668/* 669 * Update a destination in the given service 670 */ 671static void 672__ip_vs_update_dest(struct ip_vs_service *svc, 673 struct ip_vs_dest *dest, struct ip_vs_dest_user *udest) 674{ 675 int conn_flags; 676 677 /* set the weight and the flags */ 678 atomic_set(&dest->weight, udest->weight); 679 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; 680 681 /* check if local node and update the flags */ 682 if (inet_addr_type(udest->addr) == RTN_LOCAL) { 683 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) 684 | IP_VS_CONN_F_LOCALNODE; 685 } 686 687 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 688 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { 689 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 690 } else { 691 /* 692 * Put the real service in ip_vs_rtable if not present. 693 * For now only for NAT! 694 */ 695 write_lock_bh(&__ip_vs_rs_lock); 696 ip_vs_rs_hash(dest); 697 write_unlock_bh(&__ip_vs_rs_lock); 698 } 699 atomic_set(&dest->conn_flags, conn_flags); 700 701 /* bind the service */ 702 if (!dest->svc) { 703 __ip_vs_bind_svc(dest, svc); 704 } else { 705 if (dest->svc != svc) { 706 __ip_vs_unbind_svc(dest); 707 ip_vs_zero_stats(&dest->stats); 708 __ip_vs_bind_svc(dest, svc); 709 } 710 } 711 712 /* set the dest status flags */ 713 dest->flags |= IP_VS_DEST_F_AVAILABLE; 714 715 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 716 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 717 dest->u_threshold = udest->u_threshold; 718 dest->l_threshold = udest->l_threshold; 719} 720 721 722/* 723 * Create a destination for the given service 724 */ 725static int 726ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, 727 struct ip_vs_dest **dest_p) 728{ 729 struct ip_vs_dest *dest; 730 unsigned atype; 731 732 EnterFunction(2); 733 734 atype = inet_addr_type(udest->addr); 735 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 736 return -EINVAL; 737 738 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); 739 if (dest == NULL) { 740 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n"); 741 return -ENOMEM; 742 } 743 744 dest->protocol = svc->protocol; 745 dest->vaddr = svc->addr; 746 dest->vport = svc->port; 747 dest->vfwmark = svc->fwmark; 748 dest->addr = udest->addr; 749 dest->port = udest->port; 750 751 atomic_set(&dest->activeconns, 0); 752 atomic_set(&dest->inactconns, 0); 753 atomic_set(&dest->persistconns, 0); 754 atomic_set(&dest->refcnt, 0); 755 756 INIT_LIST_HEAD(&dest->d_list); 757 spin_lock_init(&dest->dst_lock); 758 spin_lock_init(&dest->stats.lock); 759 __ip_vs_update_dest(svc, dest, udest); 760 ip_vs_new_estimator(&dest->stats); 761 762 *dest_p = dest; 763 764 LeaveFunction(2); 765 return 0; 766} 767 768 769/* 770 * Add a destination into an existing service 771 */ 772static int 773ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) 774{ 775 struct ip_vs_dest *dest; 776 __be32 daddr = udest->addr; 777 __be16 dport = udest->port; 778 int ret; 779 780 EnterFunction(2); 781 782 if (udest->weight < 0) { 783 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n"); 784 return -ERANGE; 785 } 786 787 if (udest->l_threshold > udest->u_threshold) { 788 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than " 789 "upper threshold\n"); 790 return -ERANGE; 791 } 792 793 /* 794 * Check if the dest already exists in the list 795 */ 796 dest = ip_vs_lookup_dest(svc, daddr, dport); 797 if (dest != NULL) { 798 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); 799 return -EEXIST; 800 } 801 802 /* 803 * Check if the dest already exists in the trash and 804 * is from the same service 805 */ 806 dest = ip_vs_trash_get_dest(svc, daddr, dport); 807 if (dest != NULL) { 808 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " 809 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", 810 NIPQUAD(daddr), ntohs(dport), 811 atomic_read(&dest->refcnt), 812 dest->vfwmark, 813 NIPQUAD(dest->vaddr), 814 ntohs(dest->vport)); 815 __ip_vs_update_dest(svc, dest, udest); 816 817 /* 818 * Get the destination from the trash 819 */ 820 list_del(&dest->n_list); 821 822 ip_vs_new_estimator(&dest->stats); 823 824 write_lock_bh(&__ip_vs_svc_lock); 825 826 /* 827 * Wait until all other svc users go away. 828 */ 829 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 830 831 list_add(&dest->n_list, &svc->destinations); 832 svc->num_dests++; 833 834 /* call the update_service function of its scheduler */ 835 svc->scheduler->update_service(svc); 836 837 write_unlock_bh(&__ip_vs_svc_lock); 838 return 0; 839 } 840 841 /* 842 * Allocate and initialize the dest structure 843 */ 844 ret = ip_vs_new_dest(svc, udest, &dest); 845 if (ret) { 846 return ret; 847 } 848 849 /* 850 * Add the dest entry into the list 851 */ 852 atomic_inc(&dest->refcnt); 853 854 write_lock_bh(&__ip_vs_svc_lock); 855 856 /* 857 * Wait until all other svc users go away. 858 */ 859 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 860 861 list_add(&dest->n_list, &svc->destinations); 862 svc->num_dests++; 863 864 /* call the update_service function of its scheduler */ 865 svc->scheduler->update_service(svc); 866 867 write_unlock_bh(&__ip_vs_svc_lock); 868 869 LeaveFunction(2); 870 871 return 0; 872} 873 874 875/* 876 * Edit a destination in the given service 877 */ 878static int 879ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) 880{ 881 struct ip_vs_dest *dest; 882 __be32 daddr = udest->addr; 883 __be16 dport = udest->port; 884 885 EnterFunction(2); 886 887 if (udest->weight < 0) { 888 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n"); 889 return -ERANGE; 890 } 891 892 if (udest->l_threshold > udest->u_threshold) { 893 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than " 894 "upper threshold\n"); 895 return -ERANGE; 896 } 897 898 /* 899 * Lookup the destination list 900 */ 901 dest = ip_vs_lookup_dest(svc, daddr, dport); 902 if (dest == NULL) { 903 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); 904 return -ENOENT; 905 } 906 907 __ip_vs_update_dest(svc, dest, udest); 908 909 write_lock_bh(&__ip_vs_svc_lock); 910 911 /* Wait until all other svc users go away */ 912 while (atomic_read(&svc->usecnt) > 1) {}; 913 914 /* call the update_service, because server weight may be changed */ 915 svc->scheduler->update_service(svc); 916 917 write_unlock_bh(&__ip_vs_svc_lock); 918 919 LeaveFunction(2); 920 921 return 0; 922} 923 924 925/* 926 * Delete a destination (must be already unlinked from the service) 927 */ 928static void __ip_vs_del_dest(struct ip_vs_dest *dest) 929{ 930 ip_vs_kill_estimator(&dest->stats); 931 932 /* 933 * Remove it from the d-linked list with the real services. 934 */ 935 write_lock_bh(&__ip_vs_rs_lock); 936 ip_vs_rs_unhash(dest); 937 write_unlock_bh(&__ip_vs_rs_lock); 938 939 /* 940 * Decrease the refcnt of the dest, and free the dest 941 * if nobody refers to it (refcnt=0). Otherwise, throw 942 * the destination into the trash. 943 */ 944 if (atomic_dec_and_test(&dest->refcnt)) { 945 ip_vs_dst_reset(dest); 946 /* simply decrease svc->refcnt here, let the caller check 947 and release the service if nobody refers to it. 948 Only user context can release destination and service, 949 and only one user context can update virtual service at a 950 time, so the operation here is OK */ 951 atomic_dec(&dest->svc->refcnt); 952 kfree(dest); 953 } else { 954 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, " 955 "dest->refcnt=%d\n", 956 NIPQUAD(dest->addr), ntohs(dest->port), 957 atomic_read(&dest->refcnt)); 958 list_add(&dest->n_list, &ip_vs_dest_trash); 959 atomic_inc(&dest->refcnt); 960 } 961} 962 963 964/* 965 * Unlink a destination from the given service 966 */ 967static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 968 struct ip_vs_dest *dest, 969 int svcupd) 970{ 971 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 972 973 /* 974 * Remove it from the d-linked destination list. 975 */ 976 list_del(&dest->n_list); 977 svc->num_dests--; 978 if (svcupd) { 979 /* 980 * Call the update_service function of its scheduler 981 */ 982 svc->scheduler->update_service(svc); 983 } 984} 985 986 987/* 988 * Delete a destination server in the given service 989 */ 990static int 991ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) 992{ 993 struct ip_vs_dest *dest; 994 __be32 daddr = udest->addr; 995 __be16 dport = udest->port; 996 997 EnterFunction(2); 998 999 dest = ip_vs_lookup_dest(svc, daddr, dport); 1000 if (dest == NULL) { 1001 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); 1002 return -ENOENT; 1003 } 1004 1005 write_lock_bh(&__ip_vs_svc_lock); 1006 1007 /* 1008 * Wait until all other svc users go away. 1009 */ 1010 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1011 1012 /* 1013 * Unlink dest from the service 1014 */ 1015 __ip_vs_unlink_dest(svc, dest, 1); 1016 1017 write_unlock_bh(&__ip_vs_svc_lock); 1018 1019 /* 1020 * Delete the destination 1021 */ 1022 __ip_vs_del_dest(dest); 1023 1024 LeaveFunction(2); 1025 1026 return 0; 1027} 1028 1029 1030/* 1031 * Add a service into the service hash table 1032 */ 1033static int 1034ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) 1035{ 1036 int ret = 0; 1037 struct ip_vs_scheduler *sched = NULL; 1038 struct ip_vs_service *svc = NULL; 1039 1040 /* increase the module use count */ 1041 ip_vs_use_count_inc(); 1042 1043 /* Lookup the scheduler by 'u->sched_name' */ 1044 sched = ip_vs_scheduler_get(u->sched_name); 1045 if (sched == NULL) { 1046 IP_VS_INFO("Scheduler module ip_vs_%s not found\n", 1047 u->sched_name); 1048 ret = -ENOENT; 1049 goto out_mod_dec; 1050 } 1051 1052 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); 1053 if (svc == NULL) { 1054 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); 1055 ret = -ENOMEM; 1056 goto out_err; 1057 } 1058 1059 /* I'm the first user of the service */ 1060 atomic_set(&svc->usecnt, 1); 1061 atomic_set(&svc->refcnt, 0); 1062 1063 svc->protocol = u->protocol; 1064 svc->addr = u->addr; 1065 svc->port = u->port; 1066 svc->fwmark = u->fwmark; 1067 svc->flags = u->flags; 1068 svc->timeout = u->timeout * HZ; 1069 svc->netmask = u->netmask; 1070 1071 INIT_LIST_HEAD(&svc->destinations); 1072 rwlock_init(&svc->sched_lock); 1073 spin_lock_init(&svc->stats.lock); 1074 1075 /* Bind the scheduler */ 1076 ret = ip_vs_bind_scheduler(svc, sched); 1077 if (ret) 1078 goto out_err; 1079 sched = NULL; 1080 1081 /* Update the virtual service counters */ 1082 if (svc->port == FTPPORT) 1083 atomic_inc(&ip_vs_ftpsvc_counter); 1084 else if (svc->port == 0) 1085 atomic_inc(&ip_vs_nullsvc_counter); 1086 1087 ip_vs_new_estimator(&svc->stats); 1088 ip_vs_num_services++; 1089 1090 /* Hash the service into the service table */ 1091 write_lock_bh(&__ip_vs_svc_lock); 1092 ip_vs_svc_hash(svc); 1093 write_unlock_bh(&__ip_vs_svc_lock); 1094 1095 *svc_p = svc; 1096 return 0; 1097 1098 out_err: 1099 if (svc != NULL) { 1100 if (svc->scheduler) 1101 ip_vs_unbind_scheduler(svc); 1102 if (svc->inc) { 1103 local_bh_disable(); 1104 ip_vs_app_inc_put(svc->inc); 1105 local_bh_enable(); 1106 } 1107 kfree(svc); 1108 } 1109 ip_vs_scheduler_put(sched); 1110 1111 out_mod_dec: 1112 /* decrease the module use count */ 1113 ip_vs_use_count_dec(); 1114 1115 return ret; 1116} 1117 1118 1119/* 1120 * Edit a service and bind it with a new scheduler 1121 */ 1122static int 1123ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) 1124{ 1125 struct ip_vs_scheduler *sched, *old_sched; 1126 int ret = 0; 1127 1128 /* 1129 * Lookup the scheduler, by 'u->sched_name' 1130 */ 1131 sched = ip_vs_scheduler_get(u->sched_name); 1132 if (sched == NULL) { 1133 IP_VS_INFO("Scheduler module ip_vs_%s not found\n", 1134 u->sched_name); 1135 return -ENOENT; 1136 } 1137 old_sched = sched; 1138 1139 write_lock_bh(&__ip_vs_svc_lock); 1140 1141 /* 1142 * Wait until all other svc users go away. 1143 */ 1144 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1145 1146 /* 1147 * Set the flags and timeout value 1148 */ 1149 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1150 svc->timeout = u->timeout * HZ; 1151 svc->netmask = u->netmask; 1152 1153 old_sched = svc->scheduler; 1154 if (sched != old_sched) { 1155 /* 1156 * Unbind the old scheduler 1157 */ 1158 if ((ret = ip_vs_unbind_scheduler(svc))) { 1159 old_sched = sched; 1160 goto out; 1161 } 1162 1163 /* 1164 * Bind the new scheduler 1165 */ 1166 if ((ret = ip_vs_bind_scheduler(svc, sched))) { 1167 /* 1168 * If ip_vs_bind_scheduler fails, restore the old 1169 * scheduler. 1170 * The main reason of failure is out of memory. 1171 * 1172 * The question is if the old scheduler can be 1173 * restored all the time. TODO: if it cannot be 1174 * restored some time, we must delete the service, 1175 * otherwise the system may crash. 1176 */ 1177 ip_vs_bind_scheduler(svc, old_sched); 1178 old_sched = sched; 1179 goto out; 1180 } 1181 } 1182 1183 out: 1184 write_unlock_bh(&__ip_vs_svc_lock); 1185 1186 if (old_sched) 1187 ip_vs_scheduler_put(old_sched); 1188 1189 return ret; 1190} 1191 1192 1193/* 1194 * Delete a service from the service list 1195 * - The service must be unlinked, unlocked and not referenced! 1196 * - We are called under _bh lock 1197 */ 1198static void __ip_vs_del_service(struct ip_vs_service *svc) 1199{ 1200 struct ip_vs_dest *dest, *nxt; 1201 struct ip_vs_scheduler *old_sched; 1202 1203 ip_vs_num_services--; 1204 ip_vs_kill_estimator(&svc->stats); 1205 1206 /* Unbind scheduler */ 1207 old_sched = svc->scheduler; 1208 ip_vs_unbind_scheduler(svc); 1209 if (old_sched) 1210 ip_vs_scheduler_put(old_sched); 1211 1212 /* Unbind app inc */ 1213 if (svc->inc) { 1214 ip_vs_app_inc_put(svc->inc); 1215 svc->inc = NULL; 1216 } 1217 1218 /* 1219 * Unlink the whole destination list 1220 */ 1221 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1222 __ip_vs_unlink_dest(svc, dest, 0); 1223 __ip_vs_del_dest(dest); 1224 } 1225 1226 /* 1227 * Update the virtual service counters 1228 */ 1229 if (svc->port == FTPPORT) 1230 atomic_dec(&ip_vs_ftpsvc_counter); 1231 else if (svc->port == 0) 1232 atomic_dec(&ip_vs_nullsvc_counter); 1233 1234 /* 1235 * Free the service if nobody refers to it 1236 */ 1237 if (atomic_read(&svc->refcnt) == 0) 1238 kfree(svc); 1239 1240 /* decrease the module use count */ 1241 ip_vs_use_count_dec(); 1242} 1243 1244/* 1245 * Delete a service from the service list 1246 */ 1247static int ip_vs_del_service(struct ip_vs_service *svc) 1248{ 1249 if (svc == NULL) 1250 return -EEXIST; 1251 1252 /* 1253 * Unhash it from the service table 1254 */ 1255 write_lock_bh(&__ip_vs_svc_lock); 1256 1257 ip_vs_svc_unhash(svc); 1258 1259 /* 1260 * Wait until all the svc users go away. 1261 */ 1262 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); 1263 1264 __ip_vs_del_service(svc); 1265 1266 write_unlock_bh(&__ip_vs_svc_lock); 1267 1268 return 0; 1269} 1270 1271 1272/* 1273 * Flush all the virtual services 1274 */ 1275static int ip_vs_flush(void) 1276{ 1277 int idx; 1278 struct ip_vs_service *svc, *nxt; 1279 1280 /* 1281 * Flush the service table hashed by <protocol,addr,port> 1282 */ 1283 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1284 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) { 1285 write_lock_bh(&__ip_vs_svc_lock); 1286 ip_vs_svc_unhash(svc); 1287 /* 1288 * Wait until all the svc users go away. 1289 */ 1290 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); 1291 __ip_vs_del_service(svc); 1292 write_unlock_bh(&__ip_vs_svc_lock); 1293 } 1294 } 1295 1296 /* 1297 * Flush the service table hashed by fwmark 1298 */ 1299 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1300 list_for_each_entry_safe(svc, nxt, 1301 &ip_vs_svc_fwm_table[idx], f_list) { 1302 write_lock_bh(&__ip_vs_svc_lock); 1303 ip_vs_svc_unhash(svc); 1304 /* 1305 * Wait until all the svc users go away. 1306 */ 1307 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0); 1308 __ip_vs_del_service(svc); 1309 write_unlock_bh(&__ip_vs_svc_lock); 1310 } 1311 } 1312 1313 return 0; 1314} 1315 1316 1317/* 1318 * Zero counters in a service or all services 1319 */ 1320static int ip_vs_zero_service(struct ip_vs_service *svc) 1321{ 1322 struct ip_vs_dest *dest; 1323 1324 write_lock_bh(&__ip_vs_svc_lock); 1325 list_for_each_entry(dest, &svc->destinations, n_list) { 1326 ip_vs_zero_stats(&dest->stats); 1327 } 1328 ip_vs_zero_stats(&svc->stats); 1329 write_unlock_bh(&__ip_vs_svc_lock); 1330 return 0; 1331} 1332 1333static int ip_vs_zero_all(void) 1334{ 1335 int idx; 1336 struct ip_vs_service *svc; 1337 1338 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1339 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1340 ip_vs_zero_service(svc); 1341 } 1342 } 1343 1344 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1345 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1346 ip_vs_zero_service(svc); 1347 } 1348 } 1349 1350 ip_vs_zero_stats(&ip_vs_stats); 1351 return 0; 1352} 1353 1354 1355static int 1356proc_do_defense_mode(ctl_table *table, int write, struct file * filp, 1357 void __user *buffer, size_t *lenp, loff_t *ppos) 1358{ 1359 int *valp = table->data; 1360 int val = *valp; 1361 int rc; 1362 1363 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); 1364 if (write && (*valp != val)) { 1365 if ((*valp < 0) || (*valp > 3)) { 1366 /* Restore the correct value */ 1367 *valp = val; 1368 } else { 1369 update_defense_level(); 1370 } 1371 } 1372 return rc; 1373} 1374 1375 1376static int 1377proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, 1378 void __user *buffer, size_t *lenp, loff_t *ppos) 1379{ 1380 int *valp = table->data; 1381 int val[2]; 1382 int rc; 1383 1384 /* backup the value first */ 1385 memcpy(val, valp, sizeof(val)); 1386 1387 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos); 1388 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) { 1389 /* Restore the correct value */ 1390 memcpy(valp, val, sizeof(val)); 1391 } 1392 return rc; 1393} 1394 1395 1396/* 1397 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1398 */ 1399 1400static struct ctl_table vs_vars[] = { 1401 { 1402 .ctl_name = NET_IPV4_VS_AMEMTHRESH, 1403 .procname = "amemthresh", 1404 .data = &sysctl_ip_vs_amemthresh, 1405 .maxlen = sizeof(int), 1406 .mode = 0644, 1407 .proc_handler = &proc_dointvec, 1408 }, 1409#ifdef CONFIG_IP_VS_DEBUG 1410 { 1411 .ctl_name = NET_IPV4_VS_DEBUG_LEVEL, 1412 .procname = "debug_level", 1413 .data = &sysctl_ip_vs_debug_level, 1414 .maxlen = sizeof(int), 1415 .mode = 0644, 1416 .proc_handler = &proc_dointvec, 1417 }, 1418#endif 1419 { 1420 .ctl_name = NET_IPV4_VS_AMDROPRATE, 1421 .procname = "am_droprate", 1422 .data = &sysctl_ip_vs_am_droprate, 1423 .maxlen = sizeof(int), 1424 .mode = 0644, 1425 .proc_handler = &proc_dointvec, 1426 }, 1427 { 1428 .ctl_name = NET_IPV4_VS_DROP_ENTRY, 1429 .procname = "drop_entry", 1430 .data = &sysctl_ip_vs_drop_entry, 1431 .maxlen = sizeof(int), 1432 .mode = 0644, 1433 .proc_handler = &proc_do_defense_mode, 1434 }, 1435 { 1436 .ctl_name = NET_IPV4_VS_DROP_PACKET, 1437 .procname = "drop_packet", 1438 .data = &sysctl_ip_vs_drop_packet, 1439 .maxlen = sizeof(int), 1440 .mode = 0644, 1441 .proc_handler = &proc_do_defense_mode, 1442 }, 1443 { 1444 .ctl_name = NET_IPV4_VS_SECURE_TCP, 1445 .procname = "secure_tcp", 1446 .data = &sysctl_ip_vs_secure_tcp, 1447 .maxlen = sizeof(int), 1448 .mode = 0644, 1449 .proc_handler = &proc_do_defense_mode, 1450 }, 1451 { 1452 .ctl_name = NET_IPV4_VS_CACHE_BYPASS, 1453 .procname = "cache_bypass", 1454 .data = &sysctl_ip_vs_cache_bypass, 1455 .maxlen = sizeof(int), 1456 .mode = 0644, 1457 .proc_handler = &proc_dointvec, 1458 }, 1459 { 1460 .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN, 1461 .procname = "expire_nodest_conn", 1462 .data = &sysctl_ip_vs_expire_nodest_conn, 1463 .maxlen = sizeof(int), 1464 .mode = 0644, 1465 .proc_handler = &proc_dointvec, 1466 }, 1467 { 1468 .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, 1469 .procname = "expire_quiescent_template", 1470 .data = &sysctl_ip_vs_expire_quiescent_template, 1471 .maxlen = sizeof(int), 1472 .mode = 0644, 1473 .proc_handler = &proc_dointvec, 1474 }, 1475 { 1476 .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD, 1477 .procname = "sync_threshold", 1478 .data = &sysctl_ip_vs_sync_threshold, 1479 .maxlen = sizeof(sysctl_ip_vs_sync_threshold), 1480 .mode = 0644, 1481 .proc_handler = &proc_do_sync_threshold, 1482 }, 1483 { 1484 .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND, 1485 .procname = "nat_icmp_send", 1486 .data = &sysctl_ip_vs_nat_icmp_send, 1487 .maxlen = sizeof(int), 1488 .mode = 0644, 1489 .proc_handler = &proc_dointvec, 1490 }, 1491 { .ctl_name = 0 } 1492}; 1493 1494static ctl_table vs_table[] = { 1495 { 1496 .ctl_name = NET_IPV4_VS, 1497 .procname = "vs", 1498 .mode = 0555, 1499 .child = vs_vars 1500 }, 1501 { .ctl_name = 0 } 1502}; 1503 1504static ctl_table ipvs_ipv4_table[] = { 1505 { 1506 .ctl_name = NET_IPV4, 1507 .procname = "ipv4", 1508 .mode = 0555, 1509 .child = vs_table, 1510 }, 1511 { .ctl_name = 0 } 1512}; 1513 1514static ctl_table vs_root_table[] = { 1515 { 1516 .ctl_name = CTL_NET, 1517 .procname = "net", 1518 .mode = 0555, 1519 .child = ipvs_ipv4_table, 1520 }, 1521 { .ctl_name = 0 } 1522}; 1523 1524static struct ctl_table_header * sysctl_header; 1525 1526#ifdef CONFIG_PROC_FS 1527 1528struct ip_vs_iter { 1529 struct list_head *table; 1530 int bucket; 1531}; 1532 1533/* 1534 * Write the contents of the VS rule table to a PROCfs file. 1535 * (It is kept just for backward compatibility) 1536 */ 1537static inline const char *ip_vs_fwd_name(unsigned flags) 1538{ 1539 switch (flags & IP_VS_CONN_F_FWD_MASK) { 1540 case IP_VS_CONN_F_LOCALNODE: 1541 return "Local"; 1542 case IP_VS_CONN_F_TUNNEL: 1543 return "Tunnel"; 1544 case IP_VS_CONN_F_DROUTE: 1545 return "Route"; 1546 default: 1547 return "Masq"; 1548 } 1549} 1550 1551 1552/* Get the Nth entry in the two lists */ 1553static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1554{ 1555 struct ip_vs_iter *iter = seq->private; 1556 int idx; 1557 struct ip_vs_service *svc; 1558 1559 /* look in hash by protocol */ 1560 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1561 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1562 if (pos-- == 0){ 1563 iter->table = ip_vs_svc_table; 1564 iter->bucket = idx; 1565 return svc; 1566 } 1567 } 1568 } 1569 1570 /* keep looking in fwmark */ 1571 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1572 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1573 if (pos-- == 0) { 1574 iter->table = ip_vs_svc_fwm_table; 1575 iter->bucket = idx; 1576 return svc; 1577 } 1578 } 1579 } 1580 1581 return NULL; 1582} 1583 1584static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 1585{ 1586 1587 read_lock_bh(&__ip_vs_svc_lock); 1588 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 1589} 1590 1591 1592static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1593{ 1594 struct list_head *e; 1595 struct ip_vs_iter *iter; 1596 struct ip_vs_service *svc; 1597 1598 ++*pos; 1599 if (v == SEQ_START_TOKEN) 1600 return ip_vs_info_array(seq,0); 1601 1602 svc = v; 1603 iter = seq->private; 1604 1605 if (iter->table == ip_vs_svc_table) { 1606 /* next service in table hashed by protocol */ 1607 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket]) 1608 return list_entry(e, struct ip_vs_service, s_list); 1609 1610 1611 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1612 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket], 1613 s_list) { 1614 return svc; 1615 } 1616 } 1617 1618 iter->table = ip_vs_svc_fwm_table; 1619 iter->bucket = -1; 1620 goto scan_fwmark; 1621 } 1622 1623 /* next service in hashed by fwmark */ 1624 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket]) 1625 return list_entry(e, struct ip_vs_service, f_list); 1626 1627 scan_fwmark: 1628 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1629 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket], 1630 f_list) 1631 return svc; 1632 } 1633 1634 return NULL; 1635} 1636 1637static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 1638{ 1639 read_unlock_bh(&__ip_vs_svc_lock); 1640} 1641 1642 1643static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 1644{ 1645 if (v == SEQ_START_TOKEN) { 1646 seq_printf(seq, 1647 "IP Virtual Server version %d.%d.%d (size=%d)\n", 1648 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE); 1649 seq_puts(seq, 1650 "Prot LocalAddress:Port Scheduler Flags\n"); 1651 seq_puts(seq, 1652 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 1653 } else { 1654 const struct ip_vs_service *svc = v; 1655 const struct ip_vs_iter *iter = seq->private; 1656 const struct ip_vs_dest *dest; 1657 1658 if (iter->table == ip_vs_svc_table) 1659 seq_printf(seq, "%s %08X:%04X %s ", 1660 ip_vs_proto_name(svc->protocol), 1661 ntohl(svc->addr), 1662 ntohs(svc->port), 1663 svc->scheduler->name); 1664 else 1665 seq_printf(seq, "FWM %08X %s ", 1666 svc->fwmark, svc->scheduler->name); 1667 1668 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 1669 seq_printf(seq, "persistent %d %08X\n", 1670 svc->timeout, 1671 ntohl(svc->netmask)); 1672 else 1673 seq_putc(seq, '\n'); 1674 1675 list_for_each_entry(dest, &svc->destinations, n_list) { 1676 seq_printf(seq, 1677 " -> %08X:%04X %-7s %-6d %-10d %-10d\n", 1678 ntohl(dest->addr), ntohs(dest->port), 1679 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 1680 atomic_read(&dest->weight), 1681 atomic_read(&dest->activeconns), 1682 atomic_read(&dest->inactconns)); 1683 } 1684 } 1685 return 0; 1686} 1687 1688static struct seq_operations ip_vs_info_seq_ops = { 1689 .start = ip_vs_info_seq_start, 1690 .next = ip_vs_info_seq_next, 1691 .stop = ip_vs_info_seq_stop, 1692 .show = ip_vs_info_seq_show, 1693}; 1694 1695static int ip_vs_info_open(struct inode *inode, struct file *file) 1696{ 1697 struct seq_file *seq; 1698 int rc = -ENOMEM; 1699 struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL); 1700 1701 if (!s) 1702 goto out; 1703 1704 rc = seq_open(file, &ip_vs_info_seq_ops); 1705 if (rc) 1706 goto out_kfree; 1707 1708 seq = file->private_data; 1709 seq->private = s; 1710out: 1711 return rc; 1712out_kfree: 1713 kfree(s); 1714 goto out; 1715} 1716 1717static const struct file_operations ip_vs_info_fops = { 1718 .owner = THIS_MODULE, 1719 .open = ip_vs_info_open, 1720 .read = seq_read, 1721 .llseek = seq_lseek, 1722 .release = seq_release_private, 1723}; 1724 1725#endif 1726 1727struct ip_vs_stats ip_vs_stats; 1728 1729#ifdef CONFIG_PROC_FS 1730static int ip_vs_stats_show(struct seq_file *seq, void *v) 1731{ 1732 1733/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1734 seq_puts(seq, 1735 " Total Incoming Outgoing Incoming Outgoing\n"); 1736 seq_printf(seq, 1737 " Conns Packets Packets Bytes Bytes\n"); 1738 1739 spin_lock_bh(&ip_vs_stats.lock); 1740 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns, 1741 ip_vs_stats.inpkts, ip_vs_stats.outpkts, 1742 (unsigned long long) ip_vs_stats.inbytes, 1743 (unsigned long long) ip_vs_stats.outbytes); 1744 1745/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 1746 seq_puts(seq, 1747 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 1748 seq_printf(seq,"%8X %8X %8X %16X %16X\n", 1749 ip_vs_stats.cps, 1750 ip_vs_stats.inpps, 1751 ip_vs_stats.outpps, 1752 ip_vs_stats.inbps, 1753 ip_vs_stats.outbps); 1754 spin_unlock_bh(&ip_vs_stats.lock); 1755 1756 return 0; 1757} 1758 1759static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 1760{ 1761 return single_open(file, ip_vs_stats_show, NULL); 1762} 1763 1764static const struct file_operations ip_vs_stats_fops = { 1765 .owner = THIS_MODULE, 1766 .open = ip_vs_stats_seq_open, 1767 .read = seq_read, 1768 .llseek = seq_lseek, 1769 .release = single_release, 1770}; 1771 1772#endif 1773 1774/* 1775 * Set timeout values for tcp tcpfin udp in the timeout_table. 1776 */ 1777static int ip_vs_set_timeout(struct ip_vs_timeout_user *u) 1778{ 1779 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 1780 u->tcp_timeout, 1781 u->tcp_fin_timeout, 1782 u->udp_timeout); 1783 1784#ifdef CONFIG_IP_VS_PROTO_TCP 1785 if (u->tcp_timeout) { 1786 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] 1787 = u->tcp_timeout * HZ; 1788 } 1789 1790 if (u->tcp_fin_timeout) { 1791 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] 1792 = u->tcp_fin_timeout * HZ; 1793 } 1794#endif 1795 1796#ifdef CONFIG_IP_VS_PROTO_UDP 1797 if (u->udp_timeout) { 1798 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] 1799 = u->udp_timeout * HZ; 1800 } 1801#endif 1802 return 0; 1803} 1804 1805 1806#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL) 1807#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user)) 1808#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \ 1809 sizeof(struct ip_vs_dest_user)) 1810#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user)) 1811#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user)) 1812#define MAX_ARG_LEN SVCDEST_ARG_LEN 1813 1814static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { 1815 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN, 1816 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN, 1817 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN, 1818 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0, 1819 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN, 1820 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN, 1821 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN, 1822 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN, 1823 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN, 1824 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN, 1825 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, 1826}; 1827 1828static int 1829do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 1830{ 1831 int ret; 1832 unsigned char arg[MAX_ARG_LEN]; 1833 struct ip_vs_service_user *usvc; 1834 struct ip_vs_service *svc; 1835 struct ip_vs_dest_user *udest; 1836 1837 if (!capable(CAP_NET_ADMIN)) 1838 return -EPERM; 1839 1840 if (len != set_arglen[SET_CMDID(cmd)]) { 1841 IP_VS_ERR("set_ctl: len %u != %u\n", 1842 len, set_arglen[SET_CMDID(cmd)]); 1843 return -EINVAL; 1844 } 1845 1846 if (copy_from_user(arg, user, len) != 0) 1847 return -EFAULT; 1848 1849 /* increase the module use count */ 1850 ip_vs_use_count_inc(); 1851 1852 if (mutex_lock_interruptible(&__ip_vs_mutex)) { 1853 ret = -ERESTARTSYS; 1854 goto out_dec; 1855 } 1856 1857 if (cmd == IP_VS_SO_SET_FLUSH) { 1858 /* Flush the virtual service */ 1859 ret = ip_vs_flush(); 1860 goto out_unlock; 1861 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 1862 /* Set timeout values for (tcp tcpfin udp) */ 1863 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg); 1864 goto out_unlock; 1865 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) { 1866 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 1867 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid); 1868 goto out_unlock; 1869 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) { 1870 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 1871 ret = stop_sync_thread(dm->state); 1872 goto out_unlock; 1873 } 1874 1875 usvc = (struct ip_vs_service_user *)arg; 1876 udest = (struct ip_vs_dest_user *)(usvc + 1); 1877 1878 if (cmd == IP_VS_SO_SET_ZERO) { 1879 /* if no service address is set, zero counters in all */ 1880 if (!usvc->fwmark && !usvc->addr && !usvc->port) { 1881 ret = ip_vs_zero_all(); 1882 goto out_unlock; 1883 } 1884 } 1885 1886 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ 1887 if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) { 1888 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n", 1889 usvc->protocol, NIPQUAD(usvc->addr), 1890 ntohs(usvc->port), usvc->sched_name); 1891 ret = -EFAULT; 1892 goto out_unlock; 1893 } 1894 1895 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 1896 if (usvc->fwmark == 0) 1897 svc = __ip_vs_service_get(usvc->protocol, 1898 usvc->addr, usvc->port); 1899 else 1900 svc = __ip_vs_svc_fwm_get(usvc->fwmark); 1901 1902 if (cmd != IP_VS_SO_SET_ADD 1903 && (svc == NULL || svc->protocol != usvc->protocol)) { 1904 ret = -ESRCH; 1905 goto out_unlock; 1906 } 1907 1908 switch (cmd) { 1909 case IP_VS_SO_SET_ADD: 1910 if (svc != NULL) 1911 ret = -EEXIST; 1912 else 1913 ret = ip_vs_add_service(usvc, &svc); 1914 break; 1915 case IP_VS_SO_SET_EDIT: 1916 ret = ip_vs_edit_service(svc, usvc); 1917 break; 1918 case IP_VS_SO_SET_DEL: 1919 ret = ip_vs_del_service(svc); 1920 if (!ret) 1921 goto out_unlock; 1922 break; 1923 case IP_VS_SO_SET_ZERO: 1924 ret = ip_vs_zero_service(svc); 1925 break; 1926 case IP_VS_SO_SET_ADDDEST: 1927 ret = ip_vs_add_dest(svc, udest); 1928 break; 1929 case IP_VS_SO_SET_EDITDEST: 1930 ret = ip_vs_edit_dest(svc, udest); 1931 break; 1932 case IP_VS_SO_SET_DELDEST: 1933 ret = ip_vs_del_dest(svc, udest); 1934 break; 1935 default: 1936 ret = -EINVAL; 1937 } 1938 1939 if (svc) 1940 ip_vs_service_put(svc); 1941 1942 out_unlock: 1943 mutex_unlock(&__ip_vs_mutex); 1944 out_dec: 1945 /* decrease the module use count */ 1946 ip_vs_use_count_dec(); 1947 1948 return ret; 1949} 1950 1951 1952static void 1953ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) 1954{ 1955 spin_lock_bh(&src->lock); 1956 memcpy(dst, src, (char*)&src->lock - (char*)src); 1957 spin_unlock_bh(&src->lock); 1958} 1959 1960static void 1961ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 1962{ 1963 dst->protocol = src->protocol; 1964 dst->addr = src->addr; 1965 dst->port = src->port; 1966 dst->fwmark = src->fwmark; 1967 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); 1968 dst->flags = src->flags; 1969 dst->timeout = src->timeout / HZ; 1970 dst->netmask = src->netmask; 1971 dst->num_dests = src->num_dests; 1972 ip_vs_copy_stats(&dst->stats, &src->stats); 1973} 1974 1975static inline int 1976__ip_vs_get_service_entries(const struct ip_vs_get_services *get, 1977 struct ip_vs_get_services __user *uptr) 1978{ 1979 int idx, count=0; 1980 struct ip_vs_service *svc; 1981 struct ip_vs_service_entry entry; 1982 int ret = 0; 1983 1984 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1985 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1986 if (count >= get->num_services) 1987 goto out; 1988 memset(&entry, 0, sizeof(entry)); 1989 ip_vs_copy_service(&entry, svc); 1990 if (copy_to_user(&uptr->entrytable[count], 1991 &entry, sizeof(entry))) { 1992 ret = -EFAULT; 1993 goto out; 1994 } 1995 count++; 1996 } 1997 } 1998 1999 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2000 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2001 if (count >= get->num_services) 2002 goto out; 2003 memset(&entry, 0, sizeof(entry)); 2004 ip_vs_copy_service(&entry, svc); 2005 if (copy_to_user(&uptr->entrytable[count], 2006 &entry, sizeof(entry))) { 2007 ret = -EFAULT; 2008 goto out; 2009 } 2010 count++; 2011 } 2012 } 2013 out: 2014 return ret; 2015} 2016 2017static inline int 2018__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, 2019 struct ip_vs_get_dests __user *uptr) 2020{ 2021 struct ip_vs_service *svc; 2022 int ret = 0; 2023 2024 if (get->fwmark) 2025 svc = __ip_vs_svc_fwm_get(get->fwmark); 2026 else 2027 svc = __ip_vs_service_get(get->protocol, 2028 get->addr, get->port); 2029 if (svc) { 2030 int count = 0; 2031 struct ip_vs_dest *dest; 2032 struct ip_vs_dest_entry entry; 2033 2034 list_for_each_entry(dest, &svc->destinations, n_list) { 2035 if (count >= get->num_dests) 2036 break; 2037 2038 entry.addr = dest->addr; 2039 entry.port = dest->port; 2040 entry.conn_flags = atomic_read(&dest->conn_flags); 2041 entry.weight = atomic_read(&dest->weight); 2042 entry.u_threshold = dest->u_threshold; 2043 entry.l_threshold = dest->l_threshold; 2044 entry.activeconns = atomic_read(&dest->activeconns); 2045 entry.inactconns = atomic_read(&dest->inactconns); 2046 entry.persistconns = atomic_read(&dest->persistconns); 2047 ip_vs_copy_stats(&entry.stats, &dest->stats); 2048 if (copy_to_user(&uptr->entrytable[count], 2049 &entry, sizeof(entry))) { 2050 ret = -EFAULT; 2051 break; 2052 } 2053 count++; 2054 } 2055 ip_vs_service_put(svc); 2056 } else 2057 ret = -ESRCH; 2058 return ret; 2059} 2060 2061static inline void 2062__ip_vs_get_timeouts(struct ip_vs_timeout_user *u) 2063{ 2064#ifdef CONFIG_IP_VS_PROTO_TCP 2065 u->tcp_timeout = 2066 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2067 u->tcp_fin_timeout = 2068 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2069#endif 2070#ifdef CONFIG_IP_VS_PROTO_UDP 2071 u->udp_timeout = 2072 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2073#endif 2074} 2075 2076 2077#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2078#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo)) 2079#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services)) 2080#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry)) 2081#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests)) 2082#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user)) 2083#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2) 2084 2085static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = { 2086 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64, 2087 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN, 2088 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN, 2089 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN, 2090 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN, 2091 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN, 2092 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN, 2093}; 2094 2095static int 2096do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2097{ 2098 unsigned char arg[128]; 2099 int ret = 0; 2100 2101 if (!capable(CAP_NET_ADMIN)) 2102 return -EPERM; 2103 2104 if (*len < get_arglen[GET_CMDID(cmd)]) { 2105 IP_VS_ERR("get_ctl: len %u < %u\n", 2106 *len, get_arglen[GET_CMDID(cmd)]); 2107 return -EINVAL; 2108 } 2109 2110 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0) 2111 return -EFAULT; 2112 2113 if (mutex_lock_interruptible(&__ip_vs_mutex)) 2114 return -ERESTARTSYS; 2115 2116 switch (cmd) { 2117 case IP_VS_SO_GET_VERSION: 2118 { 2119 char buf[64]; 2120 2121 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2122 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE); 2123 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2124 ret = -EFAULT; 2125 goto out; 2126 } 2127 *len = strlen(buf)+1; 2128 } 2129 break; 2130 2131 case IP_VS_SO_GET_INFO: 2132 { 2133 struct ip_vs_getinfo info; 2134 info.version = IP_VS_VERSION_CODE; 2135 info.size = IP_VS_CONN_TAB_SIZE; 2136 info.num_services = ip_vs_num_services; 2137 if (copy_to_user(user, &info, sizeof(info)) != 0) 2138 ret = -EFAULT; 2139 } 2140 break; 2141 2142 case IP_VS_SO_GET_SERVICES: 2143 { 2144 struct ip_vs_get_services *get; 2145 int size; 2146 2147 get = (struct ip_vs_get_services *)arg; 2148 size = sizeof(*get) + 2149 sizeof(struct ip_vs_service_entry) * get->num_services; 2150 if (*len != size) { 2151 IP_VS_ERR("length: %u != %u\n", *len, size); 2152 ret = -EINVAL; 2153 goto out; 2154 } 2155 ret = __ip_vs_get_service_entries(get, user); 2156 } 2157 break; 2158 2159 case IP_VS_SO_GET_SERVICE: 2160 { 2161 struct ip_vs_service_entry *entry; 2162 struct ip_vs_service *svc; 2163 2164 entry = (struct ip_vs_service_entry *)arg; 2165 if (entry->fwmark) 2166 svc = __ip_vs_svc_fwm_get(entry->fwmark); 2167 else 2168 svc = __ip_vs_service_get(entry->protocol, 2169 entry->addr, entry->port); 2170 if (svc) { 2171 ip_vs_copy_service(entry, svc); 2172 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2173 ret = -EFAULT; 2174 ip_vs_service_put(svc); 2175 } else 2176 ret = -ESRCH; 2177 } 2178 break; 2179 2180 case IP_VS_SO_GET_DESTS: 2181 { 2182 struct ip_vs_get_dests *get; 2183 int size; 2184 2185 get = (struct ip_vs_get_dests *)arg; 2186 size = sizeof(*get) + 2187 sizeof(struct ip_vs_dest_entry) * get->num_dests; 2188 if (*len != size) { 2189 IP_VS_ERR("length: %u != %u\n", *len, size); 2190 ret = -EINVAL; 2191 goto out; 2192 } 2193 ret = __ip_vs_get_dest_entries(get, user); 2194 } 2195 break; 2196 2197 case IP_VS_SO_GET_TIMEOUT: 2198 { 2199 struct ip_vs_timeout_user t; 2200 2201 __ip_vs_get_timeouts(&t); 2202 if (copy_to_user(user, &t, sizeof(t)) != 0) 2203 ret = -EFAULT; 2204 } 2205 break; 2206 2207 case IP_VS_SO_GET_DAEMON: 2208 { 2209 struct ip_vs_daemon_user d[2]; 2210 2211 memset(&d, 0, sizeof(d)); 2212 if (ip_vs_sync_state & IP_VS_STATE_MASTER) { 2213 d[0].state = IP_VS_STATE_MASTER; 2214 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn)); 2215 d[0].syncid = ip_vs_master_syncid; 2216 } 2217 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) { 2218 d[1].state = IP_VS_STATE_BACKUP; 2219 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn)); 2220 d[1].syncid = ip_vs_backup_syncid; 2221 } 2222 if (copy_to_user(user, &d, sizeof(d)) != 0) 2223 ret = -EFAULT; 2224 } 2225 break; 2226 2227 default: 2228 ret = -EINVAL; 2229 } 2230 2231 out: 2232 mutex_unlock(&__ip_vs_mutex); 2233 return ret; 2234} 2235 2236 2237static struct nf_sockopt_ops ip_vs_sockopts = { 2238 .pf = PF_INET, 2239 .set_optmin = IP_VS_BASE_CTL, 2240 .set_optmax = IP_VS_SO_SET_MAX+1, 2241 .set = do_ip_vs_set_ctl, 2242 .get_optmin = IP_VS_BASE_CTL, 2243 .get_optmax = IP_VS_SO_GET_MAX+1, 2244 .get = do_ip_vs_get_ctl, 2245}; 2246 2247 2248int ip_vs_control_init(void) 2249{ 2250 int ret; 2251 int idx; 2252 2253 EnterFunction(2); 2254 2255 ret = nf_register_sockopt(&ip_vs_sockopts); 2256 if (ret) { 2257 IP_VS_ERR("cannot register sockopt.\n"); 2258 return ret; 2259 } 2260 2261 proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops); 2262 proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops); 2263 2264 sysctl_header = register_sysctl_table(vs_root_table); 2265 2266 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */ 2267 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2268 INIT_LIST_HEAD(&ip_vs_svc_table[idx]); 2269 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]); 2270 } 2271 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) { 2272 INIT_LIST_HEAD(&ip_vs_rtable[idx]); 2273 } 2274 2275 memset(&ip_vs_stats, 0, sizeof(ip_vs_stats)); 2276 spin_lock_init(&ip_vs_stats.lock); 2277 ip_vs_new_estimator(&ip_vs_stats); 2278 2279 /* Hook the defense timer */ 2280 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD); 2281 2282 LeaveFunction(2); 2283 return 0; 2284} 2285 2286 2287void ip_vs_control_cleanup(void) 2288{ 2289 EnterFunction(2); 2290 ip_vs_trash_cleanup(); 2291 cancel_rearming_delayed_work(&defense_work); 2292 cancel_work_sync(&defense_work.work); 2293 ip_vs_kill_estimator(&ip_vs_stats); 2294 unregister_sysctl_table(sysctl_header); 2295 proc_net_remove("ip_vs_stats"); 2296 proc_net_remove("ip_vs"); 2297 nf_unregister_sockopt(&ip_vs_sockopts); 2298 LeaveFunction(2); 2299} 2300