• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/net/netfilter/ipvs/
1/*
2 * IPVS         An implementation of the IP virtual server support for the
3 *              LINUX operating system.  IPVS is now implemented as a module
4 *              over the NetFilter framework. IPVS can be used to build a
5 *              high-performance and highly available server based on a
6 *              cluster of servers.
7 *
8 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9 *              Peter Kese <peter.kese@ijs.si>
10 *              Julian Anastasov <ja@ssi.bg>
11 *
12 *              This program is free software; you can redistribute it and/or
13 *              modify it under the terms of the GNU General Public License
14 *              as published by the Free Software Foundation; either version
15 *              2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
27#include <linux/capability.h>
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
33#include <linux/seq_file.h>
34#include <linux/slab.h>
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/mutex.h>
39
40#include <net/net_namespace.h>
41#include <net/ip.h>
42#ifdef CONFIG_IP_VS_IPV6
43#include <net/ipv6.h>
44#include <net/ip6_route.h>
45#endif
46#include <net/route.h>
47#include <net/sock.h>
48#include <net/genetlink.h>
49
50#include <asm/uaccess.h>
51
52#include <net/ip_vs.h>
53
54/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55static DEFINE_MUTEX(__ip_vs_mutex);
56
57/* lock for service table */
58static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60/* lock for table with the real services */
61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63/* lock for state and timeout tables */
64static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
65
66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69/* lock for drop packet handling */
70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72/* 1/rate drop and drop-entry variables */
73int ip_vs_drop_rate = 0;
74int ip_vs_drop_counter = 0;
75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77/* number of virtual services */
78static int ip_vs_num_services = 0;
79
80/* sysctl variables */
81static int sysctl_ip_vs_drop_entry = 0;
82static int sysctl_ip_vs_drop_packet = 0;
83static int sysctl_ip_vs_secure_tcp = 0;
84static int sysctl_ip_vs_amemthresh = 1024;
85static int sysctl_ip_vs_am_droprate = 10;
86int sysctl_ip_vs_cache_bypass = 0;
87int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0;
91
92
93#ifdef CONFIG_IP_VS_DEBUG
94static int sysctl_ip_vs_debug_level = 0;
95
96int ip_vs_get_debug_level(void)
97{
98	return sysctl_ip_vs_debug_level;
99}
100#endif
101
102#ifdef CONFIG_IP_VS_IPV6
103/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
104static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
105{
106	struct rt6_info *rt;
107	struct flowi fl = {
108		.oif = 0,
109		.nl_u = {
110			.ip6_u = {
111				.daddr = *addr,
112				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
113	};
114
115	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
116	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
117			return 1;
118
119	return 0;
120}
121#endif
122/*
123 *	update_defense_level is called from keventd and from sysctl,
124 *	so it needs to protect itself from softirqs
125 */
126static void update_defense_level(void)
127{
128	struct sysinfo i;
129	static int old_secure_tcp = 0;
130	int availmem;
131	int nomem;
132	int to_change = -1;
133
134	/* we only count free and buffered memory (in pages) */
135	si_meminfo(&i);
136	availmem = i.freeram + i.bufferram;
137	/* however in linux 2.5 the i.bufferram is total page cache size,
138	   we need adjust it */
139	/* si_swapinfo(&i); */
140	/* availmem = availmem - (i.totalswap - i.freeswap); */
141
142	nomem = (availmem < sysctl_ip_vs_amemthresh);
143
144	local_bh_disable();
145
146	/* drop_entry */
147	spin_lock(&__ip_vs_dropentry_lock);
148	switch (sysctl_ip_vs_drop_entry) {
149	case 0:
150		atomic_set(&ip_vs_dropentry, 0);
151		break;
152	case 1:
153		if (nomem) {
154			atomic_set(&ip_vs_dropentry, 1);
155			sysctl_ip_vs_drop_entry = 2;
156		} else {
157			atomic_set(&ip_vs_dropentry, 0);
158		}
159		break;
160	case 2:
161		if (nomem) {
162			atomic_set(&ip_vs_dropentry, 1);
163		} else {
164			atomic_set(&ip_vs_dropentry, 0);
165			sysctl_ip_vs_drop_entry = 1;
166		};
167		break;
168	case 3:
169		atomic_set(&ip_vs_dropentry, 1);
170		break;
171	}
172	spin_unlock(&__ip_vs_dropentry_lock);
173
174	/* drop_packet */
175	spin_lock(&__ip_vs_droppacket_lock);
176	switch (sysctl_ip_vs_drop_packet) {
177	case 0:
178		ip_vs_drop_rate = 0;
179		break;
180	case 1:
181		if (nomem) {
182			ip_vs_drop_rate = ip_vs_drop_counter
183				= sysctl_ip_vs_amemthresh /
184				(sysctl_ip_vs_amemthresh-availmem);
185			sysctl_ip_vs_drop_packet = 2;
186		} else {
187			ip_vs_drop_rate = 0;
188		}
189		break;
190	case 2:
191		if (nomem) {
192			ip_vs_drop_rate = ip_vs_drop_counter
193				= sysctl_ip_vs_amemthresh /
194				(sysctl_ip_vs_amemthresh-availmem);
195		} else {
196			ip_vs_drop_rate = 0;
197			sysctl_ip_vs_drop_packet = 1;
198		}
199		break;
200	case 3:
201		ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
202		break;
203	}
204	spin_unlock(&__ip_vs_droppacket_lock);
205
206	/* secure_tcp */
207	write_lock(&__ip_vs_securetcp_lock);
208	switch (sysctl_ip_vs_secure_tcp) {
209	case 0:
210		if (old_secure_tcp >= 2)
211			to_change = 0;
212		break;
213	case 1:
214		if (nomem) {
215			if (old_secure_tcp < 2)
216				to_change = 1;
217			sysctl_ip_vs_secure_tcp = 2;
218		} else {
219			if (old_secure_tcp >= 2)
220				to_change = 0;
221		}
222		break;
223	case 2:
224		if (nomem) {
225			if (old_secure_tcp < 2)
226				to_change = 1;
227		} else {
228			if (old_secure_tcp >= 2)
229				to_change = 0;
230			sysctl_ip_vs_secure_tcp = 1;
231		}
232		break;
233	case 3:
234		if (old_secure_tcp < 2)
235			to_change = 1;
236		break;
237	}
238	old_secure_tcp = sysctl_ip_vs_secure_tcp;
239	if (to_change >= 0)
240		ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
241	write_unlock(&__ip_vs_securetcp_lock);
242
243	local_bh_enable();
244}
245
246
247/*
248 *	Timer for checking the defense
249 */
250#define DEFENSE_TIMER_PERIOD	1*HZ
251static void defense_work_handler(struct work_struct *work);
252static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
253
254static void defense_work_handler(struct work_struct *work)
255{
256	update_defense_level();
257	if (atomic_read(&ip_vs_dropentry))
258		ip_vs_random_dropentry();
259
260	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
261}
262
263int
264ip_vs_use_count_inc(void)
265{
266	return try_module_get(THIS_MODULE);
267}
268
269void
270ip_vs_use_count_dec(void)
271{
272	module_put(THIS_MODULE);
273}
274
275
276/*
277 *	Hash table: for virtual service lookups
278 */
279#define IP_VS_SVC_TAB_BITS 8
280#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
281#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
282
283/* the service table hashed by <protocol, addr, port> */
284static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
285/* the service table hashed by fwmark */
286static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
287
288/*
289 *	Hash table: for real service lookups
290 */
291#define IP_VS_RTAB_BITS 4
292#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
293#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
294
295static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
296
297/*
298 *	Trash for destinations
299 */
300static LIST_HEAD(ip_vs_dest_trash);
301
302/*
303 *	FTP & NULL virtual service counters
304 */
305static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
306static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
307
308
309/*
310 *	Returns hash value for virtual service
311 */
312static __inline__ unsigned
313ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
314		  __be16 port)
315{
316	register unsigned porth = ntohs(port);
317	__be32 addr_fold = addr->ip;
318
319#ifdef CONFIG_IP_VS_IPV6
320	if (af == AF_INET6)
321		addr_fold = addr->ip6[0]^addr->ip6[1]^
322			    addr->ip6[2]^addr->ip6[3];
323#endif
324
325	return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
326		& IP_VS_SVC_TAB_MASK;
327}
328
329/*
330 *	Returns hash value of fwmark for virtual service lookup
331 */
332static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
333{
334	return fwmark & IP_VS_SVC_TAB_MASK;
335}
336
337/*
338 *	Hashes a service in the ip_vs_svc_table by <proto,addr,port>
339 *	or in the ip_vs_svc_fwm_table by fwmark.
340 *	Should be called with locked tables.
341 */
342static int ip_vs_svc_hash(struct ip_vs_service *svc)
343{
344	unsigned hash;
345
346	if (svc->flags & IP_VS_SVC_F_HASHED) {
347		pr_err("%s(): request for already hashed, called from %pF\n",
348		       __func__, __builtin_return_address(0));
349		return 0;
350	}
351
352	if (svc->fwmark == 0) {
353		/*
354		 *  Hash it by <protocol,addr,port> in ip_vs_svc_table
355		 */
356		hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
357					 svc->port);
358		list_add(&svc->s_list, &ip_vs_svc_table[hash]);
359	} else {
360		/*
361		 *  Hash it by fwmark in ip_vs_svc_fwm_table
362		 */
363		hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
364		list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
365	}
366
367	svc->flags |= IP_VS_SVC_F_HASHED;
368	/* increase its refcnt because it is referenced by the svc table */
369	atomic_inc(&svc->refcnt);
370	return 1;
371}
372
373
374/*
375 *	Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
376 *	Should be called with locked tables.
377 */
378static int ip_vs_svc_unhash(struct ip_vs_service *svc)
379{
380	if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
381		pr_err("%s(): request for unhash flagged, called from %pF\n",
382		       __func__, __builtin_return_address(0));
383		return 0;
384	}
385
386	if (svc->fwmark == 0) {
387		/* Remove it from the ip_vs_svc_table table */
388		list_del(&svc->s_list);
389	} else {
390		/* Remove it from the ip_vs_svc_fwm_table table */
391		list_del(&svc->f_list);
392	}
393
394	svc->flags &= ~IP_VS_SVC_F_HASHED;
395	atomic_dec(&svc->refcnt);
396	return 1;
397}
398
399
400/*
401 *	Get service by {proto,addr,port} in the service table.
402 */
403static inline struct ip_vs_service *
404__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
405		    __be16 vport)
406{
407	unsigned hash;
408	struct ip_vs_service *svc;
409
410	/* Check for "full" addressed entries */
411	hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
412
413	list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
414		if ((svc->af == af)
415		    && ip_vs_addr_equal(af, &svc->addr, vaddr)
416		    && (svc->port == vport)
417		    && (svc->protocol == protocol)) {
418			/* HIT */
419			atomic_inc(&svc->usecnt);
420			return svc;
421		}
422	}
423
424	return NULL;
425}
426
427
428/*
429 *	Get service by {fwmark} in the service table.
430 */
431static inline struct ip_vs_service *
432__ip_vs_svc_fwm_get(int af, __u32 fwmark)
433{
434	unsigned hash;
435	struct ip_vs_service *svc;
436
437	/* Check for fwmark addressed entries */
438	hash = ip_vs_svc_fwm_hashkey(fwmark);
439
440	list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
441		if (svc->fwmark == fwmark && svc->af == af) {
442			/* HIT */
443			atomic_inc(&svc->usecnt);
444			return svc;
445		}
446	}
447
448	return NULL;
449}
450
451struct ip_vs_service *
452ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
453		  const union nf_inet_addr *vaddr, __be16 vport)
454{
455	struct ip_vs_service *svc;
456
457	read_lock(&__ip_vs_svc_lock);
458
459	/*
460	 *	Check the table hashed by fwmark first
461	 */
462	if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
463		goto out;
464
465	/*
466	 *	Check the table hashed by <protocol,addr,port>
467	 *	for "full" addressed entries
468	 */
469	svc = __ip_vs_service_get(af, protocol, vaddr, vport);
470
471	if (svc == NULL
472	    && protocol == IPPROTO_TCP
473	    && atomic_read(&ip_vs_ftpsvc_counter)
474	    && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475		/*
476		 * Check if ftp service entry exists, the packet
477		 * might belong to FTP data connections.
478		 */
479		svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
480	}
481
482	if (svc == NULL
483	    && atomic_read(&ip_vs_nullsvc_counter)) {
484		/*
485		 * Check if the catch-all port (port zero) exists
486		 */
487		svc = __ip_vs_service_get(af, protocol, vaddr, 0);
488	}
489
490  out:
491	read_unlock(&__ip_vs_svc_lock);
492
493	IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
494		      fwmark, ip_vs_proto_name(protocol),
495		      IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
496		      svc ? "hit" : "not hit");
497
498	return svc;
499}
500
501
502static inline void
503__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
504{
505	atomic_inc(&svc->refcnt);
506	dest->svc = svc;
507}
508
509static inline void
510__ip_vs_unbind_svc(struct ip_vs_dest *dest)
511{
512	struct ip_vs_service *svc = dest->svc;
513
514	dest->svc = NULL;
515	if (atomic_dec_and_test(&svc->refcnt))
516		kfree(svc);
517}
518
519
520/*
521 *	Returns hash value for real service
522 */
523static inline unsigned ip_vs_rs_hashkey(int af,
524					    const union nf_inet_addr *addr,
525					    __be16 port)
526{
527	register unsigned porth = ntohs(port);
528	__be32 addr_fold = addr->ip;
529
530#ifdef CONFIG_IP_VS_IPV6
531	if (af == AF_INET6)
532		addr_fold = addr->ip6[0]^addr->ip6[1]^
533			    addr->ip6[2]^addr->ip6[3];
534#endif
535
536	return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
537		& IP_VS_RTAB_MASK;
538}
539
540/*
541 *	Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
542 *	should be called with locked tables.
543 */
544static int ip_vs_rs_hash(struct ip_vs_dest *dest)
545{
546	unsigned hash;
547
548	if (!list_empty(&dest->d_list)) {
549		return 0;
550	}
551
552	/*
553	 *	Hash by proto,addr,port,
554	 *	which are the parameters of the real service.
555	 */
556	hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
557
558	list_add(&dest->d_list, &ip_vs_rtable[hash]);
559
560	return 1;
561}
562
563/*
564 *	UNhashes ip_vs_dest from ip_vs_rtable.
565 *	should be called with locked tables.
566 */
567static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
568{
569	/*
570	 * Remove it from the ip_vs_rtable table.
571	 */
572	if (!list_empty(&dest->d_list)) {
573		list_del(&dest->d_list);
574		INIT_LIST_HEAD(&dest->d_list);
575	}
576
577	return 1;
578}
579
580/*
581 *	Lookup real service by <proto,addr,port> in the real service table.
582 */
583struct ip_vs_dest *
584ip_vs_lookup_real_service(int af, __u16 protocol,
585			  const union nf_inet_addr *daddr,
586			  __be16 dport)
587{
588	unsigned hash;
589	struct ip_vs_dest *dest;
590
591	/*
592	 *	Check for "full" addressed entries
593	 *	Return the first found entry
594	 */
595	hash = ip_vs_rs_hashkey(af, daddr, dport);
596
597	read_lock(&__ip_vs_rs_lock);
598	list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
599		if ((dest->af == af)
600		    && ip_vs_addr_equal(af, &dest->addr, daddr)
601		    && (dest->port == dport)
602		    && ((dest->protocol == protocol) ||
603			dest->vfwmark)) {
604			/* HIT */
605			read_unlock(&__ip_vs_rs_lock);
606			return dest;
607		}
608	}
609	read_unlock(&__ip_vs_rs_lock);
610
611	return NULL;
612}
613
614/*
615 *	Lookup destination by {addr,port} in the given service
616 */
617static struct ip_vs_dest *
618ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
619		  __be16 dport)
620{
621	struct ip_vs_dest *dest;
622
623	/*
624	 * Find the destination for the given service
625	 */
626	list_for_each_entry(dest, &svc->destinations, n_list) {
627		if ((dest->af == svc->af)
628		    && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
629		    && (dest->port == dport)) {
630			/* HIT */
631			return dest;
632		}
633	}
634
635	return NULL;
636}
637
638/*
639 * Find destination by {daddr,dport,vaddr,protocol}
640 * Cretaed to be used in ip_vs_process_message() in
641 * the backup synchronization daemon. It finds the
642 * destination to be bound to the received connection
643 * on the backup.
644 *
645 * ip_vs_lookup_real_service() looked promissing, but
646 * seems not working as expected.
647 */
648struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
649				   __be16 dport,
650				   const union nf_inet_addr *vaddr,
651				   __be16 vport, __u16 protocol)
652{
653	struct ip_vs_dest *dest;
654	struct ip_vs_service *svc;
655
656	svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
657	if (!svc)
658		return NULL;
659	dest = ip_vs_lookup_dest(svc, daddr, dport);
660	if (dest)
661		atomic_inc(&dest->refcnt);
662	ip_vs_service_put(svc);
663	return dest;
664}
665
666/*
667 *  Lookup dest by {svc,addr,port} in the destination trash.
668 *  The destination trash is used to hold the destinations that are removed
669 *  from the service table but are still referenced by some conn entries.
670 *  The reason to add the destination trash is when the dest is temporary
671 *  down (either by administrator or by monitor program), the dest can be
672 *  picked back from the trash, the remaining connections to the dest can
673 *  continue, and the counting information of the dest is also useful for
674 *  scheduling.
675 */
676static struct ip_vs_dest *
677ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
678		     __be16 dport)
679{
680	struct ip_vs_dest *dest, *nxt;
681
682	/*
683	 * Find the destination in trash
684	 */
685	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
686		IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
687			      "dest->refcnt=%d\n",
688			      dest->vfwmark,
689			      IP_VS_DBG_ADDR(svc->af, &dest->addr),
690			      ntohs(dest->port),
691			      atomic_read(&dest->refcnt));
692		if (dest->af == svc->af &&
693		    ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
694		    dest->port == dport &&
695		    dest->vfwmark == svc->fwmark &&
696		    dest->protocol == svc->protocol &&
697		    (svc->fwmark ||
698		     (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
699		      dest->vport == svc->port))) {
700			/* HIT */
701			return dest;
702		}
703
704		/*
705		 * Try to purge the destination from trash if not referenced
706		 */
707		if (atomic_read(&dest->refcnt) == 1) {
708			IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
709				      "from trash\n",
710				      dest->vfwmark,
711				      IP_VS_DBG_ADDR(svc->af, &dest->addr),
712				      ntohs(dest->port));
713			list_del(&dest->n_list);
714			ip_vs_dst_reset(dest);
715			__ip_vs_unbind_svc(dest);
716			kfree(dest);
717		}
718	}
719
720	return NULL;
721}
722
723
724/*
725 *  Clean up all the destinations in the trash
726 *  Called by the ip_vs_control_cleanup()
727 *
728 *  When the ip_vs_control_clearup is activated by ipvs module exit,
729 *  the service tables must have been flushed and all the connections
730 *  are expired, and the refcnt of each destination in the trash must
731 *  be 1, so we simply release them here.
732 */
733static void ip_vs_trash_cleanup(void)
734{
735	struct ip_vs_dest *dest, *nxt;
736
737	list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
738		list_del(&dest->n_list);
739		ip_vs_dst_reset(dest);
740		__ip_vs_unbind_svc(dest);
741		kfree(dest);
742	}
743}
744
745
746static void
747ip_vs_zero_stats(struct ip_vs_stats *stats)
748{
749	spin_lock_bh(&stats->lock);
750
751	memset(&stats->ustats, 0, sizeof(stats->ustats));
752	ip_vs_zero_estimator(stats);
753
754	spin_unlock_bh(&stats->lock);
755}
756
757/*
758 *	Update a destination in the given service
759 */
760static void
761__ip_vs_update_dest(struct ip_vs_service *svc,
762		    struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
763{
764	int conn_flags;
765
766	/* set the weight and the flags */
767	atomic_set(&dest->weight, udest->weight);
768	conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
769
770	/* check if local node and update the flags */
771#ifdef CONFIG_IP_VS_IPV6
772	if (svc->af == AF_INET6) {
773		if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
774			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
775				| IP_VS_CONN_F_LOCALNODE;
776		}
777	} else
778#endif
779		if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
780			conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
781				| IP_VS_CONN_F_LOCALNODE;
782		}
783
784	/* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
785	if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
786		conn_flags |= IP_VS_CONN_F_NOOUTPUT;
787	} else {
788		/*
789		 *    Put the real service in ip_vs_rtable if not present.
790		 *    For now only for NAT!
791		 */
792		write_lock_bh(&__ip_vs_rs_lock);
793		ip_vs_rs_hash(dest);
794		write_unlock_bh(&__ip_vs_rs_lock);
795	}
796	atomic_set(&dest->conn_flags, conn_flags);
797
798	/* bind the service */
799	if (!dest->svc) {
800		__ip_vs_bind_svc(dest, svc);
801	} else {
802		if (dest->svc != svc) {
803			__ip_vs_unbind_svc(dest);
804			ip_vs_zero_stats(&dest->stats);
805			__ip_vs_bind_svc(dest, svc);
806		}
807	}
808
809	/* set the dest status flags */
810	dest->flags |= IP_VS_DEST_F_AVAILABLE;
811
812	if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
813		dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
814	dest->u_threshold = udest->u_threshold;
815	dest->l_threshold = udest->l_threshold;
816}
817
818
819/*
820 *	Create a destination for the given service
821 */
822static int
823ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
824	       struct ip_vs_dest **dest_p)
825{
826	struct ip_vs_dest *dest;
827	unsigned atype;
828
829	EnterFunction(2);
830
831#ifdef CONFIG_IP_VS_IPV6
832	if (svc->af == AF_INET6) {
833		atype = ipv6_addr_type(&udest->addr.in6);
834		if ((!(atype & IPV6_ADDR_UNICAST) ||
835			atype & IPV6_ADDR_LINKLOCAL) &&
836			!__ip_vs_addr_is_local_v6(&udest->addr.in6))
837			return -EINVAL;
838	} else
839#endif
840	{
841		atype = inet_addr_type(&init_net, udest->addr.ip);
842		if (atype != RTN_LOCAL && atype != RTN_UNICAST)
843			return -EINVAL;
844	}
845
846	dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
847	if (dest == NULL) {
848		pr_err("%s(): no memory.\n", __func__);
849		return -ENOMEM;
850	}
851
852	dest->af = svc->af;
853	dest->protocol = svc->protocol;
854	dest->vaddr = svc->addr;
855	dest->vport = svc->port;
856	dest->vfwmark = svc->fwmark;
857	ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
858	dest->port = udest->port;
859
860	atomic_set(&dest->activeconns, 0);
861	atomic_set(&dest->inactconns, 0);
862	atomic_set(&dest->persistconns, 0);
863	atomic_set(&dest->refcnt, 0);
864
865	INIT_LIST_HEAD(&dest->d_list);
866	spin_lock_init(&dest->dst_lock);
867	spin_lock_init(&dest->stats.lock);
868	__ip_vs_update_dest(svc, dest, udest);
869	ip_vs_new_estimator(&dest->stats);
870
871	*dest_p = dest;
872
873	LeaveFunction(2);
874	return 0;
875}
876
877
878/*
879 *	Add a destination into an existing service
880 */
881static int
882ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
883{
884	struct ip_vs_dest *dest;
885	union nf_inet_addr daddr;
886	__be16 dport = udest->port;
887	int ret;
888
889	EnterFunction(2);
890
891	if (udest->weight < 0) {
892		pr_err("%s(): server weight less than zero\n", __func__);
893		return -ERANGE;
894	}
895
896	if (udest->l_threshold > udest->u_threshold) {
897		pr_err("%s(): lower threshold is higher than upper threshold\n",
898			__func__);
899		return -ERANGE;
900	}
901
902	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
903
904	/*
905	 * Check if the dest already exists in the list
906	 */
907	dest = ip_vs_lookup_dest(svc, &daddr, dport);
908
909	if (dest != NULL) {
910		IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
911		return -EEXIST;
912	}
913
914	/*
915	 * Check if the dest already exists in the trash and
916	 * is from the same service
917	 */
918	dest = ip_vs_trash_get_dest(svc, &daddr, dport);
919
920	if (dest != NULL) {
921		IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
922			      "dest->refcnt=%d, service %u/%s:%u\n",
923			      IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
924			      atomic_read(&dest->refcnt),
925			      dest->vfwmark,
926			      IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
927			      ntohs(dest->vport));
928
929		__ip_vs_update_dest(svc, dest, udest);
930
931		/*
932		 * Get the destination from the trash
933		 */
934		list_del(&dest->n_list);
935
936		ip_vs_new_estimator(&dest->stats);
937
938		write_lock_bh(&__ip_vs_svc_lock);
939
940		/*
941		 * Wait until all other svc users go away.
942		 */
943		IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
944
945		list_add(&dest->n_list, &svc->destinations);
946		svc->num_dests++;
947
948		/* call the update_service function of its scheduler */
949		if (svc->scheduler->update_service)
950			svc->scheduler->update_service(svc);
951
952		write_unlock_bh(&__ip_vs_svc_lock);
953		return 0;
954	}
955
956	/*
957	 * Allocate and initialize the dest structure
958	 */
959	ret = ip_vs_new_dest(svc, udest, &dest);
960	if (ret) {
961		return ret;
962	}
963
964	/*
965	 * Add the dest entry into the list
966	 */
967	atomic_inc(&dest->refcnt);
968
969	write_lock_bh(&__ip_vs_svc_lock);
970
971	/*
972	 * Wait until all other svc users go away.
973	 */
974	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
975
976	list_add(&dest->n_list, &svc->destinations);
977	svc->num_dests++;
978
979	/* call the update_service function of its scheduler */
980	if (svc->scheduler->update_service)
981		svc->scheduler->update_service(svc);
982
983	write_unlock_bh(&__ip_vs_svc_lock);
984
985	LeaveFunction(2);
986
987	return 0;
988}
989
990
991/*
992 *	Edit a destination in the given service
993 */
994static int
995ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
996{
997	struct ip_vs_dest *dest;
998	union nf_inet_addr daddr;
999	__be16 dport = udest->port;
1000
1001	EnterFunction(2);
1002
1003	if (udest->weight < 0) {
1004		pr_err("%s(): server weight less than zero\n", __func__);
1005		return -ERANGE;
1006	}
1007
1008	if (udest->l_threshold > udest->u_threshold) {
1009		pr_err("%s(): lower threshold is higher than upper threshold\n",
1010			__func__);
1011		return -ERANGE;
1012	}
1013
1014	ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1015
1016	/*
1017	 *  Lookup the destination list
1018	 */
1019	dest = ip_vs_lookup_dest(svc, &daddr, dport);
1020
1021	if (dest == NULL) {
1022		IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
1023		return -ENOENT;
1024	}
1025
1026	__ip_vs_update_dest(svc, dest, udest);
1027
1028	write_lock_bh(&__ip_vs_svc_lock);
1029
1030	/* Wait until all other svc users go away */
1031	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1032
1033	/* call the update_service, because server weight may be changed */
1034	if (svc->scheduler->update_service)
1035		svc->scheduler->update_service(svc);
1036
1037	write_unlock_bh(&__ip_vs_svc_lock);
1038
1039	LeaveFunction(2);
1040
1041	return 0;
1042}
1043
1044
1045/*
1046 *	Delete a destination (must be already unlinked from the service)
1047 */
1048static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1049{
1050	ip_vs_kill_estimator(&dest->stats);
1051
1052	/*
1053	 *  Remove it from the d-linked list with the real services.
1054	 */
1055	write_lock_bh(&__ip_vs_rs_lock);
1056	ip_vs_rs_unhash(dest);
1057	write_unlock_bh(&__ip_vs_rs_lock);
1058
1059	/*
1060	 *  Decrease the refcnt of the dest, and free the dest
1061	 *  if nobody refers to it (refcnt=0). Otherwise, throw
1062	 *  the destination into the trash.
1063	 */
1064	if (atomic_dec_and_test(&dest->refcnt)) {
1065		ip_vs_dst_reset(dest);
1066		/* simply decrease svc->refcnt here, let the caller check
1067		   and release the service if nobody refers to it.
1068		   Only user context can release destination and service,
1069		   and only one user context can update virtual service at a
1070		   time, so the operation here is OK */
1071		atomic_dec(&dest->svc->refcnt);
1072		kfree(dest);
1073	} else {
1074		IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1075			      "dest->refcnt=%d\n",
1076			      IP_VS_DBG_ADDR(dest->af, &dest->addr),
1077			      ntohs(dest->port),
1078			      atomic_read(&dest->refcnt));
1079		list_add(&dest->n_list, &ip_vs_dest_trash);
1080		atomic_inc(&dest->refcnt);
1081	}
1082}
1083
1084
1085/*
1086 *	Unlink a destination from the given service
1087 */
1088static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1089				struct ip_vs_dest *dest,
1090				int svcupd)
1091{
1092	dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1093
1094	/*
1095	 *  Remove it from the d-linked destination list.
1096	 */
1097	list_del(&dest->n_list);
1098	svc->num_dests--;
1099
1100	/*
1101	 *  Call the update_service function of its scheduler
1102	 */
1103	if (svcupd && svc->scheduler->update_service)
1104			svc->scheduler->update_service(svc);
1105}
1106
1107
1108/*
1109 *	Delete a destination server in the given service
1110 */
1111static int
1112ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1113{
1114	struct ip_vs_dest *dest;
1115	__be16 dport = udest->port;
1116
1117	EnterFunction(2);
1118
1119	dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1120
1121	if (dest == NULL) {
1122		IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1123		return -ENOENT;
1124	}
1125
1126	write_lock_bh(&__ip_vs_svc_lock);
1127
1128	/*
1129	 *	Wait until all other svc users go away.
1130	 */
1131	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1132
1133	/*
1134	 *	Unlink dest from the service
1135	 */
1136	__ip_vs_unlink_dest(svc, dest, 1);
1137
1138	write_unlock_bh(&__ip_vs_svc_lock);
1139
1140	/*
1141	 *	Delete the destination
1142	 */
1143	__ip_vs_del_dest(dest);
1144
1145	LeaveFunction(2);
1146
1147	return 0;
1148}
1149
1150
1151/*
1152 *	Add a service into the service hash table
1153 */
1154static int
1155ip_vs_add_service(struct ip_vs_service_user_kern *u,
1156		  struct ip_vs_service **svc_p)
1157{
1158	int ret = 0;
1159	struct ip_vs_scheduler *sched = NULL;
1160	struct ip_vs_service *svc = NULL;
1161
1162	/* increase the module use count */
1163	ip_vs_use_count_inc();
1164
1165	/* Lookup the scheduler by 'u->sched_name' */
1166	sched = ip_vs_scheduler_get(u->sched_name);
1167	if (sched == NULL) {
1168		pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1169		ret = -ENOENT;
1170		goto out_mod_dec;
1171	}
1172
1173#ifdef CONFIG_IP_VS_IPV6
1174	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1175		ret = -EINVAL;
1176		goto out_err;
1177	}
1178#endif
1179
1180	svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1181	if (svc == NULL) {
1182		IP_VS_DBG(1, "%s(): no memory\n", __func__);
1183		ret = -ENOMEM;
1184		goto out_err;
1185	}
1186
1187	/* I'm the first user of the service */
1188	atomic_set(&svc->usecnt, 1);
1189	atomic_set(&svc->refcnt, 0);
1190
1191	svc->af = u->af;
1192	svc->protocol = u->protocol;
1193	ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1194	svc->port = u->port;
1195	svc->fwmark = u->fwmark;
1196	svc->flags = u->flags;
1197	svc->timeout = u->timeout * HZ;
1198	svc->netmask = u->netmask;
1199
1200	INIT_LIST_HEAD(&svc->destinations);
1201	rwlock_init(&svc->sched_lock);
1202	spin_lock_init(&svc->stats.lock);
1203
1204	/* Bind the scheduler */
1205	ret = ip_vs_bind_scheduler(svc, sched);
1206	if (ret)
1207		goto out_err;
1208	sched = NULL;
1209
1210	/* Update the virtual service counters */
1211	if (svc->port == FTPPORT)
1212		atomic_inc(&ip_vs_ftpsvc_counter);
1213	else if (svc->port == 0)
1214		atomic_inc(&ip_vs_nullsvc_counter);
1215
1216	ip_vs_new_estimator(&svc->stats);
1217
1218	/* Count only IPv4 services for old get/setsockopt interface */
1219	if (svc->af == AF_INET)
1220		ip_vs_num_services++;
1221
1222	/* Hash the service into the service table */
1223	write_lock_bh(&__ip_vs_svc_lock);
1224	ip_vs_svc_hash(svc);
1225	write_unlock_bh(&__ip_vs_svc_lock);
1226
1227	*svc_p = svc;
1228	return 0;
1229
1230  out_err:
1231	if (svc != NULL) {
1232		if (svc->scheduler)
1233			ip_vs_unbind_scheduler(svc);
1234		if (svc->inc) {
1235			local_bh_disable();
1236			ip_vs_app_inc_put(svc->inc);
1237			local_bh_enable();
1238		}
1239		kfree(svc);
1240	}
1241	ip_vs_scheduler_put(sched);
1242
1243  out_mod_dec:
1244	/* decrease the module use count */
1245	ip_vs_use_count_dec();
1246
1247	return ret;
1248}
1249
1250
1251/*
1252 *	Edit a service and bind it with a new scheduler
1253 */
1254static int
1255ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1256{
1257	struct ip_vs_scheduler *sched, *old_sched;
1258	int ret = 0;
1259
1260	/*
1261	 * Lookup the scheduler, by 'u->sched_name'
1262	 */
1263	sched = ip_vs_scheduler_get(u->sched_name);
1264	if (sched == NULL) {
1265		pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1266		return -ENOENT;
1267	}
1268	old_sched = sched;
1269
1270#ifdef CONFIG_IP_VS_IPV6
1271	if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1272		ret = -EINVAL;
1273		goto out;
1274	}
1275#endif
1276
1277	write_lock_bh(&__ip_vs_svc_lock);
1278
1279	/*
1280	 * Wait until all other svc users go away.
1281	 */
1282	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1283
1284	/*
1285	 * Set the flags and timeout value
1286	 */
1287	svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1288	svc->timeout = u->timeout * HZ;
1289	svc->netmask = u->netmask;
1290
1291	old_sched = svc->scheduler;
1292	if (sched != old_sched) {
1293		/*
1294		 * Unbind the old scheduler
1295		 */
1296		if ((ret = ip_vs_unbind_scheduler(svc))) {
1297			old_sched = sched;
1298			goto out_unlock;
1299		}
1300
1301		/*
1302		 * Bind the new scheduler
1303		 */
1304		if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1305			/*
1306			 * If ip_vs_bind_scheduler fails, restore the old
1307			 * scheduler.
1308			 * The main reason of failure is out of memory.
1309			 *
1310			 * The question is if the old scheduler can be
1311			 * restored all the time. TODO: if it cannot be
1312			 * restored some time, we must delete the service,
1313			 * otherwise the system may crash.
1314			 */
1315			ip_vs_bind_scheduler(svc, old_sched);
1316			old_sched = sched;
1317			goto out_unlock;
1318		}
1319	}
1320
1321  out_unlock:
1322	write_unlock_bh(&__ip_vs_svc_lock);
1323#ifdef CONFIG_IP_VS_IPV6
1324  out:
1325#endif
1326
1327	if (old_sched)
1328		ip_vs_scheduler_put(old_sched);
1329
1330	return ret;
1331}
1332
1333
1334/*
1335 *	Delete a service from the service list
1336 *	- The service must be unlinked, unlocked and not referenced!
1337 *	- We are called under _bh lock
1338 */
1339static void __ip_vs_del_service(struct ip_vs_service *svc)
1340{
1341	struct ip_vs_dest *dest, *nxt;
1342	struct ip_vs_scheduler *old_sched;
1343
1344	/* Count only IPv4 services for old get/setsockopt interface */
1345	if (svc->af == AF_INET)
1346		ip_vs_num_services--;
1347
1348	ip_vs_kill_estimator(&svc->stats);
1349
1350	/* Unbind scheduler */
1351	old_sched = svc->scheduler;
1352	ip_vs_unbind_scheduler(svc);
1353	if (old_sched)
1354		ip_vs_scheduler_put(old_sched);
1355
1356	/* Unbind app inc */
1357	if (svc->inc) {
1358		ip_vs_app_inc_put(svc->inc);
1359		svc->inc = NULL;
1360	}
1361
1362	/*
1363	 *    Unlink the whole destination list
1364	 */
1365	list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1366		__ip_vs_unlink_dest(svc, dest, 0);
1367		__ip_vs_del_dest(dest);
1368	}
1369
1370	/*
1371	 *    Update the virtual service counters
1372	 */
1373	if (svc->port == FTPPORT)
1374		atomic_dec(&ip_vs_ftpsvc_counter);
1375	else if (svc->port == 0)
1376		atomic_dec(&ip_vs_nullsvc_counter);
1377
1378	/*
1379	 *    Free the service if nobody refers to it
1380	 */
1381	if (atomic_read(&svc->refcnt) == 0)
1382		kfree(svc);
1383
1384	/* decrease the module use count */
1385	ip_vs_use_count_dec();
1386}
1387
1388/*
1389 *	Delete a service from the service list
1390 */
1391static int ip_vs_del_service(struct ip_vs_service *svc)
1392{
1393	if (svc == NULL)
1394		return -EEXIST;
1395
1396	/*
1397	 * Unhash it from the service table
1398	 */
1399	write_lock_bh(&__ip_vs_svc_lock);
1400
1401	ip_vs_svc_unhash(svc);
1402
1403	/*
1404	 * Wait until all the svc users go away.
1405	 */
1406	IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1407
1408	__ip_vs_del_service(svc);
1409
1410	write_unlock_bh(&__ip_vs_svc_lock);
1411
1412	return 0;
1413}
1414
1415
1416/*
1417 *	Flush all the virtual services
1418 */
1419static int ip_vs_flush(void)
1420{
1421	int idx;
1422	struct ip_vs_service *svc, *nxt;
1423
1424	/*
1425	 * Flush the service table hashed by <protocol,addr,port>
1426	 */
1427	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1428		list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1429			write_lock_bh(&__ip_vs_svc_lock);
1430			ip_vs_svc_unhash(svc);
1431			/*
1432			 * Wait until all the svc users go away.
1433			 */
1434			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1435			__ip_vs_del_service(svc);
1436			write_unlock_bh(&__ip_vs_svc_lock);
1437		}
1438	}
1439
1440	/*
1441	 * Flush the service table hashed by fwmark
1442	 */
1443	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1444		list_for_each_entry_safe(svc, nxt,
1445					 &ip_vs_svc_fwm_table[idx], f_list) {
1446			write_lock_bh(&__ip_vs_svc_lock);
1447			ip_vs_svc_unhash(svc);
1448			/*
1449			 * Wait until all the svc users go away.
1450			 */
1451			IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1452			__ip_vs_del_service(svc);
1453			write_unlock_bh(&__ip_vs_svc_lock);
1454		}
1455	}
1456
1457	return 0;
1458}
1459
1460
1461/*
1462 *	Zero counters in a service or all services
1463 */
1464static int ip_vs_zero_service(struct ip_vs_service *svc)
1465{
1466	struct ip_vs_dest *dest;
1467
1468	write_lock_bh(&__ip_vs_svc_lock);
1469	list_for_each_entry(dest, &svc->destinations, n_list) {
1470		ip_vs_zero_stats(&dest->stats);
1471	}
1472	ip_vs_zero_stats(&svc->stats);
1473	write_unlock_bh(&__ip_vs_svc_lock);
1474	return 0;
1475}
1476
1477static int ip_vs_zero_all(void)
1478{
1479	int idx;
1480	struct ip_vs_service *svc;
1481
1482	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1483		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1484			ip_vs_zero_service(svc);
1485		}
1486	}
1487
1488	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1489		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1490			ip_vs_zero_service(svc);
1491		}
1492	}
1493
1494	ip_vs_zero_stats(&ip_vs_stats);
1495	return 0;
1496}
1497
1498
1499static int
1500proc_do_defense_mode(ctl_table *table, int write,
1501		     void __user *buffer, size_t *lenp, loff_t *ppos)
1502{
1503	int *valp = table->data;
1504	int val = *valp;
1505	int rc;
1506
1507	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1508	if (write && (*valp != val)) {
1509		if ((*valp < 0) || (*valp > 3)) {
1510			/* Restore the correct value */
1511			*valp = val;
1512		} else {
1513			update_defense_level();
1514		}
1515	}
1516	return rc;
1517}
1518
1519
1520static int
1521proc_do_sync_threshold(ctl_table *table, int write,
1522		       void __user *buffer, size_t *lenp, loff_t *ppos)
1523{
1524	int *valp = table->data;
1525	int val[2];
1526	int rc;
1527
1528	/* backup the value first */
1529	memcpy(val, valp, sizeof(val));
1530
1531	rc = proc_dointvec(table, write, buffer, lenp, ppos);
1532	if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1533		/* Restore the correct value */
1534		memcpy(valp, val, sizeof(val));
1535	}
1536	return rc;
1537}
1538
1539
1540/*
1541 *	IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1542 */
1543
1544static struct ctl_table vs_vars[] = {
1545	{
1546		.procname	= "amemthresh",
1547		.data		= &sysctl_ip_vs_amemthresh,
1548		.maxlen		= sizeof(int),
1549		.mode		= 0644,
1550		.proc_handler	= proc_dointvec,
1551	},
1552#ifdef CONFIG_IP_VS_DEBUG
1553	{
1554		.procname	= "debug_level",
1555		.data		= &sysctl_ip_vs_debug_level,
1556		.maxlen		= sizeof(int),
1557		.mode		= 0644,
1558		.proc_handler	= proc_dointvec,
1559	},
1560#endif
1561	{
1562		.procname	= "am_droprate",
1563		.data		= &sysctl_ip_vs_am_droprate,
1564		.maxlen		= sizeof(int),
1565		.mode		= 0644,
1566		.proc_handler	= proc_dointvec,
1567	},
1568	{
1569		.procname	= "drop_entry",
1570		.data		= &sysctl_ip_vs_drop_entry,
1571		.maxlen		= sizeof(int),
1572		.mode		= 0644,
1573		.proc_handler	= proc_do_defense_mode,
1574	},
1575	{
1576		.procname	= "drop_packet",
1577		.data		= &sysctl_ip_vs_drop_packet,
1578		.maxlen		= sizeof(int),
1579		.mode		= 0644,
1580		.proc_handler	= proc_do_defense_mode,
1581	},
1582	{
1583		.procname	= "secure_tcp",
1584		.data		= &sysctl_ip_vs_secure_tcp,
1585		.maxlen		= sizeof(int),
1586		.mode		= 0644,
1587		.proc_handler	= proc_do_defense_mode,
1588	},
1589	{
1590		.procname	= "cache_bypass",
1591		.data		= &sysctl_ip_vs_cache_bypass,
1592		.maxlen		= sizeof(int),
1593		.mode		= 0644,
1594		.proc_handler	= proc_dointvec,
1595	},
1596	{
1597		.procname	= "expire_nodest_conn",
1598		.data		= &sysctl_ip_vs_expire_nodest_conn,
1599		.maxlen		= sizeof(int),
1600		.mode		= 0644,
1601		.proc_handler	= proc_dointvec,
1602	},
1603	{
1604		.procname	= "expire_quiescent_template",
1605		.data		= &sysctl_ip_vs_expire_quiescent_template,
1606		.maxlen		= sizeof(int),
1607		.mode		= 0644,
1608		.proc_handler	= proc_dointvec,
1609	},
1610	{
1611		.procname	= "sync_threshold",
1612		.data		= &sysctl_ip_vs_sync_threshold,
1613		.maxlen		= sizeof(sysctl_ip_vs_sync_threshold),
1614		.mode		= 0644,
1615		.proc_handler	= proc_do_sync_threshold,
1616	},
1617	{
1618		.procname	= "nat_icmp_send",
1619		.data		= &sysctl_ip_vs_nat_icmp_send,
1620		.maxlen		= sizeof(int),
1621		.mode		= 0644,
1622		.proc_handler	= proc_dointvec,
1623	},
1624	{ }
1625};
1626
1627const struct ctl_path net_vs_ctl_path[] = {
1628	{ .procname = "net", },
1629	{ .procname = "ipv4", },
1630	{ .procname = "vs", },
1631	{ }
1632};
1633EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1634
1635static struct ctl_table_header * sysctl_header;
1636
1637#ifdef CONFIG_PROC_FS
1638
1639struct ip_vs_iter {
1640	struct list_head *table;
1641	int bucket;
1642};
1643
1644/*
1645 *	Write the contents of the VS rule table to a PROCfs file.
1646 *	(It is kept just for backward compatibility)
1647 */
1648static inline const char *ip_vs_fwd_name(unsigned flags)
1649{
1650	switch (flags & IP_VS_CONN_F_FWD_MASK) {
1651	case IP_VS_CONN_F_LOCALNODE:
1652		return "Local";
1653	case IP_VS_CONN_F_TUNNEL:
1654		return "Tunnel";
1655	case IP_VS_CONN_F_DROUTE:
1656		return "Route";
1657	default:
1658		return "Masq";
1659	}
1660}
1661
1662
1663/* Get the Nth entry in the two lists */
1664static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1665{
1666	struct ip_vs_iter *iter = seq->private;
1667	int idx;
1668	struct ip_vs_service *svc;
1669
1670	/* look in hash by protocol */
1671	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1672		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1673			if (pos-- == 0){
1674				iter->table = ip_vs_svc_table;
1675				iter->bucket = idx;
1676				return svc;
1677			}
1678		}
1679	}
1680
1681	/* keep looking in fwmark */
1682	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1683		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1684			if (pos-- == 0) {
1685				iter->table = ip_vs_svc_fwm_table;
1686				iter->bucket = idx;
1687				return svc;
1688			}
1689		}
1690	}
1691
1692	return NULL;
1693}
1694
1695static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1696__acquires(__ip_vs_svc_lock)
1697{
1698
1699	read_lock_bh(&__ip_vs_svc_lock);
1700	return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1701}
1702
1703
1704static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1705{
1706	struct list_head *e;
1707	struct ip_vs_iter *iter;
1708	struct ip_vs_service *svc;
1709
1710	++*pos;
1711	if (v == SEQ_START_TOKEN)
1712		return ip_vs_info_array(seq,0);
1713
1714	svc = v;
1715	iter = seq->private;
1716
1717	if (iter->table == ip_vs_svc_table) {
1718		/* next service in table hashed by protocol */
1719		if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1720			return list_entry(e, struct ip_vs_service, s_list);
1721
1722
1723		while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1724			list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1725					    s_list) {
1726				return svc;
1727			}
1728		}
1729
1730		iter->table = ip_vs_svc_fwm_table;
1731		iter->bucket = -1;
1732		goto scan_fwmark;
1733	}
1734
1735	/* next service in hashed by fwmark */
1736	if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1737		return list_entry(e, struct ip_vs_service, f_list);
1738
1739 scan_fwmark:
1740	while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1741		list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1742				    f_list)
1743			return svc;
1744	}
1745
1746	return NULL;
1747}
1748
1749static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1750__releases(__ip_vs_svc_lock)
1751{
1752	read_unlock_bh(&__ip_vs_svc_lock);
1753}
1754
1755
1756static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1757{
1758	if (v == SEQ_START_TOKEN) {
1759		seq_printf(seq,
1760			"IP Virtual Server version %d.%d.%d (size=%d)\n",
1761			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1762		seq_puts(seq,
1763			 "Prot LocalAddress:Port Scheduler Flags\n");
1764		seq_puts(seq,
1765			 "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1766	} else {
1767		const struct ip_vs_service *svc = v;
1768		const struct ip_vs_iter *iter = seq->private;
1769		const struct ip_vs_dest *dest;
1770
1771		if (iter->table == ip_vs_svc_table) {
1772#ifdef CONFIG_IP_VS_IPV6
1773			if (svc->af == AF_INET6)
1774				seq_printf(seq, "%s  [%pI6]:%04X %s ",
1775					   ip_vs_proto_name(svc->protocol),
1776					   &svc->addr.in6,
1777					   ntohs(svc->port),
1778					   svc->scheduler->name);
1779			else
1780#endif
1781				seq_printf(seq, "%s  %08X:%04X %s %s ",
1782					   ip_vs_proto_name(svc->protocol),
1783					   ntohl(svc->addr.ip),
1784					   ntohs(svc->port),
1785					   svc->scheduler->name,
1786					   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1787		} else {
1788			seq_printf(seq, "FWM  %08X %s %s",
1789				   svc->fwmark, svc->scheduler->name,
1790				   (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1791		}
1792
1793		if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1794			seq_printf(seq, "persistent %d %08X\n",
1795				svc->timeout,
1796				ntohl(svc->netmask));
1797		else
1798			seq_putc(seq, '\n');
1799
1800		list_for_each_entry(dest, &svc->destinations, n_list) {
1801#ifdef CONFIG_IP_VS_IPV6
1802			if (dest->af == AF_INET6)
1803				seq_printf(seq,
1804					   "  -> [%pI6]:%04X"
1805					   "      %-7s %-6d %-10d %-10d\n",
1806					   &dest->addr.in6,
1807					   ntohs(dest->port),
1808					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1809					   atomic_read(&dest->weight),
1810					   atomic_read(&dest->activeconns),
1811					   atomic_read(&dest->inactconns));
1812			else
1813#endif
1814				seq_printf(seq,
1815					   "  -> %08X:%04X      "
1816					   "%-7s %-6d %-10d %-10d\n",
1817					   ntohl(dest->addr.ip),
1818					   ntohs(dest->port),
1819					   ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1820					   atomic_read(&dest->weight),
1821					   atomic_read(&dest->activeconns),
1822					   atomic_read(&dest->inactconns));
1823
1824		}
1825	}
1826	return 0;
1827}
1828
1829static const struct seq_operations ip_vs_info_seq_ops = {
1830	.start = ip_vs_info_seq_start,
1831	.next  = ip_vs_info_seq_next,
1832	.stop  = ip_vs_info_seq_stop,
1833	.show  = ip_vs_info_seq_show,
1834};
1835
1836static int ip_vs_info_open(struct inode *inode, struct file *file)
1837{
1838	return seq_open_private(file, &ip_vs_info_seq_ops,
1839			sizeof(struct ip_vs_iter));
1840}
1841
1842static const struct file_operations ip_vs_info_fops = {
1843	.owner	 = THIS_MODULE,
1844	.open    = ip_vs_info_open,
1845	.read    = seq_read,
1846	.llseek  = seq_lseek,
1847	.release = seq_release_private,
1848};
1849
1850#endif
1851
1852struct ip_vs_stats ip_vs_stats = {
1853	.lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1854};
1855
1856#ifdef CONFIG_PROC_FS
1857static int ip_vs_stats_show(struct seq_file *seq, void *v)
1858{
1859
1860/*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1861	seq_puts(seq,
1862		 "   Total Incoming Outgoing         Incoming         Outgoing\n");
1863	seq_printf(seq,
1864		   "   Conns  Packets  Packets            Bytes            Bytes\n");
1865
1866	spin_lock_bh(&ip_vs_stats.lock);
1867	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1868		   ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1869		   (unsigned long long) ip_vs_stats.ustats.inbytes,
1870		   (unsigned long long) ip_vs_stats.ustats.outbytes);
1871
1872/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1873	seq_puts(seq,
1874		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1875	seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1876			ip_vs_stats.ustats.cps,
1877			ip_vs_stats.ustats.inpps,
1878			ip_vs_stats.ustats.outpps,
1879			ip_vs_stats.ustats.inbps,
1880			ip_vs_stats.ustats.outbps);
1881	spin_unlock_bh(&ip_vs_stats.lock);
1882
1883	return 0;
1884}
1885
1886static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1887{
1888	return single_open(file, ip_vs_stats_show, NULL);
1889}
1890
1891static const struct file_operations ip_vs_stats_fops = {
1892	.owner = THIS_MODULE,
1893	.open = ip_vs_stats_seq_open,
1894	.read = seq_read,
1895	.llseek = seq_lseek,
1896	.release = single_release,
1897};
1898
1899#endif
1900
1901/*
1902 *	Set timeout values for tcp tcpfin udp in the timeout_table.
1903 */
1904static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1905{
1906	IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1907		  u->tcp_timeout,
1908		  u->tcp_fin_timeout,
1909		  u->udp_timeout);
1910
1911#ifdef CONFIG_IP_VS_PROTO_TCP
1912	if (u->tcp_timeout) {
1913		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1914			= u->tcp_timeout * HZ;
1915	}
1916
1917	if (u->tcp_fin_timeout) {
1918		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1919			= u->tcp_fin_timeout * HZ;
1920	}
1921#endif
1922
1923#ifdef CONFIG_IP_VS_PROTO_UDP
1924	if (u->udp_timeout) {
1925		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1926			= u->udp_timeout * HZ;
1927	}
1928#endif
1929	return 0;
1930}
1931
1932
1933#define SET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
1934#define SERVICE_ARG_LEN		(sizeof(struct ip_vs_service_user))
1935#define SVCDEST_ARG_LEN		(sizeof(struct ip_vs_service_user) +	\
1936				 sizeof(struct ip_vs_dest_user))
1937#define TIMEOUT_ARG_LEN		(sizeof(struct ip_vs_timeout_user))
1938#define DAEMON_ARG_LEN		(sizeof(struct ip_vs_daemon_user))
1939#define MAX_ARG_LEN		SVCDEST_ARG_LEN
1940
1941static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1942	[SET_CMDID(IP_VS_SO_SET_ADD)]		= SERVICE_ARG_LEN,
1943	[SET_CMDID(IP_VS_SO_SET_EDIT)]		= SERVICE_ARG_LEN,
1944	[SET_CMDID(IP_VS_SO_SET_DEL)]		= SERVICE_ARG_LEN,
1945	[SET_CMDID(IP_VS_SO_SET_FLUSH)]		= 0,
1946	[SET_CMDID(IP_VS_SO_SET_ADDDEST)]	= SVCDEST_ARG_LEN,
1947	[SET_CMDID(IP_VS_SO_SET_DELDEST)]	= SVCDEST_ARG_LEN,
1948	[SET_CMDID(IP_VS_SO_SET_EDITDEST)]	= SVCDEST_ARG_LEN,
1949	[SET_CMDID(IP_VS_SO_SET_TIMEOUT)]	= TIMEOUT_ARG_LEN,
1950	[SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]	= DAEMON_ARG_LEN,
1951	[SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]	= DAEMON_ARG_LEN,
1952	[SET_CMDID(IP_VS_SO_SET_ZERO)]		= SERVICE_ARG_LEN,
1953};
1954
1955static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
1956				  struct ip_vs_service_user *usvc_compat)
1957{
1958	usvc->af		= AF_INET;
1959	usvc->protocol		= usvc_compat->protocol;
1960	usvc->addr.ip		= usvc_compat->addr;
1961	usvc->port		= usvc_compat->port;
1962	usvc->fwmark		= usvc_compat->fwmark;
1963
1964	/* Deep copy of sched_name is not needed here */
1965	usvc->sched_name	= usvc_compat->sched_name;
1966
1967	usvc->flags		= usvc_compat->flags;
1968	usvc->timeout		= usvc_compat->timeout;
1969	usvc->netmask		= usvc_compat->netmask;
1970}
1971
1972static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
1973				   struct ip_vs_dest_user *udest_compat)
1974{
1975	udest->addr.ip		= udest_compat->addr;
1976	udest->port		= udest_compat->port;
1977	udest->conn_flags	= udest_compat->conn_flags;
1978	udest->weight		= udest_compat->weight;
1979	udest->u_threshold	= udest_compat->u_threshold;
1980	udest->l_threshold	= udest_compat->l_threshold;
1981}
1982
1983static int
1984do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1985{
1986	int ret;
1987	unsigned char arg[MAX_ARG_LEN];
1988	struct ip_vs_service_user *usvc_compat;
1989	struct ip_vs_service_user_kern usvc;
1990	struct ip_vs_service *svc;
1991	struct ip_vs_dest_user *udest_compat;
1992	struct ip_vs_dest_user_kern udest;
1993
1994	if (!capable(CAP_NET_ADMIN))
1995		return -EPERM;
1996
1997	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
1998		return -EINVAL;
1999	if (len < 0 || len >  MAX_ARG_LEN)
2000		return -EINVAL;
2001	if (len != set_arglen[SET_CMDID(cmd)]) {
2002		pr_err("set_ctl: len %u != %u\n",
2003		       len, set_arglen[SET_CMDID(cmd)]);
2004		return -EINVAL;
2005	}
2006
2007	if (copy_from_user(arg, user, len) != 0)
2008		return -EFAULT;
2009
2010	/* increase the module use count */
2011	ip_vs_use_count_inc();
2012
2013	if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2014		ret = -ERESTARTSYS;
2015		goto out_dec;
2016	}
2017
2018	if (cmd == IP_VS_SO_SET_FLUSH) {
2019		/* Flush the virtual service */
2020		ret = ip_vs_flush();
2021		goto out_unlock;
2022	} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2023		/* Set timeout values for (tcp tcpfin udp) */
2024		ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2025		goto out_unlock;
2026	} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2027		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2028		ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2029		goto out_unlock;
2030	} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2031		struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2032		ret = stop_sync_thread(dm->state);
2033		goto out_unlock;
2034	}
2035
2036	usvc_compat = (struct ip_vs_service_user *)arg;
2037	udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2038
2039	/* We only use the new structs internally, so copy userspace compat
2040	 * structs to extended internal versions */
2041	ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2042	ip_vs_copy_udest_compat(&udest, udest_compat);
2043
2044	if (cmd == IP_VS_SO_SET_ZERO) {
2045		/* if no service address is set, zero counters in all */
2046		if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2047			ret = ip_vs_zero_all();
2048			goto out_unlock;
2049		}
2050	}
2051
2052	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2053	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2054	    usvc.protocol != IPPROTO_SCTP) {
2055		pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2056		       usvc.protocol, &usvc.addr.ip,
2057		       ntohs(usvc.port), usvc.sched_name);
2058		ret = -EFAULT;
2059		goto out_unlock;
2060	}
2061
2062	/* Lookup the exact service by <protocol, addr, port> or fwmark */
2063	if (usvc.fwmark == 0)
2064		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2065					  &usvc.addr, usvc.port);
2066	else
2067		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2068
2069	if (cmd != IP_VS_SO_SET_ADD
2070	    && (svc == NULL || svc->protocol != usvc.protocol)) {
2071		ret = -ESRCH;
2072		goto out_unlock;
2073	}
2074
2075	switch (cmd) {
2076	case IP_VS_SO_SET_ADD:
2077		if (svc != NULL)
2078			ret = -EEXIST;
2079		else
2080			ret = ip_vs_add_service(&usvc, &svc);
2081		break;
2082	case IP_VS_SO_SET_EDIT:
2083		ret = ip_vs_edit_service(svc, &usvc);
2084		break;
2085	case IP_VS_SO_SET_DEL:
2086		ret = ip_vs_del_service(svc);
2087		if (!ret)
2088			goto out_unlock;
2089		break;
2090	case IP_VS_SO_SET_ZERO:
2091		ret = ip_vs_zero_service(svc);
2092		break;
2093	case IP_VS_SO_SET_ADDDEST:
2094		ret = ip_vs_add_dest(svc, &udest);
2095		break;
2096	case IP_VS_SO_SET_EDITDEST:
2097		ret = ip_vs_edit_dest(svc, &udest);
2098		break;
2099	case IP_VS_SO_SET_DELDEST:
2100		ret = ip_vs_del_dest(svc, &udest);
2101		break;
2102	default:
2103		ret = -EINVAL;
2104	}
2105
2106	if (svc)
2107		ip_vs_service_put(svc);
2108
2109  out_unlock:
2110	mutex_unlock(&__ip_vs_mutex);
2111  out_dec:
2112	/* decrease the module use count */
2113	ip_vs_use_count_dec();
2114
2115	return ret;
2116}
2117
2118
2119static void
2120ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2121{
2122	spin_lock_bh(&src->lock);
2123	memcpy(dst, &src->ustats, sizeof(*dst));
2124	spin_unlock_bh(&src->lock);
2125}
2126
2127static void
2128ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2129{
2130	dst->protocol = src->protocol;
2131	dst->addr = src->addr.ip;
2132	dst->port = src->port;
2133	dst->fwmark = src->fwmark;
2134	strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2135	dst->flags = src->flags;
2136	dst->timeout = src->timeout / HZ;
2137	dst->netmask = src->netmask;
2138	dst->num_dests = src->num_dests;
2139	ip_vs_copy_stats(&dst->stats, &src->stats);
2140}
2141
2142static inline int
2143__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2144			    struct ip_vs_get_services __user *uptr)
2145{
2146	int idx, count=0;
2147	struct ip_vs_service *svc;
2148	struct ip_vs_service_entry entry;
2149	int ret = 0;
2150
2151	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2152		list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2153			/* Only expose IPv4 entries to old interface */
2154			if (svc->af != AF_INET)
2155				continue;
2156
2157			if (count >= get->num_services)
2158				goto out;
2159			memset(&entry, 0, sizeof(entry));
2160			ip_vs_copy_service(&entry, svc);
2161			if (copy_to_user(&uptr->entrytable[count],
2162					 &entry, sizeof(entry))) {
2163				ret = -EFAULT;
2164				goto out;
2165			}
2166			count++;
2167		}
2168	}
2169
2170	for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2171		list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2172			/* Only expose IPv4 entries to old interface */
2173			if (svc->af != AF_INET)
2174				continue;
2175
2176			if (count >= get->num_services)
2177				goto out;
2178			memset(&entry, 0, sizeof(entry));
2179			ip_vs_copy_service(&entry, svc);
2180			if (copy_to_user(&uptr->entrytable[count],
2181					 &entry, sizeof(entry))) {
2182				ret = -EFAULT;
2183				goto out;
2184			}
2185			count++;
2186		}
2187	}
2188  out:
2189	return ret;
2190}
2191
2192static inline int
2193__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2194			 struct ip_vs_get_dests __user *uptr)
2195{
2196	struct ip_vs_service *svc;
2197	union nf_inet_addr addr = { .ip = get->addr };
2198	int ret = 0;
2199
2200	if (get->fwmark)
2201		svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2202	else
2203		svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2204					  get->port);
2205
2206	if (svc) {
2207		int count = 0;
2208		struct ip_vs_dest *dest;
2209		struct ip_vs_dest_entry entry;
2210
2211		list_for_each_entry(dest, &svc->destinations, n_list) {
2212			if (count >= get->num_dests)
2213				break;
2214
2215			entry.addr = dest->addr.ip;
2216			entry.port = dest->port;
2217			entry.conn_flags = atomic_read(&dest->conn_flags);
2218			entry.weight = atomic_read(&dest->weight);
2219			entry.u_threshold = dest->u_threshold;
2220			entry.l_threshold = dest->l_threshold;
2221			entry.activeconns = atomic_read(&dest->activeconns);
2222			entry.inactconns = atomic_read(&dest->inactconns);
2223			entry.persistconns = atomic_read(&dest->persistconns);
2224			ip_vs_copy_stats(&entry.stats, &dest->stats);
2225			if (copy_to_user(&uptr->entrytable[count],
2226					 &entry, sizeof(entry))) {
2227				ret = -EFAULT;
2228				break;
2229			}
2230			count++;
2231		}
2232		ip_vs_service_put(svc);
2233	} else
2234		ret = -ESRCH;
2235	return ret;
2236}
2237
2238static inline void
2239__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2240{
2241#ifdef CONFIG_IP_VS_PROTO_TCP
2242	u->tcp_timeout =
2243		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2244	u->tcp_fin_timeout =
2245		ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2246#endif
2247#ifdef CONFIG_IP_VS_PROTO_UDP
2248	u->udp_timeout =
2249		ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2250#endif
2251}
2252
2253
2254#define GET_CMDID(cmd)		(cmd - IP_VS_BASE_CTL)
2255#define GET_INFO_ARG_LEN	(sizeof(struct ip_vs_getinfo))
2256#define GET_SERVICES_ARG_LEN	(sizeof(struct ip_vs_get_services))
2257#define GET_SERVICE_ARG_LEN	(sizeof(struct ip_vs_service_entry))
2258#define GET_DESTS_ARG_LEN	(sizeof(struct ip_vs_get_dests))
2259#define GET_TIMEOUT_ARG_LEN	(sizeof(struct ip_vs_timeout_user))
2260#define GET_DAEMON_ARG_LEN	(sizeof(struct ip_vs_daemon_user) * 2)
2261
2262static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2263	[GET_CMDID(IP_VS_SO_GET_VERSION)]	= 64,
2264	[GET_CMDID(IP_VS_SO_GET_INFO)]		= GET_INFO_ARG_LEN,
2265	[GET_CMDID(IP_VS_SO_GET_SERVICES)]	= GET_SERVICES_ARG_LEN,
2266	[GET_CMDID(IP_VS_SO_GET_SERVICE)]	= GET_SERVICE_ARG_LEN,
2267	[GET_CMDID(IP_VS_SO_GET_DESTS)]		= GET_DESTS_ARG_LEN,
2268	[GET_CMDID(IP_VS_SO_GET_TIMEOUT)]	= GET_TIMEOUT_ARG_LEN,
2269	[GET_CMDID(IP_VS_SO_GET_DAEMON)]	= GET_DAEMON_ARG_LEN,
2270};
2271
2272static int
2273do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2274{
2275	unsigned char arg[128];
2276	int ret = 0;
2277	unsigned int copylen;
2278
2279	if (!capable(CAP_NET_ADMIN))
2280		return -EPERM;
2281
2282	if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2283		return -EINVAL;
2284
2285	if (*len < get_arglen[GET_CMDID(cmd)]) {
2286		pr_err("get_ctl: len %u < %u\n",
2287		       *len, get_arglen[GET_CMDID(cmd)]);
2288		return -EINVAL;
2289	}
2290
2291	copylen = get_arglen[GET_CMDID(cmd)];
2292	if (copylen > 128)
2293		return -EINVAL;
2294
2295	if (copy_from_user(arg, user, copylen) != 0)
2296		return -EFAULT;
2297
2298	if (mutex_lock_interruptible(&__ip_vs_mutex))
2299		return -ERESTARTSYS;
2300
2301	switch (cmd) {
2302	case IP_VS_SO_GET_VERSION:
2303	{
2304		char buf[64];
2305
2306		sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2307			NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2308		if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2309			ret = -EFAULT;
2310			goto out;
2311		}
2312		*len = strlen(buf)+1;
2313	}
2314	break;
2315
2316	case IP_VS_SO_GET_INFO:
2317	{
2318		struct ip_vs_getinfo info;
2319		info.version = IP_VS_VERSION_CODE;
2320		info.size = ip_vs_conn_tab_size;
2321		info.num_services = ip_vs_num_services;
2322		if (copy_to_user(user, &info, sizeof(info)) != 0)
2323			ret = -EFAULT;
2324	}
2325	break;
2326
2327	case IP_VS_SO_GET_SERVICES:
2328	{
2329		struct ip_vs_get_services *get;
2330		int size;
2331
2332		get = (struct ip_vs_get_services *)arg;
2333		size = sizeof(*get) +
2334			sizeof(struct ip_vs_service_entry) * get->num_services;
2335		if (*len != size) {
2336			pr_err("length: %u != %u\n", *len, size);
2337			ret = -EINVAL;
2338			goto out;
2339		}
2340		ret = __ip_vs_get_service_entries(get, user);
2341	}
2342	break;
2343
2344	case IP_VS_SO_GET_SERVICE:
2345	{
2346		struct ip_vs_service_entry *entry;
2347		struct ip_vs_service *svc;
2348		union nf_inet_addr addr;
2349
2350		entry = (struct ip_vs_service_entry *)arg;
2351		addr.ip = entry->addr;
2352		if (entry->fwmark)
2353			svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2354		else
2355			svc = __ip_vs_service_get(AF_INET, entry->protocol,
2356						  &addr, entry->port);
2357		if (svc) {
2358			ip_vs_copy_service(entry, svc);
2359			if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2360				ret = -EFAULT;
2361			ip_vs_service_put(svc);
2362		} else
2363			ret = -ESRCH;
2364	}
2365	break;
2366
2367	case IP_VS_SO_GET_DESTS:
2368	{
2369		struct ip_vs_get_dests *get;
2370		int size;
2371
2372		get = (struct ip_vs_get_dests *)arg;
2373		size = sizeof(*get) +
2374			sizeof(struct ip_vs_dest_entry) * get->num_dests;
2375		if (*len != size) {
2376			pr_err("length: %u != %u\n", *len, size);
2377			ret = -EINVAL;
2378			goto out;
2379		}
2380		ret = __ip_vs_get_dest_entries(get, user);
2381	}
2382	break;
2383
2384	case IP_VS_SO_GET_TIMEOUT:
2385	{
2386		struct ip_vs_timeout_user t;
2387
2388		__ip_vs_get_timeouts(&t);
2389		if (copy_to_user(user, &t, sizeof(t)) != 0)
2390			ret = -EFAULT;
2391	}
2392	break;
2393
2394	case IP_VS_SO_GET_DAEMON:
2395	{
2396		struct ip_vs_daemon_user d[2];
2397
2398		memset(&d, 0, sizeof(d));
2399		if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2400			d[0].state = IP_VS_STATE_MASTER;
2401			strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2402			d[0].syncid = ip_vs_master_syncid;
2403		}
2404		if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2405			d[1].state = IP_VS_STATE_BACKUP;
2406			strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2407			d[1].syncid = ip_vs_backup_syncid;
2408		}
2409		if (copy_to_user(user, &d, sizeof(d)) != 0)
2410			ret = -EFAULT;
2411	}
2412	break;
2413
2414	default:
2415		ret = -EINVAL;
2416	}
2417
2418  out:
2419	mutex_unlock(&__ip_vs_mutex);
2420	return ret;
2421}
2422
2423
2424static struct nf_sockopt_ops ip_vs_sockopts = {
2425	.pf		= PF_INET,
2426	.set_optmin	= IP_VS_BASE_CTL,
2427	.set_optmax	= IP_VS_SO_SET_MAX+1,
2428	.set		= do_ip_vs_set_ctl,
2429	.get_optmin	= IP_VS_BASE_CTL,
2430	.get_optmax	= IP_VS_SO_GET_MAX+1,
2431	.get		= do_ip_vs_get_ctl,
2432	.owner		= THIS_MODULE,
2433};
2434
2435/*
2436 * Generic Netlink interface
2437 */
2438
2439/* IPVS genetlink family */
2440static struct genl_family ip_vs_genl_family = {
2441	.id		= GENL_ID_GENERATE,
2442	.hdrsize	= 0,
2443	.name		= IPVS_GENL_NAME,
2444	.version	= IPVS_GENL_VERSION,
2445	.maxattr	= IPVS_CMD_MAX,
2446};
2447
2448/* Policy used for first-level command attributes */
2449static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2450	[IPVS_CMD_ATTR_SERVICE]		= { .type = NLA_NESTED },
2451	[IPVS_CMD_ATTR_DEST]		= { .type = NLA_NESTED },
2452	[IPVS_CMD_ATTR_DAEMON]		= { .type = NLA_NESTED },
2453	[IPVS_CMD_ATTR_TIMEOUT_TCP]	= { .type = NLA_U32 },
2454	[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]	= { .type = NLA_U32 },
2455	[IPVS_CMD_ATTR_TIMEOUT_UDP]	= { .type = NLA_U32 },
2456};
2457
2458/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2459static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2460	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
2461	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
2462					    .len = IP_VS_IFNAME_MAXLEN },
2463	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
2464};
2465
2466/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2467static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2468	[IPVS_SVC_ATTR_AF]		= { .type = NLA_U16 },
2469	[IPVS_SVC_ATTR_PROTOCOL]	= { .type = NLA_U16 },
2470	[IPVS_SVC_ATTR_ADDR]		= { .type = NLA_BINARY,
2471					    .len = sizeof(union nf_inet_addr) },
2472	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
2473	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
2474	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
2475					    .len = IP_VS_SCHEDNAME_MAXLEN },
2476	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,
2477					    .len = sizeof(struct ip_vs_flags) },
2478	[IPVS_SVC_ATTR_TIMEOUT]		= { .type = NLA_U32 },
2479	[IPVS_SVC_ATTR_NETMASK]		= { .type = NLA_U32 },
2480	[IPVS_SVC_ATTR_STATS]		= { .type = NLA_NESTED },
2481};
2482
2483/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2484static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2485	[IPVS_DEST_ATTR_ADDR]		= { .type = NLA_BINARY,
2486					    .len = sizeof(union nf_inet_addr) },
2487	[IPVS_DEST_ATTR_PORT]		= { .type = NLA_U16 },
2488	[IPVS_DEST_ATTR_FWD_METHOD]	= { .type = NLA_U32 },
2489	[IPVS_DEST_ATTR_WEIGHT]		= { .type = NLA_U32 },
2490	[IPVS_DEST_ATTR_U_THRESH]	= { .type = NLA_U32 },
2491	[IPVS_DEST_ATTR_L_THRESH]	= { .type = NLA_U32 },
2492	[IPVS_DEST_ATTR_ACTIVE_CONNS]	= { .type = NLA_U32 },
2493	[IPVS_DEST_ATTR_INACT_CONNS]	= { .type = NLA_U32 },
2494	[IPVS_DEST_ATTR_PERSIST_CONNS]	= { .type = NLA_U32 },
2495	[IPVS_DEST_ATTR_STATS]		= { .type = NLA_NESTED },
2496};
2497
2498static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2499				 struct ip_vs_stats *stats)
2500{
2501	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2502	if (!nl_stats)
2503		return -EMSGSIZE;
2504
2505	spin_lock_bh(&stats->lock);
2506
2507	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2508	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2509	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2510	NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2511	NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2512	NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2513	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2514	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2515	NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2516	NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2517
2518	spin_unlock_bh(&stats->lock);
2519
2520	nla_nest_end(skb, nl_stats);
2521
2522	return 0;
2523
2524nla_put_failure:
2525	spin_unlock_bh(&stats->lock);
2526	nla_nest_cancel(skb, nl_stats);
2527	return -EMSGSIZE;
2528}
2529
2530static int ip_vs_genl_fill_service(struct sk_buff *skb,
2531				   struct ip_vs_service *svc)
2532{
2533	struct nlattr *nl_service;
2534	struct ip_vs_flags flags = { .flags = svc->flags,
2535				     .mask = ~0 };
2536
2537	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2538	if (!nl_service)
2539		return -EMSGSIZE;
2540
2541	NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2542
2543	if (svc->fwmark) {
2544		NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2545	} else {
2546		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2547		NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2548		NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2549	}
2550
2551	NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2552	NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2553	NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2554	NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2555
2556	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2557		goto nla_put_failure;
2558
2559	nla_nest_end(skb, nl_service);
2560
2561	return 0;
2562
2563nla_put_failure:
2564	nla_nest_cancel(skb, nl_service);
2565	return -EMSGSIZE;
2566}
2567
2568static int ip_vs_genl_dump_service(struct sk_buff *skb,
2569				   struct ip_vs_service *svc,
2570				   struct netlink_callback *cb)
2571{
2572	void *hdr;
2573
2574	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2575			  &ip_vs_genl_family, NLM_F_MULTI,
2576			  IPVS_CMD_NEW_SERVICE);
2577	if (!hdr)
2578		return -EMSGSIZE;
2579
2580	if (ip_vs_genl_fill_service(skb, svc) < 0)
2581		goto nla_put_failure;
2582
2583	return genlmsg_end(skb, hdr);
2584
2585nla_put_failure:
2586	genlmsg_cancel(skb, hdr);
2587	return -EMSGSIZE;
2588}
2589
2590static int ip_vs_genl_dump_services(struct sk_buff *skb,
2591				    struct netlink_callback *cb)
2592{
2593	int idx = 0, i;
2594	int start = cb->args[0];
2595	struct ip_vs_service *svc;
2596
2597	mutex_lock(&__ip_vs_mutex);
2598	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2599		list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2600			if (++idx <= start)
2601				continue;
2602			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2603				idx--;
2604				goto nla_put_failure;
2605			}
2606		}
2607	}
2608
2609	for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2610		list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2611			if (++idx <= start)
2612				continue;
2613			if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2614				idx--;
2615				goto nla_put_failure;
2616			}
2617		}
2618	}
2619
2620nla_put_failure:
2621	mutex_unlock(&__ip_vs_mutex);
2622	cb->args[0] = idx;
2623
2624	return skb->len;
2625}
2626
2627static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2628				    struct nlattr *nla, int full_entry)
2629{
2630	struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2631	struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2632
2633	/* Parse mandatory identifying service fields first */
2634	if (nla == NULL ||
2635	    nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2636		return -EINVAL;
2637
2638	nla_af		= attrs[IPVS_SVC_ATTR_AF];
2639	nla_protocol	= attrs[IPVS_SVC_ATTR_PROTOCOL];
2640	nla_addr	= attrs[IPVS_SVC_ATTR_ADDR];
2641	nla_port	= attrs[IPVS_SVC_ATTR_PORT];
2642	nla_fwmark	= attrs[IPVS_SVC_ATTR_FWMARK];
2643
2644	if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2645		return -EINVAL;
2646
2647	memset(usvc, 0, sizeof(*usvc));
2648
2649	usvc->af = nla_get_u16(nla_af);
2650#ifdef CONFIG_IP_VS_IPV6
2651	if (usvc->af != AF_INET && usvc->af != AF_INET6)
2652#else
2653	if (usvc->af != AF_INET)
2654#endif
2655		return -EAFNOSUPPORT;
2656
2657	if (nla_fwmark) {
2658		usvc->protocol = IPPROTO_TCP;
2659		usvc->fwmark = nla_get_u32(nla_fwmark);
2660	} else {
2661		usvc->protocol = nla_get_u16(nla_protocol);
2662		nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2663		usvc->port = nla_get_u16(nla_port);
2664		usvc->fwmark = 0;
2665	}
2666
2667	/* If a full entry was requested, check for the additional fields */
2668	if (full_entry) {
2669		struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2670			      *nla_netmask;
2671		struct ip_vs_flags flags;
2672		struct ip_vs_service *svc;
2673
2674		nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2675		nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2676		nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2677		nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2678
2679		if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2680			return -EINVAL;
2681
2682		nla_memcpy(&flags, nla_flags, sizeof(flags));
2683
2684		/* prefill flags from service if it already exists */
2685		if (usvc->fwmark)
2686			svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2687		else
2688			svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2689						  &usvc->addr, usvc->port);
2690		if (svc) {
2691			usvc->flags = svc->flags;
2692			ip_vs_service_put(svc);
2693		} else
2694			usvc->flags = 0;
2695
2696		/* set new flags from userland */
2697		usvc->flags = (usvc->flags & ~flags.mask) |
2698			      (flags.flags & flags.mask);
2699		usvc->sched_name = nla_data(nla_sched);
2700		usvc->timeout = nla_get_u32(nla_timeout);
2701		usvc->netmask = nla_get_u32(nla_netmask);
2702	}
2703
2704	return 0;
2705}
2706
2707static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2708{
2709	struct ip_vs_service_user_kern usvc;
2710	int ret;
2711
2712	ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2713	if (ret)
2714		return ERR_PTR(ret);
2715
2716	if (usvc.fwmark)
2717		return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2718	else
2719		return __ip_vs_service_get(usvc.af, usvc.protocol,
2720					   &usvc.addr, usvc.port);
2721}
2722
2723static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2724{
2725	struct nlattr *nl_dest;
2726
2727	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2728	if (!nl_dest)
2729		return -EMSGSIZE;
2730
2731	NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2732	NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2733
2734	NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2735		    atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2736	NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2737	NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2738	NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2739	NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2740		    atomic_read(&dest->activeconns));
2741	NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2742		    atomic_read(&dest->inactconns));
2743	NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2744		    atomic_read(&dest->persistconns));
2745
2746	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2747		goto nla_put_failure;
2748
2749	nla_nest_end(skb, nl_dest);
2750
2751	return 0;
2752
2753nla_put_failure:
2754	nla_nest_cancel(skb, nl_dest);
2755	return -EMSGSIZE;
2756}
2757
2758static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2759				struct netlink_callback *cb)
2760{
2761	void *hdr;
2762
2763	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2764			  &ip_vs_genl_family, NLM_F_MULTI,
2765			  IPVS_CMD_NEW_DEST);
2766	if (!hdr)
2767		return -EMSGSIZE;
2768
2769	if (ip_vs_genl_fill_dest(skb, dest) < 0)
2770		goto nla_put_failure;
2771
2772	return genlmsg_end(skb, hdr);
2773
2774nla_put_failure:
2775	genlmsg_cancel(skb, hdr);
2776	return -EMSGSIZE;
2777}
2778
2779static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2780				 struct netlink_callback *cb)
2781{
2782	int idx = 0;
2783	int start = cb->args[0];
2784	struct ip_vs_service *svc;
2785	struct ip_vs_dest *dest;
2786	struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2787
2788	mutex_lock(&__ip_vs_mutex);
2789
2790	/* Try to find the service for which to dump destinations */
2791	if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2792			IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2793		goto out_err;
2794
2795	svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2796	if (IS_ERR(svc) || svc == NULL)
2797		goto out_err;
2798
2799	/* Dump the destinations */
2800	list_for_each_entry(dest, &svc->destinations, n_list) {
2801		if (++idx <= start)
2802			continue;
2803		if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2804			idx--;
2805			goto nla_put_failure;
2806		}
2807	}
2808
2809nla_put_failure:
2810	cb->args[0] = idx;
2811	ip_vs_service_put(svc);
2812
2813out_err:
2814	mutex_unlock(&__ip_vs_mutex);
2815
2816	return skb->len;
2817}
2818
2819static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2820				 struct nlattr *nla, int full_entry)
2821{
2822	struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2823	struct nlattr *nla_addr, *nla_port;
2824
2825	/* Parse mandatory identifying destination fields first */
2826	if (nla == NULL ||
2827	    nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2828		return -EINVAL;
2829
2830	nla_addr	= attrs[IPVS_DEST_ATTR_ADDR];
2831	nla_port	= attrs[IPVS_DEST_ATTR_PORT];
2832
2833	if (!(nla_addr && nla_port))
2834		return -EINVAL;
2835
2836	memset(udest, 0, sizeof(*udest));
2837
2838	nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2839	udest->port = nla_get_u16(nla_port);
2840
2841	/* If a full entry was requested, check for the additional fields */
2842	if (full_entry) {
2843		struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2844			      *nla_l_thresh;
2845
2846		nla_fwd		= attrs[IPVS_DEST_ATTR_FWD_METHOD];
2847		nla_weight	= attrs[IPVS_DEST_ATTR_WEIGHT];
2848		nla_u_thresh	= attrs[IPVS_DEST_ATTR_U_THRESH];
2849		nla_l_thresh	= attrs[IPVS_DEST_ATTR_L_THRESH];
2850
2851		if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2852			return -EINVAL;
2853
2854		udest->conn_flags = nla_get_u32(nla_fwd)
2855				    & IP_VS_CONN_F_FWD_MASK;
2856		udest->weight = nla_get_u32(nla_weight);
2857		udest->u_threshold = nla_get_u32(nla_u_thresh);
2858		udest->l_threshold = nla_get_u32(nla_l_thresh);
2859	}
2860
2861	return 0;
2862}
2863
2864static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2865				  const char *mcast_ifn, __be32 syncid)
2866{
2867	struct nlattr *nl_daemon;
2868
2869	nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2870	if (!nl_daemon)
2871		return -EMSGSIZE;
2872
2873	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2874	NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2875	NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2876
2877	nla_nest_end(skb, nl_daemon);
2878
2879	return 0;
2880
2881nla_put_failure:
2882	nla_nest_cancel(skb, nl_daemon);
2883	return -EMSGSIZE;
2884}
2885
2886static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2887				  const char *mcast_ifn, __be32 syncid,
2888				  struct netlink_callback *cb)
2889{
2890	void *hdr;
2891	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2892			  &ip_vs_genl_family, NLM_F_MULTI,
2893			  IPVS_CMD_NEW_DAEMON);
2894	if (!hdr)
2895		return -EMSGSIZE;
2896
2897	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2898		goto nla_put_failure;
2899
2900	return genlmsg_end(skb, hdr);
2901
2902nla_put_failure:
2903	genlmsg_cancel(skb, hdr);
2904	return -EMSGSIZE;
2905}
2906
2907static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2908				   struct netlink_callback *cb)
2909{
2910	mutex_lock(&__ip_vs_mutex);
2911	if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2912		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2913					   ip_vs_master_mcast_ifn,
2914					   ip_vs_master_syncid, cb) < 0)
2915			goto nla_put_failure;
2916
2917		cb->args[0] = 1;
2918	}
2919
2920	if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2921		if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2922					   ip_vs_backup_mcast_ifn,
2923					   ip_vs_backup_syncid, cb) < 0)
2924			goto nla_put_failure;
2925
2926		cb->args[1] = 1;
2927	}
2928
2929nla_put_failure:
2930	mutex_unlock(&__ip_vs_mutex);
2931
2932	return skb->len;
2933}
2934
2935static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2936{
2937	if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2938	      attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2939	      attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2940		return -EINVAL;
2941
2942	return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2943				 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2944				 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2945}
2946
2947static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2948{
2949	if (!attrs[IPVS_DAEMON_ATTR_STATE])
2950		return -EINVAL;
2951
2952	return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2953}
2954
2955static int ip_vs_genl_set_config(struct nlattr **attrs)
2956{
2957	struct ip_vs_timeout_user t;
2958
2959	__ip_vs_get_timeouts(&t);
2960
2961	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2962		t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2963
2964	if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2965		t.tcp_fin_timeout =
2966			nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2967
2968	if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2969		t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2970
2971	return ip_vs_set_timeout(&t);
2972}
2973
2974static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2975{
2976	struct ip_vs_service *svc = NULL;
2977	struct ip_vs_service_user_kern usvc;
2978	struct ip_vs_dest_user_kern udest;
2979	int ret = 0, cmd;
2980	int need_full_svc = 0, need_full_dest = 0;
2981
2982	cmd = info->genlhdr->cmd;
2983
2984	mutex_lock(&__ip_vs_mutex);
2985
2986	if (cmd == IPVS_CMD_FLUSH) {
2987		ret = ip_vs_flush();
2988		goto out;
2989	} else if (cmd == IPVS_CMD_SET_CONFIG) {
2990		ret = ip_vs_genl_set_config(info->attrs);
2991		goto out;
2992	} else if (cmd == IPVS_CMD_NEW_DAEMON ||
2993		   cmd == IPVS_CMD_DEL_DAEMON) {
2994
2995		struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2996
2997		if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2998		    nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2999				     info->attrs[IPVS_CMD_ATTR_DAEMON],
3000				     ip_vs_daemon_policy)) {
3001			ret = -EINVAL;
3002			goto out;
3003		}
3004
3005		if (cmd == IPVS_CMD_NEW_DAEMON)
3006			ret = ip_vs_genl_new_daemon(daemon_attrs);
3007		else
3008			ret = ip_vs_genl_del_daemon(daemon_attrs);
3009		goto out;
3010	} else if (cmd == IPVS_CMD_ZERO &&
3011		   !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3012		ret = ip_vs_zero_all();
3013		goto out;
3014	}
3015
3016	/* All following commands require a service argument, so check if we
3017	 * received a valid one. We need a full service specification when
3018	 * adding / editing a service. Only identifying members otherwise. */
3019	if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3020		need_full_svc = 1;
3021
3022	ret = ip_vs_genl_parse_service(&usvc,
3023				       info->attrs[IPVS_CMD_ATTR_SERVICE],
3024				       need_full_svc);
3025	if (ret)
3026		goto out;
3027
3028	/* Lookup the exact service by <protocol, addr, port> or fwmark */
3029	if (usvc.fwmark == 0)
3030		svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3031					  &usvc.addr, usvc.port);
3032	else
3033		svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3034
3035	/* Unless we're adding a new service, the service must already exist */
3036	if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3037		ret = -ESRCH;
3038		goto out;
3039	}
3040
3041	/* Destination commands require a valid destination argument. For
3042	 * adding / editing a destination, we need a full destination
3043	 * specification. */
3044	if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3045	    cmd == IPVS_CMD_DEL_DEST) {
3046		if (cmd != IPVS_CMD_DEL_DEST)
3047			need_full_dest = 1;
3048
3049		ret = ip_vs_genl_parse_dest(&udest,
3050					    info->attrs[IPVS_CMD_ATTR_DEST],
3051					    need_full_dest);
3052		if (ret)
3053			goto out;
3054	}
3055
3056	switch (cmd) {
3057	case IPVS_CMD_NEW_SERVICE:
3058		if (svc == NULL)
3059			ret = ip_vs_add_service(&usvc, &svc);
3060		else
3061			ret = -EEXIST;
3062		break;
3063	case IPVS_CMD_SET_SERVICE:
3064		ret = ip_vs_edit_service(svc, &usvc);
3065		break;
3066	case IPVS_CMD_DEL_SERVICE:
3067		ret = ip_vs_del_service(svc);
3068		break;
3069	case IPVS_CMD_NEW_DEST:
3070		ret = ip_vs_add_dest(svc, &udest);
3071		break;
3072	case IPVS_CMD_SET_DEST:
3073		ret = ip_vs_edit_dest(svc, &udest);
3074		break;
3075	case IPVS_CMD_DEL_DEST:
3076		ret = ip_vs_del_dest(svc, &udest);
3077		break;
3078	case IPVS_CMD_ZERO:
3079		ret = ip_vs_zero_service(svc);
3080		break;
3081	default:
3082		ret = -EINVAL;
3083	}
3084
3085out:
3086	if (svc)
3087		ip_vs_service_put(svc);
3088	mutex_unlock(&__ip_vs_mutex);
3089
3090	return ret;
3091}
3092
3093static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3094{
3095	struct sk_buff *msg;
3096	void *reply;
3097	int ret, cmd, reply_cmd;
3098
3099	cmd = info->genlhdr->cmd;
3100
3101	if (cmd == IPVS_CMD_GET_SERVICE)
3102		reply_cmd = IPVS_CMD_NEW_SERVICE;
3103	else if (cmd == IPVS_CMD_GET_INFO)
3104		reply_cmd = IPVS_CMD_SET_INFO;
3105	else if (cmd == IPVS_CMD_GET_CONFIG)
3106		reply_cmd = IPVS_CMD_SET_CONFIG;
3107	else {
3108		pr_err("unknown Generic Netlink command\n");
3109		return -EINVAL;
3110	}
3111
3112	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3113	if (!msg)
3114		return -ENOMEM;
3115
3116	mutex_lock(&__ip_vs_mutex);
3117
3118	reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3119	if (reply == NULL)
3120		goto nla_put_failure;
3121
3122	switch (cmd) {
3123	case IPVS_CMD_GET_SERVICE:
3124	{
3125		struct ip_vs_service *svc;
3126
3127		svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3128		if (IS_ERR(svc)) {
3129			ret = PTR_ERR(svc);
3130			goto out_err;
3131		} else if (svc) {
3132			ret = ip_vs_genl_fill_service(msg, svc);
3133			ip_vs_service_put(svc);
3134			if (ret)
3135				goto nla_put_failure;
3136		} else {
3137			ret = -ESRCH;
3138			goto out_err;
3139		}
3140
3141		break;
3142	}
3143
3144	case IPVS_CMD_GET_CONFIG:
3145	{
3146		struct ip_vs_timeout_user t;
3147
3148		__ip_vs_get_timeouts(&t);
3149#ifdef CONFIG_IP_VS_PROTO_TCP
3150		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3151		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3152			    t.tcp_fin_timeout);
3153#endif
3154#ifdef CONFIG_IP_VS_PROTO_UDP
3155		NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3156#endif
3157
3158		break;
3159	}
3160
3161	case IPVS_CMD_GET_INFO:
3162		NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3163		NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3164			    ip_vs_conn_tab_size);
3165		break;
3166	}
3167
3168	genlmsg_end(msg, reply);
3169	ret = genlmsg_reply(msg, info);
3170	goto out;
3171
3172nla_put_failure:
3173	pr_err("not enough space in Netlink message\n");
3174	ret = -EMSGSIZE;
3175
3176out_err:
3177	nlmsg_free(msg);
3178out:
3179	mutex_unlock(&__ip_vs_mutex);
3180
3181	return ret;
3182}
3183
3184
3185static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3186	{
3187		.cmd	= IPVS_CMD_NEW_SERVICE,
3188		.flags	= GENL_ADMIN_PERM,
3189		.policy	= ip_vs_cmd_policy,
3190		.doit	= ip_vs_genl_set_cmd,
3191	},
3192	{
3193		.cmd	= IPVS_CMD_SET_SERVICE,
3194		.flags	= GENL_ADMIN_PERM,
3195		.policy	= ip_vs_cmd_policy,
3196		.doit	= ip_vs_genl_set_cmd,
3197	},
3198	{
3199		.cmd	= IPVS_CMD_DEL_SERVICE,
3200		.flags	= GENL_ADMIN_PERM,
3201		.policy	= ip_vs_cmd_policy,
3202		.doit	= ip_vs_genl_set_cmd,
3203	},
3204	{
3205		.cmd	= IPVS_CMD_GET_SERVICE,
3206		.flags	= GENL_ADMIN_PERM,
3207		.doit	= ip_vs_genl_get_cmd,
3208		.dumpit	= ip_vs_genl_dump_services,
3209		.policy	= ip_vs_cmd_policy,
3210	},
3211	{
3212		.cmd	= IPVS_CMD_NEW_DEST,
3213		.flags	= GENL_ADMIN_PERM,
3214		.policy	= ip_vs_cmd_policy,
3215		.doit	= ip_vs_genl_set_cmd,
3216	},
3217	{
3218		.cmd	= IPVS_CMD_SET_DEST,
3219		.flags	= GENL_ADMIN_PERM,
3220		.policy	= ip_vs_cmd_policy,
3221		.doit	= ip_vs_genl_set_cmd,
3222	},
3223	{
3224		.cmd	= IPVS_CMD_DEL_DEST,
3225		.flags	= GENL_ADMIN_PERM,
3226		.policy	= ip_vs_cmd_policy,
3227		.doit	= ip_vs_genl_set_cmd,
3228	},
3229	{
3230		.cmd	= IPVS_CMD_GET_DEST,
3231		.flags	= GENL_ADMIN_PERM,
3232		.policy	= ip_vs_cmd_policy,
3233		.dumpit	= ip_vs_genl_dump_dests,
3234	},
3235	{
3236		.cmd	= IPVS_CMD_NEW_DAEMON,
3237		.flags	= GENL_ADMIN_PERM,
3238		.policy	= ip_vs_cmd_policy,
3239		.doit	= ip_vs_genl_set_cmd,
3240	},
3241	{
3242		.cmd	= IPVS_CMD_DEL_DAEMON,
3243		.flags	= GENL_ADMIN_PERM,
3244		.policy	= ip_vs_cmd_policy,
3245		.doit	= ip_vs_genl_set_cmd,
3246	},
3247	{
3248		.cmd	= IPVS_CMD_GET_DAEMON,
3249		.flags	= GENL_ADMIN_PERM,
3250		.dumpit	= ip_vs_genl_dump_daemons,
3251	},
3252	{
3253		.cmd	= IPVS_CMD_SET_CONFIG,
3254		.flags	= GENL_ADMIN_PERM,
3255		.policy	= ip_vs_cmd_policy,
3256		.doit	= ip_vs_genl_set_cmd,
3257	},
3258	{
3259		.cmd	= IPVS_CMD_GET_CONFIG,
3260		.flags	= GENL_ADMIN_PERM,
3261		.doit	= ip_vs_genl_get_cmd,
3262	},
3263	{
3264		.cmd	= IPVS_CMD_GET_INFO,
3265		.flags	= GENL_ADMIN_PERM,
3266		.doit	= ip_vs_genl_get_cmd,
3267	},
3268	{
3269		.cmd	= IPVS_CMD_ZERO,
3270		.flags	= GENL_ADMIN_PERM,
3271		.policy	= ip_vs_cmd_policy,
3272		.doit	= ip_vs_genl_set_cmd,
3273	},
3274	{
3275		.cmd	= IPVS_CMD_FLUSH,
3276		.flags	= GENL_ADMIN_PERM,
3277		.doit	= ip_vs_genl_set_cmd,
3278	},
3279};
3280
3281static int __init ip_vs_genl_register(void)
3282{
3283	return genl_register_family_with_ops(&ip_vs_genl_family,
3284		ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3285}
3286
3287static void ip_vs_genl_unregister(void)
3288{
3289	genl_unregister_family(&ip_vs_genl_family);
3290}
3291
3292/* End of Generic Netlink interface definitions */
3293
3294
3295int __init ip_vs_control_init(void)
3296{
3297	int ret;
3298	int idx;
3299
3300	EnterFunction(2);
3301
3302	ret = nf_register_sockopt(&ip_vs_sockopts);
3303	if (ret) {
3304		pr_err("cannot register sockopt.\n");
3305		return ret;
3306	}
3307
3308	ret = ip_vs_genl_register();
3309	if (ret) {
3310		pr_err("cannot register Generic Netlink interface.\n");
3311		nf_unregister_sockopt(&ip_vs_sockopts);
3312		return ret;
3313	}
3314
3315	proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3316	proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3317
3318	sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3319
3320	/* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3321	for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3322		INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3323		INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3324	}
3325	for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3326		INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3327	}
3328
3329	ip_vs_new_estimator(&ip_vs_stats);
3330
3331	/* Hook the defense timer */
3332	schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3333
3334	LeaveFunction(2);
3335	return 0;
3336}
3337
3338
3339void ip_vs_control_cleanup(void)
3340{
3341	EnterFunction(2);
3342	ip_vs_trash_cleanup();
3343	cancel_rearming_delayed_work(&defense_work);
3344	cancel_work_sync(&defense_work.work);
3345	ip_vs_kill_estimator(&ip_vs_stats);
3346	unregister_sysctl_table(sysctl_header);
3347	proc_net_remove(&init_net, "ip_vs_stats");
3348	proc_net_remove(&init_net, "ip_vs");
3349	ip_vs_genl_unregister();
3350	nf_unregister_sockopt(&ip_vs_sockopts);
3351	LeaveFunction(2);
3352}
3353