• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /netgear-R7000-V1.0.7.12_1.2.5/components/opensource/linux/linux-2.6.36/net/netfilter/ipvs/
1/*
2 * ip_vs_xmit.c: various packet transmitters for IPVS
3 *
4 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
5 *              Julian Anastasov <ja@ssi.bg>
6 *
7 *              This program is free software; you can redistribute it and/or
8 *              modify it under the terms of the GNU General Public License
9 *              as published by the Free Software Foundation; either version
10 *              2 of the License, or (at your option) any later version.
11 *
12 * Changes:
13 *
14 */
15
16#define KMSG_COMPONENT "IPVS"
17#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19#include <linux/kernel.h>
20#include <linux/slab.h>
21#include <linux/tcp.h>                  /* for tcphdr */
22#include <net/ip.h>
23#include <net/tcp.h>                    /* for csum_tcpudp_magic */
24#include <net/udp.h>
25#include <net/icmp.h>                   /* for icmp_send */
26#include <net/route.h>                  /* for ip_route_output */
27#include <net/ipv6.h>
28#include <net/ip6_route.h>
29#include <linux/icmpv6.h>
30#include <linux/netfilter.h>
31#include <net/netfilter/nf_conntrack.h>
32#include <linux/netfilter_ipv4.h>
33
34#include <net/ip_vs.h>
35
36
37/*
38 *      Destination cache to speed up outgoing route lookup
39 */
40static inline void
41__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst)
42{
43	struct dst_entry *old_dst;
44
45	old_dst = dest->dst_cache;
46	dest->dst_cache = dst;
47	dest->dst_rtos = rtos;
48	dst_release(old_dst);
49}
50
51static inline struct dst_entry *
52__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie)
53{
54	struct dst_entry *dst = dest->dst_cache;
55
56	if (!dst)
57		return NULL;
58	if ((dst->obsolete
59	     || (dest->af == AF_INET && rtos != dest->dst_rtos)) &&
60	    dst->ops->check(dst, cookie) == NULL) {
61		dest->dst_cache = NULL;
62		dst_release(dst);
63		return NULL;
64	}
65	dst_hold(dst);
66	return dst;
67}
68
69static struct rtable *
70__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
71{
72	struct rtable *rt;			/* Route to the other host */
73	struct ip_vs_dest *dest = cp->dest;
74
75	if (dest) {
76		spin_lock(&dest->dst_lock);
77		if (!(rt = (struct rtable *)
78		      __ip_vs_dst_check(dest, rtos, 0))) {
79			struct flowi fl = {
80				.oif = 0,
81				.nl_u = {
82					.ip4_u = {
83						.daddr = dest->addr.ip,
84						.saddr = 0,
85						.tos = rtos, } },
86			};
87
88			if (ip_route_output_key(&init_net, &rt, &fl)) {
89				spin_unlock(&dest->dst_lock);
90				IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
91					     &dest->addr.ip);
92				return NULL;
93			}
94			__ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst));
95			IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n",
96				  &dest->addr.ip,
97				  atomic_read(&rt->dst.__refcnt), rtos);
98		}
99		spin_unlock(&dest->dst_lock);
100	} else {
101		struct flowi fl = {
102			.oif = 0,
103			.nl_u = {
104				.ip4_u = {
105					.daddr = cp->daddr.ip,
106					.saddr = 0,
107					.tos = rtos, } },
108		};
109
110		if (ip_route_output_key(&init_net, &rt, &fl)) {
111			IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n",
112				     &cp->daddr.ip);
113			return NULL;
114		}
115	}
116
117	return rt;
118}
119
120#ifdef CONFIG_IP_VS_IPV6
121static struct rt6_info *
122__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp)
123{
124	struct rt6_info *rt;			/* Route to the other host */
125	struct ip_vs_dest *dest = cp->dest;
126
127	if (dest) {
128		spin_lock(&dest->dst_lock);
129		rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0);
130		if (!rt) {
131			struct flowi fl = {
132				.oif = 0,
133				.nl_u = {
134					.ip6_u = {
135						.daddr = dest->addr.in6,
136						.saddr = {
137							.s6_addr32 =
138								{ 0, 0, 0, 0 },
139						},
140					},
141				},
142			};
143
144			rt = (struct rt6_info *)ip6_route_output(&init_net,
145								 NULL, &fl);
146			if (!rt) {
147				spin_unlock(&dest->dst_lock);
148				IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
149					     &dest->addr.in6);
150				return NULL;
151			}
152			__ip_vs_dst_set(dest, 0, dst_clone(&rt->dst));
153			IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n",
154				  &dest->addr.in6,
155				  atomic_read(&rt->dst.__refcnt));
156		}
157		spin_unlock(&dest->dst_lock);
158	} else {
159		struct flowi fl = {
160			.oif = 0,
161			.nl_u = {
162				.ip6_u = {
163					.daddr = cp->daddr.in6,
164					.saddr = {
165						.s6_addr32 = { 0, 0, 0, 0 },
166					},
167				},
168			},
169		};
170
171		rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
172		if (!rt) {
173			IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n",
174				     &cp->daddr.in6);
175			return NULL;
176		}
177	}
178
179	return rt;
180}
181#endif
182
183
184/*
185 *	Release dest->dst_cache before a dest is removed
186 */
187void
188ip_vs_dst_reset(struct ip_vs_dest *dest)
189{
190	struct dst_entry *old_dst;
191
192	old_dst = dest->dst_cache;
193	dest->dst_cache = NULL;
194	dst_release(old_dst);
195}
196
197#define IP_VS_XMIT(pf, skb, rt)				\
198do {							\
199	(skb)->ipvs_property = 1;			\
200	skb_forward_csum(skb);				\
201	NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL,	\
202		(rt)->dst.dev, dst_output);		\
203} while (0)
204
205
206/*
207 *      NULL transmitter (do nothing except return NF_ACCEPT)
208 */
209int
210ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
211		struct ip_vs_protocol *pp)
212{
213	/* we do not touch skb and do not need pskb ptr */
214	return NF_ACCEPT;
215}
216
217
218/*
219 *      Bypass transmitter
220 *      Let packets bypass the destination when the destination is not
221 *      available, it may be only used in transparent cache cluster.
222 */
223int
224ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
225		  struct ip_vs_protocol *pp)
226{
227	struct rtable *rt;			/* Route to the other host */
228	struct iphdr  *iph = ip_hdr(skb);
229	u8     tos = iph->tos;
230	int    mtu;
231	struct flowi fl = {
232		.oif = 0,
233		.nl_u = {
234			.ip4_u = {
235				.daddr = iph->daddr,
236				.saddr = 0,
237				.tos = RT_TOS(tos), } },
238	};
239
240	EnterFunction(10);
241
242	if (ip_route_output_key(&init_net, &rt, &fl)) {
243		IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n",
244			     __func__, &iph->daddr);
245		goto tx_error_icmp;
246	}
247
248	/* MTU checking */
249	mtu = dst_mtu(&rt->dst);
250	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
251		ip_rt_put(rt);
252		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
253		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
254		goto tx_error;
255	}
256
257	/*
258	 * Call ip_send_check because we are not sure it is called
259	 * after ip_defrag. Is copy-on-write needed?
260	 */
261	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
262		ip_rt_put(rt);
263		return NF_STOLEN;
264	}
265	ip_send_check(ip_hdr(skb));
266
267	/* drop old route */
268	skb_dst_drop(skb);
269	skb_dst_set(skb, &rt->dst);
270
271	/* Another hack: avoid icmp_send in ip_fragment */
272	skb->local_df = 1;
273
274	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
275
276	LeaveFunction(10);
277	return NF_STOLEN;
278
279 tx_error_icmp:
280	dst_link_failure(skb);
281 tx_error:
282	kfree_skb(skb);
283	LeaveFunction(10);
284	return NF_STOLEN;
285}
286
287#ifdef CONFIG_IP_VS_IPV6
288int
289ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
290		     struct ip_vs_protocol *pp)
291{
292	struct rt6_info *rt;			/* Route to the other host */
293	struct ipv6hdr  *iph = ipv6_hdr(skb);
294	int    mtu;
295	struct flowi fl = {
296		.oif = 0,
297		.nl_u = {
298			.ip6_u = {
299				.daddr = iph->daddr,
300				.saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
301	};
302
303	EnterFunction(10);
304
305	rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
306	if (!rt) {
307		IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n",
308			     __func__, &iph->daddr);
309		goto tx_error_icmp;
310	}
311
312	/* MTU checking */
313	mtu = dst_mtu(&rt->dst);
314	if (skb->len > mtu) {
315		dst_release(&rt->dst);
316		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
317		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
318		goto tx_error;
319	}
320
321	/*
322	 * Call ip_send_check because we are not sure it is called
323	 * after ip_defrag. Is copy-on-write needed?
324	 */
325	skb = skb_share_check(skb, GFP_ATOMIC);
326	if (unlikely(skb == NULL)) {
327		dst_release(&rt->dst);
328		return NF_STOLEN;
329	}
330
331	/* drop old route */
332	skb_dst_drop(skb);
333	skb_dst_set(skb, &rt->dst);
334
335	/* Another hack: avoid icmp_send in ip_fragment */
336	skb->local_df = 1;
337
338	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
339
340	LeaveFunction(10);
341	return NF_STOLEN;
342
343 tx_error_icmp:
344	dst_link_failure(skb);
345 tx_error:
346	kfree_skb(skb);
347	LeaveFunction(10);
348	return NF_STOLEN;
349}
350#endif
351
352void
353ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
354{
355	struct nf_conn *ct = (struct nf_conn *)skb->nfct;
356	struct nf_conntrack_tuple new_tuple;
357
358	if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct))
359		return;
360
361	/*
362	 * The connection is not yet in the hashtable, so we update it.
363	 * CIP->VIP will remain the same, so leave the tuple in
364	 * IP_CT_DIR_ORIGINAL untouched.  When the reply comes back from the
365	 * real-server we will see RIP->DIP.
366	 */
367	new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
368	if (outin)
369		new_tuple.src.u3 = cp->daddr;
370	else
371		new_tuple.dst.u3 = cp->vaddr;
372	/*
373	 * This will also take care of UDP and other protocols.
374	 */
375	if (outin)
376		new_tuple.src.u.tcp.port = cp->dport;
377	else
378		new_tuple.dst.u.tcp.port = cp->vport;
379	nf_conntrack_alter_reply(ct, &new_tuple);
380}
381
382/*
383 *      NAT transmitter (only for outside-to-inside nat forwarding)
384 *      Not used for related ICMP
385 */
386int
387ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
388	       struct ip_vs_protocol *pp)
389{
390	struct rtable *rt;		/* Route to the other host */
391	int mtu;
392	struct iphdr *iph = ip_hdr(skb);
393
394	EnterFunction(10);
395
396	/* check if it is a connection of no-client-port */
397	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
398		__be16 _pt, *p;
399		p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt);
400		if (p == NULL)
401			goto tx_error;
402		ip_vs_conn_fill_cport(cp, *p);
403		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
404	}
405
406	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
407		goto tx_error_icmp;
408
409	/* MTU checking */
410	mtu = dst_mtu(&rt->dst);
411	if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
412		ip_rt_put(rt);
413		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
414		IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
415		goto tx_error;
416	}
417
418	/* copy-on-write the packet before mangling it */
419	if (!skb_make_writable(skb, sizeof(struct iphdr)))
420		goto tx_error_put;
421
422	if (skb_cow(skb, rt->dst.dev->hard_header_len))
423		goto tx_error_put;
424
425	/* drop old route */
426	skb_dst_drop(skb);
427	skb_dst_set(skb, &rt->dst);
428
429	/* mangle the packet */
430	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
431		goto tx_error;
432	ip_hdr(skb)->daddr = cp->daddr.ip;
433	ip_send_check(ip_hdr(skb));
434
435	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
436
437	ip_vs_update_conntrack(skb, cp, 1);
438
439
440	/* Another hack: avoid icmp_send in ip_fragment */
441	skb->local_df = 1;
442
443	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
444
445	LeaveFunction(10);
446	return NF_STOLEN;
447
448  tx_error_icmp:
449	dst_link_failure(skb);
450  tx_error:
451	LeaveFunction(10);
452	kfree_skb(skb);
453	return NF_STOLEN;
454  tx_error_put:
455	ip_rt_put(rt);
456	goto tx_error;
457}
458
459#ifdef CONFIG_IP_VS_IPV6
460int
461ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
462		  struct ip_vs_protocol *pp)
463{
464	struct rt6_info *rt;		/* Route to the other host */
465	int mtu;
466
467	EnterFunction(10);
468
469	/* check if it is a connection of no-client-port */
470	if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) {
471		__be16 _pt, *p;
472		p = skb_header_pointer(skb, sizeof(struct ipv6hdr),
473				       sizeof(_pt), &_pt);
474		if (p == NULL)
475			goto tx_error;
476		ip_vs_conn_fill_cport(cp, *p);
477		IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p));
478	}
479
480	rt = __ip_vs_get_out_rt_v6(cp);
481	if (!rt)
482		goto tx_error_icmp;
483
484	/* MTU checking */
485	mtu = dst_mtu(&rt->dst);
486	if (skb->len > mtu) {
487		dst_release(&rt->dst);
488		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
489		IP_VS_DBG_RL_PKT(0, pp, skb, 0,
490				 "ip_vs_nat_xmit_v6(): frag needed for");
491		goto tx_error;
492	}
493
494	/* copy-on-write the packet before mangling it */
495	if (!skb_make_writable(skb, sizeof(struct ipv6hdr)))
496		goto tx_error_put;
497
498	if (skb_cow(skb, rt->dst.dev->hard_header_len))
499		goto tx_error_put;
500
501	/* drop old route */
502	skb_dst_drop(skb);
503	skb_dst_set(skb, &rt->dst);
504
505	/* mangle the packet */
506	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
507		goto tx_error;
508	ipv6_hdr(skb)->daddr = cp->daddr.in6;
509
510	IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
511
512	ip_vs_update_conntrack(skb, cp, 1);
513
514
515	/* Another hack: avoid icmp_send in ip_fragment */
516	skb->local_df = 1;
517
518	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
519
520	LeaveFunction(10);
521	return NF_STOLEN;
522
523tx_error_icmp:
524	dst_link_failure(skb);
525tx_error:
526	LeaveFunction(10);
527	kfree_skb(skb);
528	return NF_STOLEN;
529tx_error_put:
530	dst_release(&rt->dst);
531	goto tx_error;
532}
533#endif
534
535
536/*
537 *   IP Tunneling transmitter
538 *
539 *   This function encapsulates the packet in a new IP packet, its
540 *   destination will be set to cp->daddr. Most code of this function
541 *   is taken from ipip.c.
542 *
543 *   It is used in VS/TUN cluster. The load balancer selects a real
544 *   server from a cluster based on a scheduling algorithm,
545 *   encapsulates the request packet and forwards it to the selected
546 *   server. For example, all real servers are configured with
547 *   "ifconfig tunl0 <Virtual IP Address> up". When the server receives
548 *   the encapsulated packet, it will decapsulate the packet, processe
549 *   the request and return the response packets directly to the client
550 *   without passing the load balancer. This can greatly increase the
551 *   scalability of virtual server.
552 *
553 *   Used for ANY protocol
554 */
555int
556ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
557		  struct ip_vs_protocol *pp)
558{
559	struct rtable *rt;			/* Route to the other host */
560	struct net_device *tdev;		/* Device to other host */
561	struct iphdr  *old_iph = ip_hdr(skb);
562	u8     tos = old_iph->tos;
563	__be16 df = old_iph->frag_off;
564	sk_buff_data_t old_transport_header = skb->transport_header;
565	struct iphdr  *iph;			/* Our new IP header */
566	unsigned int max_headroom;		/* The extra header space needed */
567	int    mtu;
568
569	EnterFunction(10);
570
571	if (skb->protocol != htons(ETH_P_IP)) {
572		IP_VS_DBG_RL("%s(): protocol error, "
573			     "ETH_P_IP: %d, skb protocol: %d\n",
574			     __func__, htons(ETH_P_IP), skb->protocol);
575		goto tx_error;
576	}
577
578	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos))))
579		goto tx_error_icmp;
580
581	tdev = rt->dst.dev;
582
583	mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
584	if (mtu < 68) {
585		ip_rt_put(rt);
586		IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__);
587		goto tx_error;
588	}
589	if (skb_dst(skb))
590		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
591
592	df |= (old_iph->frag_off & htons(IP_DF));
593
594	if ((old_iph->frag_off & htons(IP_DF))
595	    && mtu < ntohs(old_iph->tot_len)) {
596		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
597		ip_rt_put(rt);
598		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
599		goto tx_error;
600	}
601
602	/*
603	 * Okay, now see if we can stuff it in the buffer as-is.
604	 */
605	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr);
606
607	if (skb_headroom(skb) < max_headroom
608	    || skb_cloned(skb) || skb_shared(skb)) {
609		struct sk_buff *new_skb =
610			skb_realloc_headroom(skb, max_headroom);
611		if (!new_skb) {
612			ip_rt_put(rt);
613			kfree_skb(skb);
614			IP_VS_ERR_RL("%s(): no memory\n", __func__);
615			return NF_STOLEN;
616		}
617		kfree_skb(skb);
618		skb = new_skb;
619		old_iph = ip_hdr(skb);
620	}
621
622	skb->transport_header = old_transport_header;
623
624	/* fix old IP header checksum */
625	ip_send_check(old_iph);
626
627	skb_push(skb, sizeof(struct iphdr));
628	skb_reset_network_header(skb);
629	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
630
631	/* drop old route */
632	skb_dst_drop(skb);
633	skb_dst_set(skb, &rt->dst);
634
635	/*
636	 *	Push down and install the IPIP header.
637	 */
638	iph			=	ip_hdr(skb);
639	iph->version		=	4;
640	iph->ihl		=	sizeof(struct iphdr)>>2;
641	iph->frag_off		=	df;
642	iph->protocol		=	IPPROTO_IPIP;
643	iph->tos		=	tos;
644	iph->daddr		=	rt->rt_dst;
645	iph->saddr		=	rt->rt_src;
646	iph->ttl		=	old_iph->ttl;
647	ip_select_ident(iph, &rt->dst, NULL);
648
649	/* Another hack: avoid icmp_send in ip_fragment */
650	skb->local_df = 1;
651
652	ip_local_out(skb);
653
654	LeaveFunction(10);
655
656	return NF_STOLEN;
657
658  tx_error_icmp:
659	dst_link_failure(skb);
660  tx_error:
661	kfree_skb(skb);
662	LeaveFunction(10);
663	return NF_STOLEN;
664}
665
666#ifdef CONFIG_IP_VS_IPV6
667int
668ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
669		     struct ip_vs_protocol *pp)
670{
671	struct rt6_info *rt;		/* Route to the other host */
672	struct net_device *tdev;	/* Device to other host */
673	struct ipv6hdr  *old_iph = ipv6_hdr(skb);
674	sk_buff_data_t old_transport_header = skb->transport_header;
675	struct ipv6hdr  *iph;		/* Our new IP header */
676	unsigned int max_headroom;	/* The extra header space needed */
677	int    mtu;
678
679	EnterFunction(10);
680
681	if (skb->protocol != htons(ETH_P_IPV6)) {
682		IP_VS_DBG_RL("%s(): protocol error, "
683			     "ETH_P_IPV6: %d, skb protocol: %d\n",
684			     __func__, htons(ETH_P_IPV6), skb->protocol);
685		goto tx_error;
686	}
687
688	rt = __ip_vs_get_out_rt_v6(cp);
689	if (!rt)
690		goto tx_error_icmp;
691
692	tdev = rt->dst.dev;
693
694	mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr);
695	/* TODO IPv6: do we need this check in IPv6? */
696	if (mtu < 1280) {
697		dst_release(&rt->dst);
698		IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__);
699		goto tx_error;
700	}
701	if (skb_dst(skb))
702		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
703
704	if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
705		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
706		dst_release(&rt->dst);
707		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
708		goto tx_error;
709	}
710
711	/*
712	 * Okay, now see if we can stuff it in the buffer as-is.
713	 */
714	max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr);
715
716	if (skb_headroom(skb) < max_headroom
717	    || skb_cloned(skb) || skb_shared(skb)) {
718		struct sk_buff *new_skb =
719			skb_realloc_headroom(skb, max_headroom);
720		if (!new_skb) {
721			dst_release(&rt->dst);
722			kfree_skb(skb);
723			IP_VS_ERR_RL("%s(): no memory\n", __func__);
724			return NF_STOLEN;
725		}
726		kfree_skb(skb);
727		skb = new_skb;
728		old_iph = ipv6_hdr(skb);
729	}
730
731	skb->transport_header = old_transport_header;
732
733	skb_push(skb, sizeof(struct ipv6hdr));
734	skb_reset_network_header(skb);
735	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
736
737	/* drop old route */
738	skb_dst_drop(skb);
739	skb_dst_set(skb, &rt->dst);
740
741	/*
742	 *	Push down and install the IPIP header.
743	 */
744	iph			=	ipv6_hdr(skb);
745	iph->version		=	6;
746	iph->nexthdr		=	IPPROTO_IPV6;
747	iph->payload_len	=	old_iph->payload_len;
748	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
749	iph->priority		=	old_iph->priority;
750	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
751	iph->daddr		=	rt->rt6i_dst.addr;
752	iph->saddr		=	cp->vaddr.in6; /* rt->rt6i_src.addr; */
753	iph->hop_limit		=	old_iph->hop_limit;
754
755	/* Another hack: avoid icmp_send in ip_fragment */
756	skb->local_df = 1;
757
758	ip6_local_out(skb);
759
760	LeaveFunction(10);
761
762	return NF_STOLEN;
763
764tx_error_icmp:
765	dst_link_failure(skb);
766tx_error:
767	kfree_skb(skb);
768	LeaveFunction(10);
769	return NF_STOLEN;
770}
771#endif
772
773
774/*
775 *      Direct Routing transmitter
776 *      Used for ANY protocol
777 */
778int
779ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
780	      struct ip_vs_protocol *pp)
781{
782	struct rtable *rt;			/* Route to the other host */
783	struct iphdr  *iph = ip_hdr(skb);
784	int    mtu;
785
786	EnterFunction(10);
787
788	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos))))
789		goto tx_error_icmp;
790
791	/* MTU checking */
792	mtu = dst_mtu(&rt->dst);
793	if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
794		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
795		ip_rt_put(rt);
796		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
797		goto tx_error;
798	}
799
800	/*
801	 * Call ip_send_check because we are not sure it is called
802	 * after ip_defrag. Is copy-on-write needed?
803	 */
804	if (unlikely((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)) {
805		ip_rt_put(rt);
806		return NF_STOLEN;
807	}
808	ip_send_check(ip_hdr(skb));
809
810	/* drop old route */
811	skb_dst_drop(skb);
812	skb_dst_set(skb, &rt->dst);
813
814	/* Another hack: avoid icmp_send in ip_fragment */
815	skb->local_df = 1;
816
817	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
818
819	LeaveFunction(10);
820	return NF_STOLEN;
821
822  tx_error_icmp:
823	dst_link_failure(skb);
824  tx_error:
825	kfree_skb(skb);
826	LeaveFunction(10);
827	return NF_STOLEN;
828}
829
830#ifdef CONFIG_IP_VS_IPV6
831int
832ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
833		 struct ip_vs_protocol *pp)
834{
835	struct rt6_info *rt;			/* Route to the other host */
836	int    mtu;
837
838	EnterFunction(10);
839
840	rt = __ip_vs_get_out_rt_v6(cp);
841	if (!rt)
842		goto tx_error_icmp;
843
844	/* MTU checking */
845	mtu = dst_mtu(&rt->dst);
846	if (skb->len > mtu) {
847		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
848		dst_release(&rt->dst);
849		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
850		goto tx_error;
851	}
852
853	/*
854	 * Call ip_send_check because we are not sure it is called
855	 * after ip_defrag. Is copy-on-write needed?
856	 */
857	skb = skb_share_check(skb, GFP_ATOMIC);
858	if (unlikely(skb == NULL)) {
859		dst_release(&rt->dst);
860		return NF_STOLEN;
861	}
862
863	/* drop old route */
864	skb_dst_drop(skb);
865	skb_dst_set(skb, &rt->dst);
866
867	/* Another hack: avoid icmp_send in ip_fragment */
868	skb->local_df = 1;
869
870	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
871
872	LeaveFunction(10);
873	return NF_STOLEN;
874
875tx_error_icmp:
876	dst_link_failure(skb);
877tx_error:
878	kfree_skb(skb);
879	LeaveFunction(10);
880	return NF_STOLEN;
881}
882#endif
883
884
885/*
886 *	ICMP packet transmitter
887 *	called by the ip_vs_in_icmp
888 */
889int
890ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
891		struct ip_vs_protocol *pp, int offset)
892{
893	struct rtable	*rt;	/* Route to the other host */
894	int mtu;
895	int rc;
896
897	EnterFunction(10);
898
899	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
900	   forwarded directly here, because there is no need to
901	   translate address/port back */
902	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
903		if (cp->packet_xmit)
904			rc = cp->packet_xmit(skb, cp, pp);
905		else
906			rc = NF_ACCEPT;
907		/* do not touch skb anymore */
908		atomic_inc(&cp->in_pkts);
909		goto out;
910	}
911
912	/*
913	 * mangle and send the packet here (only for VS/NAT)
914	 */
915
916	if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
917		goto tx_error_icmp;
918
919	/* MTU checking */
920	mtu = dst_mtu(&rt->dst);
921	if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
922		ip_rt_put(rt);
923		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
924		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
925		goto tx_error;
926	}
927
928	/* copy-on-write the packet before mangling it */
929	if (!skb_make_writable(skb, offset))
930		goto tx_error_put;
931
932	if (skb_cow(skb, rt->dst.dev->hard_header_len))
933		goto tx_error_put;
934
935	/* drop the old route when skb is not shared */
936	skb_dst_drop(skb);
937	skb_dst_set(skb, &rt->dst);
938
939	ip_vs_nat_icmp(skb, pp, cp, 0);
940
941	/* Another hack: avoid icmp_send in ip_fragment */
942	skb->local_df = 1;
943
944	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
945
946	rc = NF_STOLEN;
947	goto out;
948
949  tx_error_icmp:
950	dst_link_failure(skb);
951  tx_error:
952	dev_kfree_skb(skb);
953	rc = NF_STOLEN;
954  out:
955	LeaveFunction(10);
956	return rc;
957  tx_error_put:
958	ip_rt_put(rt);
959	goto tx_error;
960}
961
962#ifdef CONFIG_IP_VS_IPV6
963int
964ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
965		struct ip_vs_protocol *pp, int offset)
966{
967	struct rt6_info	*rt;	/* Route to the other host */
968	int mtu;
969	int rc;
970
971	EnterFunction(10);
972
973	/* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be
974	   forwarded directly here, because there is no need to
975	   translate address/port back */
976	if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
977		if (cp->packet_xmit)
978			rc = cp->packet_xmit(skb, cp, pp);
979		else
980			rc = NF_ACCEPT;
981		/* do not touch skb anymore */
982		atomic_inc(&cp->in_pkts);
983		goto out;
984	}
985
986	/*
987	 * mangle and send the packet here (only for VS/NAT)
988	 */
989
990	rt = __ip_vs_get_out_rt_v6(cp);
991	if (!rt)
992		goto tx_error_icmp;
993
994	/* MTU checking */
995	mtu = dst_mtu(&rt->dst);
996	if (skb->len > mtu) {
997		dst_release(&rt->dst);
998		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
999		IP_VS_DBG_RL("%s(): frag needed\n", __func__);
1000		goto tx_error;
1001	}
1002
1003	/* copy-on-write the packet before mangling it */
1004	if (!skb_make_writable(skb, offset))
1005		goto tx_error_put;
1006
1007	if (skb_cow(skb, rt->dst.dev->hard_header_len))
1008		goto tx_error_put;
1009
1010	/* drop the old route when skb is not shared */
1011	skb_dst_drop(skb);
1012	skb_dst_set(skb, &rt->dst);
1013
1014	ip_vs_nat_icmp_v6(skb, pp, cp, 0);
1015
1016	/* Another hack: avoid icmp_send in ip_fragment */
1017	skb->local_df = 1;
1018
1019	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
1020
1021	rc = NF_STOLEN;
1022	goto out;
1023
1024tx_error_icmp:
1025	dst_link_failure(skb);
1026tx_error:
1027	dev_kfree_skb(skb);
1028	rc = NF_STOLEN;
1029out:
1030	LeaveFunction(10);
1031	return rc;
1032tx_error_put:
1033	dst_release(&rt->dst);
1034	goto tx_error;
1035}
1036#endif
1037