1/*
2 * ip_vs_app.c: Application module support for IPVS
3 *
4 * Version:     $Id: ip_vs_app.c,v 1.1.1.1 2007/08/03 18:53:51 Exp $
5 *
6 * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
7 *
8 *              This program is free software; you can redistribute it and/or
9 *              modify it under the terms of the GNU General Public License
10 *              as published by the Free Software Foundation; either version
11 *              2 of the License, or (at your option) any later version.
12 *
13 * Most code here is taken from ip_masq_app.c in kernel 2.2. The difference
14 * is that ip_vs_app module handles the reverse direction (incoming requests
15 * and outgoing responses).
16 *
17 *		IP_MASQ_APP application masquerading module
18 *
19 * Author:	Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
20 *
21 */
22
23#include <linux/module.h>
24#include <linux/kernel.h>
25#include <linux/skbuff.h>
26#include <linux/in.h>
27#include <linux/ip.h>
28#include <net/protocol.h>
29#include <net/tcp.h>
30#include <asm/system.h>
31#include <linux/stat.h>
32#include <linux/proc_fs.h>
33#include <linux/seq_file.h>
34#include <linux/mutex.h>
35
36#include <net/ip_vs.h>
37
38EXPORT_SYMBOL(register_ip_vs_app);
39EXPORT_SYMBOL(unregister_ip_vs_app);
40EXPORT_SYMBOL(register_ip_vs_app_inc);
41
42/* ipvs application list head */
43static LIST_HEAD(ip_vs_app_list);
44static DEFINE_MUTEX(__ip_vs_app_mutex);
45
46
47/*
48 *	Get an ip_vs_app object
49 */
50static inline int ip_vs_app_get(struct ip_vs_app *app)
51{
52	/* test and get the module atomically */
53	if (app->module)
54		return try_module_get(app->module);
55	else
56		return 1;
57}
58
59
60static inline void ip_vs_app_put(struct ip_vs_app *app)
61{
62	if (app->module)
63		module_put(app->module);
64}
65
66
67/*
68 *	Allocate/initialize app incarnation and register it in proto apps.
69 */
70static int
71ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
72{
73	struct ip_vs_protocol *pp;
74	struct ip_vs_app *inc;
75	int ret;
76
77	if (!(pp = ip_vs_proto_get(proto)))
78		return -EPROTONOSUPPORT;
79
80	if (!pp->unregister_app)
81		return -EOPNOTSUPP;
82
83	inc = kmemdup(app, sizeof(*inc), GFP_KERNEL);
84	if (!inc)
85		return -ENOMEM;
86	INIT_LIST_HEAD(&inc->p_list);
87	INIT_LIST_HEAD(&inc->incs_list);
88	inc->app = app;
89	inc->port = htons(port);
90	atomic_set(&inc->usecnt, 0);
91
92	if (app->timeouts) {
93		inc->timeout_table =
94			ip_vs_create_timeout_table(app->timeouts,
95						   app->timeouts_size);
96		if (!inc->timeout_table) {
97			ret = -ENOMEM;
98			goto out;
99		}
100	}
101
102	ret = pp->register_app(inc);
103	if (ret)
104		goto out;
105
106	list_add(&inc->a_list, &app->incs_list);
107	IP_VS_DBG(9, "%s application %s:%u registered\n",
108		  pp->name, inc->name, inc->port);
109
110	return 0;
111
112  out:
113	kfree(inc->timeout_table);
114	kfree(inc);
115	return ret;
116}
117
118
119/*
120 *	Release app incarnation
121 */
122static void
123ip_vs_app_inc_release(struct ip_vs_app *inc)
124{
125	struct ip_vs_protocol *pp;
126
127	if (!(pp = ip_vs_proto_get(inc->protocol)))
128		return;
129
130	if (pp->unregister_app)
131		pp->unregister_app(inc);
132
133	IP_VS_DBG(9, "%s App %s:%u unregistered\n",
134		  pp->name, inc->name, inc->port);
135
136	list_del(&inc->a_list);
137
138	kfree(inc->timeout_table);
139	kfree(inc);
140}
141
142
143/*
144 *	Get reference to app inc (only called from softirq)
145 *
146 */
147int ip_vs_app_inc_get(struct ip_vs_app *inc)
148{
149	int result;
150
151	atomic_inc(&inc->usecnt);
152	if (unlikely((result = ip_vs_app_get(inc->app)) != 1))
153		atomic_dec(&inc->usecnt);
154	return result;
155}
156
157
158/*
159 *	Put the app inc (only called from timer or net softirq)
160 */
161void ip_vs_app_inc_put(struct ip_vs_app *inc)
162{
163	ip_vs_app_put(inc->app);
164	atomic_dec(&inc->usecnt);
165}
166
167
168/*
169 *	Register an application incarnation in protocol applications
170 */
171int
172register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
173{
174	int result;
175
176	mutex_lock(&__ip_vs_app_mutex);
177
178	result = ip_vs_app_inc_new(app, proto, port);
179
180	mutex_unlock(&__ip_vs_app_mutex);
181
182	return result;
183}
184
185
186/*
187 *	ip_vs_app registration routine
188 */
189int register_ip_vs_app(struct ip_vs_app *app)
190{
191	/* increase the module use count */
192	ip_vs_use_count_inc();
193
194	mutex_lock(&__ip_vs_app_mutex);
195
196	list_add(&app->a_list, &ip_vs_app_list);
197
198	mutex_unlock(&__ip_vs_app_mutex);
199
200	return 0;
201}
202
203
204/*
205 *	ip_vs_app unregistration routine
206 *	We are sure there are no app incarnations attached to services
207 */
208void unregister_ip_vs_app(struct ip_vs_app *app)
209{
210	struct ip_vs_app *inc, *nxt;
211
212	mutex_lock(&__ip_vs_app_mutex);
213
214	list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
215		ip_vs_app_inc_release(inc);
216	}
217
218	list_del(&app->a_list);
219
220	mutex_unlock(&__ip_vs_app_mutex);
221
222	/* decrease the module use count */
223	ip_vs_use_count_dec();
224}
225
226
227/*
228 *	Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
229 */
230int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
231{
232	return pp->app_conn_bind(cp);
233}
234
235
236/*
237 *	Unbind cp from application incarnation (called by cp destructor)
238 */
239void ip_vs_unbind_app(struct ip_vs_conn *cp)
240{
241	struct ip_vs_app *inc = cp->app;
242
243	if (!inc)
244		return;
245
246	if (inc->unbind_conn)
247		inc->unbind_conn(inc, cp);
248	if (inc->done_conn)
249		inc->done_conn(inc, cp);
250	ip_vs_app_inc_put(inc);
251	cp->app = NULL;
252}
253
254
255/*
256 *	Fixes th->seq based on ip_vs_seq info.
257 */
258static inline void vs_fix_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
259{
260	__u32 seq = ntohl(th->seq);
261
262	/*
263	 *	Adjust seq with delta-offset for all packets after
264	 *	the most recent resized pkt seq and with previous_delta offset
265	 *	for all packets	before most recent resized pkt seq.
266	 */
267	if (vseq->delta || vseq->previous_delta) {
268		if(after(seq, vseq->init_seq)) {
269			th->seq = htonl(seq + vseq->delta);
270			IP_VS_DBG(9, "vs_fix_seq(): added delta (%d) to seq\n",
271				  vseq->delta);
272		} else {
273			th->seq = htonl(seq + vseq->previous_delta);
274			IP_VS_DBG(9, "vs_fix_seq(): added previous_delta "
275				  "(%d) to seq\n", vseq->previous_delta);
276		}
277	}
278}
279
280
281/*
282 *	Fixes th->ack_seq based on ip_vs_seq info.
283 */
284static inline void
285vs_fix_ack_seq(const struct ip_vs_seq *vseq, struct tcphdr *th)
286{
287	__u32 ack_seq = ntohl(th->ack_seq);
288
289	/*
290	 * Adjust ack_seq with delta-offset for
291	 * the packets AFTER most recent resized pkt has caused a shift
292	 * for packets before most recent resized pkt, use previous_delta
293	 */
294	if (vseq->delta || vseq->previous_delta) {
295		/* since ack_seq is the number of octet that is expected
296		   to receive next, so compare it with init_seq+delta */
297		if(after(ack_seq, vseq->init_seq+vseq->delta)) {
298			th->ack_seq = htonl(ack_seq - vseq->delta);
299			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted delta "
300				  "(%d) from ack_seq\n", vseq->delta);
301
302		} else {
303			th->ack_seq = htonl(ack_seq - vseq->previous_delta);
304			IP_VS_DBG(9, "vs_fix_ack_seq(): subtracted "
305				  "previous_delta (%d) from ack_seq\n",
306				  vseq->previous_delta);
307		}
308	}
309}
310
311
312/*
313 *	Updates ip_vs_seq if pkt has been resized
314 *	Assumes already checked proto==IPPROTO_TCP and diff!=0.
315 */
316static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
317				 unsigned flag, __u32 seq, int diff)
318{
319	/* spinlock is to keep updating cp->flags atomic */
320	spin_lock(&cp->lock);
321	if (!(cp->flags & flag) || after(seq, vseq->init_seq)) {
322		vseq->previous_delta = vseq->delta;
323		vseq->delta += diff;
324		vseq->init_seq = seq;
325		cp->flags |= flag;
326	}
327	spin_unlock(&cp->lock);
328}
329
330static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
331				  struct ip_vs_app *app)
332{
333	int diff;
334	const unsigned int tcp_offset = ip_hdrlen(*pskb);
335	struct tcphdr *th;
336	__u32 seq;
337
338	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
339		return 0;
340
341	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
342
343	/*
344	 *	Remember seq number in case this pkt gets resized
345	 */
346	seq = ntohl(th->seq);
347
348	/*
349	 *	Fix seq stuff if flagged as so.
350	 */
351	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
352		vs_fix_seq(&cp->out_seq, th);
353	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
354		vs_fix_ack_seq(&cp->in_seq, th);
355
356	/*
357	 *	Call private output hook function
358	 */
359	if (app->pkt_out == NULL)
360		return 1;
361
362	if (!app->pkt_out(app, cp, pskb, &diff))
363		return 0;
364
365	/*
366	 *	Update ip_vs seq stuff if len has changed.
367	 */
368	if (diff != 0)
369		vs_seq_update(cp, &cp->out_seq,
370			      IP_VS_CONN_F_OUT_SEQ, seq, diff);
371
372	return 1;
373}
374
375/*
376 *	Output pkt hook. Will call bound ip_vs_app specific function
377 *	called by ipvs packet handler, assumes previously checked cp!=NULL
378 *	returns false if it can't handle packet (oom)
379 */
380int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
381{
382	struct ip_vs_app *app;
383
384	/*
385	 *	check if application module is bound to
386	 *	this ip_vs_conn.
387	 */
388	if ((app = cp->app) == NULL)
389		return 1;
390
391	/* TCP is complicated */
392	if (cp->protocol == IPPROTO_TCP)
393		return app_tcp_pkt_out(cp, pskb, app);
394
395	/*
396	 *	Call private output hook function
397	 */
398	if (app->pkt_out == NULL)
399		return 1;
400
401	return app->pkt_out(app, cp, pskb, NULL);
402}
403
404
405static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
406				 struct ip_vs_app *app)
407{
408	int diff;
409	const unsigned int tcp_offset = ip_hdrlen(*pskb);
410	struct tcphdr *th;
411	__u32 seq;
412
413	if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
414		return 0;
415
416	th = (struct tcphdr *)(skb_network_header(*pskb) + tcp_offset);
417
418	/*
419	 *	Remember seq number in case this pkt gets resized
420	 */
421	seq = ntohl(th->seq);
422
423	/*
424	 *	Fix seq stuff if flagged as so.
425	 */
426	if (cp->flags & IP_VS_CONN_F_IN_SEQ)
427		vs_fix_seq(&cp->in_seq, th);
428	if (cp->flags & IP_VS_CONN_F_OUT_SEQ)
429		vs_fix_ack_seq(&cp->out_seq, th);
430
431	/*
432	 *	Call private input hook function
433	 */
434	if (app->pkt_in == NULL)
435		return 1;
436
437	if (!app->pkt_in(app, cp, pskb, &diff))
438		return 0;
439
440	/*
441	 *	Update ip_vs seq stuff if len has changed.
442	 */
443	if (diff != 0)
444		vs_seq_update(cp, &cp->in_seq,
445			      IP_VS_CONN_F_IN_SEQ, seq, diff);
446
447	return 1;
448}
449
450/*
451 *	Input pkt hook. Will call bound ip_vs_app specific function
452 *	called by ipvs packet handler, assumes previously checked cp!=NULL.
453 *	returns false if can't handle packet (oom).
454 */
455int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
456{
457	struct ip_vs_app *app;
458
459	/*
460	 *	check if application module is bound to
461	 *	this ip_vs_conn.
462	 */
463	if ((app = cp->app) == NULL)
464		return 1;
465
466	/* TCP is complicated */
467	if (cp->protocol == IPPROTO_TCP)
468		return app_tcp_pkt_in(cp, pskb, app);
469
470	/*
471	 *	Call private input hook function
472	 */
473	if (app->pkt_in == NULL)
474		return 1;
475
476	return app->pkt_in(app, cp, pskb, NULL);
477}
478
479
480#ifdef CONFIG_PROC_FS
481/*
482 *	/proc/net/ip_vs_app entry function
483 */
484
485static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
486{
487	struct ip_vs_app *app, *inc;
488
489	list_for_each_entry(app, &ip_vs_app_list, a_list) {
490		list_for_each_entry(inc, &app->incs_list, a_list) {
491			if (pos-- == 0)
492				return inc;
493		}
494	}
495	return NULL;
496
497}
498
499static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
500{
501	mutex_lock(&__ip_vs_app_mutex);
502
503	return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
504}
505
506static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
507{
508	struct ip_vs_app *inc, *app;
509	struct list_head *e;
510
511	++*pos;
512	if (v == SEQ_START_TOKEN)
513		return ip_vs_app_idx(0);
514
515	inc = v;
516	app = inc->app;
517
518	if ((e = inc->a_list.next) != &app->incs_list)
519		return list_entry(e, struct ip_vs_app, a_list);
520
521	/* go on to next application */
522	for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
523		app = list_entry(e, struct ip_vs_app, a_list);
524		list_for_each_entry(inc, &app->incs_list, a_list) {
525			return inc;
526		}
527	}
528	return NULL;
529}
530
531static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
532{
533	mutex_unlock(&__ip_vs_app_mutex);
534}
535
536static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
537{
538	if (v == SEQ_START_TOKEN)
539		seq_puts(seq, "prot port    usecnt name\n");
540	else {
541		const struct ip_vs_app *inc = v;
542
543		seq_printf(seq, "%-3s  %-7u %-6d %-17s\n",
544			   ip_vs_proto_name(inc->protocol),
545			   ntohs(inc->port),
546			   atomic_read(&inc->usecnt),
547			   inc->name);
548	}
549	return 0;
550}
551
552static struct seq_operations ip_vs_app_seq_ops = {
553	.start = ip_vs_app_seq_start,
554	.next  = ip_vs_app_seq_next,
555	.stop  = ip_vs_app_seq_stop,
556	.show  = ip_vs_app_seq_show,
557};
558
559static int ip_vs_app_open(struct inode *inode, struct file *file)
560{
561	return seq_open(file, &ip_vs_app_seq_ops);
562}
563
564static const struct file_operations ip_vs_app_fops = {
565	.owner	 = THIS_MODULE,
566	.open	 = ip_vs_app_open,
567	.read	 = seq_read,
568	.llseek  = seq_lseek,
569	.release = seq_release,
570};
571#endif
572
573
574/*
575 *	Replace a segment of data with a new segment
576 */
577int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
578		      char *o_buf, int o_len, char *n_buf, int n_len)
579{
580	int diff;
581	int o_offset;
582	int o_left;
583
584	EnterFunction(9);
585
586	diff = n_len - o_len;
587	o_offset = o_buf - (char *)skb->data;
588	/* The length of left data after o_buf+o_len in the skb data */
589	o_left = skb->len - (o_offset + o_len);
590
591	if (diff <= 0) {
592		memmove(o_buf + n_len, o_buf + o_len, o_left);
593		memcpy(o_buf, n_buf, n_len);
594		skb_trim(skb, skb->len + diff);
595	} else if (diff <= skb_tailroom(skb)) {
596		skb_put(skb, diff);
597		memmove(o_buf + n_len, o_buf + o_len, o_left);
598		memcpy(o_buf, n_buf, n_len);
599	} else {
600		if (pskb_expand_head(skb, skb_headroom(skb), diff, pri))
601			return -ENOMEM;
602		skb_put(skb, diff);
603		memmove(skb->data + o_offset + n_len,
604			skb->data + o_offset + o_len, o_left);
605		skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
606	}
607
608	/* must update the iph total length here */
609	ip_hdr(skb)->tot_len = htons(skb->len);
610
611	LeaveFunction(9);
612	return 0;
613}
614
615
616int ip_vs_app_init(void)
617{
618	/* we will replace it with proc_net_ipvs_create() soon */
619	proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops);
620	return 0;
621}
622
623
624void ip_vs_app_cleanup(void)
625{
626	proc_net_remove("ip_vs_app");
627}
628