1/*-
2 * ng_tcpmss.c
3 *
4 * Copyright (c) 2004, Alexey Popov <lollypop@flexuser.ru>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice unmodified, this list of conditions, and the following
12 *    disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 *
29 * This software includes fragments of the following programs:
30 *	tcpmssd		Ruslan Ermilov <ru@FreeBSD.org>
31 *
32 * $FreeBSD$
33 */
34
35/*
36 * This node is netgraph tool for workaround of PMTUD problem. It acts
37 * like filter for IP packets. If configured, it reduces MSS of TCP SYN
38 * packets.
39 *
40 * Configuration can be done by sending NGM_TCPMSS_CONFIG message. The
41 * message sets filter for incoming packets on hook 'inHook'. Packet's
42 * TCP MSS field is lowered to 'maxMSS' parameter and resulting packet
43 * is sent to 'outHook'.
44 *
45 * XXX: statistics are updated not atomically, so they may broke on SMP.
46 */
47
48#include <sys/param.h>
49#include <sys/systm.h>
50#include <sys/endian.h>
51#include <sys/errno.h>
52#include <sys/kernel.h>
53#include <sys/malloc.h>
54#include <sys/mbuf.h>
55
56#include <netinet/in.h>
57#include <netinet/in_systm.h>
58#include <netinet/ip.h>
59#include <netinet/tcp.h>
60
61#include <netgraph/ng_message.h>
62#include <netgraph/netgraph.h>
63#include <netgraph/ng_parse.h>
64#include <netgraph/ng_tcpmss.h>
65
66/* Per hook info. */
67typedef struct {
68	hook_p				outHook;
69	struct ng_tcpmss_hookstat	stats;
70} *hpriv_p;
71
72/* Netgraph methods. */
73static ng_constructor_t	ng_tcpmss_constructor;
74static ng_rcvmsg_t	ng_tcpmss_rcvmsg;
75static ng_newhook_t	ng_tcpmss_newhook;
76static ng_rcvdata_t	ng_tcpmss_rcvdata;
77static ng_disconnect_t	ng_tcpmss_disconnect;
78
79static int correct_mss(struct tcphdr *, int, uint16_t, int);
80
81/* Parse type for struct ng_tcpmss_hookstat. */
82static const struct ng_parse_struct_field ng_tcpmss_hookstat_type_fields[]
83	= NG_TCPMSS_HOOKSTAT_INFO;
84static const struct ng_parse_type ng_tcpmss_hookstat_type = {
85	&ng_parse_struct_type,
86	&ng_tcpmss_hookstat_type_fields
87};
88
89/* Parse type for struct ng_tcpmss_config. */
90static const struct ng_parse_struct_field ng_tcpmss_config_type_fields[]
91	= NG_TCPMSS_CONFIG_INFO;
92static const struct ng_parse_type ng_tcpmss_config_type = {
93	&ng_parse_struct_type,
94	ng_tcpmss_config_type_fields
95};
96
97/* List of commands and how to convert arguments to/from ASCII. */
98static const struct ng_cmdlist ng_tcpmss_cmds[] = {
99	{
100	  NGM_TCPMSS_COOKIE,
101	  NGM_TCPMSS_GET_STATS,
102	  "getstats",
103	  &ng_parse_hookbuf_type,
104	  &ng_tcpmss_hookstat_type
105	},
106	{
107	  NGM_TCPMSS_COOKIE,
108	  NGM_TCPMSS_CLR_STATS,
109	  "clrstats",
110	  &ng_parse_hookbuf_type,
111	  NULL
112	},
113	{
114	  NGM_TCPMSS_COOKIE,
115	  NGM_TCPMSS_GETCLR_STATS,
116	  "getclrstats",
117	  &ng_parse_hookbuf_type,
118	  &ng_tcpmss_hookstat_type
119	},
120	{
121	  NGM_TCPMSS_COOKIE,
122	  NGM_TCPMSS_CONFIG,
123	  "config",
124	  &ng_tcpmss_config_type,
125	  NULL
126	},
127	{ 0 }
128};
129
130/* Netgraph type descriptor. */
131static struct ng_type ng_tcpmss_typestruct = {
132	.version =	NG_ABI_VERSION,
133	.name =		NG_TCPMSS_NODE_TYPE,
134	.constructor =	ng_tcpmss_constructor,
135	.rcvmsg =	ng_tcpmss_rcvmsg,
136	.newhook =	ng_tcpmss_newhook,
137	.rcvdata =	ng_tcpmss_rcvdata,
138	.disconnect =	ng_tcpmss_disconnect,
139	.cmdlist =	ng_tcpmss_cmds,
140};
141
142NETGRAPH_INIT(tcpmss, &ng_tcpmss_typestruct);
143
144#define	ERROUT(x)	{ error = (x); goto done; }
145
146/*
147 * Node constructor. No special actions required.
148 */
149static int
150ng_tcpmss_constructor(node_p node)
151{
152	return (0);
153}
154
155/*
156 * Add a hook. Any unique name is OK.
157 */
158static int
159ng_tcpmss_newhook(node_p node, hook_p hook, const char *name)
160{
161	hpriv_p priv;
162
163	priv = malloc(sizeof(*priv), M_NETGRAPH, M_NOWAIT | M_ZERO);
164	if (priv == NULL)
165		return (ENOMEM);
166
167	NG_HOOK_SET_PRIVATE(hook, priv);
168
169	return (0);
170}
171
172/*
173 * Receive a control message.
174 */
175static int
176ng_tcpmss_rcvmsg
177(node_p node, item_p item, hook_p lasthook)
178{
179	struct ng_mesg *msg, *resp = NULL;
180	int error = 0;
181
182	NGI_GET_MSG(item, msg);
183
184	switch (msg->header.typecookie) {
185	case NGM_TCPMSS_COOKIE:
186		switch (msg->header.cmd) {
187		case NGM_TCPMSS_GET_STATS:
188		case NGM_TCPMSS_CLR_STATS:
189		case NGM_TCPMSS_GETCLR_STATS:
190		    {
191			hook_p hook;
192			hpriv_p priv;
193
194			/* Check that message is long enough. */
195			if (msg->header.arglen != NG_HOOKSIZ)
196				ERROUT(EINVAL);
197
198			/* Find this hook. */
199			hook = ng_findhook(node, (char *)msg->data);
200			if (hook == NULL)
201				ERROUT(ENOENT);
202
203			priv = NG_HOOK_PRIVATE(hook);
204
205			/* Create response. */
206			if (msg->header.cmd != NGM_TCPMSS_CLR_STATS) {
207				NG_MKRESPONSE(resp, msg,
208				    sizeof(struct ng_tcpmss_hookstat), M_NOWAIT);
209				if (resp == NULL)
210					ERROUT(ENOMEM);
211				bcopy(&priv->stats, resp->data,
212				    sizeof(struct ng_tcpmss_hookstat));
213			}
214
215			if (msg->header.cmd != NGM_TCPMSS_GET_STATS)
216				bzero(&priv->stats,
217				    sizeof(struct ng_tcpmss_hookstat));
218			break;
219		    }
220		case NGM_TCPMSS_CONFIG:
221		    {
222			struct ng_tcpmss_config *set;
223			hook_p in, out;
224			hpriv_p priv;
225
226			/* Check that message is long enough. */
227			if (msg->header.arglen !=
228			    sizeof(struct ng_tcpmss_config))
229				ERROUT(EINVAL);
230
231			set = (struct ng_tcpmss_config *)msg->data;
232			in = ng_findhook(node, set->inHook);
233			out = ng_findhook(node, set->outHook);
234			if (in == NULL || out == NULL)
235				ERROUT(ENOENT);
236
237			/* Configure MSS hack. */
238			priv = NG_HOOK_PRIVATE(in);
239			priv->outHook = out;
240			priv->stats.maxMSS = set->maxMSS;
241
242			break;
243 		    }
244		default:
245			error = EINVAL;
246			break;
247		}
248		break;
249	default:
250		error = EINVAL;
251		break;
252	}
253
254done:
255	NG_RESPOND_MSG(error, node, item, resp);
256	NG_FREE_MSG(msg);
257
258	return (error);
259}
260
261/*
262 * Receive data on a hook, and hack MSS.
263 *
264 */
265static int
266ng_tcpmss_rcvdata(hook_p hook, item_p item)
267{
268	hpriv_p priv = NG_HOOK_PRIVATE(hook);
269	struct mbuf *m = NULL;
270	struct ip *ip;
271	struct tcphdr *tcp;
272	int iphlen, tcphlen, pktlen;
273	int pullup_len = 0;
274	int error = 0;
275
276	/* Drop packets if filter is not configured on this hook. */
277	if (priv->outHook == NULL)
278		goto done;
279
280	NGI_GET_M(item, m);
281
282	/* Update stats on incoming hook. */
283	pktlen = m->m_pkthdr.len;
284	priv->stats.Octets += pktlen;
285	priv->stats.Packets++;
286
287	/* Check whether we configured to fix MSS. */
288	if (priv->stats.maxMSS == 0)
289		goto send;
290
291#define	M_CHECK(length) do {					\
292	pullup_len += length;					\
293	if ((m)->m_pkthdr.len < pullup_len)			\
294		goto send;					\
295	if ((m)->m_len < pullup_len &&				\
296	   (((m) = m_pullup((m), pullup_len)) == NULL))		\
297		ERROUT(ENOBUFS);				\
298	} while (0)
299
300	/* Check mbuf packet size and arrange for IP header. */
301	M_CHECK(sizeof(struct ip));
302	ip = mtod(m, struct ip *);
303
304	/* Check IP version. */
305	if (ip->ip_v != IPVERSION)
306		ERROUT(EINVAL);
307
308	/* Check IP header length. */
309	iphlen = ip->ip_hl << 2;
310	if (iphlen < sizeof(struct ip) || iphlen > pktlen )
311		ERROUT(EINVAL);
312
313        /* Check if it is TCP. */
314	if (!(ip->ip_p == IPPROTO_TCP))
315		goto send;
316
317	/* Check mbuf packet size and arrange for IP+TCP header */
318	M_CHECK(iphlen - sizeof(struct ip) + sizeof(struct tcphdr));
319	ip = mtod(m, struct ip *);
320	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
321
322	/* Check TCP header length. */
323	tcphlen = tcp->th_off << 2;
324	if (tcphlen < sizeof(struct tcphdr) || tcphlen > pktlen - iphlen)
325		ERROUT(EINVAL);
326
327	/* Check SYN packet and has options. */
328	if (!(tcp->th_flags & TH_SYN) || tcphlen == sizeof(struct tcphdr))
329		goto send;
330
331	/* Update SYN stats. */
332	priv->stats.SYNPkts++;
333
334	M_CHECK(tcphlen - sizeof(struct tcphdr));
335	ip = mtod(m, struct ip *);
336	tcp = (struct tcphdr *)((caddr_t )ip + iphlen);
337
338#undef	M_CHECK
339
340	/* Fix MSS and update stats. */
341	if (correct_mss(tcp, tcphlen, priv->stats.maxMSS,
342	    m->m_pkthdr.csum_flags))
343		priv->stats.FixedPkts++;
344
345send:
346	/* Deliver frame out destination hook. */
347	NG_FWD_NEW_DATA(error, item, priv->outHook, m);
348
349	return (error);
350
351done:
352	NG_FREE_ITEM(item);
353	NG_FREE_M(m);
354
355	return (error);
356}
357
358/*
359 * Hook disconnection.
360 * We must check all hooks, since they may reference this one.
361 */
362static int
363ng_tcpmss_disconnect(hook_p hook)
364{
365	node_p node = NG_HOOK_NODE(hook);
366	hook_p hook2;
367
368	LIST_FOREACH(hook2, &node->nd_hooks, hk_hooks) {
369		hpriv_p priv = NG_HOOK_PRIVATE(hook2);
370
371		if (priv->outHook == hook)
372			priv->outHook = NULL;
373	}
374
375	free(NG_HOOK_PRIVATE(hook), M_NETGRAPH);
376
377	if (NG_NODE_NUMHOOKS(NG_HOOK_NODE(hook)) == 0)
378		ng_rmnode_self(NG_HOOK_NODE(hook));
379
380	return (0);
381}
382
383/*
384 * Code from tcpmssd.
385 */
386
387/*-
388 * The following macro is used to update an
389 * internet checksum.  "acc" is a 32-bit
390 * accumulation of all the changes to the
391 * checksum (adding in old 16-bit words and
392 * subtracting out new words), and "cksum"
393 * is the checksum value to be updated.
394 */
395#define TCPMSS_ADJUST_CHECKSUM(acc, cksum) do {		\
396	acc += cksum;					\
397	if (acc < 0) {					\
398		acc = -acc;				\
399		acc = (acc >> 16) + (acc & 0xffff);	\
400		acc += acc >> 16;			\
401		cksum = (u_short) ~acc;			\
402	} else {					\
403		acc = (acc >> 16) + (acc & 0xffff);	\
404		acc += acc >> 16;			\
405		cksum = (u_short) acc;			\
406	}						\
407} while (0);
408
409static int
410correct_mss(struct tcphdr *tc, int hlen, uint16_t maxmss, int flags)
411{
412	int olen, optlen;
413	u_char *opt;
414	int accumulate;
415	int res = 0;
416	uint16_t sum;
417
418	for (olen = hlen - sizeof(struct tcphdr), opt = (u_char *)(tc + 1);
419	     olen > 0; olen -= optlen, opt += optlen) {
420		if (*opt == TCPOPT_EOL)
421			break;
422		else if (*opt == TCPOPT_NOP)
423			optlen = 1;
424		else {
425			optlen = *(opt + 1);
426			if (optlen <= 0 || optlen > olen)
427				break;
428			if (*opt == TCPOPT_MAXSEG) {
429				if (optlen != TCPOLEN_MAXSEG)
430					continue;
431				accumulate = be16dec(opt + 2);
432				if (accumulate > maxmss) {
433					if ((flags & CSUM_TCP) == 0) {
434						accumulate -= maxmss;
435						sum = be16dec(&tc->th_sum);
436						TCPMSS_ADJUST_CHECKSUM(accumulate, sum);
437						be16enc(&tc->th_sum, sum);
438					}
439					be16enc(opt + 2, maxmss);
440					res = 1;
441				}
442			}
443		}
444	}
445	return (res);
446}
447