cc.c revision 215395
1/*-
2 * Copyright (c) 2007-2008
3 *	Swinburne University of Technology, Melbourne, Australia.
4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5 * Copyright (c) 2010 The FreeBSD Foundation
6 * All rights reserved.
7 *
8 * This software was developed at the Centre for Advanced Internet
9 * Architectures, Swinburne University, by Lawrence Stewart and James Healy,
10 * made possible in part by a grant from the Cisco University Research Program
11 * Fund at Community Foundation Silicon Valley.
12 *
13 * Portions of this software were developed at the Centre for Advanced
14 * Internet Architectures, Swinburne University of Technology, Melbourne,
15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 *    notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 *    notice, this list of conditions and the following disclaimer in the
24 *    documentation and/or other materials provided with the distribution.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39/*
40 * This software was first released in 2007 by James Healy and Lawrence Stewart
41 * whilst working on the NewTCP research project at Swinburne University's
42 * Centre for Advanced Internet Architectures, Melbourne, Australia, which was
43 * made possible in part by a grant from the Cisco University Research Program
44 * Fund at Community Foundation Silicon Valley. More details are available at:
45 *   http://caia.swin.edu.au/urp/newtcp/
46 */
47
48#include <sys/cdefs.h>
49__FBSDID("$FreeBSD: head/sys/netinet/cc/cc.c 215395 2010-11-16 09:34:31Z lstewart $");
50
51#include <sys/param.h>
52#include <sys/kernel.h>
53#include <sys/libkern.h>
54#include <sys/lock.h>
55#include <sys/malloc.h>
56#include <sys/module.h>
57#include <sys/mutex.h>
58#include <sys/queue.h>
59#include <sys/rwlock.h>
60#include <sys/sbuf.h>
61#include <sys/socket.h>
62#include <sys/socketvar.h>
63#include <sys/sysctl.h>
64
65#include <net/if.h>
66#include <net/if_var.h>
67
68#include <netinet/cc.h>
69#include <netinet/in.h>
70#include <netinet/in_pcb.h>
71#include <netinet/tcp_var.h>
72
73#include <netinet/cc/cc_module.h>
74
75/*
76 * List of available cc algorithms on the current system. First element
77 * is used as the system default CC algorithm.
78 */
79struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
80
81/* Protects the cc_list TAILQ. */
82struct rwlock cc_list_lock;
83
84VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo;
85
86/*
87 * Sysctl handler to show and change the default CC algorithm.
88 */
89static int
90cc_default_algo(SYSCTL_HANDLER_ARGS)
91{
92	char default_cc[TCP_CA_NAME_MAX];
93	struct cc_algo *funcs;
94	int err, found;
95
96	err = found = 0;
97
98	if (req->newptr == NULL) {
99		/* Just print the current default. */
100		CC_LIST_RLOCK();
101		strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX);
102		CC_LIST_RUNLOCK();
103		err = sysctl_handle_string(oidp, default_cc, 1, req);
104	} else {
105		/* Find algo with specified name and set it to default. */
106		CC_LIST_RLOCK();
107		STAILQ_FOREACH(funcs, &cc_list, entries) {
108			if (strncmp((char *)req->newptr, funcs->name,
109			    TCP_CA_NAME_MAX) == 0) {
110				found = 1;
111				V_default_cc_ptr = funcs;
112			}
113		}
114		CC_LIST_RUNLOCK();
115
116		if (!found)
117			err = ESRCH;
118	}
119
120	return (err);
121}
122
123/*
124 * Sysctl handler to display the list of available CC algorithms.
125 */
126static int
127cc_list_available(SYSCTL_HANDLER_ARGS)
128{
129	struct cc_algo *algo;
130	struct sbuf *s;
131	int err, first;
132
133	err = 0;
134	first = 1;
135	s = sbuf_new(NULL, NULL, TCP_CA_NAME_MAX, SBUF_AUTOEXTEND);
136
137	if (s == NULL)
138		return (ENOMEM);
139
140	CC_LIST_RLOCK();
141	STAILQ_FOREACH(algo, &cc_list, entries) {
142		err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
143		if (err)
144			break;
145		first = 0;
146	}
147	CC_LIST_RUNLOCK();
148
149	if (!err) {
150		sbuf_finish(s);
151		err = sysctl_handle_string(oidp, sbuf_data(s), 1, req);
152	}
153
154	sbuf_delete(s);
155	return (err);
156}
157
158/*
159 * Reset the default CC algo to NewReno for any netstack which is using the algo
160 * that is about to go away as its default.
161 */
162static void
163cc_checkreset_default(struct cc_algo *remove_cc)
164{
165	VNET_ITERATOR_DECL(vnet_iter);
166
167	CC_LIST_LOCK_ASSERT();
168
169	VNET_LIST_RLOCK_NOSLEEP();
170	VNET_FOREACH(vnet_iter) {
171		CURVNET_SET(vnet_iter);
172		if (strncmp(CC_DEFAULT()->name, remove_cc->name,
173		    TCP_CA_NAME_MAX) == 0)
174			V_default_cc_ptr = &newreno_cc_algo;
175		CURVNET_RESTORE();
176	}
177	VNET_LIST_RUNLOCK_NOSLEEP();
178}
179
180/*
181 * Initialise CC subsystem on system boot.
182 */
183static void
184cc_init(void)
185{
186	CC_LIST_LOCK_INIT();
187	STAILQ_INIT(&cc_list);
188}
189
190/*
191 * Returns non-zero on success, 0 on failure.
192 */
193int
194cc_deregister_algo(struct cc_algo *remove_cc)
195{
196	struct cc_algo *funcs, *tmpfuncs;
197	int err;
198
199	err = ENOENT;
200
201	/* Never allow newreno to be deregistered. */
202	if (&newreno_cc_algo == remove_cc)
203		return (EPERM);
204
205	/* Remove algo from cc_list so that new connections can't use it. */
206	CC_LIST_WLOCK();
207	STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
208		if (funcs == remove_cc) {
209			cc_checkreset_default(remove_cc);
210			STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
211			err = 0;
212			break;
213		}
214	}
215	CC_LIST_WUNLOCK();
216
217	if (!err)
218		/*
219		 * XXXLAS:
220		 * - We may need to handle non-zero return values in future.
221		 * - If we add CC framework support for protocols other than
222		 *   TCP, we may want a more generic way to handle this step.
223		 */
224		tcp_ccalgounload(remove_cc);
225
226	return (err);
227}
228
229/*
230 * Returns 0 on success, non-zero on failure.
231 */
232int
233cc_register_algo(struct cc_algo *add_cc)
234{
235	struct cc_algo *funcs;
236	int err;
237
238	err = 0;
239
240	/*
241	 * Iterate over list of registered CC algorithms and make sure
242	 * we're not trying to add a duplicate.
243	 */
244	CC_LIST_WLOCK();
245	STAILQ_FOREACH(funcs, &cc_list, entries) {
246		if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
247		    TCP_CA_NAME_MAX) == 0)
248			err = EEXIST;
249	}
250
251	if (!err)
252		STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
253
254	CC_LIST_WUNLOCK();
255
256	return (err);
257}
258
259/*
260 * Handles kld related events. Returns 0 on success, non-zero on failure.
261 */
262int
263cc_modevent(module_t mod, int event_type, void *data)
264{
265	struct cc_algo *algo;
266	int err;
267
268	err = 0;
269	algo = (struct cc_algo *)data;
270
271	switch(event_type) {
272	case MOD_LOAD:
273		if (algo->mod_init != NULL)
274			err = algo->mod_init();
275		if (!err)
276			err = cc_register_algo(algo);
277		break;
278
279	case MOD_QUIESCE:
280	case MOD_SHUTDOWN:
281	case MOD_UNLOAD:
282		err = cc_deregister_algo(algo);
283		if (!err && algo->mod_destroy != NULL)
284			algo->mod_destroy();
285		if (err == ENOENT)
286			err = 0;
287		break;
288
289	default:
290		err = EINVAL;
291		break;
292	}
293
294	return (err);
295}
296
297SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
298
299/* Declare sysctl tree and populate it. */
300SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
301    "congestion control related settings");
302
303SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW,
304    NULL, 0, cc_default_algo, "A", "default congestion control algorithm");
305
306SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
307    NULL, 0, cc_list_available, "A",
308    "list available congestion control algorithms");
309