1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2007-2008
5 *	Swinburne University of Technology, Melbourne, Australia.
6 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
7 * Copyright (c) 2010 The FreeBSD Foundation
8 * All rights reserved.
9 *
10 * This software was developed at the Centre for Advanced Internet
11 * Architectures, Swinburne University of Technology, by Lawrence Stewart and
12 * James Healy, made possible in part by a grant from the Cisco University
13 * Research Program Fund at Community Foundation Silicon Valley.
14 *
15 * Portions of this software were developed at the Centre for Advanced
16 * Internet Architectures, Swinburne University of Technology, Melbourne,
17 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
18 *
19 * Redistribution and use in source and binary forms, with or without
20 * modification, are permitted provided that the following conditions
21 * are met:
22 * 1. Redistributions of source code must retain the above copyright
23 *    notice, this list of conditions and the following disclaimer.
24 * 2. Redistributions in binary form must reproduce the above copyright
25 *    notice, this list of conditions and the following disclaimer in the
26 *    documentation and/or other materials provided with the distribution.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 */
40
41/*
42 * This software was first released in 2007 by James Healy and Lawrence Stewart
43 * whilst working on the NewTCP research project at Swinburne University of
44 * Technology's Centre for Advanced Internet Architectures, Melbourne,
45 * Australia, which was made possible in part by a grant from the Cisco
46 * University Research Program Fund at Community Foundation Silicon Valley.
47 * More details are available at:
48 *   http://caia.swin.edu.au/urp/newtcp/
49 */
50
51#include <sys/cdefs.h>
52__FBSDID("$FreeBSD$");
53
54#include <sys/param.h>
55#include <sys/kernel.h>
56#include <sys/libkern.h>
57#include <sys/lock.h>
58#include <sys/malloc.h>
59#include <sys/module.h>
60#include <sys/mutex.h>
61#include <sys/queue.h>
62#include <sys/rwlock.h>
63#include <sys/sbuf.h>
64#include <sys/socket.h>
65#include <sys/socketvar.h>
66#include <sys/sysctl.h>
67
68#include <net/vnet.h>
69
70#include <netinet/in.h>
71#include <netinet/in_pcb.h>
72#include <netinet/tcp.h>
73#include <netinet/tcp_var.h>
74#include <netinet/cc/cc.h>
75
76#include <netinet/cc/cc_module.h>
77
78/*
79 * List of available cc algorithms on the current system. First element
80 * is used as the system default CC algorithm.
81 */
82struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
83
84/* Protects the cc_list TAILQ. */
85struct rwlock cc_list_lock;
86
87VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo;
88
89/*
90 * Sysctl handler to show and change the default CC algorithm.
91 */
92static int
93cc_default_algo(SYSCTL_HANDLER_ARGS)
94{
95	char default_cc[TCP_CA_NAME_MAX];
96	struct cc_algo *funcs;
97	int error;
98
99	/* Get the current default: */
100	CC_LIST_RLOCK();
101	strlcpy(default_cc, CC_DEFAULT()->name, sizeof(default_cc));
102	CC_LIST_RUNLOCK();
103
104	error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
105
106	/* Check for error or no change */
107	if (error != 0 || req->newptr == NULL)
108		goto done;
109
110	error = ESRCH;
111
112	/* Find algo with specified name and set it to default. */
113	CC_LIST_RLOCK();
114	STAILQ_FOREACH(funcs, &cc_list, entries) {
115		if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
116			continue;
117		V_default_cc_ptr = funcs;
118		error = 0;
119		break;
120	}
121	CC_LIST_RUNLOCK();
122done:
123	return (error);
124}
125
126/*
127 * Sysctl handler to display the list of available CC algorithms.
128 */
129static int
130cc_list_available(SYSCTL_HANDLER_ARGS)
131{
132	struct cc_algo *algo;
133	struct sbuf *s;
134	int err, first, nalgos;
135
136	err = nalgos = 0;
137	first = 1;
138
139	CC_LIST_RLOCK();
140	STAILQ_FOREACH(algo, &cc_list, entries) {
141		nalgos++;
142	}
143	CC_LIST_RUNLOCK();
144
145	s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
146
147	if (s == NULL)
148		return (ENOMEM);
149
150	/*
151	 * It is theoretically possible for the CC list to have grown in size
152	 * since the call to sbuf_new() and therefore for the sbuf to be too
153	 * small. If this were to happen (incredibly unlikely), the sbuf will
154	 * reach an overflow condition, sbuf_printf() will return an error and
155	 * the sysctl will fail gracefully.
156	 */
157	CC_LIST_RLOCK();
158	STAILQ_FOREACH(algo, &cc_list, entries) {
159		err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
160		if (err) {
161			/* Sbuf overflow condition. */
162			err = EOVERFLOW;
163			break;
164		}
165		first = 0;
166	}
167	CC_LIST_RUNLOCK();
168
169	if (!err) {
170		sbuf_finish(s);
171		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
172	}
173
174	sbuf_delete(s);
175	return (err);
176}
177
178/*
179 * Reset the default CC algo to NewReno for any netstack which is using the algo
180 * that is about to go away as its default.
181 */
182static void
183cc_checkreset_default(struct cc_algo *remove_cc)
184{
185	VNET_ITERATOR_DECL(vnet_iter);
186
187	CC_LIST_LOCK_ASSERT();
188
189	VNET_LIST_RLOCK_NOSLEEP();
190	VNET_FOREACH(vnet_iter) {
191		CURVNET_SET(vnet_iter);
192		if (strncmp(CC_DEFAULT()->name, remove_cc->name,
193		    TCP_CA_NAME_MAX) == 0)
194			V_default_cc_ptr = &newreno_cc_algo;
195		CURVNET_RESTORE();
196	}
197	VNET_LIST_RUNLOCK_NOSLEEP();
198}
199
200/*
201 * Initialise CC subsystem on system boot.
202 */
203static void
204cc_init(void)
205{
206	CC_LIST_LOCK_INIT();
207	STAILQ_INIT(&cc_list);
208}
209
210/*
211 * Returns non-zero on success, 0 on failure.
212 */
213int
214cc_deregister_algo(struct cc_algo *remove_cc)
215{
216	struct cc_algo *funcs, *tmpfuncs;
217	int err;
218
219	err = ENOENT;
220
221	/* Never allow newreno to be deregistered. */
222	if (&newreno_cc_algo == remove_cc)
223		return (EPERM);
224
225	/* Remove algo from cc_list so that new connections can't use it. */
226	CC_LIST_WLOCK();
227	STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
228		if (funcs == remove_cc) {
229			cc_checkreset_default(remove_cc);
230			STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
231			err = 0;
232			break;
233		}
234	}
235	CC_LIST_WUNLOCK();
236
237	if (!err)
238		/*
239		 * XXXLAS:
240		 * - We may need to handle non-zero return values in future.
241		 * - If we add CC framework support for protocols other than
242		 *   TCP, we may want a more generic way to handle this step.
243		 */
244		tcp_ccalgounload(remove_cc);
245
246	return (err);
247}
248
249/*
250 * Returns 0 on success, non-zero on failure.
251 */
252int
253cc_register_algo(struct cc_algo *add_cc)
254{
255	struct cc_algo *funcs;
256	int err;
257
258	err = 0;
259
260	/*
261	 * Iterate over list of registered CC algorithms and make sure
262	 * we're not trying to add a duplicate.
263	 */
264	CC_LIST_WLOCK();
265	STAILQ_FOREACH(funcs, &cc_list, entries) {
266		if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
267		    TCP_CA_NAME_MAX) == 0)
268			err = EEXIST;
269	}
270
271	if (!err)
272		STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
273
274	CC_LIST_WUNLOCK();
275
276	return (err);
277}
278
279/*
280 * Handles kld related events. Returns 0 on success, non-zero on failure.
281 */
282int
283cc_modevent(module_t mod, int event_type, void *data)
284{
285	struct cc_algo *algo;
286	int err;
287
288	err = 0;
289	algo = (struct cc_algo *)data;
290
291	switch(event_type) {
292	case MOD_LOAD:
293		if (algo->mod_init != NULL)
294			err = algo->mod_init();
295		if (!err)
296			err = cc_register_algo(algo);
297		break;
298
299	case MOD_QUIESCE:
300	case MOD_SHUTDOWN:
301	case MOD_UNLOAD:
302		err = cc_deregister_algo(algo);
303		if (!err && algo->mod_destroy != NULL)
304			algo->mod_destroy();
305		if (err == ENOENT)
306			err = 0;
307		break;
308
309	default:
310		err = EINVAL;
311		break;
312	}
313
314	return (err);
315}
316
317SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
318
319/* Declare sysctl tree and populate it. */
320SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
321    "Congestion control related settings");
322
323SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm,
324    CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
325    NULL, 0, cc_default_algo, "A",
326    "Default congestion control algorithm");
327
328SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available,
329    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE,
330    NULL, 0, cc_list_available, "A",
331    "List available congestion control algorithms");
332
333VNET_DEFINE(int, cc_do_abe) = 0;
334SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe, CTLFLAG_VNET | CTLFLAG_RW,
335    &VNET_NAME(cc_do_abe), 0,
336    "Enable draft-ietf-tcpm-alternativebackoff-ecn (TCP Alternative Backoff with ECN)");
337
338VNET_DEFINE(int, cc_abe_frlossreduce) = 0;
339SYSCTL_INT(_net_inet_tcp_cc, OID_AUTO, abe_frlossreduce, CTLFLAG_VNET | CTLFLAG_RW,
340    &VNET_NAME(cc_abe_frlossreduce), 0,
341    "Apply standard beta instead of ABE-beta during ECN-signalled congestion "
342    "recovery episodes if loss also needs to be repaired");
343