Deleted Added
full compact
cc.c (294535) cc.c (294931)
1/*-
2 * Copyright (c) 2007-2008
3 * Swinburne University of Technology, Melbourne, Australia.
4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5 * Copyright (c) 2010 The FreeBSD Foundation
6 * All rights reserved.
7 *
8 * This software was developed at the Centre for Advanced Internet
9 * Architectures, Swinburne University of Technology, by Lawrence Stewart and
10 * James Healy, made possible in part by a grant from the Cisco University
11 * Research Program Fund at Community Foundation Silicon Valley.
12 *
13 * Portions of this software were developed at the Centre for Advanced
14 * Internet Architectures, Swinburne University of Technology, Melbourne,
15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39/*
40 * This software was first released in 2007 by James Healy and Lawrence Stewart
41 * whilst working on the NewTCP research project at Swinburne University of
42 * Technology's Centre for Advanced Internet Architectures, Melbourne,
43 * Australia, which was made possible in part by a grant from the Cisco
44 * University Research Program Fund at Community Foundation Silicon Valley.
45 * More details are available at:
46 * http://caia.swin.edu.au/urp/newtcp/
47 */
48
49#include <sys/cdefs.h>
1/*-
2 * Copyright (c) 2007-2008
3 * Swinburne University of Technology, Melbourne, Australia.
4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org>
5 * Copyright (c) 2010 The FreeBSD Foundation
6 * All rights reserved.
7 *
8 * This software was developed at the Centre for Advanced Internet
9 * Architectures, Swinburne University of Technology, by Lawrence Stewart and
10 * James Healy, made possible in part by a grant from the Cisco University
11 * Research Program Fund at Community Foundation Silicon Valley.
12 *
13 * Portions of this software were developed at the Centre for Advanced
14 * Internet Architectures, Swinburne University of Technology, Melbourne,
15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39/*
40 * This software was first released in 2007 by James Healy and Lawrence Stewart
41 * whilst working on the NewTCP research project at Swinburne University of
42 * Technology's Centre for Advanced Internet Architectures, Melbourne,
43 * Australia, which was made possible in part by a grant from the Cisco
44 * University Research Program Fund at Community Foundation Silicon Valley.
45 * More details are available at:
46 * http://caia.swin.edu.au/urp/newtcp/
47 */
48
49#include <sys/cdefs.h>
50__FBSDID("$FreeBSD: head/sys/netinet/cc/cc.c 294535 2016-01-21 22:34:51Z glebius $");
50__FBSDID("$FreeBSD: head/sys/netinet/cc/cc.c 294931 2016-01-27 17:59:39Z glebius $");
51
52#include <sys/param.h>
53#include <sys/kernel.h>
54#include <sys/libkern.h>
55#include <sys/lock.h>
56#include <sys/malloc.h>
57#include <sys/module.h>
58#include <sys/mutex.h>
59#include <sys/queue.h>
60#include <sys/rwlock.h>
61#include <sys/sbuf.h>
62#include <sys/socket.h>
63#include <sys/socketvar.h>
64#include <sys/sysctl.h>
65
66#include <net/vnet.h>
67
68#include <netinet/in.h>
69#include <netinet/in_pcb.h>
70#include <netinet/tcp.h>
71#include <netinet/tcp_var.h>
51
52#include <sys/param.h>
53#include <sys/kernel.h>
54#include <sys/libkern.h>
55#include <sys/lock.h>
56#include <sys/malloc.h>
57#include <sys/module.h>
58#include <sys/mutex.h>
59#include <sys/queue.h>
60#include <sys/rwlock.h>
61#include <sys/sbuf.h>
62#include <sys/socket.h>
63#include <sys/socketvar.h>
64#include <sys/sysctl.h>
65
66#include <net/vnet.h>
67
68#include <netinet/in.h>
69#include <netinet/in_pcb.h>
70#include <netinet/tcp.h>
71#include <netinet/tcp_var.h>
72#include <netinet/tcp_cc.h>
72#include <netinet/cc/cc.h>
73
74#include <netinet/cc/cc_module.h>
75
76/*
77 * List of available cc algorithms on the current system. First element
78 * is used as the system default CC algorithm.
79 */
80struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
81
82/* Protects the cc_list TAILQ. */
83struct rwlock cc_list_lock;
84
85VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo;
86
87/*
88 * Sysctl handler to show and change the default CC algorithm.
89 */
90static int
91cc_default_algo(SYSCTL_HANDLER_ARGS)
92{
93 char default_cc[TCP_CA_NAME_MAX];
94 struct cc_algo *funcs;
95 int error;
96
97 /* Get the current default: */
98 CC_LIST_RLOCK();
99 strlcpy(default_cc, CC_DEFAULT()->name, sizeof(default_cc));
100 CC_LIST_RUNLOCK();
101
102 error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
103
104 /* Check for error or no change */
105 if (error != 0 || req->newptr == NULL)
106 goto done;
107
108 error = ESRCH;
109
110 /* Find algo with specified name and set it to default. */
111 CC_LIST_RLOCK();
112 STAILQ_FOREACH(funcs, &cc_list, entries) {
113 if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
114 continue;
115 V_default_cc_ptr = funcs;
116 error = 0;
117 break;
118 }
119 CC_LIST_RUNLOCK();
120done:
121 return (error);
122}
123
124/*
125 * Sysctl handler to display the list of available CC algorithms.
126 */
127static int
128cc_list_available(SYSCTL_HANDLER_ARGS)
129{
130 struct cc_algo *algo;
131 struct sbuf *s;
132 int err, first, nalgos;
133
134 err = nalgos = 0;
135 first = 1;
136
137 CC_LIST_RLOCK();
138 STAILQ_FOREACH(algo, &cc_list, entries) {
139 nalgos++;
140 }
141 CC_LIST_RUNLOCK();
142
143 s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
144
145 if (s == NULL)
146 return (ENOMEM);
147
148 /*
149 * It is theoretically possible for the CC list to have grown in size
150 * since the call to sbuf_new() and therefore for the sbuf to be too
151 * small. If this were to happen (incredibly unlikely), the sbuf will
152 * reach an overflow condition, sbuf_printf() will return an error and
153 * the sysctl will fail gracefully.
154 */
155 CC_LIST_RLOCK();
156 STAILQ_FOREACH(algo, &cc_list, entries) {
157 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
158 if (err) {
159 /* Sbuf overflow condition. */
160 err = EOVERFLOW;
161 break;
162 }
163 first = 0;
164 }
165 CC_LIST_RUNLOCK();
166
167 if (!err) {
168 sbuf_finish(s);
169 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
170 }
171
172 sbuf_delete(s);
173 return (err);
174}
175
176/*
177 * Reset the default CC algo to NewReno for any netstack which is using the algo
178 * that is about to go away as its default.
179 */
180static void
181cc_checkreset_default(struct cc_algo *remove_cc)
182{
183 VNET_ITERATOR_DECL(vnet_iter);
184
185 CC_LIST_LOCK_ASSERT();
186
187 VNET_LIST_RLOCK_NOSLEEP();
188 VNET_FOREACH(vnet_iter) {
189 CURVNET_SET(vnet_iter);
190 if (strncmp(CC_DEFAULT()->name, remove_cc->name,
191 TCP_CA_NAME_MAX) == 0)
192 V_default_cc_ptr = &newreno_cc_algo;
193 CURVNET_RESTORE();
194 }
195 VNET_LIST_RUNLOCK_NOSLEEP();
196}
197
198/*
199 * Initialise CC subsystem on system boot.
200 */
201static void
202cc_init(void)
203{
204 CC_LIST_LOCK_INIT();
205 STAILQ_INIT(&cc_list);
206}
207
208/*
209 * Returns non-zero on success, 0 on failure.
210 */
211int
212cc_deregister_algo(struct cc_algo *remove_cc)
213{
214 struct cc_algo *funcs, *tmpfuncs;
215 int err;
216
217 err = ENOENT;
218
219 /* Never allow newreno to be deregistered. */
220 if (&newreno_cc_algo == remove_cc)
221 return (EPERM);
222
223 /* Remove algo from cc_list so that new connections can't use it. */
224 CC_LIST_WLOCK();
225 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
226 if (funcs == remove_cc) {
227 cc_checkreset_default(remove_cc);
228 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
229 err = 0;
230 break;
231 }
232 }
233 CC_LIST_WUNLOCK();
234
235 if (!err)
236 /*
237 * XXXLAS:
238 * - We may need to handle non-zero return values in future.
239 * - If we add CC framework support for protocols other than
240 * TCP, we may want a more generic way to handle this step.
241 */
242 tcp_ccalgounload(remove_cc);
243
244 return (err);
245}
246
247/*
248 * Returns 0 on success, non-zero on failure.
249 */
250int
251cc_register_algo(struct cc_algo *add_cc)
252{
253 struct cc_algo *funcs;
254 int err;
255
256 err = 0;
257
258 /*
259 * Iterate over list of registered CC algorithms and make sure
260 * we're not trying to add a duplicate.
261 */
262 CC_LIST_WLOCK();
263 STAILQ_FOREACH(funcs, &cc_list, entries) {
264 if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
265 TCP_CA_NAME_MAX) == 0)
266 err = EEXIST;
267 }
268
269 if (!err)
270 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
271
272 CC_LIST_WUNLOCK();
273
274 return (err);
275}
276
277/*
278 * Handles kld related events. Returns 0 on success, non-zero on failure.
279 */
280int
281cc_modevent(module_t mod, int event_type, void *data)
282{
283 struct cc_algo *algo;
284 int err;
285
286 err = 0;
287 algo = (struct cc_algo *)data;
288
289 switch(event_type) {
290 case MOD_LOAD:
291 if (algo->mod_init != NULL)
292 err = algo->mod_init();
293 if (!err)
294 err = cc_register_algo(algo);
295 break;
296
297 case MOD_QUIESCE:
298 case MOD_SHUTDOWN:
299 case MOD_UNLOAD:
300 err = cc_deregister_algo(algo);
301 if (!err && algo->mod_destroy != NULL)
302 algo->mod_destroy();
303 if (err == ENOENT)
304 err = 0;
305 break;
306
307 default:
308 err = EINVAL;
309 break;
310 }
311
312 return (err);
313}
314
315SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
316
317/* Declare sysctl tree and populate it. */
318SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
319 "congestion control related settings");
320
321SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm,
322 CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW,
323 NULL, 0, cc_default_algo, "A", "default congestion control algorithm");
324
325SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
326 NULL, 0, cc_list_available, "A",
327 "list available congestion control algorithms");
73
74#include <netinet/cc/cc_module.h>
75
76/*
77 * List of available cc algorithms on the current system. First element
78 * is used as the system default CC algorithm.
79 */
80struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list);
81
82/* Protects the cc_list TAILQ. */
83struct rwlock cc_list_lock;
84
85VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo;
86
87/*
88 * Sysctl handler to show and change the default CC algorithm.
89 */
90static int
91cc_default_algo(SYSCTL_HANDLER_ARGS)
92{
93 char default_cc[TCP_CA_NAME_MAX];
94 struct cc_algo *funcs;
95 int error;
96
97 /* Get the current default: */
98 CC_LIST_RLOCK();
99 strlcpy(default_cc, CC_DEFAULT()->name, sizeof(default_cc));
100 CC_LIST_RUNLOCK();
101
102 error = sysctl_handle_string(oidp, default_cc, sizeof(default_cc), req);
103
104 /* Check for error or no change */
105 if (error != 0 || req->newptr == NULL)
106 goto done;
107
108 error = ESRCH;
109
110 /* Find algo with specified name and set it to default. */
111 CC_LIST_RLOCK();
112 STAILQ_FOREACH(funcs, &cc_list, entries) {
113 if (strncmp(default_cc, funcs->name, sizeof(default_cc)))
114 continue;
115 V_default_cc_ptr = funcs;
116 error = 0;
117 break;
118 }
119 CC_LIST_RUNLOCK();
120done:
121 return (error);
122}
123
124/*
125 * Sysctl handler to display the list of available CC algorithms.
126 */
127static int
128cc_list_available(SYSCTL_HANDLER_ARGS)
129{
130 struct cc_algo *algo;
131 struct sbuf *s;
132 int err, first, nalgos;
133
134 err = nalgos = 0;
135 first = 1;
136
137 CC_LIST_RLOCK();
138 STAILQ_FOREACH(algo, &cc_list, entries) {
139 nalgos++;
140 }
141 CC_LIST_RUNLOCK();
142
143 s = sbuf_new(NULL, NULL, nalgos * TCP_CA_NAME_MAX, SBUF_FIXEDLEN);
144
145 if (s == NULL)
146 return (ENOMEM);
147
148 /*
149 * It is theoretically possible for the CC list to have grown in size
150 * since the call to sbuf_new() and therefore for the sbuf to be too
151 * small. If this were to happen (incredibly unlikely), the sbuf will
152 * reach an overflow condition, sbuf_printf() will return an error and
153 * the sysctl will fail gracefully.
154 */
155 CC_LIST_RLOCK();
156 STAILQ_FOREACH(algo, &cc_list, entries) {
157 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name);
158 if (err) {
159 /* Sbuf overflow condition. */
160 err = EOVERFLOW;
161 break;
162 }
163 first = 0;
164 }
165 CC_LIST_RUNLOCK();
166
167 if (!err) {
168 sbuf_finish(s);
169 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
170 }
171
172 sbuf_delete(s);
173 return (err);
174}
175
176/*
177 * Reset the default CC algo to NewReno for any netstack which is using the algo
178 * that is about to go away as its default.
179 */
180static void
181cc_checkreset_default(struct cc_algo *remove_cc)
182{
183 VNET_ITERATOR_DECL(vnet_iter);
184
185 CC_LIST_LOCK_ASSERT();
186
187 VNET_LIST_RLOCK_NOSLEEP();
188 VNET_FOREACH(vnet_iter) {
189 CURVNET_SET(vnet_iter);
190 if (strncmp(CC_DEFAULT()->name, remove_cc->name,
191 TCP_CA_NAME_MAX) == 0)
192 V_default_cc_ptr = &newreno_cc_algo;
193 CURVNET_RESTORE();
194 }
195 VNET_LIST_RUNLOCK_NOSLEEP();
196}
197
198/*
199 * Initialise CC subsystem on system boot.
200 */
201static void
202cc_init(void)
203{
204 CC_LIST_LOCK_INIT();
205 STAILQ_INIT(&cc_list);
206}
207
208/*
209 * Returns non-zero on success, 0 on failure.
210 */
211int
212cc_deregister_algo(struct cc_algo *remove_cc)
213{
214 struct cc_algo *funcs, *tmpfuncs;
215 int err;
216
217 err = ENOENT;
218
219 /* Never allow newreno to be deregistered. */
220 if (&newreno_cc_algo == remove_cc)
221 return (EPERM);
222
223 /* Remove algo from cc_list so that new connections can't use it. */
224 CC_LIST_WLOCK();
225 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) {
226 if (funcs == remove_cc) {
227 cc_checkreset_default(remove_cc);
228 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries);
229 err = 0;
230 break;
231 }
232 }
233 CC_LIST_WUNLOCK();
234
235 if (!err)
236 /*
237 * XXXLAS:
238 * - We may need to handle non-zero return values in future.
239 * - If we add CC framework support for protocols other than
240 * TCP, we may want a more generic way to handle this step.
241 */
242 tcp_ccalgounload(remove_cc);
243
244 return (err);
245}
246
247/*
248 * Returns 0 on success, non-zero on failure.
249 */
250int
251cc_register_algo(struct cc_algo *add_cc)
252{
253 struct cc_algo *funcs;
254 int err;
255
256 err = 0;
257
258 /*
259 * Iterate over list of registered CC algorithms and make sure
260 * we're not trying to add a duplicate.
261 */
262 CC_LIST_WLOCK();
263 STAILQ_FOREACH(funcs, &cc_list, entries) {
264 if (funcs == add_cc || strncmp(funcs->name, add_cc->name,
265 TCP_CA_NAME_MAX) == 0)
266 err = EEXIST;
267 }
268
269 if (!err)
270 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries);
271
272 CC_LIST_WUNLOCK();
273
274 return (err);
275}
276
277/*
278 * Handles kld related events. Returns 0 on success, non-zero on failure.
279 */
280int
281cc_modevent(module_t mod, int event_type, void *data)
282{
283 struct cc_algo *algo;
284 int err;
285
286 err = 0;
287 algo = (struct cc_algo *)data;
288
289 switch(event_type) {
290 case MOD_LOAD:
291 if (algo->mod_init != NULL)
292 err = algo->mod_init();
293 if (!err)
294 err = cc_register_algo(algo);
295 break;
296
297 case MOD_QUIESCE:
298 case MOD_SHUTDOWN:
299 case MOD_UNLOAD:
300 err = cc_deregister_algo(algo);
301 if (!err && algo->mod_destroy != NULL)
302 algo->mod_destroy();
303 if (err == ENOENT)
304 err = 0;
305 break;
306
307 default:
308 err = EINVAL;
309 break;
310 }
311
312 return (err);
313}
314
315SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL);
316
317/* Declare sysctl tree and populate it. */
318SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL,
319 "congestion control related settings");
320
321SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm,
322 CTLFLAG_VNET | CTLTYPE_STRING | CTLFLAG_RW,
323 NULL, 0, cc_default_algo, "A", "default congestion control algorithm");
324
325SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD,
326 NULL, 0, cc_list_available, "A",
327 "list available congestion control algorithms");