cc.c revision 215395
1/*- 2 * Copyright (c) 2007-2008 3 * Swinburne University of Technology, Melbourne, Australia. 4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 5 * Copyright (c) 2010 The FreeBSD Foundation 6 * All rights reserved. 7 * 8 * This software was developed at the Centre for Advanced Internet 9 * Architectures, Swinburne University, by Lawrence Stewart and James Healy, 10 * made possible in part by a grant from the Cisco University Research Program 11 * Fund at Community Foundation Silicon Valley. 12 * 13 * Portions of this software were developed at the Centre for Advanced 14 * Internet Architectures, Swinburne University of Technology, Melbourne, 15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39/* 40 * This software was first released in 2007 by James Healy and Lawrence Stewart 41 * whilst working on the NewTCP research project at Swinburne University's 42 * Centre for Advanced Internet Architectures, Melbourne, Australia, which was 43 * made possible in part by a grant from the Cisco University Research Program 44 * Fund at Community Foundation Silicon Valley. More details are available at: 45 * http://caia.swin.edu.au/urp/newtcp/ 46 */ 47 48#include <sys/cdefs.h> 49__FBSDID("$FreeBSD: head/sys/netinet/cc/cc.c 215395 2010-11-16 09:34:31Z lstewart $"); 50 51#include <sys/param.h> 52#include <sys/kernel.h> 53#include <sys/libkern.h> 54#include <sys/lock.h> 55#include <sys/malloc.h> 56#include <sys/module.h> 57#include <sys/mutex.h> 58#include <sys/queue.h> 59#include <sys/rwlock.h> 60#include <sys/sbuf.h> 61#include <sys/socket.h> 62#include <sys/socketvar.h> 63#include <sys/sysctl.h> 64 65#include <net/if.h> 66#include <net/if_var.h> 67 68#include <netinet/cc.h> 69#include <netinet/in.h> 70#include <netinet/in_pcb.h> 71#include <netinet/tcp_var.h> 72 73#include <netinet/cc/cc_module.h> 74 75/* 76 * List of available cc algorithms on the current system. First element 77 * is used as the system default CC algorithm. 78 */ 79struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); 80 81/* Protects the cc_list TAILQ. */ 82struct rwlock cc_list_lock; 83 84VNET_DEFINE(struct cc_algo *, default_cc_ptr) = &newreno_cc_algo; 85 86/* 87 * Sysctl handler to show and change the default CC algorithm. 88 */ 89static int 90cc_default_algo(SYSCTL_HANDLER_ARGS) 91{ 92 char default_cc[TCP_CA_NAME_MAX]; 93 struct cc_algo *funcs; 94 int err, found; 95 96 err = found = 0; 97 98 if (req->newptr == NULL) { 99 /* Just print the current default. */ 100 CC_LIST_RLOCK(); 101 strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX); 102 CC_LIST_RUNLOCK(); 103 err = sysctl_handle_string(oidp, default_cc, 1, req); 104 } else { 105 /* Find algo with specified name and set it to default. */ 106 CC_LIST_RLOCK(); 107 STAILQ_FOREACH(funcs, &cc_list, entries) { 108 if (strncmp((char *)req->newptr, funcs->name, 109 TCP_CA_NAME_MAX) == 0) { 110 found = 1; 111 V_default_cc_ptr = funcs; 112 } 113 } 114 CC_LIST_RUNLOCK(); 115 116 if (!found) 117 err = ESRCH; 118 } 119 120 return (err); 121} 122 123/* 124 * Sysctl handler to display the list of available CC algorithms. 125 */ 126static int 127cc_list_available(SYSCTL_HANDLER_ARGS) 128{ 129 struct cc_algo *algo; 130 struct sbuf *s; 131 int err, first; 132 133 err = 0; 134 first = 1; 135 s = sbuf_new(NULL, NULL, TCP_CA_NAME_MAX, SBUF_AUTOEXTEND); 136 137 if (s == NULL) 138 return (ENOMEM); 139 140 CC_LIST_RLOCK(); 141 STAILQ_FOREACH(algo, &cc_list, entries) { 142 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name); 143 if (err) 144 break; 145 first = 0; 146 } 147 CC_LIST_RUNLOCK(); 148 149 if (!err) { 150 sbuf_finish(s); 151 err = sysctl_handle_string(oidp, sbuf_data(s), 1, req); 152 } 153 154 sbuf_delete(s); 155 return (err); 156} 157 158/* 159 * Reset the default CC algo to NewReno for any netstack which is using the algo 160 * that is about to go away as its default. 161 */ 162static void 163cc_checkreset_default(struct cc_algo *remove_cc) 164{ 165 VNET_ITERATOR_DECL(vnet_iter); 166 167 CC_LIST_LOCK_ASSERT(); 168 169 VNET_LIST_RLOCK_NOSLEEP(); 170 VNET_FOREACH(vnet_iter) { 171 CURVNET_SET(vnet_iter); 172 if (strncmp(CC_DEFAULT()->name, remove_cc->name, 173 TCP_CA_NAME_MAX) == 0) 174 V_default_cc_ptr = &newreno_cc_algo; 175 CURVNET_RESTORE(); 176 } 177 VNET_LIST_RUNLOCK_NOSLEEP(); 178} 179 180/* 181 * Initialise CC subsystem on system boot. 182 */ 183static void 184cc_init(void) 185{ 186 CC_LIST_LOCK_INIT(); 187 STAILQ_INIT(&cc_list); 188} 189 190/* 191 * Returns non-zero on success, 0 on failure. 192 */ 193int 194cc_deregister_algo(struct cc_algo *remove_cc) 195{ 196 struct cc_algo *funcs, *tmpfuncs; 197 int err; 198 199 err = ENOENT; 200 201 /* Never allow newreno to be deregistered. */ 202 if (&newreno_cc_algo == remove_cc) 203 return (EPERM); 204 205 /* Remove algo from cc_list so that new connections can't use it. */ 206 CC_LIST_WLOCK(); 207 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) { 208 if (funcs == remove_cc) { 209 cc_checkreset_default(remove_cc); 210 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries); 211 err = 0; 212 break; 213 } 214 } 215 CC_LIST_WUNLOCK(); 216 217 if (!err) 218 /* 219 * XXXLAS: 220 * - We may need to handle non-zero return values in future. 221 * - If we add CC framework support for protocols other than 222 * TCP, we may want a more generic way to handle this step. 223 */ 224 tcp_ccalgounload(remove_cc); 225 226 return (err); 227} 228 229/* 230 * Returns 0 on success, non-zero on failure. 231 */ 232int 233cc_register_algo(struct cc_algo *add_cc) 234{ 235 struct cc_algo *funcs; 236 int err; 237 238 err = 0; 239 240 /* 241 * Iterate over list of registered CC algorithms and make sure 242 * we're not trying to add a duplicate. 243 */ 244 CC_LIST_WLOCK(); 245 STAILQ_FOREACH(funcs, &cc_list, entries) { 246 if (funcs == add_cc || strncmp(funcs->name, add_cc->name, 247 TCP_CA_NAME_MAX) == 0) 248 err = EEXIST; 249 } 250 251 if (!err) 252 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries); 253 254 CC_LIST_WUNLOCK(); 255 256 return (err); 257} 258 259/* 260 * Handles kld related events. Returns 0 on success, non-zero on failure. 261 */ 262int 263cc_modevent(module_t mod, int event_type, void *data) 264{ 265 struct cc_algo *algo; 266 int err; 267 268 err = 0; 269 algo = (struct cc_algo *)data; 270 271 switch(event_type) { 272 case MOD_LOAD: 273 if (algo->mod_init != NULL) 274 err = algo->mod_init(); 275 if (!err) 276 err = cc_register_algo(algo); 277 break; 278 279 case MOD_QUIESCE: 280 case MOD_SHUTDOWN: 281 case MOD_UNLOAD: 282 err = cc_deregister_algo(algo); 283 if (!err && algo->mod_destroy != NULL) 284 algo->mod_destroy(); 285 if (err == ENOENT) 286 err = 0; 287 break; 288 289 default: 290 err = EINVAL; 291 break; 292 } 293 294 return (err); 295} 296 297SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); 298 299/* Declare sysctl tree and populate it. */ 300SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL, 301 "congestion control related settings"); 302 303SYSCTL_VNET_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW, 304 NULL, 0, cc_default_algo, "A", "default congestion control algorithm"); 305 306SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD, 307 NULL, 0, cc_list_available, "A", 308 "list available congestion control algorithms"); 309