cc.c revision 215392
1/*- 2 * Copyright (c) 2007-2008 3 * Swinburne University of Technology, Melbourne, Australia. 4 * Copyright (c) 2009-2010 Lawrence Stewart <lstewart@freebsd.org> 5 * Copyright (c) 2010 The FreeBSD Foundation 6 * All rights reserved. 7 * 8 * This software was developed at the Centre for Advanced Internet 9 * Architectures, Swinburne University, by Lawrence Stewart and James Healy, 10 * made possible in part by a grant from the Cisco University Research Program 11 * Fund at Community Foundation Silicon Valley. 12 * 13 * Portions of this software were developed at the Centre for Advanced 14 * Internet Architectures, Swinburne University of Technology, Melbourne, 15 * Australia by David Hayes under sponsorship from the FreeBSD Foundation. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 */ 38 39/* 40 * This software was first released in 2007 by James Healy and Lawrence Stewart 41 * whilst working on the NewTCP research project at Swinburne University's 42 * Centre for Advanced Internet Architectures, Melbourne, Australia, which was 43 * made possible in part by a grant from the Cisco University Research Program 44 * Fund at Community Foundation Silicon Valley. More details are available at: 45 * http://caia.swin.edu.au/urp/newtcp/ 46 */ 47 48#include <sys/cdefs.h> 49__FBSDID("$FreeBSD: head/sys/netinet/cc/cc.c 215392 2010-11-16 08:30:39Z lstewart $"); 50 51#include <sys/param.h> 52#include <sys/kernel.h> 53#include <sys/libkern.h> 54#include <sys/lock.h> 55#include <sys/malloc.h> 56#include <sys/module.h> 57#include <sys/mutex.h> 58#include <sys/queue.h> 59#include <sys/rwlock.h> 60#include <sys/sbuf.h> 61#include <sys/socket.h> 62#include <sys/socketvar.h> 63#include <sys/sysctl.h> 64 65#include <net/if.h> 66#include <net/if_var.h> 67 68#include <netinet/cc.h> 69#include <netinet/in.h> 70#include <netinet/in_pcb.h> 71#include <netinet/tcp_var.h> 72 73#include <netinet/cc/cc_module.h> 74 75/* 76 * List of available cc algorithms on the current system. First element 77 * is used as the system default CC algorithm. 78 */ 79struct cc_head cc_list = STAILQ_HEAD_INITIALIZER(cc_list); 80 81/* Protects the cc_list TAILQ. */ 82struct rwlock cc_list_lock; 83 84/* 85 * Set the default CC algorithm to new_default. The default is identified 86 * by being the first element in the cc_list TAILQ. 87 */ 88static void 89cc_set_default(struct cc_algo *new_default) 90{ 91 CC_LIST_WLOCK_ASSERT(); 92 93 /* 94 * Make the requested system default CC algorithm the first element in 95 * the list if it isn't already. 96 */ 97 if (new_default != CC_DEFAULT()) { 98 STAILQ_REMOVE(&cc_list, new_default, cc_algo, entries); 99 STAILQ_INSERT_HEAD(&cc_list, new_default, entries); 100 } 101} 102 103/* 104 * Sysctl handler to show and change the default CC algorithm. 105 */ 106static int 107cc_default_algo(SYSCTL_HANDLER_ARGS) 108{ 109 struct cc_algo *funcs; 110 int err, found; 111 112 err = found = 0; 113 114 if (req->newptr == NULL) { 115 char default_cc[TCP_CA_NAME_MAX]; 116 117 /* Just print the current default. */ 118 CC_LIST_RLOCK(); 119 strlcpy(default_cc, CC_DEFAULT()->name, TCP_CA_NAME_MAX); 120 CC_LIST_RUNLOCK(); 121 err = sysctl_handle_string(oidp, default_cc, 1, req); 122 } else { 123 /* Find algo with specified name and set it to default. */ 124 CC_LIST_WLOCK(); 125 STAILQ_FOREACH(funcs, &cc_list, entries) { 126 if (strncmp((char *)req->newptr, funcs->name, 127 TCP_CA_NAME_MAX) == 0) { 128 found = 1; 129 cc_set_default(funcs); 130 } 131 } 132 CC_LIST_WUNLOCK(); 133 134 if (!found) 135 err = ESRCH; 136 } 137 138 return (err); 139} 140 141/* 142 * Sysctl handler to display the list of available CC algorithms. 143 */ 144static int 145cc_list_available(SYSCTL_HANDLER_ARGS) 146{ 147 struct cc_algo *algo; 148 struct sbuf *s; 149 int err, first; 150 151 err = 0; 152 first = 1; 153 s = sbuf_new(NULL, NULL, TCP_CA_NAME_MAX, SBUF_AUTOEXTEND); 154 155 if (s == NULL) 156 return (ENOMEM); 157 158 CC_LIST_RLOCK(); 159 STAILQ_FOREACH(algo, &cc_list, entries) { 160 err = sbuf_printf(s, first ? "%s" : ", %s", algo->name); 161 if (err) 162 break; 163 first = 0; 164 } 165 CC_LIST_RUNLOCK(); 166 167 if (!err) { 168 sbuf_finish(s); 169 err = sysctl_handle_string(oidp, sbuf_data(s), 1, req); 170 } 171 172 sbuf_delete(s); 173 return (err); 174} 175 176/* 177 * Initialise CC subsystem on system boot. 178 */ 179static void 180cc_init(void) 181{ 182 CC_LIST_LOCK_INIT(); 183 STAILQ_INIT(&cc_list); 184} 185 186/* 187 * Returns non-zero on success, 0 on failure. 188 */ 189int 190cc_deregister_algo(struct cc_algo *remove_cc) 191{ 192 struct cc_algo *funcs, *tmpfuncs; 193 int err; 194 195 err = ENOENT; 196 197 /* Never allow newreno to be deregistered. */ 198 if (&newreno_cc_algo == remove_cc) 199 return (EPERM); 200 201 /* Remove algo from cc_list so that new connections can't use it. */ 202 CC_LIST_WLOCK(); 203 STAILQ_FOREACH_SAFE(funcs, &cc_list, entries, tmpfuncs) { 204 if (funcs == remove_cc) { 205 /* 206 * If we're removing the current system default, 207 * reset the default to newreno. 208 */ 209 if (strncmp(CC_DEFAULT()->name, remove_cc->name, 210 TCP_CA_NAME_MAX) == 0) 211 cc_set_default(&newreno_cc_algo); 212 213 STAILQ_REMOVE(&cc_list, funcs, cc_algo, entries); 214 err = 0; 215 break; 216 } 217 } 218 CC_LIST_WUNLOCK(); 219 220 if (!err) 221 /* 222 * XXXLAS: 223 * - We may need to handle non-zero return values in future. 224 * - If we add CC framework support for protocols other than 225 * TCP, we may want a more generic way to handle this step. 226 */ 227 tcp_ccalgounload(remove_cc); 228 229 return (err); 230} 231 232/* 233 * Returns 0 on success, non-zero on failure. 234 */ 235int 236cc_register_algo(struct cc_algo *add_cc) 237{ 238 struct cc_algo *funcs; 239 int err; 240 241 err = 0; 242 243 /* 244 * Iterate over list of registered CC algorithms and make sure 245 * we're not trying to add a duplicate. 246 */ 247 CC_LIST_WLOCK(); 248 STAILQ_FOREACH(funcs, &cc_list, entries) { 249 if (funcs == add_cc || strncmp(funcs->name, add_cc->name, 250 TCP_CA_NAME_MAX) == 0) 251 err = EEXIST; 252 } 253 254 if (!err) 255 STAILQ_INSERT_TAIL(&cc_list, add_cc, entries); 256 257 CC_LIST_WUNLOCK(); 258 259 return (err); 260} 261 262/* 263 * Handles kld related events. Returns 0 on success, non-zero on failure. 264 */ 265int 266cc_modevent(module_t mod, int event_type, void *data) 267{ 268 struct cc_algo *algo; 269 int err; 270 271 err = 0; 272 algo = (struct cc_algo *)data; 273 274 switch(event_type) { 275 case MOD_LOAD: 276 if (algo->mod_init != NULL) 277 err = algo->mod_init(); 278 if (!err) 279 err = cc_register_algo(algo); 280 break; 281 282 case MOD_QUIESCE: 283 case MOD_SHUTDOWN: 284 case MOD_UNLOAD: 285 err = cc_deregister_algo(algo); 286 if (!err && algo->mod_destroy != NULL) 287 algo->mod_destroy(); 288 if (err == ENOENT) 289 err = 0; 290 break; 291 292 default: 293 err = EINVAL; 294 break; 295 } 296 297 return (err); 298} 299 300SYSINIT(cc, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_FIRST, cc_init, NULL); 301 302/* Declare sysctl tree and populate it. */ 303SYSCTL_NODE(_net_inet_tcp, OID_AUTO, cc, CTLFLAG_RW, NULL, 304 "congestion control related settings"); 305 306SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, algorithm, CTLTYPE_STRING|CTLFLAG_RW, 307 NULL, 0, cc_default_algo, "A", "default congestion control algorithm"); 308 309SYSCTL_PROC(_net_inet_tcp_cc, OID_AUTO, available, CTLTYPE_STRING|CTLFLAG_RD, 310 NULL, 0, cc_list_available, "A", 311 "list available congestion control algorithms"); 312