wqinput.c revision 1.7
1/* $NetBSD: wqinput.c,v 1.7 2020/02/01 12:54:51 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2017 Internet Initiative Japan Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifdef _KERNEL_OPT 30#include "opt_net_mpsafe.h" 31#endif 32 33#include <sys/param.h> 34#include <sys/kmem.h> 35#include <sys/mbuf.h> 36#include <sys/protosw.h> 37#include <sys/socketvar.h> 38#include <sys/syslog.h> 39#include <sys/workqueue.h> 40#include <sys/atomic.h> 41#include <sys/queue.h> 42#include <sys/percpu.h> 43#include <sys/sysctl.h> 44 45#include <net/if.h> 46#include <netinet/wqinput.h> 47 48#define WQINPUT_LIST_MAXLEN IFQ_MAXLEN 49 50struct wqinput_work { 51 struct mbuf *ww_mbuf; 52 int ww_off; 53 int ww_proto; 54 struct wqinput_work *ww_next; 55}; 56 57struct wqinput_worklist { 58 /* 59 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address 60 * of percpu data while percpu(9) may move percpu data during bootup. 61 */ 62 struct wqinput_work *wwl_head; 63 struct wqinput_work *wwl_tail; 64 unsigned int wwl_len; 65 uint64_t wwl_dropped; 66 struct work wwl_work; 67 bool wwl_wq_is_active; 68}; 69 70struct wqinput { 71 struct workqueue *wqi_wq; 72 struct pool wqi_work_pool; 73 struct percpu *wqi_worklists; /* struct wqinput_worklist */ 74 void (*wqi_input)(struct mbuf *, int, int); 75}; 76 77static void wqinput_work(struct work *, void *); 78static void wqinput_sysctl_setup(const char *, struct wqinput *); 79 80static void 81wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused) 82{ 83 struct wqinput_worklist **const wwlp = p; 84 struct wqinput_worklist *const wwl = *wwlp; 85 uint64_t *sum = arg; 86 87 *sum += wwl->wwl_dropped; 88} 89 90static int 91wqinput_sysctl_drops_handler(SYSCTLFN_ARGS) 92{ 93 struct sysctlnode node; 94 struct wqinput *wqi; 95 uint64_t sum = 0; 96 int error; 97 98 node = *rnode; 99 wqi = node.sysctl_data; 100 101 percpu_foreach(wqi->wqi_worklists, wqinput_drops, &sum); 102 103 node.sysctl_data = ∑ 104 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 105 if (error != 0 || newp == NULL) 106 return error; 107 108 return 0; 109} 110 111static void 112wqinput_sysctl_setup(const char *name, struct wqinput *wqi) 113{ 114 const struct sysctlnode *cnode, *rnode; 115 int error; 116 117 error = sysctl_createv(NULL, 0, NULL, &rnode, 118 CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput", 119 SYSCTL_DESCR("workqueue-based pr_input controls"), 120 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL); 121 if (error != 0) 122 goto bad; 123 124 error = sysctl_createv(NULL, 0, &rnode, &rnode, 125 CTLFLAG_PERMANENT, CTLTYPE_NODE, name, 126 SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"), 127 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 128 if (error != 0) 129 goto bad; 130 131 error = sysctl_createv(NULL, 0, &rnode, &rnode, 132 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq", 133 SYSCTL_DESCR("wqinput input queue controls"), 134 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 135 if (error != 0) 136 goto bad; 137 138 error = sysctl_createv(NULL, 0, &rnode, &cnode, 139 CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops", 140 SYSCTL_DESCR("Total packets dropped due to full input queue"), 141 wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL); 142 if (error != 0) 143 goto bad; 144 145 return; 146bad: 147 log(LOG_ERR, "%s: could not create a sysctl node for %s\n", 148 __func__, name); 149 return; 150} 151 152static struct wqinput_worklist * 153wqinput_percpu_getref(percpu_t *pc) 154{ 155 156 return *(struct wqinput_worklist **)percpu_getref(pc); 157} 158 159static void 160wqinput_percpu_putref(percpu_t *pc) 161{ 162 163 percpu_putref(pc); 164} 165 166static void 167wqinput_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused) 168{ 169 struct wqinput_worklist **wwlp = p; 170 171 *wwlp = kmem_zalloc(sizeof(**wwlp), KM_SLEEP); 172} 173 174struct wqinput * 175wqinput_create(const char *name, void (*func)(struct mbuf *, int, int)) 176{ 177 struct wqinput *wqi; 178 int error; 179 char namebuf[32]; 180 181 snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name); 182 183 wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP); 184 185 error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi, 186 PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU); 187 if (error != 0) 188 panic("%s: workqueue_create failed (%d)\n", __func__, error); 189 pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0, 190 name, NULL, IPL_SOFTNET); 191 wqi->wqi_worklists = percpu_create(sizeof(struct wqinput_worklist *), 192 wqinput_percpu_init_cpu, NULL, NULL); 193 wqi->wqi_input = func; 194 195 wqinput_sysctl_setup(name, wqi); 196 197 return wqi; 198} 199 200static struct wqinput_work * 201wqinput_work_get(struct wqinput_worklist *wwl) 202{ 203 struct wqinput_work *work; 204 205 /* Must be called at IPL_SOFTNET */ 206 207 work = wwl->wwl_head; 208 if (work != NULL) { 209 KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len); 210 wwl->wwl_len--; 211 wwl->wwl_head = work->ww_next; 212 work->ww_next = NULL; 213 214 if (wwl->wwl_head == NULL) 215 wwl->wwl_tail = NULL; 216 } else { 217 KASSERT(wwl->wwl_len == 0); 218 } 219 220 return work; 221} 222 223static void 224wqinput_work(struct work *wk, void *arg) 225{ 226 struct wqinput *wqi = arg; 227 struct wqinput_work *work; 228 struct wqinput_worklist *wwl; 229 int s; 230 231 /* Users expect to run at IPL_SOFTNET */ 232 s = splsoftnet(); 233 /* This also prevents LWP migrations between CPUs */ 234 wwl = wqinput_percpu_getref(wqi->wqi_worklists); 235 236 /* We can allow enqueuing another work at this point */ 237 wwl->wwl_wq_is_active = false; 238 239 while ((work = wqinput_work_get(wwl)) != NULL) { 240 mutex_enter(softnet_lock); 241 KERNEL_LOCK_UNLESS_NET_MPSAFE(); 242 wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto); 243 KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 244 mutex_exit(softnet_lock); 245 246 pool_put(&wqi->wqi_work_pool, work); 247 } 248 249 wqinput_percpu_putref(wqi->wqi_worklists); 250 splx(s); 251} 252 253static void 254wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work) 255{ 256 257 if (wwl->wwl_tail != NULL) { 258 wwl->wwl_tail->ww_next = work; 259 } else { 260 wwl->wwl_head = work; 261 } 262 wwl->wwl_tail = work; 263 wwl->wwl_len++; 264} 265 266void 267wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto) 268{ 269 struct wqinput_work *work; 270 struct wqinput_worklist *wwl; 271 272 wwl = wqinput_percpu_getref(wqi->wqi_worklists); 273 274 /* Prevent too much work and mbuf from being queued */ 275 if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) { 276 wwl->wwl_dropped++; 277 m_freem(m); 278 goto out; 279 } 280 281 work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT); 282 if (work == NULL) { 283 wwl->wwl_dropped++; 284 m_freem(m); 285 goto out; 286 } 287 work->ww_mbuf = m; 288 work->ww_off = off; 289 work->ww_proto = proto; 290 work->ww_next = NULL; 291 292 wqinput_work_put(wwl, work); 293 294 /* Avoid enqueuing another work when one is already enqueued */ 295 if (wwl->wwl_wq_is_active) 296 goto out; 297 wwl->wwl_wq_is_active = true; 298 299 workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL); 300out: 301 wqinput_percpu_putref(wqi->wqi_worklists); 302} 303