1/* $NetBSD: wqinput.c,v 1.8 2020/02/07 12:35:33 thorpej Exp $ */ 2 3/*- 4 * Copyright (c) 2017 Internet Initiative Japan Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifdef _KERNEL_OPT 30#include "opt_net_mpsafe.h" 31#endif 32 33#include <sys/param.h> 34#include <sys/kmem.h> 35#include <sys/mbuf.h> 36#include <sys/protosw.h> 37#include <sys/socketvar.h> 38#include <sys/syslog.h> 39#include <sys/workqueue.h> 40#include <sys/atomic.h> 41#include <sys/queue.h> 42#include <sys/percpu.h> 43#include <sys/sysctl.h> 44#include <sys/xcall.h> 45 46#include <net/if.h> 47#include <netinet/wqinput.h> 48 49#define WQINPUT_LIST_MAXLEN IFQ_MAXLEN 50 51struct wqinput_work { 52 struct mbuf *ww_mbuf; 53 int ww_off; 54 int ww_proto; 55 struct wqinput_work *ww_next; 56}; 57 58struct wqinput_worklist { 59 /* 60 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address 61 * of percpu data while percpu(9) may move percpu data during bootup. 62 */ 63 struct wqinput_work *wwl_head; 64 struct wqinput_work *wwl_tail; 65 unsigned int wwl_len; 66 uint64_t wwl_dropped; 67 struct work wwl_work; 68 bool wwl_wq_is_active; 69}; 70 71struct wqinput { 72 struct workqueue *wqi_wq; 73 struct pool wqi_work_pool; 74 struct percpu *wqi_worklists; /* struct wqinput_worklist */ 75 void (*wqi_input)(struct mbuf *, int, int); 76}; 77 78static void wqinput_work(struct work *, void *); 79static void wqinput_sysctl_setup(const char *, struct wqinput *); 80 81static void 82wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused) 83{ 84 struct wqinput_worklist **const wwlp = p; 85 struct wqinput_worklist *const wwl = *wwlp; 86 uint64_t *sum = arg; 87 88 *sum += wwl->wwl_dropped; 89} 90 91static int 92wqinput_sysctl_drops_handler(SYSCTLFN_ARGS) 93{ 94 struct sysctlnode node; 95 struct wqinput *wqi; 96 uint64_t sum = 0; 97 int error; 98 99 node = *rnode; 100 wqi = node.sysctl_data; 101 102 percpu_foreach_xcall(wqi->wqi_worklists, XC_HIGHPRI_IPL(IPL_SOFTNET), 103 wqinput_drops, &sum); 104 105 node.sysctl_data = ∑ 106 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 107 if (error != 0 || newp == NULL) 108 return error; 109 110 return 0; 111} 112 113static void 114wqinput_sysctl_setup(const char *name, struct wqinput *wqi) 115{ 116 const struct sysctlnode *cnode, *rnode; 117 int error; 118 119 error = sysctl_createv(NULL, 0, NULL, &rnode, 120 CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput", 121 SYSCTL_DESCR("workqueue-based pr_input controls"), 122 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL); 123 if (error != 0) 124 goto bad; 125 126 error = sysctl_createv(NULL, 0, &rnode, &rnode, 127 CTLFLAG_PERMANENT, CTLTYPE_NODE, name, 128 SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"), 129 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 130 if (error != 0) 131 goto bad; 132 133 error = sysctl_createv(NULL, 0, &rnode, &rnode, 134 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq", 135 SYSCTL_DESCR("wqinput input queue controls"), 136 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 137 if (error != 0) 138 goto bad; 139 140 error = sysctl_createv(NULL, 0, &rnode, &cnode, 141 CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops", 142 SYSCTL_DESCR("Total packets dropped due to full input queue"), 143 wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL); 144 if (error != 0) 145 goto bad; 146 147 return; 148bad: 149 log(LOG_ERR, "%s: could not create a sysctl node for %s\n", 150 __func__, name); 151 return; 152} 153 154static struct wqinput_worklist * 155wqinput_percpu_getref(percpu_t *pc) 156{ 157 158 return *(struct wqinput_worklist **)percpu_getref(pc); 159} 160 161static void 162wqinput_percpu_putref(percpu_t *pc) 163{ 164 165 percpu_putref(pc); 166} 167 168static void 169wqinput_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused) 170{ 171 struct wqinput_worklist **wwlp = p; 172 173 *wwlp = kmem_zalloc(sizeof(**wwlp), KM_SLEEP); 174} 175 176struct wqinput * 177wqinput_create(const char *name, void (*func)(struct mbuf *, int, int)) 178{ 179 struct wqinput *wqi; 180 int error; 181 char namebuf[32]; 182 183 snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name); 184 185 wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP); 186 187 error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi, 188 PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU); 189 if (error != 0) 190 panic("%s: workqueue_create failed (%d)\n", __func__, error); 191 pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0, 192 name, NULL, IPL_SOFTNET); 193 wqi->wqi_worklists = percpu_create(sizeof(struct wqinput_worklist *), 194 wqinput_percpu_init_cpu, NULL, NULL); 195 wqi->wqi_input = func; 196 197 wqinput_sysctl_setup(name, wqi); 198 199 return wqi; 200} 201 202static struct wqinput_work * 203wqinput_work_get(struct wqinput_worklist *wwl) 204{ 205 struct wqinput_work *work; 206 207 /* Must be called at IPL_SOFTNET */ 208 209 work = wwl->wwl_head; 210 if (work != NULL) { 211 KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len); 212 wwl->wwl_len--; 213 wwl->wwl_head = work->ww_next; 214 work->ww_next = NULL; 215 216 if (wwl->wwl_head == NULL) 217 wwl->wwl_tail = NULL; 218 } else { 219 KASSERT(wwl->wwl_len == 0); 220 } 221 222 return work; 223} 224 225static void 226wqinput_work(struct work *wk, void *arg) 227{ 228 struct wqinput *wqi = arg; 229 struct wqinput_work *work; 230 struct wqinput_worklist *wwl; 231 int s; 232 233 /* Users expect to run at IPL_SOFTNET */ 234 s = splsoftnet(); 235 /* This also prevents LWP migrations between CPUs */ 236 wwl = wqinput_percpu_getref(wqi->wqi_worklists); 237 238 /* We can allow enqueuing another work at this point */ 239 wwl->wwl_wq_is_active = false; 240 241 while ((work = wqinput_work_get(wwl)) != NULL) { 242 mutex_enter(softnet_lock); 243 KERNEL_LOCK_UNLESS_NET_MPSAFE(); 244 wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto); 245 KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 246 mutex_exit(softnet_lock); 247 248 pool_put(&wqi->wqi_work_pool, work); 249 } 250 251 wqinput_percpu_putref(wqi->wqi_worklists); 252 splx(s); 253} 254 255static void 256wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work) 257{ 258 259 if (wwl->wwl_tail != NULL) { 260 wwl->wwl_tail->ww_next = work; 261 } else { 262 wwl->wwl_head = work; 263 } 264 wwl->wwl_tail = work; 265 wwl->wwl_len++; 266} 267 268void 269wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto) 270{ 271 struct wqinput_work *work; 272 struct wqinput_worklist *wwl; 273 274 wwl = wqinput_percpu_getref(wqi->wqi_worklists); 275 276 /* Prevent too much work and mbuf from being queued */ 277 if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) { 278 wwl->wwl_dropped++; 279 m_freem(m); 280 goto out; 281 } 282 283 work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT); 284 if (work == NULL) { 285 wwl->wwl_dropped++; 286 m_freem(m); 287 goto out; 288 } 289 work->ww_mbuf = m; 290 work->ww_off = off; 291 work->ww_proto = proto; 292 work->ww_next = NULL; 293 294 wqinput_work_put(wwl, work); 295 296 /* Avoid enqueuing another work when one is already enqueued */ 297 if (wwl->wwl_wq_is_active) 298 goto out; 299 wwl->wwl_wq_is_active = true; 300 301 workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL); 302out: 303 wqinput_percpu_putref(wqi->wqi_worklists); 304} 305