wqinput.c revision 1.5
1/* $NetBSD: wqinput.c,v 1.5 2018/08/10 07:20:59 msaitoh Exp $ */ 2 3/*- 4 * Copyright (c) 2017 Internet Initiative Japan Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#ifdef _KERNEL_OPT 30#include "opt_net_mpsafe.h" 31#endif 32 33#include <sys/param.h> 34#include <sys/kmem.h> 35#include <sys/mbuf.h> 36#include <sys/protosw.h> 37#include <sys/socketvar.h> 38#include <sys/syslog.h> 39#include <sys/workqueue.h> 40#include <sys/atomic.h> 41#include <sys/queue.h> 42#include <sys/percpu.h> 43#include <sys/sysctl.h> 44 45#include <net/if.h> 46#include <netinet/wqinput.h> 47 48#define WQINPUT_LIST_MAXLEN IFQ_MAXLEN 49 50struct wqinput_work { 51 struct mbuf *ww_mbuf; 52 int ww_off; 53 int ww_proto; 54 struct wqinput_work *ww_next; 55}; 56 57struct wqinput_worklist { 58 /* 59 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address 60 * of percpu data while percpu(9) may move percpu data during bootup. 61 */ 62 struct wqinput_work *wwl_head; 63 struct wqinput_work *wwl_tail; 64 unsigned int wwl_len; 65 uint64_t wwl_dropped; 66 struct work wwl_work; 67 bool wwl_wq_is_active; 68}; 69 70struct wqinput { 71 struct workqueue *wqi_wq; 72 struct pool wqi_work_pool; 73 struct percpu *wqi_worklists; /* struct wqinput_worklist */ 74 void (*wqi_input)(struct mbuf *, int, int); 75}; 76 77static void wqinput_work(struct work *, void *); 78static void wqinput_sysctl_setup(const char *, struct wqinput *); 79 80static void 81wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused) 82{ 83 struct wqinput_worklist *const wwl = p; 84 uint64_t *sum = arg; 85 86 *sum += wwl->wwl_dropped; 87} 88 89static int 90wqinput_sysctl_drops_handler(SYSCTLFN_ARGS) 91{ 92 struct sysctlnode node; 93 struct wqinput *wqi; 94 uint64_t sum = 0; 95 int error; 96 97 node = *rnode; 98 wqi = node.sysctl_data; 99 100 percpu_foreach(wqi->wqi_worklists, wqinput_drops, &sum); 101 102 node.sysctl_data = ∑ 103 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 104 if (error != 0 || newp == NULL) 105 return error; 106 107 return 0; 108} 109 110static void 111wqinput_sysctl_setup(const char *name, struct wqinput *wqi) 112{ 113 const struct sysctlnode *cnode, *rnode; 114 int error; 115 116 error = sysctl_createv(NULL, 0, NULL, &rnode, 117 CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput", 118 SYSCTL_DESCR("workqueue-based pr_input controls"), 119 NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL); 120 if (error != 0) 121 goto bad; 122 123 error = sysctl_createv(NULL, 0, &rnode, &rnode, 124 CTLFLAG_PERMANENT, CTLTYPE_NODE, name, 125 SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"), 126 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 127 if (error != 0) 128 goto bad; 129 130 error = sysctl_createv(NULL, 0, &rnode, &rnode, 131 CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq", 132 SYSCTL_DESCR("wqinput input queue controls"), 133 NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL); 134 if (error != 0) 135 goto bad; 136 137 error = sysctl_createv(NULL, 0, &rnode, &cnode, 138 CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops", 139 SYSCTL_DESCR("Total packets dropped due to full input queue"), 140 wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL); 141 if (error != 0) 142 goto bad; 143 144 return; 145bad: 146 log(LOG_ERR, "%s: could not create a sysctl node for %s\n", 147 __func__, name); 148 return; 149} 150 151struct wqinput * 152wqinput_create(const char *name, void (*func)(struct mbuf *, int, int)) 153{ 154 struct wqinput *wqi; 155 int error; 156 char namebuf[32]; 157 158 snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name); 159 160 wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP); 161 162 error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi, 163 PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU); 164 if (error != 0) 165 panic("%s: workqueue_create failed (%d)\n", __func__, error); 166 pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0, 167 name, NULL, IPL_SOFTNET); 168 wqi->wqi_worklists = percpu_alloc(sizeof(struct wqinput_worklist)); 169 wqi->wqi_input = func; 170 171 wqinput_sysctl_setup(name, wqi); 172 173 return wqi; 174} 175 176static struct wqinput_work * 177wqinput_work_get(struct wqinput_worklist *wwl) 178{ 179 struct wqinput_work *work; 180 181 /* Must be called at IPL_SOFTNET */ 182 183 work = wwl->wwl_head; 184 if (work != NULL) { 185 KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len); 186 wwl->wwl_len--; 187 wwl->wwl_head = work->ww_next; 188 work->ww_next = NULL; 189 190 if (wwl->wwl_head == NULL) 191 wwl->wwl_tail = NULL; 192 } else { 193 KASSERT(wwl->wwl_len == 0); 194 } 195 196 return work; 197} 198 199static void 200wqinput_work(struct work *wk, void *arg) 201{ 202 struct wqinput *wqi = arg; 203 struct wqinput_work *work; 204 struct wqinput_worklist *wwl; 205 int s; 206 207 /* Users expect to run at IPL_SOFTNET */ 208 s = splsoftnet(); 209 /* This also prevents LWP migrations between CPUs */ 210 wwl = percpu_getref(wqi->wqi_worklists); 211 212 /* We can allow enqueuing another work at this point */ 213 wwl->wwl_wq_is_active = false; 214 215 while ((work = wqinput_work_get(wwl)) != NULL) { 216 mutex_enter(softnet_lock); 217 KERNEL_LOCK_UNLESS_NET_MPSAFE(); 218 wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto); 219 KERNEL_UNLOCK_UNLESS_NET_MPSAFE(); 220 mutex_exit(softnet_lock); 221 222 pool_put(&wqi->wqi_work_pool, work); 223 } 224 225 percpu_putref(wqi->wqi_worklists); 226 splx(s); 227} 228 229static void 230wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work) 231{ 232 233 if (wwl->wwl_tail != NULL) { 234 wwl->wwl_tail->ww_next = work; 235 } else { 236 wwl->wwl_head = work; 237 } 238 wwl->wwl_tail = work; 239 wwl->wwl_len++; 240} 241 242void 243wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto) 244{ 245 struct wqinput_work *work; 246 struct wqinput_worklist *wwl; 247 248 wwl = percpu_getref(wqi->wqi_worklists); 249 250 /* Prevent too much work and mbuf from being queued */ 251 if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) { 252 wwl->wwl_dropped++; 253 m_freem(m); 254 goto out; 255 } 256 257 work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT); 258 if (work == NULL) { 259 wwl->wwl_dropped++; 260 m_freem(m); 261 goto out; 262 } 263 work->ww_mbuf = m; 264 work->ww_off = off; 265 work->ww_proto = proto; 266 work->ww_next = NULL; 267 268 wqinput_work_put(wwl, work); 269 270 /* Avoid enqueuing another work when one is already enqueued */ 271 if (wwl->wwl_wq_is_active) 272 goto out; 273 wwl->wwl_wq_is_active = true; 274 275 workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL); 276out: 277 percpu_putref(wqi->wqi_worklists); 278} 279