wqinput.c revision 1.5
1/*	$NetBSD: wqinput.c,v 1.5 2018/08/10 07:20:59 msaitoh Exp $	*/
2
3/*-
4 * Copyright (c) 2017 Internet Initiative Japan Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifdef _KERNEL_OPT
30#include "opt_net_mpsafe.h"
31#endif
32
33#include <sys/param.h>
34#include <sys/kmem.h>
35#include <sys/mbuf.h>
36#include <sys/protosw.h>
37#include <sys/socketvar.h>
38#include <sys/syslog.h>
39#include <sys/workqueue.h>
40#include <sys/atomic.h>
41#include <sys/queue.h>
42#include <sys/percpu.h>
43#include <sys/sysctl.h>
44
45#include <net/if.h>
46#include <netinet/wqinput.h>
47
48#define WQINPUT_LIST_MAXLEN	IFQ_MAXLEN
49
50struct wqinput_work {
51	struct mbuf	*ww_mbuf;
52	int		ww_off;
53	int		ww_proto;
54	struct wqinput_work *ww_next;
55};
56
57struct wqinput_worklist {
58	/*
59	 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address
60	 * of percpu data while percpu(9) may move percpu data during bootup.
61	 */
62	struct wqinput_work *wwl_head;
63	struct wqinput_work *wwl_tail;
64	unsigned int	wwl_len;
65	uint64_t	wwl_dropped;
66	struct work	wwl_work;
67	bool		wwl_wq_is_active;
68};
69
70struct wqinput {
71	struct workqueue *wqi_wq;
72	struct pool	wqi_work_pool;
73	struct percpu	*wqi_worklists; /* struct wqinput_worklist */
74	void    	(*wqi_input)(struct mbuf *, int, int);
75};
76
77static void wqinput_work(struct work *, void *);
78static void wqinput_sysctl_setup(const char *, struct wqinput *);
79
80static void
81wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused)
82{
83	struct wqinput_worklist *const wwl = p;
84	uint64_t *sum = arg;
85
86	*sum += wwl->wwl_dropped;
87}
88
89static int
90wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)
91{
92	struct sysctlnode node;
93	struct wqinput *wqi;
94	uint64_t sum = 0;
95	int error;
96
97	node = *rnode;
98	wqi = node.sysctl_data;
99
100	percpu_foreach(wqi->wqi_worklists, wqinput_drops, &sum);
101
102	node.sysctl_data = &sum;
103	error = sysctl_lookup(SYSCTLFN_CALL(&node));
104	if (error != 0 || newp == NULL)
105		return error;
106
107	return 0;
108}
109
110static void
111wqinput_sysctl_setup(const char *name, struct wqinput *wqi)
112{
113	const struct sysctlnode *cnode, *rnode;
114	int error;
115
116	error = sysctl_createv(NULL, 0, NULL, &rnode,
117	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput",
118	    SYSCTL_DESCR("workqueue-based pr_input controls"),
119	    NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL);
120	if (error != 0)
121		goto bad;
122
123	error = sysctl_createv(NULL, 0, &rnode, &rnode,
124	    CTLFLAG_PERMANENT, CTLTYPE_NODE, name,
125	    SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"),
126	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
127	if (error != 0)
128		goto bad;
129
130	error = sysctl_createv(NULL, 0, &rnode, &rnode,
131	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq",
132	    SYSCTL_DESCR("wqinput input queue controls"),
133	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
134	if (error != 0)
135		goto bad;
136
137	error = sysctl_createv(NULL, 0, &rnode, &cnode,
138	    CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops",
139	    SYSCTL_DESCR("Total packets dropped due to full input queue"),
140	    wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL);
141	if (error != 0)
142		goto bad;
143
144	return;
145bad:
146	log(LOG_ERR, "%s: could not create a sysctl node for %s\n",
147	    __func__, name);
148	return;
149}
150
151struct wqinput *
152wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
153{
154	struct wqinput *wqi;
155	int error;
156	char namebuf[32];
157
158	snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name);
159
160	wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP);
161
162	error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi,
163	    PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
164	if (error != 0)
165		panic("%s: workqueue_create failed (%d)\n", __func__, error);
166	pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
167	    name, NULL, IPL_SOFTNET);
168	wqi->wqi_worklists = percpu_alloc(sizeof(struct wqinput_worklist));
169	wqi->wqi_input = func;
170
171	wqinput_sysctl_setup(name, wqi);
172
173	return wqi;
174}
175
176static struct wqinput_work *
177wqinput_work_get(struct wqinput_worklist *wwl)
178{
179	struct wqinput_work *work;
180
181	/* Must be called at IPL_SOFTNET */
182
183	work = wwl->wwl_head;
184	if (work != NULL) {
185		KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len);
186		wwl->wwl_len--;
187		wwl->wwl_head = work->ww_next;
188		work->ww_next = NULL;
189
190		if (wwl->wwl_head == NULL)
191			wwl->wwl_tail = NULL;
192	} else {
193		KASSERT(wwl->wwl_len == 0);
194	}
195
196	return work;
197}
198
199static void
200wqinput_work(struct work *wk, void *arg)
201{
202	struct wqinput *wqi = arg;
203	struct wqinput_work *work;
204	struct wqinput_worklist *wwl;
205	int s;
206
207	/* Users expect to run at IPL_SOFTNET */
208	s = splsoftnet();
209	/* This also prevents LWP migrations between CPUs */
210	wwl = percpu_getref(wqi->wqi_worklists);
211
212	/* We can allow enqueuing another work at this point */
213	wwl->wwl_wq_is_active = false;
214
215	while ((work = wqinput_work_get(wwl)) != NULL) {
216		mutex_enter(softnet_lock);
217		KERNEL_LOCK_UNLESS_NET_MPSAFE();
218		wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto);
219		KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
220		mutex_exit(softnet_lock);
221
222		pool_put(&wqi->wqi_work_pool, work);
223	}
224
225	percpu_putref(wqi->wqi_worklists);
226	splx(s);
227}
228
229static void
230wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work)
231{
232
233	if (wwl->wwl_tail != NULL) {
234		wwl->wwl_tail->ww_next = work;
235	} else {
236		wwl->wwl_head = work;
237	}
238	wwl->wwl_tail = work;
239	wwl->wwl_len++;
240}
241
242void
243wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto)
244{
245	struct wqinput_work *work;
246	struct wqinput_worklist *wwl;
247
248	wwl = percpu_getref(wqi->wqi_worklists);
249
250	/* Prevent too much work and mbuf from being queued */
251	if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) {
252		wwl->wwl_dropped++;
253		m_freem(m);
254		goto out;
255	}
256
257	work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT);
258	if (work == NULL) {
259		wwl->wwl_dropped++;
260		m_freem(m);
261		goto out;
262	}
263	work->ww_mbuf = m;
264	work->ww_off = off;
265	work->ww_proto = proto;
266	work->ww_next = NULL;
267
268	wqinput_work_put(wwl, work);
269
270	/* Avoid enqueuing another work when one is already enqueued */
271	if (wwl->wwl_wq_is_active)
272		goto out;
273	wwl->wwl_wq_is_active = true;
274
275	workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL);
276out:
277	percpu_putref(wqi->wqi_worklists);
278}
279