1/*	$NetBSD: wqinput.c,v 1.8 2020/02/07 12:35:33 thorpej Exp $	*/
2
3/*-
4 * Copyright (c) 2017 Internet Initiative Japan Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#ifdef _KERNEL_OPT
30#include "opt_net_mpsafe.h"
31#endif
32
33#include <sys/param.h>
34#include <sys/kmem.h>
35#include <sys/mbuf.h>
36#include <sys/protosw.h>
37#include <sys/socketvar.h>
38#include <sys/syslog.h>
39#include <sys/workqueue.h>
40#include <sys/atomic.h>
41#include <sys/queue.h>
42#include <sys/percpu.h>
43#include <sys/sysctl.h>
44#include <sys/xcall.h>
45
46#include <net/if.h>
47#include <netinet/wqinput.h>
48
49#define WQINPUT_LIST_MAXLEN	IFQ_MAXLEN
50
51struct wqinput_work {
52	struct mbuf	*ww_mbuf;
53	int		ww_off;
54	int		ww_proto;
55	struct wqinput_work *ww_next;
56};
57
58struct wqinput_worklist {
59	/*
60	 * XXX: TAILQ cannot be used because TAILQ_INIT memories the address
61	 * of percpu data while percpu(9) may move percpu data during bootup.
62	 */
63	struct wqinput_work *wwl_head;
64	struct wqinput_work *wwl_tail;
65	unsigned int	wwl_len;
66	uint64_t	wwl_dropped;
67	struct work	wwl_work;
68	bool		wwl_wq_is_active;
69};
70
71struct wqinput {
72	struct workqueue *wqi_wq;
73	struct pool	wqi_work_pool;
74	struct percpu	*wqi_worklists; /* struct wqinput_worklist */
75	void    	(*wqi_input)(struct mbuf *, int, int);
76};
77
78static void wqinput_work(struct work *, void *);
79static void wqinput_sysctl_setup(const char *, struct wqinput *);
80
81static void
82wqinput_drops(void *p, void *arg, struct cpu_info *ci __unused)
83{
84	struct wqinput_worklist **const wwlp = p;
85	struct wqinput_worklist *const wwl = *wwlp;
86	uint64_t *sum = arg;
87
88	*sum += wwl->wwl_dropped;
89}
90
91static int
92wqinput_sysctl_drops_handler(SYSCTLFN_ARGS)
93{
94	struct sysctlnode node;
95	struct wqinput *wqi;
96	uint64_t sum = 0;
97	int error;
98
99	node = *rnode;
100	wqi = node.sysctl_data;
101
102	percpu_foreach_xcall(wqi->wqi_worklists, XC_HIGHPRI_IPL(IPL_SOFTNET),
103	    wqinput_drops, &sum);
104
105	node.sysctl_data = &sum;
106	error = sysctl_lookup(SYSCTLFN_CALL(&node));
107	if (error != 0 || newp == NULL)
108		return error;
109
110	return 0;
111}
112
113static void
114wqinput_sysctl_setup(const char *name, struct wqinput *wqi)
115{
116	const struct sysctlnode *cnode, *rnode;
117	int error;
118
119	error = sysctl_createv(NULL, 0, NULL, &rnode,
120	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "wqinput",
121	    SYSCTL_DESCR("workqueue-based pr_input controls"),
122	    NULL, 0, NULL, 0, CTL_NET, CTL_CREATE, CTL_EOL);
123	if (error != 0)
124		goto bad;
125
126	error = sysctl_createv(NULL, 0, &rnode, &rnode,
127	    CTLFLAG_PERMANENT, CTLTYPE_NODE, name,
128	    SYSCTL_DESCR("Protocol controls for workqueue-based pr_input"),
129	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
130	if (error != 0)
131		goto bad;
132
133	error = sysctl_createv(NULL, 0, &rnode, &rnode,
134	    CTLFLAG_PERMANENT, CTLTYPE_NODE, "inputq",
135	    SYSCTL_DESCR("wqinput input queue controls"),
136	    NULL, 0, NULL, 0, CTL_CREATE, CTL_EOL);
137	if (error != 0)
138		goto bad;
139
140	error = sysctl_createv(NULL, 0, &rnode, &cnode,
141	    CTLFLAG_PERMANENT, CTLTYPE_QUAD, "drops",
142	    SYSCTL_DESCR("Total packets dropped due to full input queue"),
143	    wqinput_sysctl_drops_handler, 0, (void *)wqi, 0, CTL_CREATE, CTL_EOL);
144	if (error != 0)
145		goto bad;
146
147	return;
148bad:
149	log(LOG_ERR, "%s: could not create a sysctl node for %s\n",
150	    __func__, name);
151	return;
152}
153
154static struct wqinput_worklist *
155wqinput_percpu_getref(percpu_t *pc)
156{
157
158	return *(struct wqinput_worklist **)percpu_getref(pc);
159}
160
161static void
162wqinput_percpu_putref(percpu_t *pc)
163{
164
165	percpu_putref(pc);
166}
167
168static void
169wqinput_percpu_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused)
170{
171	struct wqinput_worklist **wwlp = p;
172
173	*wwlp = kmem_zalloc(sizeof(**wwlp), KM_SLEEP);
174}
175
176struct wqinput *
177wqinput_create(const char *name, void (*func)(struct mbuf *, int, int))
178{
179	struct wqinput *wqi;
180	int error;
181	char namebuf[32];
182
183	snprintf(namebuf, sizeof(namebuf), "%s_wqinput", name);
184
185	wqi = kmem_alloc(sizeof(*wqi), KM_SLEEP);
186
187	error = workqueue_create(&wqi->wqi_wq, namebuf, wqinput_work, wqi,
188	    PRI_SOFTNET, IPL_SOFTNET, WQ_MPSAFE|WQ_PERCPU);
189	if (error != 0)
190		panic("%s: workqueue_create failed (%d)\n", __func__, error);
191	pool_init(&wqi->wqi_work_pool, sizeof(struct wqinput_work), 0, 0, 0,
192	    name, NULL, IPL_SOFTNET);
193	wqi->wqi_worklists = percpu_create(sizeof(struct wqinput_worklist *),
194	    wqinput_percpu_init_cpu, NULL, NULL);
195	wqi->wqi_input = func;
196
197	wqinput_sysctl_setup(name, wqi);
198
199	return wqi;
200}
201
202static struct wqinput_work *
203wqinput_work_get(struct wqinput_worklist *wwl)
204{
205	struct wqinput_work *work;
206
207	/* Must be called at IPL_SOFTNET */
208
209	work = wwl->wwl_head;
210	if (work != NULL) {
211		KASSERTMSG(wwl->wwl_len > 0, "wwl->wwl_len=%d", wwl->wwl_len);
212		wwl->wwl_len--;
213		wwl->wwl_head = work->ww_next;
214		work->ww_next = NULL;
215
216		if (wwl->wwl_head == NULL)
217			wwl->wwl_tail = NULL;
218	} else {
219		KASSERT(wwl->wwl_len == 0);
220	}
221
222	return work;
223}
224
225static void
226wqinput_work(struct work *wk, void *arg)
227{
228	struct wqinput *wqi = arg;
229	struct wqinput_work *work;
230	struct wqinput_worklist *wwl;
231	int s;
232
233	/* Users expect to run at IPL_SOFTNET */
234	s = splsoftnet();
235	/* This also prevents LWP migrations between CPUs */
236	wwl = wqinput_percpu_getref(wqi->wqi_worklists);
237
238	/* We can allow enqueuing another work at this point */
239	wwl->wwl_wq_is_active = false;
240
241	while ((work = wqinput_work_get(wwl)) != NULL) {
242		mutex_enter(softnet_lock);
243		KERNEL_LOCK_UNLESS_NET_MPSAFE();
244		wqi->wqi_input(work->ww_mbuf, work->ww_off, work->ww_proto);
245		KERNEL_UNLOCK_UNLESS_NET_MPSAFE();
246		mutex_exit(softnet_lock);
247
248		pool_put(&wqi->wqi_work_pool, work);
249	}
250
251	wqinput_percpu_putref(wqi->wqi_worklists);
252	splx(s);
253}
254
255static void
256wqinput_work_put(struct wqinput_worklist *wwl, struct wqinput_work *work)
257{
258
259	if (wwl->wwl_tail != NULL) {
260		wwl->wwl_tail->ww_next = work;
261	} else {
262		wwl->wwl_head = work;
263	}
264	wwl->wwl_tail = work;
265	wwl->wwl_len++;
266}
267
268void
269wqinput_input(struct wqinput *wqi, struct mbuf *m, int off, int proto)
270{
271	struct wqinput_work *work;
272	struct wqinput_worklist *wwl;
273
274	wwl = wqinput_percpu_getref(wqi->wqi_worklists);
275
276	/* Prevent too much work and mbuf from being queued */
277	if (wwl->wwl_len >= WQINPUT_LIST_MAXLEN) {
278		wwl->wwl_dropped++;
279		m_freem(m);
280		goto out;
281	}
282
283	work = pool_get(&wqi->wqi_work_pool, PR_NOWAIT);
284	if (work == NULL) {
285		wwl->wwl_dropped++;
286		m_freem(m);
287		goto out;
288	}
289	work->ww_mbuf = m;
290	work->ww_off = off;
291	work->ww_proto = proto;
292	work->ww_next = NULL;
293
294	wqinput_work_put(wwl, work);
295
296	/* Avoid enqueuing another work when one is already enqueued */
297	if (wwl->wwl_wq_is_active)
298		goto out;
299	wwl->wwl_wq_is_active = true;
300
301	workqueue_enqueue(wqi->wqi_wq, &wwl->wwl_work, NULL);
302out:
303	wqinput_percpu_putref(wqi->wqi_worklists);
304}
305