ifq.c revision 1.53
1/*	$OpenBSD: ifq.c,v 1.53 2023/11/10 15:51:24 bluhm Exp $ */
2
3/*
4 * Copyright (c) 2015 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include "bpfilter.h"
20#include "kstat.h"
21
22#include <sys/param.h>
23#include <sys/systm.h>
24#include <sys/socket.h>
25#include <sys/mbuf.h>
26#include <sys/proc.h>
27#include <sys/sysctl.h>
28
29#include <net/if.h>
30#include <net/if_var.h>
31
32#if NBPFILTER > 0
33#include <net/bpf.h>
34#endif
35
36#if NKSTAT > 0
37#include <sys/kstat.h>
38#endif
39
40/*
41 * priq glue
42 */
43unsigned int	 priq_idx(unsigned int, const struct mbuf *);
44struct mbuf	*priq_enq(struct ifqueue *, struct mbuf *);
45struct mbuf	*priq_deq_begin(struct ifqueue *, void **);
46void		 priq_deq_commit(struct ifqueue *, struct mbuf *, void *);
47void		 priq_purge(struct ifqueue *, struct mbuf_list *);
48
49void		*priq_alloc(unsigned int, void *);
50void		 priq_free(unsigned int, void *);
51
52const struct ifq_ops priq_ops = {
53	priq_idx,
54	priq_enq,
55	priq_deq_begin,
56	priq_deq_commit,
57	priq_purge,
58	priq_alloc,
59	priq_free,
60};
61
62const struct ifq_ops * const ifq_priq_ops = &priq_ops;
63
64/*
65 * priq internal structures
66 */
67
68struct priq {
69	struct mbuf_list	 pq_lists[IFQ_NQUEUES];
70};
71
72/*
73 * ifqueue serialiser
74 */
75
76void	ifq_start_task(void *);
77void	ifq_restart_task(void *);
78void	ifq_barrier_task(void *);
79void	ifq_bundle_task(void *);
80
81static inline void
82ifq_run_start(struct ifqueue *ifq)
83{
84	ifq_serialize(ifq, &ifq->ifq_start);
85}
86
87void
88ifq_serialize(struct ifqueue *ifq, struct task *t)
89{
90	struct task work;
91
92	if (ISSET(t->t_flags, TASK_ONQUEUE))
93		return;
94
95	mtx_enter(&ifq->ifq_task_mtx);
96	if (!ISSET(t->t_flags, TASK_ONQUEUE)) {
97		SET(t->t_flags, TASK_ONQUEUE);
98		TAILQ_INSERT_TAIL(&ifq->ifq_task_list, t, t_entry);
99	}
100
101	if (ifq->ifq_serializer == NULL) {
102		ifq->ifq_serializer = curcpu();
103
104		while ((t = TAILQ_FIRST(&ifq->ifq_task_list)) != NULL) {
105			TAILQ_REMOVE(&ifq->ifq_task_list, t, t_entry);
106			CLR(t->t_flags, TASK_ONQUEUE);
107			work = *t; /* copy to caller to avoid races */
108
109			mtx_leave(&ifq->ifq_task_mtx);
110
111			(*work.t_func)(work.t_arg);
112
113			mtx_enter(&ifq->ifq_task_mtx);
114		}
115
116		ifq->ifq_serializer = NULL;
117	}
118	mtx_leave(&ifq->ifq_task_mtx);
119}
120
121int
122ifq_is_serialized(struct ifqueue *ifq)
123{
124	return (ifq->ifq_serializer == curcpu());
125}
126
127void
128ifq_start(struct ifqueue *ifq)
129{
130	if (ifq_len(ifq) >= min(ifq->ifq_if->if_txmit, ifq->ifq_maxlen)) {
131		task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
132		ifq_run_start(ifq);
133	} else
134		task_add(ifq->ifq_softnet, &ifq->ifq_bundle);
135}
136
137void
138ifq_start_task(void *p)
139{
140	struct ifqueue *ifq = p;
141	struct ifnet *ifp = ifq->ifq_if;
142
143	if (!ISSET(ifp->if_flags, IFF_RUNNING) ||
144	    ifq_empty(ifq) || ifq_is_oactive(ifq))
145		return;
146
147	ifp->if_qstart(ifq);
148}
149
150void
151ifq_set_oactive(struct ifqueue *ifq)
152{
153	if (ifq->ifq_oactive)
154		return;
155
156	mtx_enter(&ifq->ifq_mtx);
157	if (!ifq->ifq_oactive) {
158		ifq->ifq_oactive = 1;
159		ifq->ifq_oactives++;
160	}
161	mtx_leave(&ifq->ifq_mtx);
162}
163
164void
165ifq_restart_task(void *p)
166{
167	struct ifqueue *ifq = p;
168	struct ifnet *ifp = ifq->ifq_if;
169
170	ifq_clr_oactive(ifq);
171	ifp->if_qstart(ifq);
172}
173
174void
175ifq_bundle_task(void *p)
176{
177	struct ifqueue *ifq = p;
178
179	ifq_run_start(ifq);
180}
181
182void
183ifq_barrier(struct ifqueue *ifq)
184{
185	struct cond c = COND_INITIALIZER();
186	struct task t = TASK_INITIALIZER(ifq_barrier_task, &c);
187
188	task_del(ifq->ifq_softnet, &ifq->ifq_bundle);
189
190	if (ifq->ifq_serializer == NULL)
191		return;
192
193	ifq_serialize(ifq, &t);
194
195	cond_wait(&c, "ifqbar");
196}
197
198void
199ifq_barrier_task(void *p)
200{
201	struct cond *c = p;
202
203	cond_signal(c);
204}
205
206/*
207 * ifqueue mbuf queue API
208 */
209
210#if NKSTAT > 0
211struct ifq_kstat_data {
212	struct kstat_kv kd_packets;
213	struct kstat_kv kd_bytes;
214	struct kstat_kv kd_qdrops;
215	struct kstat_kv kd_errors;
216	struct kstat_kv kd_qlen;
217	struct kstat_kv kd_maxqlen;
218	struct kstat_kv kd_oactive;
219	struct kstat_kv kd_oactives;
220};
221
222static const struct ifq_kstat_data ifq_kstat_tpl = {
223	KSTAT_KV_UNIT_INITIALIZER("packets",
224	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
225	KSTAT_KV_UNIT_INITIALIZER("bytes",
226	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
227	KSTAT_KV_UNIT_INITIALIZER("qdrops",
228	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
229	KSTAT_KV_UNIT_INITIALIZER("errors",
230	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
231	KSTAT_KV_UNIT_INITIALIZER("qlen",
232	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
233	KSTAT_KV_UNIT_INITIALIZER("maxqlen",
234	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
235	KSTAT_KV_INITIALIZER("oactive", KSTAT_KV_T_BOOL),
236	KSTAT_KV_INITIALIZER("oactives", KSTAT_KV_T_COUNTER32),
237};
238
239int
240ifq_kstat_copy(struct kstat *ks, void *dst)
241{
242	struct ifqueue *ifq = ks->ks_softc;
243	struct ifq_kstat_data *kd = dst;
244
245	*kd = ifq_kstat_tpl;
246	kstat_kv_u64(&kd->kd_packets) = ifq->ifq_packets;
247	kstat_kv_u64(&kd->kd_bytes) = ifq->ifq_bytes;
248	kstat_kv_u64(&kd->kd_qdrops) = ifq->ifq_qdrops;
249	kstat_kv_u64(&kd->kd_errors) = ifq->ifq_errors;
250	kstat_kv_u32(&kd->kd_qlen) = ifq->ifq_len;
251	kstat_kv_u32(&kd->kd_maxqlen) = ifq->ifq_maxlen;
252	kstat_kv_bool(&kd->kd_oactive) = ifq->ifq_oactive;
253	kstat_kv_u32(&kd->kd_oactives) = ifq->ifq_oactives;
254
255	return (0);
256}
257#endif
258
259void
260ifq_init(struct ifqueue *ifq, struct ifnet *ifp, unsigned int idx)
261{
262	ifq->ifq_if = ifp;
263	ifq->ifq_softnet = net_tq(idx);
264	ifq->ifq_softc = NULL;
265
266	mtx_init(&ifq->ifq_mtx, IPL_NET);
267
268	/* default to priq */
269	ifq->ifq_ops = &priq_ops;
270	ifq->ifq_q = priq_ops.ifqop_alloc(idx, NULL);
271
272	ml_init(&ifq->ifq_free);
273	ifq->ifq_len = 0;
274
275	ifq->ifq_packets = 0;
276	ifq->ifq_bytes = 0;
277	ifq->ifq_qdrops = 0;
278	ifq->ifq_errors = 0;
279	ifq->ifq_mcasts = 0;
280
281	mtx_init(&ifq->ifq_task_mtx, IPL_NET);
282	TAILQ_INIT(&ifq->ifq_task_list);
283	ifq->ifq_serializer = NULL;
284	task_set(&ifq->ifq_bundle, ifq_bundle_task, ifq);
285
286	task_set(&ifq->ifq_start, ifq_start_task, ifq);
287	task_set(&ifq->ifq_restart, ifq_restart_task, ifq);
288
289	if (ifq->ifq_maxlen == 0)
290		ifq_init_maxlen(ifq, IFQ_MAXLEN);
291
292	ifq->ifq_idx = idx;
293
294#if NKSTAT > 0
295	/* XXX xname vs driver name and unit */
296	ifq->ifq_kstat = kstat_create(ifp->if_xname, 0,
297	    "txq", ifq->ifq_idx, KSTAT_T_KV, 0);
298	KASSERT(ifq->ifq_kstat != NULL);
299	kstat_set_mutex(ifq->ifq_kstat, &ifq->ifq_mtx);
300	ifq->ifq_kstat->ks_softc = ifq;
301	ifq->ifq_kstat->ks_datalen = sizeof(ifq_kstat_tpl);
302	ifq->ifq_kstat->ks_copy = ifq_kstat_copy;
303	kstat_install(ifq->ifq_kstat);
304#endif
305}
306
307void
308ifq_attach(struct ifqueue *ifq, const struct ifq_ops *newops, void *opsarg)
309{
310	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
311	struct mbuf_list free_ml = MBUF_LIST_INITIALIZER();
312	struct mbuf *m;
313	const struct ifq_ops *oldops;
314	void *newq, *oldq;
315
316	newq = newops->ifqop_alloc(ifq->ifq_idx, opsarg);
317
318	mtx_enter(&ifq->ifq_mtx);
319	ifq->ifq_ops->ifqop_purge(ifq, &ml);
320	ifq->ifq_len = 0;
321
322	oldops = ifq->ifq_ops;
323	oldq = ifq->ifq_q;
324
325	ifq->ifq_ops = newops;
326	ifq->ifq_q = newq;
327
328	while ((m = ml_dequeue(&ml)) != NULL) {
329		m = ifq->ifq_ops->ifqop_enq(ifq, m);
330		if (m != NULL) {
331			ifq->ifq_qdrops++;
332			ml_enqueue(&free_ml, m);
333		} else
334			ifq->ifq_len++;
335	}
336	mtx_leave(&ifq->ifq_mtx);
337
338	oldops->ifqop_free(ifq->ifq_idx, oldq);
339
340	ml_purge(&free_ml);
341}
342
343void
344ifq_destroy(struct ifqueue *ifq)
345{
346	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
347
348#if NKSTAT > 0
349	kstat_destroy(ifq->ifq_kstat);
350#endif
351
352	NET_ASSERT_UNLOCKED();
353	if (!task_del(ifq->ifq_softnet, &ifq->ifq_bundle))
354		taskq_barrier(ifq->ifq_softnet);
355
356	/* don't need to lock because this is the last use of the ifq */
357
358	ifq->ifq_ops->ifqop_purge(ifq, &ml);
359	ifq->ifq_ops->ifqop_free(ifq->ifq_idx, ifq->ifq_q);
360
361	ml_purge(&ml);
362}
363
364void
365ifq_add_data(struct ifqueue *ifq, struct if_data *data)
366{
367	mtx_enter(&ifq->ifq_mtx);
368	data->ifi_opackets += ifq->ifq_packets;
369	data->ifi_obytes += ifq->ifq_bytes;
370	data->ifi_oqdrops += ifq->ifq_qdrops;
371	data->ifi_omcasts += ifq->ifq_mcasts;
372	/* ifp->if_data.ifi_oerrors */
373	mtx_leave(&ifq->ifq_mtx);
374}
375
376int
377ifq_enqueue(struct ifqueue *ifq, struct mbuf *m)
378{
379	struct mbuf *dm;
380
381	mtx_enter(&ifq->ifq_mtx);
382	dm = ifq->ifq_ops->ifqop_enq(ifq, m);
383	if (dm != m) {
384		ifq->ifq_packets++;
385		ifq->ifq_bytes += m->m_pkthdr.len;
386		if (ISSET(m->m_flags, M_MCAST))
387			ifq->ifq_mcasts++;
388	}
389
390	if (dm == NULL)
391		ifq->ifq_len++;
392	else
393		ifq->ifq_qdrops++;
394	mtx_leave(&ifq->ifq_mtx);
395
396	if (dm != NULL)
397		m_freem(dm);
398
399	return (dm == m ? ENOBUFS : 0);
400}
401
402static inline void
403ifq_deq_enter(struct ifqueue *ifq)
404{
405	mtx_enter(&ifq->ifq_mtx);
406}
407
408static inline void
409ifq_deq_leave(struct ifqueue *ifq)
410{
411	struct mbuf_list ml;
412
413	ml = ifq->ifq_free;
414	ml_init(&ifq->ifq_free);
415
416	mtx_leave(&ifq->ifq_mtx);
417
418	if (!ml_empty(&ml))
419		ml_purge(&ml);
420}
421
422struct mbuf *
423ifq_deq_begin(struct ifqueue *ifq)
424{
425	struct mbuf *m = NULL;
426	void *cookie;
427
428	ifq_deq_enter(ifq);
429	if (ifq->ifq_len == 0 ||
430	    (m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie)) == NULL) {
431		ifq_deq_leave(ifq);
432		return (NULL);
433	}
434
435	m->m_pkthdr.ph_cookie = cookie;
436
437	return (m);
438}
439
440void
441ifq_deq_commit(struct ifqueue *ifq, struct mbuf *m)
442{
443	void *cookie;
444
445	KASSERT(m != NULL);
446	cookie = m->m_pkthdr.ph_cookie;
447
448	ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
449	ifq->ifq_len--;
450	ifq_deq_leave(ifq);
451}
452
453void
454ifq_deq_rollback(struct ifqueue *ifq, struct mbuf *m)
455{
456	KASSERT(m != NULL);
457
458	ifq_deq_leave(ifq);
459}
460
461struct mbuf *
462ifq_dequeue(struct ifqueue *ifq)
463{
464	struct mbuf *m;
465
466	m = ifq_deq_begin(ifq);
467	if (m == NULL)
468		return (NULL);
469
470	ifq_deq_commit(ifq, m);
471
472	return (m);
473}
474
475int
476ifq_deq_sleep(struct ifqueue *ifq, struct mbuf **mp, int nbio, int priority,
477    const char *wmesg, volatile unsigned int *sleeping,
478    volatile unsigned int *alive)
479{
480	struct mbuf *m;
481	void *cookie;
482	int error = 0;
483
484	ifq_deq_enter(ifq);
485	if (ifq->ifq_len == 0 && nbio)
486		error = EWOULDBLOCK;
487	else {
488		for (;;) {
489			m = ifq->ifq_ops->ifqop_deq_begin(ifq, &cookie);
490			if (m != NULL) {
491				ifq->ifq_ops->ifqop_deq_commit(ifq, m, cookie);
492				ifq->ifq_len--;
493				*mp = m;
494				break;
495			}
496
497			(*sleeping)++;
498			error = msleep_nsec(ifq, &ifq->ifq_mtx,
499			    priority, wmesg, INFSLP);
500			(*sleeping)--;
501			if (error != 0)
502				break;
503			if (!(*alive)) {
504				error = EIO;
505				break;
506			}
507		}
508	}
509	ifq_deq_leave(ifq);
510
511	return (error);
512}
513
514int
515ifq_hdatalen(struct ifqueue *ifq)
516{
517	struct mbuf *m;
518	int len = 0;
519
520	if (ifq_empty(ifq))
521		return (0);
522
523	m = ifq_deq_begin(ifq);
524	if (m != NULL) {
525		len = m->m_pkthdr.len;
526		ifq_deq_rollback(ifq, m);
527	}
528
529	return (len);
530}
531
532void
533ifq_init_maxlen(struct ifqueue *ifq, unsigned int maxlen)
534{
535	/* this is not MP safe, use only during attach */
536	ifq->ifq_maxlen = maxlen;
537}
538
539unsigned int
540ifq_purge(struct ifqueue *ifq)
541{
542	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
543	unsigned int rv;
544
545	mtx_enter(&ifq->ifq_mtx);
546	ifq->ifq_ops->ifqop_purge(ifq, &ml);
547	rv = ifq->ifq_len;
548	ifq->ifq_len = 0;
549	ifq->ifq_qdrops += rv;
550	mtx_leave(&ifq->ifq_mtx);
551
552	KASSERT(rv == ml_len(&ml));
553
554	ml_purge(&ml);
555
556	return (rv);
557}
558
559void *
560ifq_q_enter(struct ifqueue *ifq, const struct ifq_ops *ops)
561{
562	mtx_enter(&ifq->ifq_mtx);
563	if (ifq->ifq_ops == ops)
564		return (ifq->ifq_q);
565
566	mtx_leave(&ifq->ifq_mtx);
567
568	return (NULL);
569}
570
571void
572ifq_q_leave(struct ifqueue *ifq, void *q)
573{
574	KASSERT(q == ifq->ifq_q);
575	mtx_leave(&ifq->ifq_mtx);
576}
577
578void
579ifq_mfreem(struct ifqueue *ifq, struct mbuf *m)
580{
581	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
582
583	ifq->ifq_len--;
584	ifq->ifq_qdrops++;
585	ml_enqueue(&ifq->ifq_free, m);
586}
587
588void
589ifq_mfreeml(struct ifqueue *ifq, struct mbuf_list *ml)
590{
591	MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
592
593	ifq->ifq_len -= ml_len(ml);
594	ifq->ifq_qdrops += ml_len(ml);
595	ml_enlist(&ifq->ifq_free, ml);
596}
597
598/*
599 * ifiq
600 */
601
602#if NKSTAT > 0
603struct ifiq_kstat_data {
604	struct kstat_kv kd_packets;
605	struct kstat_kv kd_bytes;
606	struct kstat_kv kd_fdrops;
607	struct kstat_kv kd_qdrops;
608	struct kstat_kv kd_errors;
609	struct kstat_kv kd_qlen;
610
611	struct kstat_kv kd_enqueues;
612	struct kstat_kv kd_dequeues;
613};
614
615static const struct ifiq_kstat_data ifiq_kstat_tpl = {
616	KSTAT_KV_UNIT_INITIALIZER("packets",
617	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
618	KSTAT_KV_UNIT_INITIALIZER("bytes",
619	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_BYTES),
620	KSTAT_KV_UNIT_INITIALIZER("fdrops",
621	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
622	KSTAT_KV_UNIT_INITIALIZER("qdrops",
623	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
624	KSTAT_KV_UNIT_INITIALIZER("errors",
625	    KSTAT_KV_T_COUNTER64, KSTAT_KV_U_PACKETS),
626	KSTAT_KV_UNIT_INITIALIZER("qlen",
627	    KSTAT_KV_T_UINT32, KSTAT_KV_U_PACKETS),
628
629	KSTAT_KV_INITIALIZER("enqueues",
630	    KSTAT_KV_T_COUNTER64),
631	KSTAT_KV_INITIALIZER("dequeues",
632	    KSTAT_KV_T_COUNTER64),
633};
634
635int
636ifiq_kstat_copy(struct kstat *ks, void *dst)
637{
638	struct ifiqueue *ifiq = ks->ks_softc;
639	struct ifiq_kstat_data *kd = dst;
640
641	*kd = ifiq_kstat_tpl;
642	kstat_kv_u64(&kd->kd_packets) = ifiq->ifiq_packets;
643	kstat_kv_u64(&kd->kd_bytes) = ifiq->ifiq_bytes;
644	kstat_kv_u64(&kd->kd_fdrops) = ifiq->ifiq_fdrops;
645	kstat_kv_u64(&kd->kd_qdrops) = ifiq->ifiq_qdrops;
646	kstat_kv_u64(&kd->kd_errors) = ifiq->ifiq_errors;
647	kstat_kv_u32(&kd->kd_qlen) = ml_len(&ifiq->ifiq_ml);
648
649	kstat_kv_u64(&kd->kd_enqueues) = ifiq->ifiq_enqueues;
650	kstat_kv_u64(&kd->kd_dequeues) = ifiq->ifiq_dequeues;
651
652	return (0);
653}
654#endif
655
656static void	ifiq_process(void *);
657
658void
659ifiq_init(struct ifiqueue *ifiq, struct ifnet *ifp, unsigned int idx)
660{
661	ifiq->ifiq_if = ifp;
662	ifiq->ifiq_softnet = net_tq(idx);
663	ifiq->ifiq_softc = NULL;
664
665	mtx_init(&ifiq->ifiq_mtx, IPL_NET);
666	ml_init(&ifiq->ifiq_ml);
667	task_set(&ifiq->ifiq_task, ifiq_process, ifiq);
668	ifiq->ifiq_pressure = 0;
669
670	ifiq->ifiq_packets = 0;
671	ifiq->ifiq_bytes = 0;
672	ifiq->ifiq_fdrops = 0;
673	ifiq->ifiq_qdrops = 0;
674	ifiq->ifiq_errors = 0;
675
676	ifiq->ifiq_idx = idx;
677
678#if NKSTAT > 0
679	/* XXX xname vs driver name and unit */
680	ifiq->ifiq_kstat = kstat_create(ifp->if_xname, 0,
681	    "rxq", ifiq->ifiq_idx, KSTAT_T_KV, 0);
682	KASSERT(ifiq->ifiq_kstat != NULL);
683	kstat_set_mutex(ifiq->ifiq_kstat, &ifiq->ifiq_mtx);
684	ifiq->ifiq_kstat->ks_softc = ifiq;
685	ifiq->ifiq_kstat->ks_datalen = sizeof(ifiq_kstat_tpl);
686	ifiq->ifiq_kstat->ks_copy = ifiq_kstat_copy;
687	kstat_install(ifiq->ifiq_kstat);
688#endif
689}
690
691void
692ifiq_destroy(struct ifiqueue *ifiq)
693{
694#if NKSTAT > 0
695	kstat_destroy(ifiq->ifiq_kstat);
696#endif
697
698	NET_ASSERT_UNLOCKED();
699	if (!task_del(ifiq->ifiq_softnet, &ifiq->ifiq_task))
700		taskq_barrier(ifiq->ifiq_softnet);
701
702	/* don't need to lock because this is the last use of the ifiq */
703	ml_purge(&ifiq->ifiq_ml);
704}
705
706unsigned int ifiq_maxlen_drop = 2048 * 5;
707unsigned int ifiq_maxlen_return = 2048 * 3;
708
709int
710ifiq_input(struct ifiqueue *ifiq, struct mbuf_list *ml)
711{
712	struct ifnet *ifp = ifiq->ifiq_if;
713	struct mbuf *m;
714	uint64_t packets;
715	uint64_t bytes = 0;
716	uint64_t fdrops = 0;
717	unsigned int len;
718#if NBPFILTER > 0
719	caddr_t if_bpf;
720#endif
721
722	if (ml_empty(ml))
723		return (0);
724
725	MBUF_LIST_FOREACH(ml, m) {
726		m->m_pkthdr.ph_ifidx = ifp->if_index;
727		m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
728		bytes += m->m_pkthdr.len;
729	}
730	packets = ml_len(ml);
731
732#if NBPFILTER > 0
733	if_bpf = ifp->if_bpf;
734	if (if_bpf) {
735		struct mbuf_list ml0 = *ml;
736
737		ml_init(ml);
738
739		while ((m = ml_dequeue(&ml0)) != NULL) {
740			if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
741				m_freem(m);
742				fdrops++;
743			} else
744				ml_enqueue(ml, m);
745		}
746
747		if (ml_empty(ml)) {
748			mtx_enter(&ifiq->ifiq_mtx);
749			ifiq->ifiq_packets += packets;
750			ifiq->ifiq_bytes += bytes;
751			ifiq->ifiq_fdrops += fdrops;
752			mtx_leave(&ifiq->ifiq_mtx);
753
754			return (0);
755		}
756	}
757#endif
758
759	mtx_enter(&ifiq->ifiq_mtx);
760	ifiq->ifiq_packets += packets;
761	ifiq->ifiq_bytes += bytes;
762	ifiq->ifiq_fdrops += fdrops;
763
764	len = ml_len(&ifiq->ifiq_ml);
765	if (__predict_true(!ISSET(ifp->if_xflags, IFXF_MONITOR))) {
766		if (len > ifiq_maxlen_drop)
767			ifiq->ifiq_qdrops += ml_len(ml);
768		else {
769			ifiq->ifiq_enqueues++;
770			ml_enlist(&ifiq->ifiq_ml, ml);
771		}
772	}
773	mtx_leave(&ifiq->ifiq_mtx);
774
775	if (ml_empty(ml))
776		task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
777	else
778		ml_purge(ml);
779
780	return (len > ifiq_maxlen_return);
781}
782
783void
784ifiq_add_data(struct ifiqueue *ifiq, struct if_data *data)
785{
786	mtx_enter(&ifiq->ifiq_mtx);
787	data->ifi_ipackets += ifiq->ifiq_packets;
788	data->ifi_ibytes += ifiq->ifiq_bytes;
789	data->ifi_iqdrops += ifiq->ifiq_qdrops;
790	mtx_leave(&ifiq->ifiq_mtx);
791}
792
793int
794ifiq_enqueue(struct ifiqueue *ifiq, struct mbuf *m)
795{
796	struct ifnet *ifp = ifiq->ifiq_if;
797#if NBPFILTER > 0
798	caddr_t if_bpf = ifp->if_bpf;
799#endif
800
801	m->m_pkthdr.ph_ifidx = ifp->if_index;
802	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
803
804#if NBPFILTER > 0
805	if_bpf = ifp->if_bpf;
806	if (if_bpf) {
807		if ((*ifp->if_bpf_mtap)(if_bpf, m, BPF_DIRECTION_IN)) {
808			mtx_enter(&ifiq->ifiq_mtx);
809			ifiq->ifiq_packets++;
810			ifiq->ifiq_bytes += m->m_pkthdr.len;
811			ifiq->ifiq_fdrops++;
812			mtx_leave(&ifiq->ifiq_mtx);
813
814			m_freem(m);
815			return (0);
816		}
817	}
818#endif
819
820	mtx_enter(&ifiq->ifiq_mtx);
821	ifiq->ifiq_packets++;
822	ifiq->ifiq_bytes += m->m_pkthdr.len;
823	ifiq->ifiq_enqueues++;
824	ml_enqueue(&ifiq->ifiq_ml, m);
825	mtx_leave(&ifiq->ifiq_mtx);
826
827	task_add(ifiq->ifiq_softnet, &ifiq->ifiq_task);
828
829	return (0);
830}
831
832static void
833ifiq_process(void *arg)
834{
835	struct ifiqueue *ifiq = arg;
836	struct mbuf_list ml;
837
838	if (ifiq_empty(ifiq))
839		return;
840
841	mtx_enter(&ifiq->ifiq_mtx);
842	ifiq->ifiq_dequeues++;
843	ml = ifiq->ifiq_ml;
844	ml_init(&ifiq->ifiq_ml);
845	mtx_leave(&ifiq->ifiq_mtx);
846
847	if_input_process(ifiq->ifiq_if, &ml);
848}
849
850int
851net_ifiq_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
852    void *newp, size_t newlen)
853{
854	int error = EOPNOTSUPP;
855/* pressure is disabled for 6.6-release */
856#if 0
857	int val;
858
859	if (namelen != 1)
860		return (EISDIR);
861
862	switch (name[0]) {
863	case NET_LINK_IFRXQ_PRESSURE_RETURN:
864		val = ifiq_pressure_return;
865		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
866		if (error != 0)
867			return (error);
868		if (val < 1 || val > ifiq_pressure_drop)
869			return (EINVAL);
870		ifiq_pressure_return = val;
871		break;
872	case NET_LINK_IFRXQ_PRESSURE_DROP:
873		val = ifiq_pressure_drop;
874		error = sysctl_int(oldp, oldlenp, newp, newlen, &val);
875		if (error != 0)
876			return (error);
877		if (ifiq_pressure_return > val)
878			return (EINVAL);
879		ifiq_pressure_drop = val;
880		break;
881	default:
882		error = EOPNOTSUPP;
883		break;
884	}
885#endif
886
887	return (error);
888}
889
890/*
891 * priq implementation
892 */
893
894unsigned int
895priq_idx(unsigned int nqueues, const struct mbuf *m)
896{
897	unsigned int flow = 0;
898
899	if (ISSET(m->m_pkthdr.csum_flags, M_FLOWID))
900		flow = m->m_pkthdr.ph_flowid;
901
902	return (flow % nqueues);
903}
904
905void *
906priq_alloc(unsigned int idx, void *null)
907{
908	struct priq *pq;
909	int i;
910
911	pq = malloc(sizeof(struct priq), M_DEVBUF, M_WAITOK);
912	for (i = 0; i < IFQ_NQUEUES; i++)
913		ml_init(&pq->pq_lists[i]);
914	return (pq);
915}
916
917void
918priq_free(unsigned int idx, void *pq)
919{
920	free(pq, M_DEVBUF, sizeof(struct priq));
921}
922
923struct mbuf *
924priq_enq(struct ifqueue *ifq, struct mbuf *m)
925{
926	struct priq *pq;
927	struct mbuf_list *pl;
928	struct mbuf *n = NULL;
929	unsigned int prio;
930
931	pq = ifq->ifq_q;
932	KASSERT(m->m_pkthdr.pf.prio <= IFQ_MAXPRIO);
933
934	/* Find a lower priority queue to drop from */
935	if (ifq_len(ifq) >= ifq->ifq_maxlen) {
936		for (prio = 0; prio < m->m_pkthdr.pf.prio; prio++) {
937			pl = &pq->pq_lists[prio];
938			if (ml_len(pl) > 0) {
939				n = ml_dequeue(pl);
940				goto enqueue;
941			}
942		}
943		/*
944		 * There's no lower priority queue that we can
945		 * drop from so don't enqueue this one.
946		 */
947		return (m);
948	}
949
950 enqueue:
951	pl = &pq->pq_lists[m->m_pkthdr.pf.prio];
952	ml_enqueue(pl, m);
953
954	return (n);
955}
956
957struct mbuf *
958priq_deq_begin(struct ifqueue *ifq, void **cookiep)
959{
960	struct priq *pq = ifq->ifq_q;
961	struct mbuf_list *pl;
962	unsigned int prio = nitems(pq->pq_lists);
963	struct mbuf *m;
964
965	do {
966		pl = &pq->pq_lists[--prio];
967		m = MBUF_LIST_FIRST(pl);
968		if (m != NULL) {
969			*cookiep = pl;
970			return (m);
971		}
972	} while (prio > 0);
973
974	return (NULL);
975}
976
977void
978priq_deq_commit(struct ifqueue *ifq, struct mbuf *m, void *cookie)
979{
980	struct mbuf_list *pl = cookie;
981
982	KASSERT(MBUF_LIST_FIRST(pl) == m);
983
984	ml_dequeue(pl);
985}
986
987void
988priq_purge(struct ifqueue *ifq, struct mbuf_list *ml)
989{
990	struct priq *pq = ifq->ifq_q;
991	struct mbuf_list *pl;
992	unsigned int prio = nitems(pq->pq_lists);
993
994	do {
995		pl = &pq->pq_lists[--prio];
996		ml_enlist(ml, pl);
997	} while (prio > 0);
998}
999