1227614Sluigi/*
2262151Sluigi * Copyright (C) 2011-2014 Matteo Landi, Luigi Rizzo. All rights reserved.
3232238Sluigi *
4227614Sluigi * Redistribution and use in source and binary forms, with or without
5227614Sluigi * modification, are permitted provided that the following conditions
6227614Sluigi * are met:
7228276Sluigi *   1. Redistributions of source code must retain the above copyright
8228276Sluigi *      notice, this list of conditions and the following disclaimer.
9228276Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10228276Sluigi *      notice, this list of conditions and the following disclaimer in the
11262151Sluigi *      documentation and/or other materials provided with the distribution.
12232238Sluigi *
13227614Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14227614Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15227614Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16227614Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17227614Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18227614Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19227614Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20227614Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21227614Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22227614Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23227614Sluigi * SUCH DAMAGE.
24227614Sluigi */
25227614Sluigi
26238812Sluigi
27227614Sluigi/*
28262151Sluigi * $FreeBSD: releng/10.3/sys/dev/netmap/netmap.c 281706 2015-04-18 21:22:26Z rpaulo $
29262151Sluigi *
30227614Sluigi * This module supports memory mapped access to network devices,
31227614Sluigi * see netmap(4).
32227614Sluigi *
33227614Sluigi * The module uses a large, memory pool allocated by the kernel
34227614Sluigi * and accessible as mmapped memory by multiple userspace threads/processes.
35227614Sluigi * The memory pool contains packet buffers and "netmap rings",
36227614Sluigi * i.e. user-accessible copies of the interface's queues.
37227614Sluigi *
38227614Sluigi * Access to the network card works like this:
39227614Sluigi * 1. a process/thread issues one or more open() on /dev/netmap, to create
40227614Sluigi *    select()able file descriptor on which events are reported.
41227614Sluigi * 2. on each descriptor, the process issues an ioctl() to identify
42227614Sluigi *    the interface that should report events to the file descriptor.
43227614Sluigi * 3. on each descriptor, the process issues an mmap() request to
44227614Sluigi *    map the shared memory region within the process' address space.
45227614Sluigi *    The list of interesting queues is indicated by a location in
46227614Sluigi *    the shared memory region.
47227614Sluigi * 4. using the functions in the netmap(4) userspace API, a process
48227614Sluigi *    can look up the occupation state of a queue, access memory buffers,
49227614Sluigi *    and retrieve received packets or enqueue packets to transmit.
50227614Sluigi * 5. using some ioctl()s the process can synchronize the userspace view
51227614Sluigi *    of the queue with the actual status in the kernel. This includes both
52227614Sluigi *    receiving the notification of new packets, and transmitting new
53227614Sluigi *    packets on the output interface.
54227614Sluigi * 6. select() or poll() can be used to wait for events on individual
55227614Sluigi *    transmit or receive queues (or all queues for a given interface).
56262151Sluigi *
57262151Sluigi
58262151Sluigi		SYNCHRONIZATION (USER)
59262151Sluigi
60262151SluigiThe netmap rings and data structures may be shared among multiple
61262151Sluigiuser threads or even independent processes.
62262151SluigiAny synchronization among those threads/processes is delegated
63262151Sluigito the threads themselves. Only one thread at a time can be in
64262151Sluigia system call on the same netmap ring. The OS does not enforce
65262151Sluigithis and only guarantees against system crashes in case of
66262151Sluigiinvalid usage.
67262151Sluigi
68262151Sluigi		LOCKING (INTERNAL)
69262151Sluigi
70262151SluigiWithin the kernel, access to the netmap rings is protected as follows:
71262151Sluigi
72262151Sluigi- a spinlock on each ring, to handle producer/consumer races on
73262151Sluigi  RX rings attached to the host stack (against multiple host
74262151Sluigi  threads writing from the host stack to the same ring),
75262151Sluigi  and on 'destination' rings attached to a VALE switch
76262151Sluigi  (i.e. RX rings in VALE ports, and TX rings in NIC/host ports)
77262151Sluigi  protecting multiple active senders for the same destination)
78262151Sluigi
79262151Sluigi- an atomic variable to guarantee that there is at most one
80262151Sluigi  instance of *_*xsync() on the ring at any time.
81262151Sluigi  For rings connected to user file
82262151Sluigi  descriptors, an atomic_test_and_set() protects this, and the
83262151Sluigi  lock on the ring is not actually used.
84262151Sluigi  For NIC RX rings connected to a VALE switch, an atomic_test_and_set()
85262151Sluigi  is also used to prevent multiple executions (the driver might indeed
86262151Sluigi  already guarantee this).
87262151Sluigi  For NIC TX rings connected to a VALE switch, the lock arbitrates
88262151Sluigi  access to the queue (both when allocating buffers and when pushing
89262151Sluigi  them out).
90262151Sluigi
91262151Sluigi- *xsync() should be protected against initializations of the card.
92262151Sluigi  On FreeBSD most devices have the reset routine protected by
93262151Sluigi  a RING lock (ixgbe, igb, em) or core lock (re). lem is missing
94262151Sluigi  the RING protection on rx_reset(), this should be added.
95262151Sluigi
96262151Sluigi  On linux there is an external lock on the tx path, which probably
97262151Sluigi  also arbitrates access to the reset routine. XXX to be revised
98262151Sluigi
99262151Sluigi- a per-interface core_lock protecting access from the host stack
100262151Sluigi  while interfaces may be detached from netmap mode.
101262151Sluigi  XXX there should be no need for this lock if we detach the interfaces
102262151Sluigi  only while they are down.
103262151Sluigi
104262151Sluigi
105262151Sluigi--- VALE SWITCH ---
106262151Sluigi
107262151SluigiNMG_LOCK() serializes all modifications to switches and ports.
108262151SluigiA switch cannot be deleted until all ports are gone.
109262151Sluigi
110262151SluigiFor each switch, an SX lock (RWlock on linux) protects
111262151Sluigideletion of ports. When configuring or deleting a new port, the
112262151Sluigilock is acquired in exclusive mode (after holding NMG_LOCK).
113262151SluigiWhen forwarding, the lock is acquired in shared mode (without NMG_LOCK).
114262151SluigiThe lock is held throughout the entire forwarding cycle,
115262151Sluigiduring which the thread may incur in a page fault.
116262151SluigiHence it is important that sleepable shared locks are used.
117262151Sluigi
118262151SluigiOn the rx ring, the per-port lock is grabbed initially to reserve
119262151Sluigia number of slot in the ring, then the lock is released,
120262151Sluigipackets are copied from source to destination, and then
121262151Sluigithe lock is acquired again and the receive ring is updated.
122262151Sluigi(A similar thing is done on the tx ring for NIC and host stack
123262151Sluigiports attached to the switch)
124262151Sluigi
125227614Sluigi */
126227614Sluigi
127270252Sluigi
128270252Sluigi/* --- internals ----
129270252Sluigi *
130270252Sluigi * Roadmap to the code that implements the above.
131270252Sluigi *
132270252Sluigi * > 1. a process/thread issues one or more open() on /dev/netmap, to create
133270252Sluigi * >    select()able file descriptor on which events are reported.
134270252Sluigi *
135270252Sluigi *  	Internally, we allocate a netmap_priv_d structure, that will be
136270252Sluigi *  	initialized on ioctl(NIOCREGIF).
137270252Sluigi *
138270252Sluigi *      os-specific:
139270252Sluigi *  	    FreeBSD: netmap_open (netmap_freebsd.c). The priv is
140270252Sluigi *  		     per-thread.
141270252Sluigi *  	    linux:   linux_netmap_open (netmap_linux.c). The priv is
142270252Sluigi *  		     per-open.
143270252Sluigi *
144270252Sluigi * > 2. on each descriptor, the process issues an ioctl() to identify
145270252Sluigi * >    the interface that should report events to the file descriptor.
146270252Sluigi *
147270252Sluigi * 	Implemented by netmap_ioctl(), NIOCREGIF case, with nmr->nr_cmd==0.
148270252Sluigi * 	Most important things happen in netmap_get_na() and
149270252Sluigi * 	netmap_do_regif(), called from there. Additional details can be
150270252Sluigi * 	found in the comments above those functions.
151270252Sluigi *
152270252Sluigi * 	In all cases, this action creates/takes-a-reference-to a
153270252Sluigi * 	netmap_*_adapter describing the port, and allocates a netmap_if
154270252Sluigi * 	and all necessary netmap rings, filling them with netmap buffers.
155270252Sluigi *
156270252Sluigi *      In this phase, the sync callbacks for each ring are set (these are used
157270252Sluigi *      in steps 5 and 6 below).  The callbacks depend on the type of adapter.
158270252Sluigi *      The adapter creation/initialization code puts them in the
159270252Sluigi * 	netmap_adapter (fields na->nm_txsync and na->nm_rxsync).  Then, they
160270252Sluigi * 	are copied from there to the netmap_kring's during netmap_do_regif(), by
161270252Sluigi * 	the nm_krings_create() callback.  All the nm_krings_create callbacks
162270252Sluigi * 	actually call netmap_krings_create() to perform this and the other
163270252Sluigi * 	common stuff. netmap_krings_create() also takes care of the host rings,
164270252Sluigi * 	if needed, by setting their sync callbacks appropriately.
165270252Sluigi *
166270252Sluigi * 	Additional actions depend on the kind of netmap_adapter that has been
167270252Sluigi * 	registered:
168270252Sluigi *
169270252Sluigi * 	- netmap_hw_adapter:  	     [netmap.c]
170270252Sluigi * 	     This is a system netdev/ifp with native netmap support.
171270252Sluigi * 	     The ifp is detached from the host stack by redirecting:
172270252Sluigi * 	       - transmissions (from the network stack) to netmap_transmit()
173270252Sluigi * 	       - receive notifications to the nm_notify() callback for
174270252Sluigi * 	         this adapter. The callback is normally netmap_notify(), unless
175270252Sluigi * 	         the ifp is attached to a bridge using bwrap, in which case it
176270252Sluigi * 	         is netmap_bwrap_intr_notify().
177270252Sluigi *
178270252Sluigi * 	- netmap_generic_adapter:      [netmap_generic.c]
179270252Sluigi * 	      A system netdev/ifp without native netmap support.
180270252Sluigi *
181270252Sluigi * 	(the decision about native/non native support is taken in
182270252Sluigi * 	 netmap_get_hw_na(), called by netmap_get_na())
183270252Sluigi *
184270252Sluigi * 	- netmap_vp_adapter 		[netmap_vale.c]
185270252Sluigi * 	      Returned by netmap_get_bdg_na().
186270252Sluigi * 	      This is a persistent or ephemeral VALE port. Ephemeral ports
187270252Sluigi * 	      are created on the fly if they don't already exist, and are
188270252Sluigi * 	      always attached to a bridge.
189270252Sluigi * 	      Persistent VALE ports must must be created seperately, and i
190270252Sluigi * 	      then attached like normal NICs. The NIOCREGIF we are examining
191270252Sluigi * 	      will find them only if they had previosly been created and
192270252Sluigi * 	      attached (see VALE_CTL below).
193270252Sluigi *
194270252Sluigi * 	- netmap_pipe_adapter 	      [netmap_pipe.c]
195270252Sluigi * 	      Returned by netmap_get_pipe_na().
196270252Sluigi * 	      Both pipe ends are created, if they didn't already exist.
197270252Sluigi *
198270252Sluigi * 	- netmap_monitor_adapter      [netmap_monitor.c]
199270252Sluigi * 	      Returned by netmap_get_monitor_na().
200270252Sluigi * 	      If successful, the nm_sync callbacks of the monitored adapter
201270252Sluigi * 	      will be intercepted by the returned monitor.
202270252Sluigi *
203270252Sluigi * 	- netmap_bwrap_adapter	      [netmap_vale.c]
204270252Sluigi * 	      Cannot be obtained in this way, see VALE_CTL below
205270252Sluigi *
206270252Sluigi *
207270252Sluigi * 	os-specific:
208270252Sluigi * 	    linux: we first go through linux_netmap_ioctl() to
209270252Sluigi * 	           adapt the FreeBSD interface to the linux one.
210270252Sluigi *
211270252Sluigi *
212270252Sluigi * > 3. on each descriptor, the process issues an mmap() request to
213270252Sluigi * >    map the shared memory region within the process' address space.
214270252Sluigi * >    The list of interesting queues is indicated by a location in
215270252Sluigi * >    the shared memory region.
216270252Sluigi *
217270252Sluigi *      os-specific:
218270252Sluigi *  	    FreeBSD: netmap_mmap_single (netmap_freebsd.c).
219270252Sluigi *  	    linux:   linux_netmap_mmap (netmap_linux.c).
220270252Sluigi *
221270252Sluigi * > 4. using the functions in the netmap(4) userspace API, a process
222270252Sluigi * >    can look up the occupation state of a queue, access memory buffers,
223270252Sluigi * >    and retrieve received packets or enqueue packets to transmit.
224270252Sluigi *
225270252Sluigi * 	these actions do not involve the kernel.
226270252Sluigi *
227270252Sluigi * > 5. using some ioctl()s the process can synchronize the userspace view
228270252Sluigi * >    of the queue with the actual status in the kernel. This includes both
229270252Sluigi * >    receiving the notification of new packets, and transmitting new
230270252Sluigi * >    packets on the output interface.
231270252Sluigi *
232270252Sluigi * 	These are implemented in netmap_ioctl(), NIOCTXSYNC and NIOCRXSYNC
233270252Sluigi * 	cases. They invoke the nm_sync callbacks on the netmap_kring
234270252Sluigi * 	structures, as initialized in step 2 and maybe later modified
235270252Sluigi * 	by a monitor. Monitors, however, will always call the original
236270252Sluigi * 	callback before doing anything else.
237270252Sluigi *
238270252Sluigi *
239270252Sluigi * > 6. select() or poll() can be used to wait for events on individual
240270252Sluigi * >    transmit or receive queues (or all queues for a given interface).
241270252Sluigi *
242270252Sluigi * 	Implemented in netmap_poll(). This will call the same nm_sync()
243270252Sluigi * 	callbacks as in step 5 above.
244270252Sluigi *
245270252Sluigi * 	os-specific:
246270252Sluigi * 		linux: we first go through linux_netmap_poll() to adapt
247270252Sluigi * 		       the FreeBSD interface to the linux one.
248270252Sluigi *
249270252Sluigi *
250270252Sluigi *  ----  VALE_CTL -----
251270252Sluigi *
252270252Sluigi *  VALE switches are controlled by issuing a NIOCREGIF with a non-null
253270252Sluigi *  nr_cmd in the nmreq structure. These subcommands are handled by
254270252Sluigi *  netmap_bdg_ctl() in netmap_vale.c. Persistent VALE ports are created
255270252Sluigi *  and destroyed by issuing the NETMAP_BDG_NEWIF and NETMAP_BDG_DELIF
256270252Sluigi *  subcommands, respectively.
257270252Sluigi *
258270252Sluigi *  Any network interface known to the system (including a persistent VALE
259270252Sluigi *  port) can be attached to a VALE switch by issuing the
260270252Sluigi *  NETMAP_BDG_ATTACH subcommand. After the attachment, persistent VALE ports
261270252Sluigi *  look exactly like ephemeral VALE ports (as created in step 2 above).  The
262270252Sluigi *  attachment of other interfaces, instead, requires the creation of a
263270252Sluigi *  netmap_bwrap_adapter.  Moreover, the attached interface must be put in
264270252Sluigi *  netmap mode. This may require the creation of a netmap_generic_adapter if
265270252Sluigi *  we have no native support for the interface, or if generic adapters have
266270252Sluigi *  been forced by sysctl.
267270252Sluigi *
268270252Sluigi *  Both persistent VALE ports and bwraps are handled by netmap_get_bdg_na(),
269270252Sluigi *  called by nm_bdg_ctl_attach(), and discriminated by the nm_bdg_attach()
270270252Sluigi *  callback.  In the case of the bwrap, the callback creates the
271270252Sluigi *  netmap_bwrap_adapter.  The initialization of the bwrap is then
272270252Sluigi *  completed by calling netmap_do_regif() on it, in the nm_bdg_ctl()
273270252Sluigi *  callback (netmap_bwrap_bdg_ctl in netmap_vale.c).
274270252Sluigi *  A generic adapter for the wrapped ifp will be created if needed, when
275270252Sluigi *  netmap_get_bdg_na() calls netmap_get_hw_na().
276270252Sluigi *
277270252Sluigi *
278270252Sluigi *  ---- DATAPATHS -----
279270252Sluigi *
280270252Sluigi *              -= SYSTEM DEVICE WITH NATIVE SUPPORT =-
281270252Sluigi *
282270252Sluigi *    na == NA(ifp) == netmap_hw_adapter created in DEVICE_netmap_attach()
283270252Sluigi *
284270252Sluigi *    - tx from netmap userspace:
285270252Sluigi *	 concurrently:
286270252Sluigi *           1) ioctl(NIOCTXSYNC)/netmap_poll() in process context
287270252Sluigi *                kring->nm_sync() == DEVICE_netmap_txsync()
288270252Sluigi *           2) device interrupt handler
289270252Sluigi *                na->nm_notify()  == netmap_notify()
290270252Sluigi *    - rx from netmap userspace:
291270252Sluigi *       concurrently:
292270252Sluigi *           1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
293270252Sluigi *                kring->nm_sync() == DEVICE_netmap_rxsync()
294270252Sluigi *           2) device interrupt handler
295270252Sluigi *                na->nm_notify()  == netmap_notify()
296270252Sluigi *    - tx from host stack
297270252Sluigi *       concurrently:
298270252Sluigi *           1) host stack
299270252Sluigi *                netmap_transmit()
300270252Sluigi *                  na->nm_notify  == netmap_notify()
301270252Sluigi *           2) ioctl(NIOCRXSYNC)/netmap_poll() in process context
302270252Sluigi *                kring->nm_sync() == netmap_rxsync_from_host_compat
303270252Sluigi *                  netmap_rxsync_from_host(na, NULL, NULL)
304270252Sluigi *    - tx to host stack
305270252Sluigi *           ioctl(NIOCTXSYNC)/netmap_poll() in process context
306270252Sluigi *             kring->nm_sync() == netmap_txsync_to_host_compat
307270252Sluigi *               netmap_txsync_to_host(na)
308270252Sluigi *                 NM_SEND_UP()
309270252Sluigi *                   FreeBSD: na->if_input() == ?? XXX
310270252Sluigi *                   linux: netif_rx() with NM_MAGIC_PRIORITY_RX
311270252Sluigi *
312270252Sluigi *
313270252Sluigi *
314270252Sluigi *               -= SYSTEM DEVICE WITH GENERIC SUPPORT =-
315270252Sluigi *
316270252Sluigi *
317270252Sluigi *
318270252Sluigi *                           -= VALE PORT =-
319270252Sluigi *
320270252Sluigi *
321270252Sluigi *
322270252Sluigi *                           -= NETMAP PIPE =-
323270252Sluigi *
324270252Sluigi *
325270252Sluigi *
326270252Sluigi *  -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, NO HOST RINGS =-
327270252Sluigi *
328270252Sluigi *
329270252Sluigi *
330270252Sluigi *  -= SYSTEM DEVICE WITH NATIVE SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =-
331270252Sluigi *
332270252Sluigi *
333270252Sluigi *
334270252Sluigi *  -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, NO HOST RINGS =-
335270252Sluigi *
336270252Sluigi *
337270252Sluigi *
338270252Sluigi *  -= SYSTEM DEVICE WITH GENERIC SUPPORT, CONNECTED TO VALE, WITH HOST RINGS =-
339270252Sluigi *
340270252Sluigi *
341270252Sluigi *
342270252Sluigi */
343270252Sluigi
344262151Sluigi/*
345262151Sluigi * OS-specific code that is used only within this file.
346262151Sluigi * Other OS-specific code that must be accessed by drivers
347262151Sluigi * is present in netmap_kern.h
348262151Sluigi */
349238837Sluigi
350262151Sluigi#if defined(__FreeBSD__)
351227614Sluigi#include <sys/cdefs.h> /* prerequisite */
352227614Sluigi#include <sys/types.h>
353227614Sluigi#include <sys/errno.h>
354227614Sluigi#include <sys/param.h>	/* defines used in kernel.h */
355227614Sluigi#include <sys/kernel.h>	/* types used in module initialization */
356262151Sluigi#include <sys/conf.h>	/* cdevsw struct, UID, GID */
357262151Sluigi#include <sys/filio.h>	/* FIONBIO */
358227614Sluigi#include <sys/sockio.h>
359227614Sluigi#include <sys/socketvar.h>	/* struct socket */
360227614Sluigi#include <sys/malloc.h>
361227614Sluigi#include <sys/poll.h>
362248084Sattilio#include <sys/rwlock.h>
363227614Sluigi#include <sys/socket.h> /* sockaddrs */
364227614Sluigi#include <sys/selinfo.h>
365227614Sluigi#include <sys/sysctl.h>
366262151Sluigi#include <sys/jail.h>
367262151Sluigi#include <net/vnet.h>
368227614Sluigi#include <net/if.h>
369262151Sluigi#include <net/if_var.h>
370227614Sluigi#include <net/bpf.h>		/* BIOCIMMEDIATE */
371227614Sluigi#include <machine/bus.h>	/* bus_dmamap_* */
372262151Sluigi#include <sys/endian.h>
373262151Sluigi#include <sys/refcount.h>
374227614Sluigi
375227614Sluigi
376262151Sluigi/* reduce conditional code */
377262151Sluigi// linux API, use for the knlist in FreeBSD
378278779Sluigi/* use a private mutex for the knlist */
379278779Sluigi#define init_waitqueue_head(x) do {			\
380278779Sluigi	struct mtx *m = &(x)->m;			\
381278779Sluigi	mtx_init(m, "nm_kn_lock", NULL, MTX_DEF);	\
382278779Sluigi	knlist_init_mtx(&(x)->si.si_note, m);		\
383278779Sluigi    } while (0)
384262151Sluigi
385278779Sluigi#define OS_selrecord(a, b)	selrecord(a, &((b)->si))
386262151Sluigi#define OS_selwakeup(a, b)	freebsd_selwakeup(a, b)
387262151Sluigi
388262151Sluigi#elif defined(linux)
389262151Sluigi
390262151Sluigi#include "bsd_glue.h"
391262151Sluigi
392262151Sluigi
393262151Sluigi
394262151Sluigi#elif defined(__APPLE__)
395262151Sluigi
396262151Sluigi#warning OSX support is only partial
397262151Sluigi#include "osx_glue.h"
398262151Sluigi
399262151Sluigi#else
400262151Sluigi
401262151Sluigi#error	Unsupported platform
402262151Sluigi
403262151Sluigi#endif /* unsupported */
404262151Sluigi
405262151Sluigi/*
406262151Sluigi * common headers
407262151Sluigi */
408238912Sluigi#include <net/netmap.h>
409238912Sluigi#include <dev/netmap/netmap_kern.h>
410262151Sluigi#include <dev/netmap/netmap_mem2.h>
411238912Sluigi
412262151Sluigi
413262151SluigiMALLOC_DEFINE(M_NETMAP, "netmap", "Network memory map");
414262151Sluigi
415262151Sluigi/*
416262151Sluigi * The following variables are used by the drivers and replicate
417262151Sluigi * fields in the global memory pool. They only refer to buffers
418262151Sluigi * used by physical interfaces.
419262151Sluigi */
420231198Sluigiu_int netmap_total_buffers;
421241719Sluigiu_int netmap_buf_size;
422262151Sluigichar *netmap_buffer_base;	/* also address of an invalid buffer */
423231198Sluigi
424231198Sluigi/* user-controlled variables */
425231198Sluigiint netmap_verbose;
426231198Sluigi
427231198Sluigistatic int netmap_no_timestamp; /* don't timestamp on rxsync */
428231198Sluigi
429231198SluigiSYSCTL_NODE(_dev, OID_AUTO, netmap, CTLFLAG_RW, 0, "Netmap args");
430231198SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, verbose,
431231198Sluigi    CTLFLAG_RW, &netmap_verbose, 0, "Verbose mode");
432231198SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, no_timestamp,
433231198Sluigi    CTLFLAG_RW, &netmap_no_timestamp, 0, "no_timestamp");
434231198Sluigiint netmap_mitigate = 1;
435231198SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, mitigate, CTLFLAG_RW, &netmap_mitigate, 0, "");
436234140Sluigiint netmap_no_pendintr = 1;
437231198SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, no_pendintr,
438231198Sluigi    CTLFLAG_RW, &netmap_no_pendintr, 0, "Always look for new received packets.");
439251139Sluigiint netmap_txsync_retry = 2;
440251139SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, txsync_retry, CTLFLAG_RW,
441251139Sluigi    &netmap_txsync_retry, 0 , "Number of txsync loops in bridge's flush.");
442231198Sluigi
443270252Sluigiint netmap_adaptive_io = 0;
444270252SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, adaptive_io, CTLFLAG_RW,
445270252Sluigi    &netmap_adaptive_io, 0 , "Adaptive I/O on paravirt");
446270252Sluigi
447238812Sluigiint netmap_flags = 0;	/* debug flags */
448245836Sluigiint netmap_fwd = 0;	/* force transparent mode */
449262151Sluigiint netmap_mmap_unreg = 0; /* allow mmap of unregistered fds */
450231198Sluigi
451238812Sluigi/*
452262151Sluigi * netmap_admode selects the netmap mode to use.
453262151Sluigi * Invalid values are reset to NETMAP_ADMODE_BEST
454238812Sluigi */
455262151Sluigienum { NETMAP_ADMODE_BEST = 0,	/* use native, fallback to generic */
456262151Sluigi	NETMAP_ADMODE_NATIVE,	/* either native or none */
457262151Sluigi	NETMAP_ADMODE_GENERIC,	/* force generic */
458262151Sluigi	NETMAP_ADMODE_LAST };
459262151Sluigistatic int netmap_admode = NETMAP_ADMODE_BEST;
460250052Sluigi
461262151Sluigiint netmap_generic_mit = 100*1000;   /* Generic mitigation interval in nanoseconds. */
462262151Sluigiint netmap_generic_ringsize = 1024;   /* Generic ringsize. */
463262151Sluigiint netmap_generic_rings = 1;   /* number of queues in generic. */
464250052Sluigi
465262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, flags, CTLFLAG_RW, &netmap_flags, 0 , "");
466262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, fwd, CTLFLAG_RW, &netmap_fwd, 0 , "");
467262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, mmap_unreg, CTLFLAG_RW, &netmap_mmap_unreg, 0, "");
468262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, admode, CTLFLAG_RW, &netmap_admode, 0 , "");
469262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, generic_mit, CTLFLAG_RW, &netmap_generic_mit, 0 , "");
470262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_ringsize, 0 , "");
471262151SluigiSYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
472238837Sluigi
473262151SluigiNMG_LOCK_T	netmap_global_lock;
474250107Sluigi
475250107Sluigi
476262151Sluigistatic void
477262151Sluiginm_kr_get(struct netmap_kring *kr)
478262151Sluigi{
479262151Sluigi	while (NM_ATOMIC_TEST_AND_SET(&kr->nr_busy))
480262151Sluigi		tsleep(kr, 0, "NM_KR_GET", 4);
481262151Sluigi}
482250107Sluigi
483250107Sluigi
484250184Sluigi/*
485262151Sluigi * mark the ring as stopped, and run through the locks
486262151Sluigi * to make sure other users get to see it.
487250107Sluigi */
488270252Sluigistatic void
489262151Sluiginetmap_disable_ring(struct netmap_kring *kr)
490262151Sluigi{
491262151Sluigi	kr->nkr_stopped = 1;
492262151Sluigi	nm_kr_get(kr);
493262151Sluigi	mtx_lock(&kr->q_lock);
494262151Sluigi	mtx_unlock(&kr->q_lock);
495262151Sluigi	nm_kr_put(kr);
496262151Sluigi}
497250107Sluigi
498270252Sluigi/* stop or enable a single tx ring */
499270252Sluigivoid
500270252Sluiginetmap_set_txring(struct netmap_adapter *na, u_int ring_id, int stopped)
501270252Sluigi{
502270252Sluigi	if (stopped)
503270252Sluigi		netmap_disable_ring(na->tx_rings + ring_id);
504270252Sluigi	else
505270252Sluigi		na->tx_rings[ring_id].nkr_stopped = 0;
506270252Sluigi	/* nofify that the stopped state has changed. This is currently
507270252Sluigi	 *only used by bwrap to propagate the state to its own krings.
508270252Sluigi	 * (see netmap_bwrap_intr_notify).
509270252Sluigi	 */
510270252Sluigi	na->nm_notify(na, ring_id, NR_TX, NAF_DISABLE_NOTIFY);
511270252Sluigi}
512251139Sluigi
513270252Sluigi/* stop or enable a single rx ring */
514270252Sluigivoid
515270252Sluiginetmap_set_rxring(struct netmap_adapter *na, u_int ring_id, int stopped)
516270252Sluigi{
517270252Sluigi	if (stopped)
518270252Sluigi		netmap_disable_ring(na->rx_rings + ring_id);
519270252Sluigi	else
520270252Sluigi		na->rx_rings[ring_id].nkr_stopped = 0;
521270252Sluigi	/* nofify that the stopped state has changed. This is currently
522270252Sluigi	 *only used by bwrap to propagate the state to its own krings.
523270252Sluigi	 * (see netmap_bwrap_intr_notify).
524270252Sluigi	 */
525270252Sluigi	na->nm_notify(na, ring_id, NR_RX, NAF_DISABLE_NOTIFY);
526270252Sluigi}
527270252Sluigi
528270252Sluigi
529267282Sluigi/* stop or enable all the rings of na */
530270252Sluigivoid
531270252Sluiginetmap_set_all_rings(struct netmap_adapter *na, int stopped)
532262151Sluigi{
533262151Sluigi	int i;
534262151Sluigi	u_int ntx, nrx;
535238812Sluigi
536270252Sluigi	if (!nm_netmap_on(na))
537262151Sluigi		return;
538251139Sluigi
539262151Sluigi	ntx = netmap_real_tx_rings(na);
540262151Sluigi	nrx = netmap_real_rx_rings(na);
541238812Sluigi
542262151Sluigi	for (i = 0; i < ntx; i++) {
543270252Sluigi		netmap_set_txring(na, i, stopped);
544262151Sluigi	}
545251139Sluigi
546262151Sluigi	for (i = 0; i < nrx; i++) {
547270252Sluigi		netmap_set_rxring(na, i, stopped);
548262151Sluigi	}
549251139Sluigi}
550251139Sluigi
551267282Sluigi/*
552267282Sluigi * Convenience function used in drivers.  Waits for current txsync()s/rxsync()s
553267282Sluigi * to finish and prevents any new one from starting.  Call this before turning
554267282Sluigi * netmap mode off, or before removing the harware rings (e.g., on module
555267282Sluigi * onload).  As a rule of thumb for linux drivers, this should be placed near
556267282Sluigi * each napi_disable().
557267282Sluigi */
558262151Sluigivoid
559262151Sluiginetmap_disable_all_rings(struct ifnet *ifp)
560238812Sluigi{
561270252Sluigi	netmap_set_all_rings(NA(ifp), 1 /* stopped */);
562238812Sluigi}
563238812Sluigi
564267282Sluigi/*
565267282Sluigi * Convenience function used in drivers.  Re-enables rxsync and txsync on the
566267282Sluigi * adapter's rings In linux drivers, this should be placed near each
567267282Sluigi * napi_enable().
568267282Sluigi */
569262151Sluigivoid
570262151Sluiginetmap_enable_all_rings(struct ifnet *ifp)
571238812Sluigi{
572270252Sluigi	netmap_set_all_rings(NA(ifp), 0 /* enabled */);
573238812Sluigi}
574251139Sluigi
575251139Sluigi
576251139Sluigi/*
577262151Sluigi * generic bound_checking function
578251139Sluigi */
579262151Sluigiu_int
580262151Sluiginm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
581251139Sluigi{
582262151Sluigi	u_int oldv = *v;
583262151Sluigi	const char *op = NULL;
584251139Sluigi
585262151Sluigi	if (dflt < lo)
586262151Sluigi		dflt = lo;
587262151Sluigi	if (dflt > hi)
588262151Sluigi		dflt = hi;
589262151Sluigi	if (oldv < lo) {
590262151Sluigi		*v = dflt;
591262151Sluigi		op = "Bump";
592262151Sluigi	} else if (oldv > hi) {
593262151Sluigi		*v = hi;
594262151Sluigi		op = "Clamp";
595251139Sluigi	}
596262151Sluigi	if (op && msg)
597262151Sluigi		printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
598262151Sluigi	return *v;
599251139Sluigi}
600251139Sluigi
601251139Sluigi
602251139Sluigi/*
603262151Sluigi * packet-dump function, user-supplied or static buffer.
604262151Sluigi * The destination buffer must be at least 30+4*len
605251139Sluigi */
606262151Sluigiconst char *
607262151Sluiginm_dump_buf(char *p, int len, int lim, char *dst)
608251139Sluigi{
609262151Sluigi	static char _dst[8192];
610262151Sluigi	int i, j, i0;
611262151Sluigi	static char hex[] ="0123456789abcdef";
612262151Sluigi	char *o;	/* output position */
613251139Sluigi
614262151Sluigi#define P_HI(x)	hex[((x) & 0xf0)>>4]
615262151Sluigi#define P_LO(x)	hex[((x) & 0xf)]
616262151Sluigi#define P_C(x)	((x) >= 0x20 && (x) <= 0x7e ? (x) : '.')
617262151Sluigi	if (!dst)
618262151Sluigi		dst = _dst;
619262151Sluigi	if (lim <= 0 || lim > len)
620262151Sluigi		lim = len;
621262151Sluigi	o = dst;
622262151Sluigi	sprintf(o, "buf 0x%p len %d lim %d\n", p, len, lim);
623262151Sluigi	o += strlen(o);
624262151Sluigi	/* hexdump routine */
625262151Sluigi	for (i = 0; i < lim; ) {
626262151Sluigi		sprintf(o, "%5d: ", i);
627262151Sluigi		o += strlen(o);
628262151Sluigi		memset(o, ' ', 48);
629262151Sluigi		i0 = i;
630262151Sluigi		for (j=0; j < 16 && i < lim; i++, j++) {
631262151Sluigi			o[j*3] = P_HI(p[i]);
632262151Sluigi			o[j*3+1] = P_LO(p[i]);
633251139Sluigi		}
634262151Sluigi		i = i0;
635262151Sluigi		for (j=0; j < 16 && i < lim; i++, j++)
636262151Sluigi			o[j + 48] = P_C(p[i]);
637262151Sluigi		o[j+48] = '\n';
638262151Sluigi		o += j+49;
639251139Sluigi	}
640262151Sluigi	*o = '\0';
641262151Sluigi#undef P_HI
642262151Sluigi#undef P_LO
643262151Sluigi#undef P_C
644262151Sluigi	return dst;
645251139Sluigi}
646251139Sluigi
647238812Sluigi
648245835Sluigi/*
649245835Sluigi * Fetch configuration from the device, to cope with dynamic
650245835Sluigi * reconfigurations after loading the module.
651245835Sluigi */
652267282Sluigi/* call with NMG_LOCK held */
653262151Sluigiint
654245835Sluiginetmap_update_config(struct netmap_adapter *na)
655245835Sluigi{
656245835Sluigi	u_int txr, txd, rxr, rxd;
657245835Sluigi
658245835Sluigi	txr = txd = rxr = rxd = 0;
659278779Sluigi	if (na->nm_config == NULL ||
660278779Sluigi	    na->nm_config(na, &txr, &txd, &rxr, &rxd)) {
661245835Sluigi		/* take whatever we had at init time */
662245835Sluigi		txr = na->num_tx_rings;
663245835Sluigi		txd = na->num_tx_desc;
664245835Sluigi		rxr = na->num_rx_rings;
665245835Sluigi		rxd = na->num_rx_desc;
666250184Sluigi	}
667245835Sluigi
668245835Sluigi	if (na->num_tx_rings == txr && na->num_tx_desc == txd &&
669245835Sluigi	    na->num_rx_rings == rxr && na->num_rx_desc == rxd)
670245835Sluigi		return 0; /* nothing changed */
671262151Sluigi	if (netmap_verbose || na->active_fds > 0) {
672245835Sluigi		D("stored config %s: txring %d x %d, rxring %d x %d",
673270252Sluigi			na->name,
674245835Sluigi			na->num_tx_rings, na->num_tx_desc,
675245835Sluigi			na->num_rx_rings, na->num_rx_desc);
676245835Sluigi		D("new config %s: txring %d x %d, rxring %d x %d",
677270252Sluigi			na->name, txr, txd, rxr, rxd);
678245835Sluigi	}
679262151Sluigi	if (na->active_fds == 0) {
680245835Sluigi		D("configuration changed (but fine)");
681245835Sluigi		na->num_tx_rings = txr;
682245835Sluigi		na->num_tx_desc = txd;
683245835Sluigi		na->num_rx_rings = rxr;
684245835Sluigi		na->num_rx_desc = rxd;
685245835Sluigi		return 0;
686245835Sluigi	}
687245835Sluigi	D("configuration changed while active, this is bad...");
688245835Sluigi	return 1;
689245835Sluigi}
690245835Sluigi
691267282Sluigi/* kring->nm_sync callback for the host tx ring */
692262151Sluigistatic int
693262151Sluiginetmap_txsync_to_host_compat(struct netmap_kring *kring, int flags)
694262151Sluigi{
695267282Sluigi	(void)flags; /* unused */
696262151Sluigi	netmap_txsync_to_host(kring->na);
697262151Sluigi	return 0;
698262151Sluigi}
699227614Sluigi
700267282Sluigi/* kring->nm_sync callback for the host rx ring */
701241719Sluigistatic int
702262151Sluiginetmap_rxsync_from_host_compat(struct netmap_kring *kring, int flags)
703241719Sluigi{
704267282Sluigi	(void)flags; /* unused */
705262151Sluigi	netmap_rxsync_from_host(kring->na, NULL, NULL);
706262151Sluigi	return 0;
707241719Sluigi}
708241719Sluigi
709262151Sluigi
710262151Sluigi
711262151Sluigi/* create the krings array and initialize the fields common to all adapters.
712262151Sluigi * The array layout is this:
713227614Sluigi *
714262151Sluigi *                    +----------+
715262151Sluigi * na->tx_rings ----->|          | \
716262151Sluigi *                    |          |  } na->num_tx_ring
717262151Sluigi *                    |          | /
718262151Sluigi *                    +----------+
719262151Sluigi *                    |          |    host tx kring
720262151Sluigi * na->rx_rings ----> +----------+
721262151Sluigi *                    |          | \
722262151Sluigi *                    |          |  } na->num_rx_rings
723262151Sluigi *                    |          | /
724262151Sluigi *                    +----------+
725262151Sluigi *                    |          |    host rx kring
726262151Sluigi *                    +----------+
727262151Sluigi * na->tailroom ----->|          | \
728262151Sluigi *                    |          |  } tailroom bytes
729262151Sluigi *                    |          | /
730262151Sluigi *                    +----------+
731262151Sluigi *
732262151Sluigi * Note: for compatibility, host krings are created even when not needed.
733262151Sluigi * The tailroom space is currently used by vale ports for allocating leases.
734227614Sluigi */
735267282Sluigi/* call with NMG_LOCK held */
736262151Sluigiint
737262151Sluiginetmap_krings_create(struct netmap_adapter *na, u_int tailroom)
738227614Sluigi{
739262151Sluigi	u_int i, len, ndesc;
740262151Sluigi	struct netmap_kring *kring;
741262151Sluigi	u_int ntx, nrx;
742227614Sluigi
743262151Sluigi	/* account for the (possibly fake) host rings */
744262151Sluigi	ntx = na->num_tx_rings + 1;
745262151Sluigi	nrx = na->num_rx_rings + 1;
746227614Sluigi
747262151Sluigi	len = (ntx + nrx) * sizeof(struct netmap_kring) + tailroom;
748262151Sluigi
749262151Sluigi	na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
750262151Sluigi	if (na->tx_rings == NULL) {
751262151Sluigi		D("Cannot allocate krings");
752262151Sluigi		return ENOMEM;
753262151Sluigi	}
754262151Sluigi	na->rx_rings = na->tx_rings + ntx;
755262151Sluigi
756262151Sluigi	/*
757262151Sluigi	 * All fields in krings are 0 except the one initialized below.
758262151Sluigi	 * but better be explicit on important kring fields.
759262151Sluigi	 */
760262151Sluigi	ndesc = na->num_tx_desc;
761262151Sluigi	for (i = 0; i < ntx; i++) { /* Transmit rings */
762262151Sluigi		kring = &na->tx_rings[i];
763262151Sluigi		bzero(kring, sizeof(*kring));
764262151Sluigi		kring->na = na;
765262151Sluigi		kring->ring_id = i;
766262151Sluigi		kring->nkr_num_slots = ndesc;
767262151Sluigi		if (i < na->num_tx_rings) {
768270252Sluigi			kring->nm_sync = na->nm_txsync;
769262151Sluigi		} else if (i == na->num_tx_rings) {
770262151Sluigi			kring->nm_sync = netmap_txsync_to_host_compat;
771262151Sluigi		}
772227614Sluigi		/*
773262151Sluigi		 * IMPORTANT: Always keep one slot empty.
774227614Sluigi		 */
775262151Sluigi		kring->rhead = kring->rcur = kring->nr_hwcur = 0;
776262151Sluigi		kring->rtail = kring->nr_hwtail = ndesc - 1;
777270252Sluigi		snprintf(kring->name, sizeof(kring->name) - 1, "%s TX%d", na->name, i);
778262151Sluigi		ND("ktx %s h %d c %d t %d",
779262151Sluigi			kring->name, kring->rhead, kring->rcur, kring->rtail);
780262151Sluigi		mtx_init(&kring->q_lock, "nm_txq_lock", NULL, MTX_DEF);
781262151Sluigi		init_waitqueue_head(&kring->si);
782262151Sluigi	}
783262151Sluigi
784262151Sluigi	ndesc = na->num_rx_desc;
785262151Sluigi	for (i = 0; i < nrx; i++) { /* Receive rings */
786262151Sluigi		kring = &na->rx_rings[i];
787262151Sluigi		bzero(kring, sizeof(*kring));
788262151Sluigi		kring->na = na;
789262151Sluigi		kring->ring_id = i;
790262151Sluigi		kring->nkr_num_slots = ndesc;
791262151Sluigi		if (i < na->num_rx_rings) {
792270252Sluigi			kring->nm_sync = na->nm_rxsync;
793262151Sluigi		} else if (i == na->num_rx_rings) {
794262151Sluigi			kring->nm_sync = netmap_rxsync_from_host_compat;
795232238Sluigi		}
796262151Sluigi		kring->rhead = kring->rcur = kring->nr_hwcur = 0;
797262151Sluigi		kring->rtail = kring->nr_hwtail = 0;
798270252Sluigi		snprintf(kring->name, sizeof(kring->name) - 1, "%s RX%d", na->name, i);
799262151Sluigi		ND("krx %s h %d c %d t %d",
800262151Sluigi			kring->name, kring->rhead, kring->rcur, kring->rtail);
801262151Sluigi		mtx_init(&kring->q_lock, "nm_rxq_lock", NULL, MTX_DEF);
802262151Sluigi		init_waitqueue_head(&kring->si);
803227614Sluigi	}
804262151Sluigi	init_waitqueue_head(&na->tx_si);
805262151Sluigi	init_waitqueue_head(&na->rx_si);
806262151Sluigi
807262151Sluigi	na->tailroom = na->rx_rings + nrx;
808262151Sluigi
809262151Sluigi	return 0;
810231198Sluigi}
811227614Sluigi
812251139Sluigi
813278779Sluigi#ifdef __FreeBSD__
814278779Sluigistatic void
815278779Sluiginetmap_knlist_destroy(NM_SELINFO_T *si)
816278779Sluigi{
817278779Sluigi	/* XXX kqueue(9) needed; these will mirror knlist_init. */
818278779Sluigi	knlist_delete(&si->si.si_note, curthread, 0 /* not locked */ );
819278779Sluigi	knlist_destroy(&si->si.si_note);
820278779Sluigi	/* now we don't need the mutex anymore */
821278779Sluigi	mtx_destroy(&si->m);
822278779Sluigi}
823278779Sluigi#endif /* __FreeBSD__ */
824278779Sluigi
825278779Sluigi
826262151Sluigi/* undo the actions performed by netmap_krings_create */
827267282Sluigi/* call with NMG_LOCK held */
828262151Sluigivoid
829262151Sluiginetmap_krings_delete(struct netmap_adapter *na)
830238812Sluigi{
831262151Sluigi	struct netmap_kring *kring = na->tx_rings;
832231198Sluigi
833262151Sluigi	/* we rely on the krings layout described above */
834262151Sluigi	for ( ; kring != na->tailroom; kring++) {
835262151Sluigi		mtx_destroy(&kring->q_lock);
836278779Sluigi		netmap_knlist_destroy(&kring->si);
837238812Sluigi	}
838262151Sluigi	free(na->tx_rings, M_DEVBUF);
839262151Sluigi	na->tx_rings = na->rx_rings = na->tailroom = NULL;
840262151Sluigi}
841251139Sluigi
842251139Sluigi
843262151Sluigi/*
844262151Sluigi * Destructor for NIC ports. They also have an mbuf queue
845262151Sluigi * on the rings connected to the host so we need to purge
846262151Sluigi * them first.
847262151Sluigi */
848267282Sluigi/* call with NMG_LOCK held */
849262151Sluigistatic void
850262151Sluiginetmap_hw_krings_delete(struct netmap_adapter *na)
851262151Sluigi{
852262151Sluigi	struct mbq *q = &na->rx_rings[na->num_rx_rings].rx_queue;
853251139Sluigi
854262151Sluigi	ND("destroy sw mbq with len %d", mbq_len(q));
855262151Sluigi	mbq_purge(q);
856262151Sluigi	mbq_safe_destroy(q);
857262151Sluigi	netmap_krings_delete(na);
858238812Sluigi}
859238812Sluigi
860262151Sluigi
861267282Sluigi/* create a new netmap_if for a newly registered fd.
862267282Sluigi * If this is the first registration of the adapter,
863267282Sluigi * also create the netmap rings and their in-kernel view,
864267282Sluigi * the netmap krings.
865267282Sluigi */
866267282Sluigi/* call with NMG_LOCK held */
867262151Sluigistatic struct netmap_if*
868270252Sluiginetmap_if_new(struct netmap_adapter *na)
869231198Sluigi{
870262151Sluigi	struct netmap_if *nifp;
871231198Sluigi
872262151Sluigi	if (netmap_update_config(na)) {
873262151Sluigi		/* configuration mismatch, report and fail */
874262151Sluigi		return NULL;
875262151Sluigi	}
876249659Sluigi
877267282Sluigi	if (na->active_fds)	/* already registered */
878262151Sluigi		goto final;
879227614Sluigi
880267282Sluigi	/* create and init the krings arrays.
881267282Sluigi	 * Depending on the adapter, this may also create
882267282Sluigi	 * the netmap rings themselves
883267282Sluigi	 */
884262151Sluigi	if (na->nm_krings_create(na))
885270252Sluigi		return NULL;
886262151Sluigi
887267282Sluigi	/* create all missing netmap rings */
888262151Sluigi	if (netmap_mem_rings_create(na))
889262151Sluigi		goto cleanup;
890262151Sluigi
891262151Sluigifinal:
892262151Sluigi
893267282Sluigi	/* in all cases, create a new netmap if */
894270252Sluigi	nifp = netmap_mem_if_new(na);
895262151Sluigi	if (nifp == NULL)
896262151Sluigi		goto cleanup;
897262151Sluigi
898262151Sluigi	return (nifp);
899262151Sluigi
900262151Sluigicleanup:
901262151Sluigi
902262151Sluigi	if (na->active_fds == 0) {
903262151Sluigi		netmap_mem_rings_delete(na);
904262151Sluigi		na->nm_krings_delete(na);
905241719Sluigi	}
906262151Sluigi
907262151Sluigi	return NULL;
908227614Sluigi}
909227614Sluigi
910251139Sluigi
911262151Sluigi/* grab a reference to the memory allocator, if we don't have one already.  The
912262151Sluigi * reference is taken from the netmap_adapter registered with the priv.
913251139Sluigi */
914267282Sluigi/* call with NMG_LOCK held */
915241719Sluigistatic int
916262151Sluiginetmap_get_memory_locked(struct netmap_priv_d* p)
917241719Sluigi{
918262151Sluigi	struct netmap_mem_d *nmd;
919262151Sluigi	int error = 0;
920262151Sluigi
921262151Sluigi	if (p->np_na == NULL) {
922262151Sluigi		if (!netmap_mmap_unreg)
923262151Sluigi			return ENODEV;
924262151Sluigi		/* for compatibility with older versions of the API
925262151Sluigi 		 * we use the global allocator when no interface has been
926262151Sluigi 		 * registered
927262151Sluigi 		 */
928262151Sluigi		nmd = &nm_mem;
929262151Sluigi	} else {
930262151Sluigi		nmd = p->np_na->nm_mem;
931262151Sluigi	}
932262151Sluigi	if (p->np_mref == NULL) {
933270252Sluigi		error = netmap_mem_finalize(nmd, p->np_na);
934262151Sluigi		if (!error)
935262151Sluigi			p->np_mref = nmd;
936262151Sluigi	} else if (p->np_mref != nmd) {
937262151Sluigi		/* a virtual port has been registered, but previous
938262151Sluigi 		 * syscalls already used the global allocator.
939262151Sluigi 		 * We cannot continue
940262151Sluigi 		 */
941262151Sluigi		error = ENODEV;
942262151Sluigi	}
943262151Sluigi	return error;
944241719Sluigi}
945241719Sluigi
946251139Sluigi
947267282Sluigi/* call with NMG_LOCK *not* held */
948262151Sluigiint
949262151Sluiginetmap_get_memory(struct netmap_priv_d* p)
950241719Sluigi{
951262151Sluigi	int error;
952262151Sluigi	NMG_LOCK();
953262151Sluigi	error = netmap_get_memory_locked(p);
954262151Sluigi	NMG_UNLOCK();
955262151Sluigi	return error;
956241719Sluigi}
957241719Sluigi
958241719Sluigi
959267282Sluigi/* call with NMG_LOCK held */
960262151Sluigistatic int
961262151Sluiginetmap_have_memory_locked(struct netmap_priv_d* p)
962262151Sluigi{
963262151Sluigi	return p->np_mref != NULL;
964262151Sluigi}
965241719Sluigi
966251139Sluigi
967267282Sluigi/* call with NMG_LOCK held */
968262151Sluigistatic void
969262151Sluiginetmap_drop_memory_locked(struct netmap_priv_d* p)
970241719Sluigi{
971262151Sluigi	if (p->np_mref) {
972270252Sluigi		netmap_mem_deref(p->np_mref, p->np_na);
973262151Sluigi		p->np_mref = NULL;
974262151Sluigi	}
975241719Sluigi}
976241719Sluigi
977241719Sluigi
978227614Sluigi/*
979262151Sluigi * Call nm_register(ifp,0) to stop netmap mode on the interface and
980270252Sluigi * revert to normal operation.
981262151Sluigi * The second argument is the nifp to work on. In some cases it is
982262151Sluigi * not attached yet to the netmap_priv_d so we need to pass it as
983262151Sluigi * a separate argument.
984227614Sluigi */
985262151Sluigi/* call with NMG_LOCK held */
986262151Sluigistatic void
987262151Sluiginetmap_do_unregif(struct netmap_priv_d *priv, struct netmap_if *nifp)
988227614Sluigi{
989262151Sluigi	struct netmap_adapter *na = priv->np_na;
990241719Sluigi
991262151Sluigi	NMG_LOCK_ASSERT();
992262151Sluigi	na->active_fds--;
993262151Sluigi	if (na->active_fds <= 0) {	/* last instance */
994229947Sluigi
995262151Sluigi		if (netmap_verbose)
996270252Sluigi			D("deleting last instance for %s", na->name);
997262151Sluigi		/*
998262151Sluigi		 * (TO CHECK) This function is only called
999262151Sluigi		 * when the last reference to this file descriptor goes
1000262151Sluigi		 * away. This means we cannot have any pending poll()
1001262151Sluigi		 * or interrupt routine operating on the structure.
1002262151Sluigi		 * XXX The file may be closed in a thread while
1003262151Sluigi		 * another thread is using it.
1004262151Sluigi		 * Linux keeps the file opened until the last reference
1005262151Sluigi		 * by any outstanding ioctl/poll or mmap is gone.
1006262151Sluigi		 * FreeBSD does not track mmap()s (but we do) and
1007262151Sluigi		 * wakes up any sleeping poll(). Need to check what
1008262151Sluigi		 * happens if the close() occurs while a concurrent
1009262151Sluigi		 * syscall is running.
1010262151Sluigi		 */
1011270252Sluigi		na->nm_register(na, 0); /* off, clear flags */
1012262151Sluigi		/* Wake up any sleeping threads. netmap_poll will
1013262151Sluigi		 * then return POLLERR
1014262151Sluigi		 * XXX The wake up now must happen during *_down(), when
1015262151Sluigi		 * we order all activities to stop. -gl
1016262151Sluigi		 */
1017278779Sluigi		netmap_knlist_destroy(&na->tx_si);
1018278779Sluigi		netmap_knlist_destroy(&na->rx_si);
1019241719Sluigi
1020262151Sluigi		/* delete rings and buffers */
1021262151Sluigi		netmap_mem_rings_delete(na);
1022262151Sluigi		na->nm_krings_delete(na);
1023262151Sluigi	}
1024262151Sluigi	/* delete the nifp */
1025262151Sluigi	netmap_mem_if_delete(na, nifp);
1026262151Sluigi}
1027227614Sluigi
1028267282Sluigi/* call with NMG_LOCK held */
1029262151Sluigistatic __inline int
1030262151Sluiginm_tx_si_user(struct netmap_priv_d *priv)
1031262151Sluigi{
1032262151Sluigi	return (priv->np_na != NULL &&
1033262151Sluigi		(priv->np_txqlast - priv->np_txqfirst > 1));
1034227614Sluigi}
1035241719Sluigi
1036267282Sluigi/* call with NMG_LOCK held */
1037262151Sluigistatic __inline int
1038262151Sluiginm_rx_si_user(struct netmap_priv_d *priv)
1039241719Sluigi{
1040262151Sluigi	return (priv->np_na != NULL &&
1041262151Sluigi		(priv->np_rxqlast - priv->np_rxqfirst > 1));
1042241719Sluigi}
1043241719Sluigi
1044251139Sluigi
1045262151Sluigi/*
1046267282Sluigi * Destructor of the netmap_priv_d, called when the fd has
1047267282Sluigi * no active open() and mmap(). Also called in error paths.
1048267282Sluigi *
1049262151Sluigi * returns 1 if this is the last instance and we can free priv
1050262151Sluigi */
1051267282Sluigi/* call with NMG_LOCK held */
1052262151Sluigiint
1053262151Sluiginetmap_dtor_locked(struct netmap_priv_d *priv)
1054241719Sluigi{
1055262151Sluigi	struct netmap_adapter *na = priv->np_na;
1056241719Sluigi
1057262151Sluigi#ifdef __FreeBSD__
1058262151Sluigi	/*
1059262151Sluigi	 * np_refcount is the number of active mmaps on
1060262151Sluigi	 * this file descriptor
1061262151Sluigi	 */
1062262151Sluigi	if (--priv->np_refcount > 0) {
1063262151Sluigi		return 0;
1064262151Sluigi	}
1065262151Sluigi#endif /* __FreeBSD__ */
1066262151Sluigi	if (!na) {
1067262151Sluigi	    return 1; //XXX is it correct?
1068262151Sluigi	}
1069262151Sluigi	netmap_do_unregif(priv, priv->np_nifp);
1070262151Sluigi	priv->np_nifp = NULL;
1071262151Sluigi	netmap_drop_memory_locked(priv);
1072262151Sluigi	if (priv->np_na) {
1073262151Sluigi		if (nm_tx_si_user(priv))
1074262151Sluigi			na->tx_si_users--;
1075262151Sluigi		if (nm_rx_si_user(priv))
1076262151Sluigi			na->rx_si_users--;
1077262151Sluigi		netmap_adapter_put(na);
1078262151Sluigi		priv->np_na = NULL;
1079262151Sluigi	}
1080262151Sluigi	return 1;
1081262151Sluigi}
1082241719Sluigi
1083241719Sluigi
1084267282Sluigi/* call with NMG_LOCK *not* held */
1085262151Sluigivoid
1086262151Sluiginetmap_dtor(void *data)
1087262151Sluigi{
1088262151Sluigi	struct netmap_priv_d *priv = data;
1089262151Sluigi	int last_instance;
1090262151Sluigi
1091262151Sluigi	NMG_LOCK();
1092262151Sluigi	last_instance = netmap_dtor_locked(priv);
1093262151Sluigi	NMG_UNLOCK();
1094262151Sluigi	if (last_instance) {
1095262151Sluigi		bzero(priv, sizeof(*priv));	/* for safety */
1096262151Sluigi		free(priv, M_DEVBUF);
1097262151Sluigi	}
1098241719Sluigi}
1099227614Sluigi
1100227614Sluigi
1101262151Sluigi
1102262151Sluigi
1103227614Sluigi/*
1104228280Sluigi * Handlers for synchronization of the queues from/to the host.
1105245836Sluigi * Netmap has two operating modes:
1106245836Sluigi * - in the default mode, the rings connected to the host stack are
1107245836Sluigi *   just another ring pair managed by userspace;
1108245836Sluigi * - in transparent mode (XXX to be defined) incoming packets
1109245836Sluigi *   (from the host or the NIC) are marked as NS_FORWARD upon
1110245836Sluigi *   arrival, and the user application has a chance to reset the
1111245836Sluigi *   flag for packets that should be dropped.
1112245836Sluigi *   On the RXSYNC or poll(), packets in RX rings between
1113245836Sluigi *   kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
1114245836Sluigi *   to the other side.
1115245836Sluigi * The transfer NIC --> host is relatively easy, just encapsulate
1116245836Sluigi * into mbufs and we are done. The host --> NIC side is slightly
1117245836Sluigi * harder because there might not be room in the tx ring so it
1118245836Sluigi * might take a while before releasing the buffer.
1119227614Sluigi */
1120245836Sluigi
1121251139Sluigi
1122245836Sluigi/*
1123245836Sluigi * pass a chain of buffers to the host stack as coming from 'dst'
1124262151Sluigi * We do not need to lock because the queue is private.
1125245836Sluigi */
1126227614Sluigistatic void
1127262151Sluiginetmap_send_up(struct ifnet *dst, struct mbq *q)
1128227614Sluigi{
1129245836Sluigi	struct mbuf *m;
1130227614Sluigi
1131245836Sluigi	/* send packets up, outside the lock */
1132262151Sluigi	while ((m = mbq_dequeue(q)) != NULL) {
1133245836Sluigi		if (netmap_verbose & NM_VERB_HOST)
1134245836Sluigi			D("sending up pkt %p size %d", m, MBUF_LEN(m));
1135245836Sluigi		NM_SEND_UP(dst, m);
1136228280Sluigi	}
1137262151Sluigi	mbq_destroy(q);
1138245836Sluigi}
1139227614Sluigi
1140245836Sluigi
1141245836Sluigi/*
1142245836Sluigi * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
1143262151Sluigi * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
1144262151Sluigi * and pass them up. Drop remaining packets in the unlikely event
1145262151Sluigi * of an mbuf shortage.
1146245836Sluigi */
1147245836Sluigistatic void
1148245836Sluiginetmap_grab_packets(struct netmap_kring *kring, struct mbq *q, int force)
1149245836Sluigi{
1150262151Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1151262151Sluigi	u_int const head = kring->ring->head;
1152262151Sluigi	u_int n;
1153262151Sluigi	struct netmap_adapter *na = kring->na;
1154245836Sluigi
1155262151Sluigi	for (n = kring->nr_hwcur; n != head; n = nm_next(n, lim)) {
1156262151Sluigi		struct mbuf *m;
1157245836Sluigi		struct netmap_slot *slot = &kring->ring->slot[n];
1158227614Sluigi
1159245836Sluigi		if ((slot->flags & NS_FORWARD) == 0 && !force)
1160245836Sluigi			continue;
1161270252Sluigi		if (slot->len < 14 || slot->len > NETMAP_BUF_SIZE(na)) {
1162262151Sluigi			RD(5, "bad pkt at %d len %d", n, slot->len);
1163227614Sluigi			continue;
1164227614Sluigi		}
1165245836Sluigi		slot->flags &= ~NS_FORWARD; // XXX needed ?
1166262151Sluigi		/* XXX TODO: adapt to the case of a multisegment packet */
1167270252Sluigi		m = m_devget(NMB(na, slot), slot->len, 0, na->ifp, NULL);
1168227614Sluigi
1169227614Sluigi		if (m == NULL)
1170227614Sluigi			break;
1171262151Sluigi		mbq_enqueue(q, m);
1172227614Sluigi	}
1173245836Sluigi}
1174245836Sluigi
1175251139Sluigi
1176245836Sluigi/*
1177262151Sluigi * Send to the NIC rings packets marked NS_FORWARD between
1178262151Sluigi * kring->nr_hwcur and kring->rhead
1179262151Sluigi * Called under kring->rx_queue.lock on the sw rx ring,
1180245836Sluigi */
1181262151Sluigistatic u_int
1182245836Sluiginetmap_sw_to_nic(struct netmap_adapter *na)
1183245836Sluigi{
1184245836Sluigi	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1185262151Sluigi	struct netmap_slot *rxslot = kring->ring->slot;
1186262151Sluigi	u_int i, rxcur = kring->nr_hwcur;
1187262151Sluigi	u_int const head = kring->rhead;
1188262151Sluigi	u_int const src_lim = kring->nkr_num_slots - 1;
1189262151Sluigi	u_int sent = 0;
1190245836Sluigi
1191262151Sluigi	/* scan rings to find space, then fill as much as possible */
1192262151Sluigi	for (i = 0; i < na->num_tx_rings; i++) {
1193262151Sluigi		struct netmap_kring *kdst = &na->tx_rings[i];
1194262151Sluigi		struct netmap_ring *rdst = kdst->ring;
1195262151Sluigi		u_int const dst_lim = kdst->nkr_num_slots - 1;
1196245836Sluigi
1197262151Sluigi		/* XXX do we trust ring or kring->rcur,rtail ? */
1198262151Sluigi		for (; rxcur != head && !nm_ring_empty(rdst);
1199262151Sluigi		     rxcur = nm_next(rxcur, src_lim) ) {
1200245836Sluigi			struct netmap_slot *src, *dst, tmp;
1201262151Sluigi			u_int dst_cur = rdst->cur;
1202262151Sluigi
1203262151Sluigi			src = &rxslot[rxcur];
1204262151Sluigi			if ((src->flags & NS_FORWARD) == 0 && !netmap_fwd)
1205262151Sluigi				continue;
1206262151Sluigi
1207262151Sluigi			sent++;
1208262151Sluigi
1209262151Sluigi			dst = &rdst->slot[dst_cur];
1210262151Sluigi
1211245836Sluigi			tmp = *src;
1212262151Sluigi
1213245836Sluigi			src->buf_idx = dst->buf_idx;
1214245836Sluigi			src->flags = NS_BUF_CHANGED;
1215245836Sluigi
1216245836Sluigi			dst->buf_idx = tmp.buf_idx;
1217245836Sluigi			dst->len = tmp.len;
1218245836Sluigi			dst->flags = NS_BUF_CHANGED;
1219245836Sluigi
1220270252Sluigi			rdst->cur = nm_next(dst_cur, dst_lim);
1221245836Sluigi		}
1222262151Sluigi		/* if (sent) XXX txsync ? */
1223245836Sluigi	}
1224262151Sluigi	return sent;
1225245836Sluigi}
1226245836Sluigi
1227251139Sluigi
1228245836Sluigi/*
1229262151Sluigi * netmap_txsync_to_host() passes packets up. We are called from a
1230245836Sluigi * system call in user process context, and the only contention
1231245836Sluigi * can be among multiple user threads erroneously calling
1232245836Sluigi * this routine concurrently.
1233245836Sluigi */
1234262151Sluigivoid
1235262151Sluiginetmap_txsync_to_host(struct netmap_adapter *na)
1236245836Sluigi{
1237245836Sluigi	struct netmap_kring *kring = &na->tx_rings[na->num_tx_rings];
1238245836Sluigi	struct netmap_ring *ring = kring->ring;
1239262151Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1240262151Sluigi	u_int const head = kring->rhead;
1241262151Sluigi	struct mbq q;
1242245836Sluigi
1243262151Sluigi	/* Take packets from hwcur to head and pass them up.
1244262151Sluigi	 * force head = cur since netmap_grab_packets() stops at head
1245245836Sluigi	 * In case of no buffers we give up. At the end of the loop,
1246245836Sluigi	 * the queue is drained in all cases.
1247245836Sluigi	 */
1248262151Sluigi	mbq_init(&q);
1249262151Sluigi	ring->cur = head;
1250262151Sluigi	netmap_grab_packets(kring, &q, 1 /* force */);
1251262151Sluigi	ND("have %d pkts in queue", mbq_len(&q));
1252262151Sluigi	kring->nr_hwcur = head;
1253262151Sluigi	kring->nr_hwtail = head + lim;
1254262151Sluigi	if (kring->nr_hwtail > lim)
1255262151Sluigi		kring->nr_hwtail -= lim + 1;
1256262151Sluigi	nm_txsync_finalize(kring);
1257227614Sluigi
1258262151Sluigi	netmap_send_up(na->ifp, &q);
1259227614Sluigi}
1260227614Sluigi
1261251139Sluigi
1262227614Sluigi/*
1263228280Sluigi * rxsync backend for packets coming from the host stack.
1264262151Sluigi * They have been put in kring->rx_queue by netmap_transmit().
1265262151Sluigi * We protect access to the kring using kring->rx_queue.lock
1266228280Sluigi *
1267270252Sluigi * This routine also does the selrecord if called from the poll handler
1268270252Sluigi * (we know because td != NULL).
1269270252Sluigi *
1270270252Sluigi * NOTE: on linux, selrecord() is defined as a macro and uses pwait
1271270252Sluigi *     as an additional hidden argument.
1272262151Sluigi * returns the number of packets delivered to tx queues in
1273262151Sluigi * transparent mode, or a negative value if error
1274227614Sluigi */
1275262151Sluigiint
1276262151Sluiginetmap_rxsync_from_host(struct netmap_adapter *na, struct thread *td, void *pwait)
1277227614Sluigi{
1278234227Sluigi	struct netmap_kring *kring = &na->rx_rings[na->num_rx_rings];
1279227614Sluigi	struct netmap_ring *ring = kring->ring;
1280262151Sluigi	u_int nm_i, n;
1281262151Sluigi	u_int const lim = kring->nkr_num_slots - 1;
1282262151Sluigi	u_int const head = kring->rhead;
1283262151Sluigi	int ret = 0;
1284262151Sluigi	struct mbq *q = &kring->rx_queue;
1285227614Sluigi
1286238837Sluigi	(void)pwait;	/* disable unused warnings */
1287262151Sluigi	(void)td;
1288262151Sluigi
1289267282Sluigi	mbq_lock(q);
1290262151Sluigi
1291262151Sluigi	/* First part: import newly received packets */
1292262151Sluigi	n = mbq_len(q);
1293262151Sluigi	if (n) { /* grab packets from the queue */
1294262151Sluigi		struct mbuf *m;
1295262151Sluigi		uint32_t stop_i;
1296262151Sluigi
1297262151Sluigi		nm_i = kring->nr_hwtail;
1298262151Sluigi		stop_i = nm_prev(nm_i, lim);
1299267282Sluigi		while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
1300262151Sluigi			int len = MBUF_LEN(m);
1301262151Sluigi			struct netmap_slot *slot = &ring->slot[nm_i];
1302262151Sluigi
1303270252Sluigi			m_copydata(m, 0, len, NMB(na, slot));
1304262151Sluigi			ND("nm %d len %d", nm_i, len);
1305262151Sluigi			if (netmap_verbose)
1306270252Sluigi                                D("%s", nm_dump_buf(NMB(na, slot),len, 128, NULL));
1307262151Sluigi
1308262151Sluigi			slot->len = len;
1309262151Sluigi			slot->flags = kring->nkr_slot_flags;
1310262151Sluigi			nm_i = nm_next(nm_i, lim);
1311270252Sluigi			m_freem(m);
1312232238Sluigi		}
1313262151Sluigi		kring->nr_hwtail = nm_i;
1314232238Sluigi	}
1315262151Sluigi
1316262151Sluigi	/*
1317262151Sluigi	 * Second part: skip past packets that userspace has released.
1318262151Sluigi	 */
1319262151Sluigi	nm_i = kring->nr_hwcur;
1320262151Sluigi	if (nm_i != head) { /* something was released */
1321262151Sluigi		if (netmap_fwd || kring->ring->flags & NR_FORWARD)
1322262151Sluigi			ret = netmap_sw_to_nic(na);
1323262151Sluigi		kring->nr_hwcur = head;
1324262151Sluigi	}
1325262151Sluigi
1326262151Sluigi	nm_rxsync_finalize(kring);
1327262151Sluigi
1328270252Sluigi	/* access copies of cur,tail in the kring */
1329270252Sluigi	if (kring->rcur == kring->rtail && td) /* no bufs available */
1330278779Sluigi		OS_selrecord(td, &kring->si);
1331270252Sluigi
1332267282Sluigi	mbq_unlock(q);
1333262151Sluigi	return ret;
1334227614Sluigi}
1335227614Sluigi
1336227614Sluigi
1337262151Sluigi/* Get a netmap adapter for the port.
1338251139Sluigi *
1339262151Sluigi * If it is possible to satisfy the request, return 0
1340262151Sluigi * with *na containing the netmap adapter found.
1341262151Sluigi * Otherwise return an error code, with *na containing NULL.
1342251139Sluigi *
1343262151Sluigi * When the port is attached to a bridge, we always return
1344262151Sluigi * EBUSY.
1345262151Sluigi * Otherwise, if the port is already bound to a file descriptor,
1346262151Sluigi * then we unconditionally return the existing adapter into *na.
1347262151Sluigi * In all the other cases, we return (into *na) either native,
1348262151Sluigi * generic or NULL, according to the following table:
1349262151Sluigi *
1350262151Sluigi *					native_support
1351262151Sluigi * active_fds   dev.netmap.admode         YES     NO
1352262151Sluigi * -------------------------------------------------------
1353262151Sluigi *    >0              *                 NA(ifp) NA(ifp)
1354262151Sluigi *
1355262151Sluigi *     0        NETMAP_ADMODE_BEST      NATIVE  GENERIC
1356262151Sluigi *     0        NETMAP_ADMODE_NATIVE    NATIVE   NULL
1357262151Sluigi *     0        NETMAP_ADMODE_GENERIC   GENERIC GENERIC
1358262151Sluigi *
1359227614Sluigi */
1360262151Sluigi
1361262151Sluigiint
1362262151Sluiginetmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
1363227614Sluigi{
1364262151Sluigi	/* generic support */
1365262151Sluigi	int i = netmap_admode;	/* Take a snapshot. */
1366262151Sluigi	int error = 0;
1367262151Sluigi	struct netmap_adapter *prev_na;
1368262151Sluigi	struct netmap_generic_adapter *gna;
1369238812Sluigi
1370262151Sluigi	*na = NULL; /* default */
1371238812Sluigi
1372262151Sluigi	/* reset in case of invalid value */
1373262151Sluigi	if (i < NETMAP_ADMODE_BEST || i >= NETMAP_ADMODE_LAST)
1374262151Sluigi		i = netmap_admode = NETMAP_ADMODE_BEST;
1375262151Sluigi
1376262151Sluigi	if (NETMAP_CAPABLE(ifp)) {
1377270252Sluigi		prev_na = NA(ifp);
1378262151Sluigi		/* If an adapter already exists, return it if
1379262151Sluigi		 * there are active file descriptors or if
1380262151Sluigi		 * netmap is not forced to use generic
1381262151Sluigi		 * adapters.
1382251139Sluigi		 */
1383270252Sluigi		if (NETMAP_OWNED_BY_ANY(prev_na)
1384270252Sluigi			|| i != NETMAP_ADMODE_GENERIC
1385270252Sluigi			|| prev_na->na_flags & NAF_FORCE_NATIVE
1386270252Sluigi#ifdef WITH_PIPES
1387270252Sluigi			/* ugly, but we cannot allow an adapter switch
1388270252Sluigi			 * if some pipe is referring to this one
1389270252Sluigi			 */
1390270252Sluigi			|| prev_na->na_next_pipe > 0
1391270252Sluigi#endif
1392270252Sluigi		) {
1393270252Sluigi			*na = prev_na;
1394262151Sluigi			return 0;
1395262151Sluigi		}
1396262151Sluigi	}
1397251139Sluigi
1398262151Sluigi	/* If there isn't native support and netmap is not allowed
1399262151Sluigi	 * to use generic adapters, we cannot satisfy the request.
1400262151Sluigi	 */
1401262151Sluigi	if (!NETMAP_CAPABLE(ifp) && i == NETMAP_ADMODE_NATIVE)
1402262151Sluigi		return EOPNOTSUPP;
1403251139Sluigi
1404262151Sluigi	/* Otherwise, create a generic adapter and return it,
1405262151Sluigi	 * saving the previously used netmap adapter, if any.
1406262151Sluigi	 *
1407262151Sluigi	 * Note that here 'prev_na', if not NULL, MUST be a
1408262151Sluigi	 * native adapter, and CANNOT be a generic one. This is
1409262151Sluigi	 * true because generic adapters are created on demand, and
1410262151Sluigi	 * destroyed when not used anymore. Therefore, if the adapter
1411262151Sluigi	 * currently attached to an interface 'ifp' is generic, it
1412262151Sluigi	 * must be that
1413262151Sluigi	 * (NA(ifp)->active_fds > 0 || NETMAP_OWNED_BY_KERN(NA(ifp))).
1414262151Sluigi	 * Consequently, if NA(ifp) is generic, we will enter one of
1415262151Sluigi	 * the branches above. This ensures that we never override
1416262151Sluigi	 * a generic adapter with another generic adapter.
1417227614Sluigi	 */
1418262151Sluigi	prev_na = NA(ifp);
1419262151Sluigi	error = generic_netmap_attach(ifp);
1420262151Sluigi	if (error)
1421262151Sluigi		return error;
1422262151Sluigi
1423262151Sluigi	*na = NA(ifp);
1424262151Sluigi	gna = (struct netmap_generic_adapter*)NA(ifp);
1425262151Sluigi	gna->prev = prev_na; /* save old na */
1426262151Sluigi	if (prev_na != NULL) {
1427262151Sluigi		ifunit_ref(ifp->if_xname);
1428262151Sluigi		// XXX add a refcount ?
1429262151Sluigi		netmap_adapter_get(prev_na);
1430251139Sluigi	}
1431262151Sluigi	ND("Created generic NA %p (prev %p)", gna, gna->prev);
1432262151Sluigi
1433262151Sluigi	return 0;
1434227614Sluigi}
1435227614Sluigi
1436227614Sluigi
1437227614Sluigi/*
1438262151Sluigi * MUST BE CALLED UNDER NMG_LOCK()
1439262151Sluigi *
1440262151Sluigi * Get a refcounted reference to a netmap adapter attached
1441262151Sluigi * to the interface specified by nmr.
1442262151Sluigi * This is always called in the execution of an ioctl().
1443262151Sluigi *
1444262151Sluigi * Return ENXIO if the interface specified by the request does
1445262151Sluigi * not exist, ENOTSUP if netmap is not supported by the interface,
1446262151Sluigi * EBUSY if the interface is already attached to a bridge,
1447262151Sluigi * EINVAL if parameters are invalid, ENOMEM if needed resources
1448262151Sluigi * could not be allocated.
1449262151Sluigi * If successful, hold a reference to the netmap adapter.
1450262151Sluigi *
1451262151Sluigi * No reference is kept on the real interface, which may then
1452262151Sluigi * disappear at any time.
1453262151Sluigi */
1454262151Sluigiint
1455262151Sluiginetmap_get_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
1456262151Sluigi{
1457262151Sluigi	struct ifnet *ifp = NULL;
1458262151Sluigi	int error = 0;
1459262151Sluigi	struct netmap_adapter *ret = NULL;
1460262151Sluigi
1461262151Sluigi	*na = NULL;     /* default return value */
1462262151Sluigi
1463262151Sluigi	NMG_LOCK_ASSERT();
1464262151Sluigi
1465270252Sluigi	/* we cascade through all possibile types of netmap adapter.
1466270252Sluigi	 * All netmap_get_*_na() functions return an error and an na,
1467270252Sluigi	 * with the following combinations:
1468270252Sluigi	 *
1469270252Sluigi	 * error    na
1470270252Sluigi	 *   0	   NULL		type doesn't match
1471270252Sluigi	 *  !0	   NULL		type matches, but na creation/lookup failed
1472270252Sluigi	 *   0	  !NULL		type matches and na created/found
1473270252Sluigi	 *  !0    !NULL		impossible
1474270252Sluigi	 */
1475270252Sluigi
1476270252Sluigi	/* try to see if this is a monitor port */
1477270252Sluigi	error = netmap_get_monitor_na(nmr, na, create);
1478270252Sluigi	if (error || *na != NULL)
1479270252Sluigi		return error;
1480270252Sluigi
1481270252Sluigi	/* try to see if this is a pipe port */
1482262151Sluigi	error = netmap_get_pipe_na(nmr, na, create);
1483262151Sluigi	if (error || *na != NULL)
1484262151Sluigi		return error;
1485262151Sluigi
1486270252Sluigi	/* try to see if this is a bridge port */
1487262151Sluigi	error = netmap_get_bdg_na(nmr, na, create);
1488262151Sluigi	if (error)
1489262151Sluigi		return error;
1490262151Sluigi
1491262151Sluigi	if (*na != NULL) /* valid match in netmap_get_bdg_na() */
1492262151Sluigi		goto pipes;
1493262151Sluigi
1494267282Sluigi	/*
1495267282Sluigi	 * This must be a hardware na, lookup the name in the system.
1496267282Sluigi	 * Note that by hardware we actually mean "it shows up in ifconfig".
1497267282Sluigi	 * This may still be a tap, a veth/epair, or even a
1498267282Sluigi	 * persistent VALE port.
1499267282Sluigi	 */
1500262151Sluigi	ifp = ifunit_ref(nmr->nr_name);
1501262151Sluigi	if (ifp == NULL) {
1502262151Sluigi	        return ENXIO;
1503262151Sluigi	}
1504262151Sluigi
1505262151Sluigi	error = netmap_get_hw_na(ifp, &ret);
1506262151Sluigi	if (error)
1507262151Sluigi		goto out;
1508262151Sluigi
1509262151Sluigi	*na = ret;
1510262151Sluigi	netmap_adapter_get(ret);
1511262151Sluigi
1512262151Sluigipipes:
1513267282Sluigi	/*
1514267282Sluigi	 * If we are opening a pipe whose parent was not in netmap mode,
1515267282Sluigi	 * we have to allocate the pipe array now.
1516267282Sluigi	 * XXX get rid of this clumsiness (2014-03-15)
1517267282Sluigi	 */
1518262151Sluigi	error = netmap_pipe_alloc(*na, nmr);
1519262151Sluigi
1520262151Sluigiout:
1521262151Sluigi	if (error && ret != NULL)
1522262151Sluigi		netmap_adapter_put(ret);
1523262151Sluigi
1524262151Sluigi	if (ifp)
1525267282Sluigi		if_rele(ifp); /* allow live unloading of drivers modules */
1526262151Sluigi
1527262151Sluigi	return error;
1528262151Sluigi}
1529262151Sluigi
1530262151Sluigi
1531262151Sluigi/*
1532262151Sluigi * validate parameters on entry for *_txsync()
1533262151Sluigi * Returns ring->cur if ok, or something >= kring->nkr_num_slots
1534262151Sluigi * in case of error.
1535262151Sluigi *
1536262151Sluigi * rhead, rcur and rtail=hwtail are stored from previous round.
1537262151Sluigi * hwcur is the next packet to send to the ring.
1538262151Sluigi *
1539262151Sluigi * We want
1540262151Sluigi *    hwcur <= *rhead <= head <= cur <= tail = *rtail <= hwtail
1541262151Sluigi *
1542262151Sluigi * hwcur, rhead, rtail and hwtail are reliable
1543262151Sluigi */
1544262151Sluigiu_int
1545262151Sluiginm_txsync_prologue(struct netmap_kring *kring)
1546262151Sluigi{
1547262151Sluigi	struct netmap_ring *ring = kring->ring;
1548262151Sluigi	u_int head = ring->head; /* read only once */
1549262151Sluigi	u_int cur = ring->cur; /* read only once */
1550262151Sluigi	u_int n = kring->nkr_num_slots;
1551262151Sluigi
1552262151Sluigi	ND(5, "%s kcur %d ktail %d head %d cur %d tail %d",
1553262151Sluigi		kring->name,
1554262151Sluigi		kring->nr_hwcur, kring->nr_hwtail,
1555262151Sluigi		ring->head, ring->cur, ring->tail);
1556262151Sluigi#if 1 /* kernel sanity checks; but we can trust the kring. */
1557262151Sluigi	if (kring->nr_hwcur >= n || kring->rhead >= n ||
1558262151Sluigi	    kring->rtail >= n ||  kring->nr_hwtail >= n)
1559262151Sluigi		goto error;
1560262151Sluigi#endif /* kernel sanity checks */
1561262151Sluigi	/*
1562262151Sluigi	 * user sanity checks. We only use 'cur',
1563262151Sluigi	 * A, B, ... are possible positions for cur:
1564262151Sluigi	 *
1565262151Sluigi	 *  0    A  cur   B  tail  C  n-1
1566262151Sluigi	 *  0    D  tail  E  cur   F  n-1
1567262151Sluigi	 *
1568262151Sluigi	 * B, F, D are valid. A, C, E are wrong
1569262151Sluigi	 */
1570262151Sluigi	if (kring->rtail >= kring->rhead) {
1571262151Sluigi		/* want rhead <= head <= rtail */
1572262151Sluigi		if (head < kring->rhead || head > kring->rtail)
1573262151Sluigi			goto error;
1574262151Sluigi		/* and also head <= cur <= rtail */
1575262151Sluigi		if (cur < head || cur > kring->rtail)
1576262151Sluigi			goto error;
1577262151Sluigi	} else { /* here rtail < rhead */
1578262151Sluigi		/* we need head outside rtail .. rhead */
1579262151Sluigi		if (head > kring->rtail && head < kring->rhead)
1580262151Sluigi			goto error;
1581262151Sluigi
1582262151Sluigi		/* two cases now: head <= rtail or head >= rhead  */
1583262151Sluigi		if (head <= kring->rtail) {
1584262151Sluigi			/* want head <= cur <= rtail */
1585262151Sluigi			if (cur < head || cur > kring->rtail)
1586262151Sluigi				goto error;
1587262151Sluigi		} else { /* head >= rhead */
1588262151Sluigi			/* cur must be outside rtail..head */
1589262151Sluigi			if (cur > kring->rtail && cur < head)
1590262151Sluigi				goto error;
1591262151Sluigi		}
1592262151Sluigi	}
1593262151Sluigi	if (ring->tail != kring->rtail) {
1594262151Sluigi		RD(5, "tail overwritten was %d need %d",
1595262151Sluigi			ring->tail, kring->rtail);
1596262151Sluigi		ring->tail = kring->rtail;
1597262151Sluigi	}
1598262151Sluigi	kring->rhead = head;
1599262151Sluigi	kring->rcur = cur;
1600262151Sluigi	return head;
1601262151Sluigi
1602262151Sluigierror:
1603262151Sluigi	RD(5, "%s kring error: hwcur %d rcur %d hwtail %d cur %d tail %d",
1604262151Sluigi		kring->name,
1605262151Sluigi		kring->nr_hwcur,
1606262151Sluigi		kring->rcur, kring->nr_hwtail,
1607262151Sluigi		cur, ring->tail);
1608262151Sluigi	return n;
1609262151Sluigi}
1610262151Sluigi
1611262151Sluigi
1612262151Sluigi/*
1613262151Sluigi * validate parameters on entry for *_rxsync()
1614262151Sluigi * Returns ring->head if ok, kring->nkr_num_slots on error.
1615262151Sluigi *
1616262151Sluigi * For a valid configuration,
1617262151Sluigi * hwcur <= head <= cur <= tail <= hwtail
1618262151Sluigi *
1619262151Sluigi * We only consider head and cur.
1620262151Sluigi * hwcur and hwtail are reliable.
1621262151Sluigi *
1622262151Sluigi */
1623262151Sluigiu_int
1624262151Sluiginm_rxsync_prologue(struct netmap_kring *kring)
1625262151Sluigi{
1626262151Sluigi	struct netmap_ring *ring = kring->ring;
1627262151Sluigi	uint32_t const n = kring->nkr_num_slots;
1628262151Sluigi	uint32_t head, cur;
1629262151Sluigi
1630262151Sluigi	ND("%s kc %d kt %d h %d c %d t %d",
1631262151Sluigi		kring->name,
1632262151Sluigi		kring->nr_hwcur, kring->nr_hwtail,
1633262151Sluigi		ring->head, ring->cur, ring->tail);
1634262151Sluigi	/*
1635262151Sluigi	 * Before storing the new values, we should check they do not
1636262151Sluigi	 * move backwards. However:
1637262151Sluigi	 * - head is not an issue because the previous value is hwcur;
1638262151Sluigi	 * - cur could in principle go back, however it does not matter
1639262151Sluigi	 *   because we are processing a brand new rxsync()
1640262151Sluigi	 */
1641262151Sluigi	cur = kring->rcur = ring->cur;	/* read only once */
1642262151Sluigi	head = kring->rhead = ring->head;	/* read only once */
1643262151Sluigi#if 1 /* kernel sanity checks */
1644262151Sluigi	if (kring->nr_hwcur >= n || kring->nr_hwtail >= n)
1645262151Sluigi		goto error;
1646262151Sluigi#endif /* kernel sanity checks */
1647262151Sluigi	/* user sanity checks */
1648262151Sluigi	if (kring->nr_hwtail >= kring->nr_hwcur) {
1649262151Sluigi		/* want hwcur <= rhead <= hwtail */
1650262151Sluigi		if (head < kring->nr_hwcur || head > kring->nr_hwtail)
1651262151Sluigi			goto error;
1652262151Sluigi		/* and also rhead <= rcur <= hwtail */
1653262151Sluigi		if (cur < head || cur > kring->nr_hwtail)
1654262151Sluigi			goto error;
1655262151Sluigi	} else {
1656262151Sluigi		/* we need rhead outside hwtail..hwcur */
1657262151Sluigi		if (head < kring->nr_hwcur && head > kring->nr_hwtail)
1658262151Sluigi			goto error;
1659262151Sluigi		/* two cases now: head <= hwtail or head >= hwcur  */
1660262151Sluigi		if (head <= kring->nr_hwtail) {
1661262151Sluigi			/* want head <= cur <= hwtail */
1662262151Sluigi			if (cur < head || cur > kring->nr_hwtail)
1663262151Sluigi				goto error;
1664262151Sluigi		} else {
1665262151Sluigi			/* cur must be outside hwtail..head */
1666262151Sluigi			if (cur < head && cur > kring->nr_hwtail)
1667262151Sluigi				goto error;
1668262151Sluigi		}
1669262151Sluigi	}
1670262151Sluigi	if (ring->tail != kring->rtail) {
1671262151Sluigi		RD(5, "%s tail overwritten was %d need %d",
1672262151Sluigi			kring->name,
1673262151Sluigi			ring->tail, kring->rtail);
1674262151Sluigi		ring->tail = kring->rtail;
1675262151Sluigi	}
1676262151Sluigi	return head;
1677262151Sluigi
1678262151Sluigierror:
1679262151Sluigi	RD(5, "kring error: hwcur %d rcur %d hwtail %d head %d cur %d tail %d",
1680262151Sluigi		kring->nr_hwcur,
1681262151Sluigi		kring->rcur, kring->nr_hwtail,
1682262151Sluigi		kring->rhead, kring->rcur, ring->tail);
1683262151Sluigi	return n;
1684262151Sluigi}
1685262151Sluigi
1686262151Sluigi
1687262151Sluigi/*
1688227614Sluigi * Error routine called when txsync/rxsync detects an error.
1689262151Sluigi * Can't do much more than resetting head =cur = hwcur, tail = hwtail
1690227614Sluigi * Return 1 on reinit.
1691228276Sluigi *
1692228276Sluigi * This routine is only called by the upper half of the kernel.
1693228276Sluigi * It only reads hwcur (which is changed only by the upper half, too)
1694262151Sluigi * and hwtail (which may be changed by the lower half, but only on
1695228276Sluigi * a tx ring and only to increase it, so any error will be recovered
1696228276Sluigi * on the next call). For the above, we don't strictly need to call
1697228276Sluigi * it under lock.
1698227614Sluigi */
1699227614Sluigiint
1700227614Sluiginetmap_ring_reinit(struct netmap_kring *kring)
1701227614Sluigi{
1702227614Sluigi	struct netmap_ring *ring = kring->ring;
1703227614Sluigi	u_int i, lim = kring->nkr_num_slots - 1;
1704227614Sluigi	int errors = 0;
1705227614Sluigi
1706262151Sluigi	// XXX KASSERT nm_kr_tryget
1707270252Sluigi	RD(10, "called for %s", kring->name);
1708262151Sluigi	// XXX probably wrong to trust userspace
1709262151Sluigi	kring->rhead = ring->head;
1710262151Sluigi	kring->rcur  = ring->cur;
1711262151Sluigi	kring->rtail = ring->tail;
1712262151Sluigi
1713227614Sluigi	if (ring->cur > lim)
1714227614Sluigi		errors++;
1715262151Sluigi	if (ring->head > lim)
1716262151Sluigi		errors++;
1717262151Sluigi	if (ring->tail > lim)
1718262151Sluigi		errors++;
1719227614Sluigi	for (i = 0; i <= lim; i++) {
1720227614Sluigi		u_int idx = ring->slot[i].buf_idx;
1721227614Sluigi		u_int len = ring->slot[i].len;
1722227614Sluigi		if (idx < 2 || idx >= netmap_total_buffers) {
1723262151Sluigi			RD(5, "bad index at slot %d idx %d len %d ", i, idx, len);
1724227614Sluigi			ring->slot[i].buf_idx = 0;
1725227614Sluigi			ring->slot[i].len = 0;
1726270252Sluigi		} else if (len > NETMAP_BUF_SIZE(kring->na)) {
1727227614Sluigi			ring->slot[i].len = 0;
1728262151Sluigi			RD(5, "bad len at slot %d idx %d len %d", i, idx, len);
1729227614Sluigi		}
1730227614Sluigi	}
1731227614Sluigi	if (errors) {
1732241719Sluigi		RD(10, "total %d errors", errors);
1733262151Sluigi		RD(10, "%s reinit, cur %d -> %d tail %d -> %d",
1734262151Sluigi			kring->name,
1735227614Sluigi			ring->cur, kring->nr_hwcur,
1736262151Sluigi			ring->tail, kring->nr_hwtail);
1737262151Sluigi		ring->head = kring->rhead = kring->nr_hwcur;
1738262151Sluigi		ring->cur  = kring->rcur  = kring->nr_hwcur;
1739262151Sluigi		ring->tail = kring->rtail = kring->nr_hwtail;
1740227614Sluigi	}
1741227614Sluigi	return (errors ? 1 : 0);
1742227614Sluigi}
1743227614Sluigi
1744270252Sluigi/* interpret the ringid and flags fields of an nmreq, by translating them
1745270252Sluigi * into a pair of intervals of ring indices:
1746270252Sluigi *
1747270252Sluigi * [priv->np_txqfirst, priv->np_txqlast) and
1748270252Sluigi * [priv->np_rxqfirst, priv->np_rxqlast)
1749270252Sluigi *
1750227614Sluigi */
1751270252Sluigiint
1752270252Sluiginetmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
1753227614Sluigi{
1754262151Sluigi	struct netmap_adapter *na = priv->np_na;
1755262151Sluigi	u_int j, i = ringid & NETMAP_RING_MASK;
1756262151Sluigi	u_int reg = flags & NR_REG_MASK;
1757227614Sluigi
1758262151Sluigi	if (reg == NR_REG_DEFAULT) {
1759262151Sluigi		/* convert from old ringid to flags */
1760262151Sluigi		if (ringid & NETMAP_SW_RING) {
1761262151Sluigi			reg = NR_REG_SW;
1762262151Sluigi		} else if (ringid & NETMAP_HW_RING) {
1763262151Sluigi			reg = NR_REG_ONE_NIC;
1764262151Sluigi		} else {
1765262151Sluigi			reg = NR_REG_ALL_NIC;
1766262151Sluigi		}
1767262151Sluigi		D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
1768227614Sluigi	}
1769262151Sluigi	switch (reg) {
1770262151Sluigi	case NR_REG_ALL_NIC:
1771262151Sluigi	case NR_REG_PIPE_MASTER:
1772262151Sluigi	case NR_REG_PIPE_SLAVE:
1773262151Sluigi		priv->np_txqfirst = 0;
1774262151Sluigi		priv->np_txqlast = na->num_tx_rings;
1775262151Sluigi		priv->np_rxqfirst = 0;
1776262151Sluigi		priv->np_rxqlast = na->num_rx_rings;
1777262151Sluigi		ND("%s %d %d", "ALL/PIPE",
1778262151Sluigi			priv->np_rxqfirst, priv->np_rxqlast);
1779262151Sluigi		break;
1780262151Sluigi	case NR_REG_SW:
1781262151Sluigi	case NR_REG_NIC_SW:
1782262151Sluigi		if (!(na->na_flags & NAF_HOST_RINGS)) {
1783262151Sluigi			D("host rings not supported");
1784262151Sluigi			return EINVAL;
1785262151Sluigi		}
1786262151Sluigi		priv->np_txqfirst = (reg == NR_REG_SW ?
1787262151Sluigi			na->num_tx_rings : 0);
1788262151Sluigi		priv->np_txqlast = na->num_tx_rings + 1;
1789262151Sluigi		priv->np_rxqfirst = (reg == NR_REG_SW ?
1790262151Sluigi			na->num_rx_rings : 0);
1791262151Sluigi		priv->np_rxqlast = na->num_rx_rings + 1;
1792262151Sluigi		ND("%s %d %d", reg == NR_REG_SW ? "SW" : "NIC+SW",
1793262151Sluigi			priv->np_rxqfirst, priv->np_rxqlast);
1794262151Sluigi		break;
1795262151Sluigi	case NR_REG_ONE_NIC:
1796262151Sluigi		if (i >= na->num_tx_rings && i >= na->num_rx_rings) {
1797262151Sluigi			D("invalid ring id %d", i);
1798262151Sluigi			return EINVAL;
1799262151Sluigi		}
1800262151Sluigi		/* if not enough rings, use the first one */
1801262151Sluigi		j = i;
1802262151Sluigi		if (j >= na->num_tx_rings)
1803262151Sluigi			j = 0;
1804262151Sluigi		priv->np_txqfirst = j;
1805262151Sluigi		priv->np_txqlast = j + 1;
1806262151Sluigi		j = i;
1807262151Sluigi		if (j >= na->num_rx_rings)
1808262151Sluigi			j = 0;
1809262151Sluigi		priv->np_rxqfirst = j;
1810262151Sluigi		priv->np_rxqlast = j + 1;
1811262151Sluigi		break;
1812262151Sluigi	default:
1813262151Sluigi		D("invalid regif type %d", reg);
1814262151Sluigi		return EINVAL;
1815227614Sluigi	}
1816262151Sluigi	priv->np_flags = (flags & ~NR_REG_MASK) | reg;
1817270252Sluigi
1818262151Sluigi	if (netmap_verbose) {
1819267282Sluigi		D("%s: tx [%d,%d) rx [%d,%d) id %d",
1820270252Sluigi			na->name,
1821262151Sluigi			priv->np_txqfirst,
1822262151Sluigi			priv->np_txqlast,
1823262151Sluigi			priv->np_rxqfirst,
1824262151Sluigi			priv->np_rxqlast,
1825262151Sluigi			i);
1826262151Sluigi	}
1827227614Sluigi	return 0;
1828227614Sluigi}
1829227614Sluigi
1830270252Sluigi
1831227614Sluigi/*
1832270252Sluigi * Set the ring ID. For devices with a single queue, a request
1833270252Sluigi * for all rings is the same as a single ring.
1834270252Sluigi */
1835270252Sluigistatic int
1836270252Sluiginetmap_set_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags)
1837270252Sluigi{
1838270252Sluigi	struct netmap_adapter *na = priv->np_na;
1839270252Sluigi	int error;
1840270252Sluigi
1841270252Sluigi	error = netmap_interp_ringid(priv, ringid, flags);
1842270252Sluigi	if (error) {
1843270252Sluigi		return error;
1844270252Sluigi	}
1845270252Sluigi
1846270252Sluigi	priv->np_txpoll = (ringid & NETMAP_NO_TX_POLL) ? 0 : 1;
1847270252Sluigi
1848270252Sluigi	/* optimization: count the users registered for more than
1849270252Sluigi	 * one ring, which are the ones sleeping on the global queue.
1850270252Sluigi	 * The default netmap_notify() callback will then
1851270252Sluigi	 * avoid signaling the global queue if nobody is using it
1852270252Sluigi	 */
1853270252Sluigi	if (nm_tx_si_user(priv))
1854270252Sluigi		na->tx_si_users++;
1855270252Sluigi	if (nm_rx_si_user(priv))
1856270252Sluigi		na->rx_si_users++;
1857270252Sluigi	return 0;
1858270252Sluigi}
1859270252Sluigi
1860270252Sluigi/*
1861251139Sluigi * possibly move the interface to netmap-mode.
1862251139Sluigi * If success it returns a pointer to netmap_if, otherwise NULL.
1863262151Sluigi * This must be called with NMG_LOCK held.
1864270252Sluigi *
1865270252Sluigi * The following na callbacks are called in the process:
1866270252Sluigi *
1867270252Sluigi * na->nm_config()			[by netmap_update_config]
1868270252Sluigi * (get current number and size of rings)
1869270252Sluigi *
1870270252Sluigi *  	We have a generic one for linux (netmap_linux_config).
1871270252Sluigi *  	The bwrap has to override this, since it has to forward
1872270252Sluigi *  	the request to the wrapped adapter (netmap_bwrap_config).
1873270252Sluigi *
1874270252Sluigi *    	XXX netmap_if_new calls this again (2014-03-15)
1875270252Sluigi *
1876270252Sluigi * na->nm_krings_create()		[by netmap_if_new]
1877270252Sluigi * (create and init the krings array)
1878270252Sluigi *
1879270252Sluigi * 	One of the following:
1880270252Sluigi *
1881270252Sluigi *	* netmap_hw_krings_create, 			(hw ports)
1882270252Sluigi *		creates the standard layout for the krings
1883270252Sluigi * 		and adds the mbq (used for the host rings).
1884270252Sluigi *
1885270252Sluigi * 	* netmap_vp_krings_create			(VALE ports)
1886270252Sluigi * 		add leases and scratchpads
1887270252Sluigi *
1888270252Sluigi * 	* netmap_pipe_krings_create			(pipes)
1889270252Sluigi * 		create the krings and rings of both ends and
1890270252Sluigi * 		cross-link them
1891270252Sluigi *
1892270252Sluigi *      * netmap_monitor_krings_create 			(monitors)
1893270252Sluigi *      	avoid allocating the mbq
1894270252Sluigi *
1895270252Sluigi *      * netmap_bwrap_krings_create			(bwraps)
1896270252Sluigi *      	create both the brap krings array,
1897270252Sluigi *      	the krings array of the wrapped adapter, and
1898270252Sluigi *      	(if needed) the fake array for the host adapter
1899270252Sluigi *
1900270252Sluigi * na->nm_register(, 1)
1901270252Sluigi * (put the adapter in netmap mode)
1902270252Sluigi *
1903270252Sluigi * 	This may be one of the following:
1904270252Sluigi * 	(XXX these should be either all *_register or all *_reg 2014-03-15)
1905270252Sluigi *
1906270252Sluigi * 	* netmap_hw_register				(hw ports)
1907270252Sluigi * 		checks that the ifp is still there, then calls
1908270252Sluigi * 		the hardware specific callback;
1909270252Sluigi *
1910270252Sluigi * 	* netmap_vp_reg					(VALE ports)
1911270252Sluigi *		If the port is connected to a bridge,
1912270252Sluigi *		set the NAF_NETMAP_ON flag under the
1913270252Sluigi *		bridge write lock.
1914270252Sluigi *
1915270252Sluigi *	* netmap_pipe_reg				(pipes)
1916270252Sluigi *		inform the other pipe end that it is no
1917270252Sluigi *		longer responsibile for the lifetime of this
1918270252Sluigi *		pipe end
1919270252Sluigi *
1920270252Sluigi *	* netmap_monitor_reg				(monitors)
1921270252Sluigi *		intercept the sync callbacks of the monitored
1922270252Sluigi *		rings
1923270252Sluigi *
1924270252Sluigi *	* netmap_bwrap_register				(bwraps)
1925270252Sluigi *		cross-link the bwrap and hwna rings,
1926270252Sluigi *		forward the request to the hwna, override
1927270252Sluigi *		the hwna notify callback (to get the frames
1928270252Sluigi *		coming from outside go through the bridge).
1929270252Sluigi *
1930270252Sluigi * XXX maybe netmap_if_new() should be merged with this (2014-03-15).
1931270252Sluigi *
1932251139Sluigi */
1933262151Sluigistruct netmap_if *
1934262151Sluiginetmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
1935262151Sluigi	uint16_t ringid, uint32_t flags, int *err)
1936251139Sluigi{
1937251139Sluigi	struct netmap_if *nifp = NULL;
1938262151Sluigi	int error, need_mem = 0;
1939251139Sluigi
1940262151Sluigi	NMG_LOCK_ASSERT();
1941251139Sluigi	/* ring configuration may have changed, fetch from the card */
1942251139Sluigi	netmap_update_config(na);
1943262151Sluigi	priv->np_na = na;     /* store the reference */
1944262151Sluigi	error = netmap_set_ringid(priv, ringid, flags);
1945251139Sluigi	if (error)
1946251139Sluigi		goto out;
1947262151Sluigi	/* ensure allocators are ready */
1948262151Sluigi	need_mem = !netmap_have_memory_locked(priv);
1949262151Sluigi	if (need_mem) {
1950262151Sluigi		error = netmap_get_memory_locked(priv);
1951262151Sluigi		ND("get_memory returned %d", error);
1952262151Sluigi		if (error)
1953262151Sluigi			goto out;
1954262151Sluigi	}
1955267282Sluigi	/* Allocate a netmap_if and, if necessary, all the netmap_ring's */
1956270252Sluigi	nifp = netmap_if_new(na);
1957251139Sluigi	if (nifp == NULL) { /* allocation failed */
1958251139Sluigi		error = ENOMEM;
1959262151Sluigi		goto out;
1960262151Sluigi	}
1961262151Sluigi	na->active_fds++;
1962270252Sluigi	if (!nm_netmap_on(na)) {
1963270252Sluigi		/* Netmap not active, set the card in netmap mode
1964251139Sluigi		 * and make it use the shared buffers.
1965251139Sluigi		 */
1966267282Sluigi		/* cache the allocator info in the na */
1967270252Sluigi		na->na_lut = netmap_mem_get_lut(na->nm_mem);
1968262151Sluigi		ND("%p->na_lut == %p", na, na->na_lut);
1969270252Sluigi		na->na_lut_objtotal = netmap_mem_get_buftotal(na->nm_mem);
1970270252Sluigi		na->na_lut_objsize = netmap_mem_get_bufsize(na->nm_mem);
1971262151Sluigi		error = na->nm_register(na, 1); /* mode on */
1972251139Sluigi		if (error) {
1973262151Sluigi			netmap_do_unregif(priv, nifp);
1974251139Sluigi			nifp = NULL;
1975251139Sluigi		}
1976251139Sluigi	}
1977251139Sluigiout:
1978251139Sluigi	*err = error;
1979251139Sluigi	if (error) {
1980267282Sluigi		/* we should drop the allocator, but only
1981267282Sluigi		 * if we were the ones who grabbed it
1982267282Sluigi		 */
1983262151Sluigi		if (need_mem)
1984262151Sluigi			netmap_drop_memory_locked(priv);
1985270252Sluigi		priv->np_na = NULL;
1986251139Sluigi	}
1987262151Sluigi	if (nifp != NULL) {
1988262151Sluigi		/*
1989262151Sluigi		 * advertise that the interface is ready bt setting ni_nifp.
1990262151Sluigi		 * The barrier is needed because readers (poll and *SYNC)
1991262151Sluigi		 * check for priv->np_nifp != NULL without locking
1992251139Sluigi		 */
1993262151Sluigi		wmb(); /* make sure previous writes are visible to all CPUs */
1994262151Sluigi		priv->np_nifp = nifp;
1995251139Sluigi	}
1996262151Sluigi	return nifp;
1997251139Sluigi}
1998251139Sluigi
1999251139Sluigi
2000251139Sluigi
2001251139Sluigi/*
2002227614Sluigi * ioctl(2) support for the "netmap" device.
2003227614Sluigi *
2004227614Sluigi * Following a list of accepted commands:
2005227614Sluigi * - NIOCGINFO
2006227614Sluigi * - SIOCGIFADDR	just for convenience
2007227614Sluigi * - NIOCREGIF
2008227614Sluigi * - NIOCTXSYNC
2009227614Sluigi * - NIOCRXSYNC
2010227614Sluigi *
2011227614Sluigi * Return 0 on success, errno otherwise.
2012227614Sluigi */
2013262151Sluigiint
2014238912Sluiginetmap_ioctl(struct cdev *dev, u_long cmd, caddr_t data,
2015238912Sluigi	int fflag, struct thread *td)
2016227614Sluigi{
2017227614Sluigi	struct netmap_priv_d *priv = NULL;
2018227614Sluigi	struct nmreq *nmr = (struct nmreq *) data;
2019262151Sluigi	struct netmap_adapter *na = NULL;
2020227614Sluigi	int error;
2021262151Sluigi	u_int i, qfirst, qlast;
2022227614Sluigi	struct netmap_if *nifp;
2023262151Sluigi	struct netmap_kring *krings;
2024227614Sluigi
2025238912Sluigi	(void)dev;	/* UNUSED */
2026238912Sluigi	(void)fflag;	/* UNUSED */
2027238812Sluigi
2028262151Sluigi	if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
2029262151Sluigi		/* truncate name */
2030262151Sluigi		nmr->nr_name[sizeof(nmr->nr_name) - 1] = '\0';
2031262151Sluigi		if (nmr->nr_version != NETMAP_API) {
2032262151Sluigi			D("API mismatch for %s got %d need %d",
2033262151Sluigi				nmr->nr_name,
2034262151Sluigi				nmr->nr_version, NETMAP_API);
2035262151Sluigi			nmr->nr_version = NETMAP_API;
2036262151Sluigi		}
2037262151Sluigi		if (nmr->nr_version < NETMAP_MIN_API ||
2038262151Sluigi		    nmr->nr_version > NETMAP_MAX_API) {
2039262151Sluigi			return EINVAL;
2040262151Sluigi		}
2041262151Sluigi	}
2042228276Sluigi	CURVNET_SET(TD_TO_VNET(td));
2043228276Sluigi
2044227614Sluigi	error = devfs_get_cdevpriv((void **)&priv);
2045241719Sluigi	if (error) {
2046228276Sluigi		CURVNET_RESTORE();
2047241719Sluigi		/* XXX ENOENT should be impossible, since the priv
2048241719Sluigi		 * is now created in the open */
2049241719Sluigi		return (error == ENOENT ? ENXIO : error);
2050228276Sluigi	}
2051227614Sluigi
2052227614Sluigi	switch (cmd) {
2053227614Sluigi	case NIOCGINFO:		/* return capabilities etc */
2054251139Sluigi		if (nmr->nr_cmd == NETMAP_BDG_LIST) {
2055251139Sluigi			error = netmap_bdg_ctl(nmr, NULL);
2056251139Sluigi			break;
2057251139Sluigi		}
2058262151Sluigi
2059262151Sluigi		NMG_LOCK();
2060262151Sluigi		do {
2061262151Sluigi			/* memsize is always valid */
2062262151Sluigi			struct netmap_mem_d *nmd = &nm_mem;
2063262151Sluigi			u_int memflags;
2064262151Sluigi
2065262151Sluigi			if (nmr->nr_name[0] != '\0') {
2066262151Sluigi				/* get a refcount */
2067262151Sluigi				error = netmap_get_na(nmr, &na, 1 /* create */);
2068262151Sluigi				if (error)
2069262151Sluigi					break;
2070262151Sluigi				nmd = na->nm_mem; /* get memory allocator */
2071262151Sluigi			}
2072262151Sluigi
2073262151Sluigi			error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
2074262151Sluigi				&nmr->nr_arg2);
2075262151Sluigi			if (error)
2076262151Sluigi				break;
2077262151Sluigi			if (na == NULL) /* only memory info */
2078262151Sluigi				break;
2079262151Sluigi			nmr->nr_offset = 0;
2080262151Sluigi			nmr->nr_rx_slots = nmr->nr_tx_slots = 0;
2081262151Sluigi			netmap_update_config(na);
2082262151Sluigi			nmr->nr_rx_rings = na->num_rx_rings;
2083262151Sluigi			nmr->nr_tx_rings = na->num_tx_rings;
2084262151Sluigi			nmr->nr_rx_slots = na->num_rx_desc;
2085262151Sluigi			nmr->nr_tx_slots = na->num_tx_desc;
2086262151Sluigi			netmap_adapter_put(na);
2087262151Sluigi		} while (0);
2088262151Sluigi		NMG_UNLOCK();
2089227614Sluigi		break;
2090227614Sluigi
2091227614Sluigi	case NIOCREGIF:
2092251139Sluigi		/* possibly attach/detach NIC and VALE switch */
2093251139Sluigi		i = nmr->nr_cmd;
2094262151Sluigi		if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH
2095270252Sluigi				|| i == NETMAP_BDG_VNET_HDR
2096270252Sluigi				|| i == NETMAP_BDG_NEWIF
2097270252Sluigi				|| i == NETMAP_BDG_DELIF) {
2098251139Sluigi			error = netmap_bdg_ctl(nmr, NULL);
2099251139Sluigi			break;
2100251139Sluigi		} else if (i != 0) {
2101251139Sluigi			D("nr_cmd must be 0 not %d", i);
2102251139Sluigi			error = EINVAL;
2103251139Sluigi			break;
2104251139Sluigi		}
2105251139Sluigi
2106241719Sluigi		/* protect access to priv from concurrent NIOCREGIF */
2107262151Sluigi		NMG_LOCK();
2108262151Sluigi		do {
2109262151Sluigi			u_int memflags;
2110227614Sluigi
2111262151Sluigi			if (priv->np_na != NULL) {	/* thread already registered */
2112262151Sluigi				error = EBUSY;
2113262151Sluigi				break;
2114262151Sluigi			}
2115262151Sluigi			/* find the interface and a reference */
2116262151Sluigi			error = netmap_get_na(nmr, &na, 1 /* create */); /* keep reference */
2117262151Sluigi			if (error)
2118262151Sluigi				break;
2119262151Sluigi			if (NETMAP_OWNED_BY_KERN(na)) {
2120262151Sluigi				netmap_adapter_put(na);
2121262151Sluigi				error = EBUSY;
2122262151Sluigi				break;
2123262151Sluigi			}
2124262151Sluigi			nifp = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags, &error);
2125262151Sluigi			if (!nifp) {    /* reg. failed, release priv and ref */
2126262151Sluigi				netmap_adapter_put(na);
2127262151Sluigi				priv->np_nifp = NULL;
2128262151Sluigi				break;
2129262151Sluigi			}
2130262151Sluigi			priv->np_td = td; // XXX kqueue, debugging only
2131227614Sluigi
2132262151Sluigi			/* return the offset of the netmap_if object */
2133262151Sluigi			nmr->nr_rx_rings = na->num_rx_rings;
2134262151Sluigi			nmr->nr_tx_rings = na->num_tx_rings;
2135262151Sluigi			nmr->nr_rx_slots = na->num_rx_desc;
2136262151Sluigi			nmr->nr_tx_slots = na->num_tx_desc;
2137262151Sluigi			error = netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, &memflags,
2138262151Sluigi				&nmr->nr_arg2);
2139262151Sluigi			if (error) {
2140262151Sluigi				netmap_adapter_put(na);
2141262151Sluigi				break;
2142262151Sluigi			}
2143262151Sluigi			if (memflags & NETMAP_MEM_PRIVATE) {
2144262151Sluigi				*(uint32_t *)(uintptr_t)&nifp->ni_flags |= NI_PRIV_MEM;
2145262151Sluigi			}
2146262151Sluigi			priv->np_txsi = (priv->np_txqlast - priv->np_txqfirst > 1) ?
2147262151Sluigi				&na->tx_si : &na->tx_rings[priv->np_txqfirst].si;
2148262151Sluigi			priv->np_rxsi = (priv->np_rxqlast - priv->np_rxqfirst > 1) ?
2149262151Sluigi				&na->rx_si : &na->rx_rings[priv->np_rxqfirst].si;
2150227614Sluigi
2151262151Sluigi			if (nmr->nr_arg3) {
2152262151Sluigi				D("requested %d extra buffers", nmr->nr_arg3);
2153262151Sluigi				nmr->nr_arg3 = netmap_extra_alloc(na,
2154262151Sluigi					&nifp->ni_bufs_head, nmr->nr_arg3);
2155262151Sluigi				D("got %d extra buffers", nmr->nr_arg3);
2156262151Sluigi			}
2157262151Sluigi			nmr->nr_offset = netmap_mem_if_offset(na->nm_mem, nifp);
2158262151Sluigi		} while (0);
2159262151Sluigi		NMG_UNLOCK();
2160241719Sluigi		break;
2161241719Sluigi
2162241719Sluigi	case NIOCTXSYNC:
2163241719Sluigi	case NIOCRXSYNC:
2164241719Sluigi		nifp = priv->np_nifp;
2165241719Sluigi
2166241719Sluigi		if (nifp == NULL) {
2167228276Sluigi			error = ENXIO;
2168228276Sluigi			break;
2169228276Sluigi		}
2170278779Sluigi		mb(); /* make sure following reads are not from cache */
2171227614Sluigi
2172262151Sluigi		na = priv->np_na;      /* we have a reference */
2173227614Sluigi
2174262151Sluigi		if (na == NULL) {
2175262151Sluigi			D("Internal error: nifp != NULL && na == NULL");
2176262151Sluigi			error = ENXIO;
2177262151Sluigi			break;
2178262151Sluigi		}
2179241719Sluigi
2180270252Sluigi		if (!nm_netmap_on(na)) {
2181228276Sluigi			error = ENXIO;
2182228276Sluigi			break;
2183228276Sluigi		}
2184241719Sluigi
2185262151Sluigi		if (cmd == NIOCTXSYNC) {
2186262151Sluigi			krings = na->tx_rings;
2187262151Sluigi			qfirst = priv->np_txqfirst;
2188262151Sluigi			qlast = priv->np_txqlast;
2189262151Sluigi		} else {
2190262151Sluigi			krings = na->rx_rings;
2191262151Sluigi			qfirst = priv->np_rxqfirst;
2192262151Sluigi			qlast = priv->np_rxqlast;
2193227614Sluigi		}
2194227614Sluigi
2195262151Sluigi		for (i = qfirst; i < qlast; i++) {
2196262151Sluigi			struct netmap_kring *kring = krings + i;
2197262151Sluigi			if (nm_kr_tryget(kring)) {
2198262151Sluigi				error = EBUSY;
2199262151Sluigi				goto out;
2200262151Sluigi			}
2201234174Sluigi			if (cmd == NIOCTXSYNC) {
2202234174Sluigi				if (netmap_verbose & NM_VERB_TXSYNC)
2203234174Sluigi					D("pre txsync ring %d cur %d hwcur %d",
2204234174Sluigi					    i, kring->ring->cur,
2205234174Sluigi					    kring->nr_hwcur);
2206262151Sluigi				if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
2207262151Sluigi					netmap_ring_reinit(kring);
2208262151Sluigi				} else {
2209262151Sluigi					kring->nm_sync(kring, NAF_FORCE_RECLAIM);
2210262151Sluigi				}
2211234174Sluigi				if (netmap_verbose & NM_VERB_TXSYNC)
2212234174Sluigi					D("post txsync ring %d cur %d hwcur %d",
2213234174Sluigi					    i, kring->ring->cur,
2214234174Sluigi					    kring->nr_hwcur);
2215234174Sluigi			} else {
2216262151Sluigi				kring->nm_sync(kring, NAF_FORCE_READ);
2217234174Sluigi				microtime(&na->rx_rings[i].ring->ts);
2218234174Sluigi			}
2219262151Sluigi			nm_kr_put(kring);
2220227614Sluigi		}
2221227614Sluigi
2222234174Sluigi		break;
2223227614Sluigi
2224270252Sluigi	case NIOCCONFIG:
2225270252Sluigi		error = netmap_bdg_config(nmr);
2226270252Sluigi		break;
2227238812Sluigi#ifdef __FreeBSD__
2228262151Sluigi	case FIONBIO:
2229262151Sluigi	case FIOASYNC:
2230262151Sluigi		ND("FIONBIO/FIOASYNC are no-ops");
2231262151Sluigi		break;
2232262151Sluigi
2233227614Sluigi	case BIOCIMMEDIATE:
2234227614Sluigi	case BIOCGHDRCMPLT:
2235227614Sluigi	case BIOCSHDRCMPLT:
2236227614Sluigi	case BIOCSSEESENT:
2237227614Sluigi		D("ignore BIOCIMMEDIATE/BIOCSHDRCMPLT/BIOCSHDRCMPLT/BIOCSSEESENT");
2238227614Sluigi		break;
2239227614Sluigi
2240231881Sluigi	default:	/* allow device-specific ioctls */
2241227614Sluigi	    {
2242272604Sluigi		struct ifnet *ifp = ifunit_ref(nmr->nr_name);
2243272604Sluigi		if (ifp == NULL) {
2244272604Sluigi			error = ENXIO;
2245272604Sluigi		} else {
2246272604Sluigi			struct socket so;
2247262151Sluigi
2248272604Sluigi			bzero(&so, sizeof(so));
2249272604Sluigi			so.so_vnet = ifp->if_vnet;
2250272604Sluigi			// so->so_proto not null.
2251272604Sluigi			error = ifioctl(&so, cmd, data, td);
2252272604Sluigi			if_rele(ifp);
2253262151Sluigi		}
2254231881Sluigi		break;
2255227614Sluigi	    }
2256238812Sluigi
2257238812Sluigi#else /* linux */
2258238812Sluigi	default:
2259238812Sluigi		error = EOPNOTSUPP;
2260238812Sluigi#endif /* linux */
2261227614Sluigi	}
2262262151Sluigiout:
2263227614Sluigi
2264228276Sluigi	CURVNET_RESTORE();
2265227614Sluigi	return (error);
2266227614Sluigi}
2267227614Sluigi
2268227614Sluigi
2269227614Sluigi/*
2270227614Sluigi * select(2) and poll(2) handlers for the "netmap" device.
2271227614Sluigi *
2272227614Sluigi * Can be called for one or more queues.
2273227614Sluigi * Return true the event mask corresponding to ready events.
2274227614Sluigi * If there are no ready events, do a selrecord on either individual
2275262151Sluigi * selinfo or on the global one.
2276227614Sluigi * Device-dependent parts (locking and sync of tx/rx rings)
2277227614Sluigi * are done through callbacks.
2278238812Sluigi *
2279238837Sluigi * On linux, arguments are really pwait, the poll table, and 'td' is struct file *
2280238837Sluigi * The first one is remapped to pwait as selrecord() uses the name as an
2281238837Sluigi * hidden argument.
2282227614Sluigi */
2283262151Sluigiint
2284238837Sluiginetmap_poll(struct cdev *dev, int events, struct thread *td)
2285227614Sluigi{
2286227614Sluigi	struct netmap_priv_d *priv = NULL;
2287227614Sluigi	struct netmap_adapter *na;
2288227614Sluigi	struct netmap_kring *kring;
2289262151Sluigi	u_int i, check_all_tx, check_all_rx, want_tx, want_rx, revents = 0;
2290262151Sluigi	struct mbq q;		/* packets from hw queues to host stack */
2291238837Sluigi	void *pwait = dev;	/* linux compatibility */
2292262151Sluigi	int is_kevent = 0;
2293227614Sluigi
2294262151Sluigi	/*
2295262151Sluigi	 * In order to avoid nested locks, we need to "double check"
2296262151Sluigi	 * txsync and rxsync if we decide to do a selrecord().
2297262151Sluigi	 * retry_tx (and retry_rx, later) prevent looping forever.
2298262151Sluigi	 */
2299262151Sluigi	int retry_tx = 1, retry_rx = 1;
2300262151Sluigi
2301238837Sluigi	(void)pwait;
2302262151Sluigi	mbq_init(&q);
2303238837Sluigi
2304262151Sluigi	/*
2305262151Sluigi	 * XXX kevent has curthread->tp_fop == NULL,
2306262151Sluigi	 * so devfs_get_cdevpriv() fails. We circumvent this by passing
2307262151Sluigi	 * priv as the first argument, which is also useful to avoid
2308262151Sluigi	 * the selrecord() which are not necessary in that case.
2309262151Sluigi	 */
2310262151Sluigi	if (devfs_get_cdevpriv((void **)&priv) != 0) {
2311262151Sluigi		is_kevent = 1;
2312262151Sluigi		if (netmap_verbose)
2313262151Sluigi			D("called from kevent");
2314262151Sluigi		priv = (struct netmap_priv_d *)dev;
2315262151Sluigi	}
2316262151Sluigi	if (priv == NULL)
2317227614Sluigi		return POLLERR;
2318227614Sluigi
2319241719Sluigi	if (priv->np_nifp == NULL) {
2320241719Sluigi		D("No if registered");
2321241719Sluigi		return POLLERR;
2322241719Sluigi	}
2323241719Sluigi	rmb(); /* make sure following reads are not from cache */
2324241719Sluigi
2325262151Sluigi	na = priv->np_na;
2326262151Sluigi
2327270252Sluigi	if (!nm_netmap_on(na))
2328227614Sluigi		return POLLERR;
2329227614Sluigi
2330227614Sluigi	if (netmap_verbose & 0x8000)
2331270252Sluigi		D("device %s events 0x%x", na->name, events);
2332227614Sluigi	want_tx = events & (POLLOUT | POLLWRNORM);
2333227614Sluigi	want_rx = events & (POLLIN | POLLRDNORM);
2334227614Sluigi
2335227614Sluigi
2336227614Sluigi	/*
2337262151Sluigi	 * check_all_{tx|rx} are set if the card has more than one queue AND
2338262151Sluigi	 * the file descriptor is bound to all of them. If so, we sleep on
2339262151Sluigi	 * the "global" selinfo, otherwise we sleep on individual selinfo
2340262151Sluigi	 * (FreeBSD only allows two selinfo's per file descriptor).
2341262151Sluigi	 * The interrupt routine in the driver wake one or the other
2342262151Sluigi	 * (or both) depending on which clients are active.
2343227614Sluigi	 *
2344227614Sluigi	 * rxsync() is only called if we run out of buffers on a POLLIN.
2345227614Sluigi	 * txsync() is called if we run out of buffers on POLLOUT, or
2346227614Sluigi	 * there are pending packets to send. The latter can be disabled
2347227614Sluigi	 * passing NETMAP_NO_TX_POLL in the NIOCREG call.
2348227614Sluigi	 */
2349262151Sluigi	check_all_tx = nm_tx_si_user(priv);
2350262151Sluigi	check_all_rx = nm_rx_si_user(priv);
2351227614Sluigi
2352227614Sluigi	/*
2353262151Sluigi	 * We start with a lock free round which is cheap if we have
2354262151Sluigi	 * slots available. If this fails, then lock and call the sync
2355227614Sluigi	 * routines.
2356227614Sluigi	 */
2357262151Sluigi	for (i = priv->np_rxqfirst; want_rx && i < priv->np_rxqlast; i++) {
2358232238Sluigi		kring = &na->rx_rings[i];
2359262151Sluigi		/* XXX compare ring->cur and kring->tail */
2360262151Sluigi		if (!nm_ring_empty(kring->ring)) {
2361232238Sluigi			revents |= want_rx;
2362232238Sluigi			want_rx = 0;	/* also breaks the loop */
2363227614Sluigi		}
2364232238Sluigi	}
2365262151Sluigi	for (i = priv->np_txqfirst; want_tx && i < priv->np_txqlast; i++) {
2366232238Sluigi		kring = &na->tx_rings[i];
2367262151Sluigi		/* XXX compare ring->cur and kring->tail */
2368262151Sluigi		if (!nm_ring_empty(kring->ring)) {
2369232238Sluigi			revents |= want_tx;
2370232238Sluigi			want_tx = 0;	/* also breaks the loop */
2371227614Sluigi		}
2372232238Sluigi	}
2373227614Sluigi
2374227614Sluigi	/*
2375262151Sluigi	 * If we want to push packets out (priv->np_txpoll) or
2376262151Sluigi	 * want_tx is still set, we must issue txsync calls
2377262151Sluigi	 * (on all rings, to avoid that the tx rings stall).
2378262151Sluigi	 * XXX should also check cur != hwcur on the tx rings.
2379262151Sluigi	 * Fortunately, normal tx mode has np_txpoll set.
2380227614Sluigi	 */
2381227614Sluigi	if (priv->np_txpoll || want_tx) {
2382262151Sluigi		/*
2383262151Sluigi		 * The first round checks if anyone is ready, if not
2384262151Sluigi		 * do a selrecord and another round to handle races.
2385262151Sluigi		 * want_tx goes to 0 if any space is found, and is
2386262151Sluigi		 * used to skip rings with no pending transmissions.
2387262151Sluigi		 */
2388245836Sluigiflush_tx:
2389262151Sluigi		for (i = priv->np_txqfirst; i < priv->np_txqlast; i++) {
2390262151Sluigi			int found = 0;
2391262151Sluigi
2392227614Sluigi			kring = &na->tx_rings[i];
2393227614Sluigi			if (!want_tx && kring->ring->cur == kring->nr_hwcur)
2394227614Sluigi				continue;
2395262151Sluigi			/* only one thread does txsync */
2396262151Sluigi			if (nm_kr_tryget(kring)) {
2397267282Sluigi				/* either busy or stopped
2398267282Sluigi				 * XXX if the ring is stopped, sleeping would
2399267282Sluigi				 * be better. In current code, however, we only
2400267282Sluigi				 * stop the rings for brief intervals (2014-03-14)
2401267282Sluigi				 */
2402262151Sluigi				if (netmap_verbose)
2403262151Sluigi					RD(2, "%p lost race on txring %d, ok",
2404262151Sluigi					    priv, i);
2405262151Sluigi				continue;
2406227614Sluigi			}
2407262151Sluigi			if (nm_txsync_prologue(kring) >= kring->nkr_num_slots) {
2408262151Sluigi				netmap_ring_reinit(kring);
2409227614Sluigi				revents |= POLLERR;
2410262151Sluigi			} else {
2411262151Sluigi				if (kring->nm_sync(kring, 0))
2412262151Sluigi					revents |= POLLERR;
2413262151Sluigi			}
2414227614Sluigi
2415262151Sluigi			/*
2416262151Sluigi			 * If we found new slots, notify potential
2417262151Sluigi			 * listeners on the same ring.
2418262151Sluigi			 * Since we just did a txsync, look at the copies
2419262151Sluigi			 * of cur,tail in the kring.
2420262151Sluigi			 */
2421262151Sluigi			found = kring->rcur != kring->rtail;
2422262151Sluigi			nm_kr_put(kring);
2423262151Sluigi			if (found) { /* notify other listeners */
2424262151Sluigi				revents |= want_tx;
2425262151Sluigi				want_tx = 0;
2426262151Sluigi				na->nm_notify(na, i, NR_TX, 0);
2427227614Sluigi			}
2428227614Sluigi		}
2429262151Sluigi		if (want_tx && retry_tx && !is_kevent) {
2430278779Sluigi			OS_selrecord(td, check_all_tx ?
2431262151Sluigi			    &na->tx_si : &na->tx_rings[priv->np_txqfirst].si);
2432262151Sluigi			retry_tx = 0;
2433262151Sluigi			goto flush_tx;
2434262151Sluigi		}
2435227614Sluigi	}
2436227614Sluigi
2437227614Sluigi	/*
2438262151Sluigi	 * If want_rx is still set scan receive rings.
2439227614Sluigi	 * Do it on all rings because otherwise we starve.
2440227614Sluigi	 */
2441227614Sluigi	if (want_rx) {
2442262151Sluigi		int send_down = 0; /* transparent mode */
2443267282Sluigi		/* two rounds here for race avoidance */
2444262151Sluigido_retry_rx:
2445262151Sluigi		for (i = priv->np_rxqfirst; i < priv->np_rxqlast; i++) {
2446262151Sluigi			int found = 0;
2447262151Sluigi
2448227614Sluigi			kring = &na->rx_rings[i];
2449262151Sluigi
2450262151Sluigi			if (nm_kr_tryget(kring)) {
2451262151Sluigi				if (netmap_verbose)
2452262151Sluigi					RD(2, "%p lost race on rxring %d, ok",
2453262151Sluigi					    priv, i);
2454262151Sluigi				continue;
2455227614Sluigi			}
2456262151Sluigi
2457262151Sluigi			/*
2458262151Sluigi			 * transparent mode support: collect packets
2459262151Sluigi			 * from the rxring(s).
2460270252Sluigi			 * XXX NR_FORWARD should only be read on
2461270252Sluigi			 * physical or NIC ports
2462262151Sluigi			 */
2463245836Sluigi			if (netmap_fwd ||kring->ring->flags & NR_FORWARD) {
2464245836Sluigi				ND(10, "forwarding some buffers up %d to %d",
2465245836Sluigi				    kring->nr_hwcur, kring->ring->cur);
2466245836Sluigi				netmap_grab_packets(kring, &q, netmap_fwd);
2467245836Sluigi			}
2468227614Sluigi
2469262151Sluigi			if (kring->nm_sync(kring, 0))
2470227614Sluigi				revents |= POLLERR;
2471231198Sluigi			if (netmap_no_timestamp == 0 ||
2472231198Sluigi					kring->ring->flags & NR_TIMESTAMP) {
2473227614Sluigi				microtime(&kring->ring->ts);
2474231198Sluigi			}
2475262151Sluigi			/* after an rxsync we can use kring->rcur, rtail */
2476262151Sluigi			found = kring->rcur != kring->rtail;
2477262151Sluigi			nm_kr_put(kring);
2478262151Sluigi			if (found) {
2479227614Sluigi				revents |= want_rx;
2480262151Sluigi				retry_rx = 0;
2481262151Sluigi				na->nm_notify(na, i, NR_RX, 0);
2482262151Sluigi			}
2483227614Sluigi		}
2484245836Sluigi
2485262151Sluigi		/* transparent mode XXX only during first pass ? */
2486262151Sluigi		if (na->na_flags & NAF_HOST_RINGS) {
2487262151Sluigi			kring = &na->rx_rings[na->num_rx_rings];
2488270252Sluigi			if (check_all_rx
2489270252Sluigi			    && (netmap_fwd || kring->ring->flags & NR_FORWARD)) {
2490270252Sluigi				/* XXX fix to use kring fields */
2491270252Sluigi				if (nm_ring_empty(kring->ring))
2492270252Sluigi					send_down = netmap_rxsync_from_host(na, td, dev);
2493270252Sluigi				if (!nm_ring_empty(kring->ring))
2494270252Sluigi					revents |= want_rx;
2495262151Sluigi			}
2496245836Sluigi		}
2497262151Sluigi
2498262151Sluigi		if (retry_rx && !is_kevent)
2499278779Sluigi			OS_selrecord(td, check_all_rx ?
2500262151Sluigi			    &na->rx_si : &na->rx_rings[priv->np_rxqfirst].si);
2501262151Sluigi		if (send_down > 0 || retry_rx) {
2502262151Sluigi			retry_rx = 0;
2503262151Sluigi			if (send_down)
2504262151Sluigi				goto flush_tx; /* and retry_rx */
2505262151Sluigi			else
2506262151Sluigi				goto do_retry_rx;
2507262151Sluigi		}
2508245836Sluigi	}
2509245836Sluigi
2510262151Sluigi	/*
2511262151Sluigi	 * Transparent mode: marked bufs on rx rings between
2512262151Sluigi	 * kring->nr_hwcur and ring->head
2513262151Sluigi	 * are passed to the other endpoint.
2514267282Sluigi	 *
2515262151Sluigi	 * In this mode we also scan the sw rxring, which in
2516262151Sluigi	 * turn passes packets up.
2517262151Sluigi	 *
2518262151Sluigi	 * XXX Transparent mode at the moment requires to bind all
2519262151Sluigi 	 * rings to a single file descriptor.
2520262151Sluigi	 */
2521262151Sluigi
2522270252Sluigi	if (q.head && na->ifp != NULL)
2523262151Sluigi		netmap_send_up(na->ifp, &q);
2524227614Sluigi
2525227614Sluigi	return (revents);
2526227614Sluigi}
2527227614Sluigi
2528227614Sluigi
2529262151Sluigi/*-------------------- driver support routines -------------------*/
2530251139Sluigi
2531262151Sluigistatic int netmap_hw_krings_create(struct netmap_adapter *);
2532262151Sluigi
2533267282Sluigi/* default notify callback */
2534262151Sluigistatic int
2535262151Sluiginetmap_notify(struct netmap_adapter *na, u_int n_ring,
2536262151Sluigi	enum txrx tx, int flags)
2537231594Sluigi{
2538262151Sluigi	struct netmap_kring *kring;
2539231594Sluigi
2540262151Sluigi	if (tx == NR_TX) {
2541262151Sluigi		kring = na->tx_rings + n_ring;
2542262151Sluigi		OS_selwakeup(&kring->si, PI_NET);
2543267282Sluigi		/* optimization: avoid a wake up on the global
2544267282Sluigi		 * queue if nobody has registered for more
2545267282Sluigi		 * than one ring
2546267282Sluigi		 */
2547262151Sluigi		if (na->tx_si_users > 0)
2548262151Sluigi			OS_selwakeup(&na->tx_si, PI_NET);
2549262151Sluigi	} else {
2550262151Sluigi		kring = na->rx_rings + n_ring;
2551262151Sluigi		OS_selwakeup(&kring->si, PI_NET);
2552267282Sluigi		/* optimization: same as above */
2553262151Sluigi		if (na->rx_si_users > 0)
2554262151Sluigi			OS_selwakeup(&na->rx_si, PI_NET);
2555262151Sluigi	}
2556262151Sluigi	return 0;
2557262151Sluigi}
2558231594Sluigi
2559231594Sluigi
2560267282Sluigi/* called by all routines that create netmap_adapters.
2561267282Sluigi * Attach na to the ifp (if any) and provide defaults
2562267282Sluigi * for optional callbacks. Defaults assume that we
2563267282Sluigi * are creating an hardware netmap_adapter.
2564267282Sluigi */
2565262151Sluigiint
2566262151Sluiginetmap_attach_common(struct netmap_adapter *na)
2567262151Sluigi{
2568262151Sluigi	struct ifnet *ifp = na->ifp;
2569231594Sluigi
2570262151Sluigi	if (na->num_tx_rings == 0 || na->num_rx_rings == 0) {
2571262151Sluigi		D("%s: invalid rings tx %d rx %d",
2572270252Sluigi			na->name, na->num_tx_rings, na->num_rx_rings);
2573262151Sluigi		return EINVAL;
2574262151Sluigi	}
2575270252Sluigi	/* ifp is NULL for virtual adapters (bwrap, non-persistent VALE ports,
2576270252Sluigi	 * pipes, monitors). For bwrap we actually have a non-null ifp for
2577270252Sluigi	 * use by the external modules, but that is set after this
2578270252Sluigi	 * function has been called.
2579270252Sluigi	 * XXX this is ugly, maybe split this function in two (2014-03-14)
2580270252Sluigi	 */
2581270252Sluigi	if (ifp != NULL) {
2582270252Sluigi		WNA(ifp) = na;
2583231594Sluigi
2584262151Sluigi	/* the following is only needed for na that use the host port.
2585262151Sluigi	 * XXX do we have something similar for linux ?
2586262151Sluigi	 */
2587262151Sluigi#ifdef __FreeBSD__
2588270252Sluigi		na->if_input = ifp->if_input; /* for netmap_send_up */
2589262151Sluigi#endif /* __FreeBSD__ */
2590231594Sluigi
2591270252Sluigi		NETMAP_SET_CAPABLE(ifp);
2592270252Sluigi	}
2593262151Sluigi	if (na->nm_krings_create == NULL) {
2594267282Sluigi		/* we assume that we have been called by a driver,
2595267282Sluigi		 * since other port types all provide their own
2596267282Sluigi		 * nm_krings_create
2597267282Sluigi		 */
2598262151Sluigi		na->nm_krings_create = netmap_hw_krings_create;
2599262151Sluigi		na->nm_krings_delete = netmap_hw_krings_delete;
2600262151Sluigi	}
2601262151Sluigi	if (na->nm_notify == NULL)
2602262151Sluigi		na->nm_notify = netmap_notify;
2603262151Sluigi	na->active_fds = 0;
2604231594Sluigi
2605262151Sluigi	if (na->nm_mem == NULL)
2606270252Sluigi		/* use the global allocator */
2607262151Sluigi		na->nm_mem = &nm_mem;
2608270252Sluigi	if (na->nm_bdg_attach == NULL)
2609270252Sluigi		/* no special nm_bdg_attach callback. On VALE
2610270252Sluigi		 * attach, we need to interpose a bwrap
2611270252Sluigi		 */
2612270252Sluigi		na->nm_bdg_attach = netmap_bwrap_attach;
2613262151Sluigi	return 0;
2614262151Sluigi}
2615262151Sluigi
2616262151Sluigi
2617267282Sluigi/* standard cleanup, called by all destructors */
2618262151Sluigivoid
2619262151Sluiginetmap_detach_common(struct netmap_adapter *na)
2620262151Sluigi{
2621267282Sluigi	if (na->ifp != NULL)
2622262151Sluigi		WNA(na->ifp) = NULL; /* XXX do we need this? */
2623262151Sluigi
2624262151Sluigi	if (na->tx_rings) { /* XXX should not happen */
2625262151Sluigi		D("freeing leftover tx_rings");
2626262151Sluigi		na->nm_krings_delete(na);
2627231594Sluigi	}
2628262151Sluigi	netmap_pipe_dealloc(na);
2629262151Sluigi	if (na->na_flags & NAF_MEM_OWNER)
2630262151Sluigi		netmap_mem_private_delete(na->nm_mem);
2631262151Sluigi	bzero(na, sizeof(*na));
2632262151Sluigi	free(na, M_DEVBUF);
2633231594Sluigi}
2634231594Sluigi
2635270252Sluigi/* Wrapper for the register callback provided hardware drivers.
2636270252Sluigi * na->ifp == NULL means the the driver module has been
2637270252Sluigi * unloaded, so we cannot call into it.
2638270252Sluigi * Note that module unloading, in our patched linux drivers,
2639270252Sluigi * happens under NMG_LOCK and after having stopped all the
2640270252Sluigi * nic rings (see netmap_detach). This provides sufficient
2641270252Sluigi * protection for the other driver-provied callbacks
2642270252Sluigi * (i.e., nm_config and nm_*xsync), that therefore don't need
2643270252Sluigi * to wrapped.
2644270252Sluigi */
2645270252Sluigistatic int
2646270252Sluiginetmap_hw_register(struct netmap_adapter *na, int onoff)
2647270252Sluigi{
2648270252Sluigi	struct netmap_hw_adapter *hwna =
2649270252Sluigi		(struct netmap_hw_adapter*)na;
2650231594Sluigi
2651270252Sluigi	if (na->ifp == NULL)
2652270252Sluigi		return onoff ? ENXIO : 0;
2653270252Sluigi
2654270252Sluigi	return hwna->nm_hw_register(na, onoff);
2655270252Sluigi}
2656270252Sluigi
2657270252Sluigi
2658231594Sluigi/*
2659227614Sluigi * Initialize a ``netmap_adapter`` object created by driver on attach.
2660227614Sluigi * We allocate a block of memory with room for a struct netmap_adapter
2661227614Sluigi * plus two sets of N+2 struct netmap_kring (where N is the number
2662227614Sluigi * of hardware rings):
2663227614Sluigi * krings	0..N-1	are for the hardware queues.
2664227614Sluigi * kring	N	is for the host stack queue
2665262151Sluigi * kring	N+1	is only used for the selinfo for all queues. // XXX still true ?
2666227614Sluigi * Return 0 on success, ENOMEM otherwise.
2667227614Sluigi */
2668227614Sluigiint
2669262151Sluiginetmap_attach(struct netmap_adapter *arg)
2670227614Sluigi{
2671262151Sluigi	struct netmap_hw_adapter *hwna = NULL;
2672262151Sluigi	// XXX when is arg == NULL ?
2673245835Sluigi	struct ifnet *ifp = arg ? arg->ifp : NULL;
2674227614Sluigi
2675245835Sluigi	if (arg == NULL || ifp == NULL)
2676245835Sluigi		goto fail;
2677262151Sluigi	hwna = malloc(sizeof(*hwna), M_DEVBUF, M_NOWAIT | M_ZERO);
2678262151Sluigi	if (hwna == NULL)
2679245835Sluigi		goto fail;
2680262151Sluigi	hwna->up = *arg;
2681262151Sluigi	hwna->up.na_flags |= NAF_HOST_RINGS;
2682270252Sluigi	strncpy(hwna->up.name, ifp->if_xname, sizeof(hwna->up.name));
2683270252Sluigi	hwna->nm_hw_register = hwna->up.nm_register;
2684270252Sluigi	hwna->up.nm_register = netmap_hw_register;
2685262151Sluigi	if (netmap_attach_common(&hwna->up)) {
2686262151Sluigi		free(hwna, M_DEVBUF);
2687262151Sluigi		goto fail;
2688227614Sluigi	}
2689262151Sluigi	netmap_adapter_get(&hwna->up);
2690262151Sluigi
2691232238Sluigi#ifdef linux
2692251139Sluigi	if (ifp->netdev_ops) {
2693251139Sluigi		/* prepare a clone of the netdev ops */
2694251139Sluigi#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 28)
2695262151Sluigi		hwna->nm_ndo.ndo_start_xmit = ifp->netdev_ops;
2696251139Sluigi#else
2697262151Sluigi		hwna->nm_ndo = *ifp->netdev_ops;
2698251139Sluigi#endif
2699238812Sluigi	}
2700262151Sluigi	hwna->nm_ndo.ndo_start_xmit = linux_netmap_start_xmit;
2701270252Sluigi	if (ifp->ethtool_ops) {
2702270252Sluigi		hwna->nm_eto = *ifp->ethtool_ops;
2703270252Sluigi	}
2704270252Sluigi	hwna->nm_eto.set_ringparam = linux_netmap_set_ringparam;
2705270252Sluigi#ifdef ETHTOOL_SCHANNELS
2706270252Sluigi	hwna->nm_eto.set_channels = linux_netmap_set_channels;
2707270252Sluigi#endif
2708270252Sluigi	if (arg->nm_config == NULL) {
2709270252Sluigi		hwna->up.nm_config = netmap_linux_config;
2710270252Sluigi	}
2711262151Sluigi#endif /* linux */
2712262151Sluigi
2713281706Srpaulo#ifdef __FreeBSD__
2714281706Srpaulo	if_printf(ifp, "netmap queues/slots: TX %d/%d, RX %d/%d\n",
2715281706Srpaulo	    hwna->up.num_tx_rings, hwna->up.num_tx_desc,
2716281706Srpaulo	    hwna->up.num_rx_rings, hwna->up.num_rx_desc);
2717281706Srpaulo#else
2718267282Sluigi	D("success for %s tx %d/%d rx %d/%d queues/slots",
2719270252Sluigi		hwna->up.name,
2720267282Sluigi		hwna->up.num_tx_rings, hwna->up.num_tx_desc,
2721267282Sluigi		hwna->up.num_rx_rings, hwna->up.num_rx_desc
2722267282Sluigi		);
2723281706Srpaulo#endif
2724245835Sluigi	return 0;
2725227614Sluigi
2726245835Sluigifail:
2727262151Sluigi	D("fail, arg %p ifp %p na %p", arg, ifp, hwna);
2728267282Sluigi	if (ifp)
2729267282Sluigi		netmap_detach(ifp);
2730262151Sluigi	return (hwna ? EINVAL : ENOMEM);
2731227614Sluigi}
2732227614Sluigi
2733227614Sluigi
2734227614Sluigivoid
2735262151SluigiNM_DBG(netmap_adapter_get)(struct netmap_adapter *na)
2736227614Sluigi{
2737262151Sluigi	if (!na) {
2738262151Sluigi		return;
2739262151Sluigi	}
2740227614Sluigi
2741262151Sluigi	refcount_acquire(&na->na_refcount);
2742262151Sluigi}
2743262151Sluigi
2744262151Sluigi
2745262151Sluigi/* returns 1 iff the netmap_adapter is destroyed */
2746262151Sluigiint
2747262151SluigiNM_DBG(netmap_adapter_put)(struct netmap_adapter *na)
2748262151Sluigi{
2749227614Sluigi	if (!na)
2750262151Sluigi		return 1;
2751227614Sluigi
2752262151Sluigi	if (!refcount_release(&na->na_refcount))
2753262151Sluigi		return 0;
2754239149Semaste
2755262151Sluigi	if (na->nm_dtor)
2756262151Sluigi		na->nm_dtor(na);
2757262151Sluigi
2758262151Sluigi	netmap_detach_common(na);
2759262151Sluigi
2760262151Sluigi	return 1;
2761262151Sluigi}
2762262151Sluigi
2763267282Sluigi/* nm_krings_create callback for all hardware native adapters */
2764262151Sluigiint
2765262151Sluiginetmap_hw_krings_create(struct netmap_adapter *na)
2766262151Sluigi{
2767262151Sluigi	int ret = netmap_krings_create(na, 0);
2768262151Sluigi	if (ret == 0) {
2769262151Sluigi		/* initialize the mbq for the sw rx ring */
2770262151Sluigi		mbq_safe_init(&na->rx_rings[na->num_rx_rings].rx_queue);
2771262151Sluigi		ND("initialized sw rx queue %d", na->num_rx_rings);
2772245835Sluigi	}
2773262151Sluigi	return ret;
2774227614Sluigi}
2775227614Sluigi
2776227614Sluigi
2777251139Sluigi
2778262151Sluigi/*
2779267282Sluigi * Called on module unload by the netmap-enabled drivers
2780262151Sluigi */
2781262151Sluigivoid
2782262151Sluiginetmap_detach(struct ifnet *ifp)
2783251139Sluigi{
2784262151Sluigi	struct netmap_adapter *na = NA(ifp);
2785251139Sluigi
2786262151Sluigi	if (!na)
2787262151Sluigi		return;
2788251139Sluigi
2789262151Sluigi	NMG_LOCK();
2790262151Sluigi	netmap_disable_all_rings(ifp);
2791262151Sluigi	if (!netmap_adapter_put(na)) {
2792262151Sluigi		/* someone is still using the adapter,
2793262151Sluigi		 * tell them that the interface is gone
2794262151Sluigi		 */
2795262151Sluigi		na->ifp = NULL;
2796270252Sluigi		// XXX also clear NAF_NATIVE_ON ?
2797270252Sluigi		na->na_flags &= ~NAF_NETMAP_ON;
2798262151Sluigi		/* give them a chance to notice */
2799262151Sluigi		netmap_enable_all_rings(ifp);
2800262151Sluigi	}
2801262151Sluigi	NMG_UNLOCK();
2802251139Sluigi}
2803251139Sluigi
2804251139Sluigi
2805227614Sluigi/*
2806228280Sluigi * Intercept packets from the network stack and pass them
2807228280Sluigi * to netmap as incoming packets on the 'software' ring.
2808262151Sluigi *
2809262151Sluigi * We only store packets in a bounded mbq and then copy them
2810262151Sluigi * in the relevant rxsync routine.
2811262151Sluigi *
2812262151Sluigi * We rely on the OS to make sure that the ifp and na do not go
2813262151Sluigi * away (typically the caller checks for IFF_DRV_RUNNING or the like).
2814262151Sluigi * In nm_register() or whenever there is a reinitialization,
2815262151Sluigi * we make sure to make the mode change visible here.
2816227614Sluigi */
2817227614Sluigiint
2818262151Sluiginetmap_transmit(struct ifnet *ifp, struct mbuf *m)
2819227614Sluigi{
2820227614Sluigi	struct netmap_adapter *na = NA(ifp);
2821262151Sluigi	struct netmap_kring *kring;
2822262151Sluigi	u_int len = MBUF_LEN(m);
2823262151Sluigi	u_int error = ENOBUFS;
2824262151Sluigi	struct mbq *q;
2825262151Sluigi	int space;
2826227614Sluigi
2827262151Sluigi	// XXX [Linux] we do not need this lock
2828262151Sluigi	// if we follow the down/configure/up protocol -gl
2829262151Sluigi	// mtx_lock(&na->core_lock);
2830262151Sluigi
2831270252Sluigi	if (!nm_netmap_on(na)) {
2832270252Sluigi		D("%s not in netmap mode anymore", na->name);
2833262151Sluigi		error = ENXIO;
2834262151Sluigi		goto done;
2835250107Sluigi	}
2836251139Sluigi
2837262151Sluigi	kring = &na->rx_rings[na->num_rx_rings];
2838262151Sluigi	q = &kring->rx_queue;
2839262151Sluigi
2840262151Sluigi	// XXX reconsider long packets if we handle fragments
2841270252Sluigi	if (len > NETMAP_BUF_SIZE(na)) { /* too long for us */
2842270252Sluigi		D("%s from_host, drop packet size %d > %d", na->name,
2843270252Sluigi			len, NETMAP_BUF_SIZE(na));
2844262151Sluigi		goto done;
2845227614Sluigi	}
2846227614Sluigi
2847262151Sluigi	/* protect against rxsync_from_host(), netmap_sw_to_nic()
2848262151Sluigi	 * and maybe other instances of netmap_transmit (the latter
2849262151Sluigi	 * not possible on Linux).
2850262151Sluigi	 * Also avoid overflowing the queue.
2851262151Sluigi	 */
2852267282Sluigi	mbq_lock(q);
2853262151Sluigi
2854262151Sluigi        space = kring->nr_hwtail - kring->nr_hwcur;
2855262151Sluigi        if (space < 0)
2856262151Sluigi                space += kring->nkr_num_slots;
2857262151Sluigi	if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
2858262151Sluigi		RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
2859270252Sluigi			na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
2860262151Sluigi			len, m);
2861262151Sluigi	} else {
2862262151Sluigi		mbq_enqueue(q, m);
2863262151Sluigi		ND(10, "%s %d bufs in queue len %d m %p",
2864270252Sluigi			na->name, mbq_len(q), len, m);
2865262151Sluigi		/* notify outside the lock */
2866262151Sluigi		m = NULL;
2867262151Sluigi		error = 0;
2868262151Sluigi	}
2869267282Sluigi	mbq_unlock(q);
2870262151Sluigi
2871227614Sluigidone:
2872262151Sluigi	if (m)
2873262151Sluigi		m_freem(m);
2874262151Sluigi	/* unconditionally wake up listeners */
2875262151Sluigi	na->nm_notify(na, na->num_rx_rings, NR_RX, 0);
2876267282Sluigi	/* this is normally netmap_notify(), but for nics
2877267282Sluigi	 * connected to a bridge it is netmap_bwrap_intr_notify(),
2878267282Sluigi	 * that possibly forwards the frames through the switch
2879267282Sluigi	 */
2880227614Sluigi
2881227614Sluigi	return (error);
2882227614Sluigi}
2883227614Sluigi
2884227614Sluigi
2885227614Sluigi/*
2886227614Sluigi * netmap_reset() is called by the driver routines when reinitializing
2887227614Sluigi * a ring. The driver is in charge of locking to protect the kring.
2888262151Sluigi * If native netmap mode is not set just return NULL.
2889227614Sluigi */
2890227614Sluigistruct netmap_slot *
2891262151Sluiginetmap_reset(struct netmap_adapter *na, enum txrx tx, u_int n,
2892227614Sluigi	u_int new_cur)
2893227614Sluigi{
2894227614Sluigi	struct netmap_kring *kring;
2895228276Sluigi	int new_hwofs, lim;
2896227614Sluigi
2897270252Sluigi	if (!nm_native_on(na)) {
2898270252Sluigi		ND("interface not in native netmap mode");
2899227614Sluigi		return NULL;	/* nothing to reinitialize */
2900262151Sluigi	}
2901227614Sluigi
2902262151Sluigi	/* XXX note- in the new scheme, we are not guaranteed to be
2903262151Sluigi	 * under lock (e.g. when called on a device reset).
2904262151Sluigi	 * In this case, we should set a flag and do not trust too
2905262151Sluigi	 * much the values. In practice: TODO
2906262151Sluigi	 * - set a RESET flag somewhere in the kring
2907262151Sluigi	 * - do the processing in a conservative way
2908262151Sluigi	 * - let the *sync() fixup at the end.
2909262151Sluigi	 */
2910232238Sluigi	if (tx == NR_TX) {
2911241719Sluigi		if (n >= na->num_tx_rings)
2912241719Sluigi			return NULL;
2913232238Sluigi		kring = na->tx_rings + n;
2914262151Sluigi		// XXX check whether we should use hwcur or rcur
2915228276Sluigi		new_hwofs = kring->nr_hwcur - new_cur;
2916232238Sluigi	} else {
2917241719Sluigi		if (n >= na->num_rx_rings)
2918241719Sluigi			return NULL;
2919232238Sluigi		kring = na->rx_rings + n;
2920262151Sluigi		new_hwofs = kring->nr_hwtail - new_cur;
2921232238Sluigi	}
2922232238Sluigi	lim = kring->nkr_num_slots - 1;
2923228276Sluigi	if (new_hwofs > lim)
2924228276Sluigi		new_hwofs -= lim + 1;
2925228276Sluigi
2926262151Sluigi	/* Always set the new offset value and realign the ring. */
2927262151Sluigi	if (netmap_verbose)
2928262151Sluigi	    D("%s %s%d hwofs %d -> %d, hwtail %d -> %d",
2929270252Sluigi		na->name,
2930262151Sluigi		tx == NR_TX ? "TX" : "RX", n,
2931262151Sluigi		kring->nkr_hwofs, new_hwofs,
2932262151Sluigi		kring->nr_hwtail,
2933262151Sluigi		tx == NR_TX ? lim : kring->nr_hwtail);
2934228276Sluigi	kring->nkr_hwofs = new_hwofs;
2935262151Sluigi	if (tx == NR_TX) {
2936262151Sluigi		kring->nr_hwtail = kring->nr_hwcur + lim;
2937262151Sluigi		if (kring->nr_hwtail > lim)
2938262151Sluigi			kring->nr_hwtail -= lim + 1;
2939262151Sluigi	}
2940228276Sluigi
2941238812Sluigi#if 0 // def linux
2942238812Sluigi	/* XXX check that the mappings are correct */
2943238812Sluigi	/* need ring_nr, adapter->pdev, direction */
2944238812Sluigi	buffer_info->dma = dma_map_single(&pdev->dev, addr, adapter->rx_buffer_len, DMA_FROM_DEVICE);
2945238812Sluigi	if (dma_mapping_error(&adapter->pdev->dev, buffer_info->dma)) {
2946238812Sluigi		D("error mapping rx netmap buffer %d", i);
2947238812Sluigi		// XXX fix error handling
2948238812Sluigi	}
2949238812Sluigi
2950238812Sluigi#endif /* linux */
2951227614Sluigi	/*
2952262151Sluigi	 * Wakeup on the individual and global selwait
2953228276Sluigi	 * We do the wakeup here, but the ring is not yet reconfigured.
2954228276Sluigi	 * However, we are under lock so there are no races.
2955227614Sluigi	 */
2956262151Sluigi	na->nm_notify(na, n, tx, 0);
2957227614Sluigi	return kring->ring->slot;
2958227614Sluigi}
2959227614Sluigi
2960227614Sluigi
2961227614Sluigi/*
2962262151Sluigi * Dispatch rx/tx interrupts to the netmap rings.
2963262151Sluigi *
2964262151Sluigi * "work_done" is non-null on the RX path, NULL for the TX path.
2965262151Sluigi * We rely on the OS to make sure that there is only one active
2966262151Sluigi * instance per queue, and that there is appropriate locking.
2967262151Sluigi *
2968262151Sluigi * The 'notify' routine depends on what the ring is attached to.
2969262151Sluigi * - for a netmap file descriptor, do a selwakeup on the individual
2970262151Sluigi *   waitqueue, plus one on the global one if needed
2971270252Sluigi *   (see netmap_notify)
2972270252Sluigi * - for a nic connected to a switch, call the proper forwarding routine
2973270252Sluigi *   (see netmap_bwrap_intr_notify)
2974251139Sluigi */
2975262151Sluigivoid
2976262151Sluiginetmap_common_irq(struct ifnet *ifp, u_int q, u_int *work_done)
2977251139Sluigi{
2978251139Sluigi	struct netmap_adapter *na = NA(ifp);
2979262151Sluigi	struct netmap_kring *kring;
2980251139Sluigi
2981262151Sluigi	q &= NETMAP_RING_MASK;
2982262151Sluigi
2983262151Sluigi	if (netmap_verbose) {
2984262151Sluigi	        RD(5, "received %s queue %d", work_done ? "RX" : "TX" , q);
2985251139Sluigi	}
2986251139Sluigi
2987262151Sluigi	if (work_done) { /* RX path */
2988262151Sluigi		if (q >= na->num_rx_rings)
2989262151Sluigi			return;	// not a physical queue
2990262151Sluigi		kring = na->rx_rings + q;
2991262151Sluigi		kring->nr_kflags |= NKR_PENDINTR;	// XXX atomic ?
2992262151Sluigi		na->nm_notify(na, q, NR_RX, 0);
2993262151Sluigi		*work_done = 1; /* do not fire napi again */
2994262151Sluigi	} else { /* TX path */
2995262151Sluigi		if (q >= na->num_tx_rings)
2996262151Sluigi			return;	// not a physical queue
2997262151Sluigi		kring = na->tx_rings + q;
2998262151Sluigi		na->nm_notify(na, q, NR_TX, 0);
2999262151Sluigi	}
3000251139Sluigi}
3001251139Sluigi
3002251139Sluigi
3003251139Sluigi/*
3004262151Sluigi * Default functions to handle rx/tx interrupts from a physical device.
3005262151Sluigi * "work_done" is non-null on the RX path, NULL for the TX path.
3006250107Sluigi *
3007262151Sluigi * If the card is not in netmap mode, simply return 0,
3008262151Sluigi * so that the caller proceeds with regular processing.
3009262151Sluigi * Otherwise call netmap_common_irq() and return 1.
3010262151Sluigi *
3011262151Sluigi * If the card is connected to a netmap file descriptor,
3012262151Sluigi * do a selwakeup on the individual queue, plus one on the global one
3013262151Sluigi * if needed (multiqueue card _and_ there are multiqueue listeners),
3014262151Sluigi * and return 1.
3015262151Sluigi *
3016262151Sluigi * Finally, if called on rx from an interface connected to a switch,
3017262151Sluigi * calls the proper forwarding routine, and return 1.
3018231594Sluigi */
3019231881Sluigiint
3020262151Sluiginetmap_rx_irq(struct ifnet *ifp, u_int q, u_int *work_done)
3021231594Sluigi{
3022270252Sluigi	struct netmap_adapter *na = NA(ifp);
3023270252Sluigi
3024270252Sluigi	/*
3025270252Sluigi	 * XXX emulated netmap mode sets NAF_SKIP_INTR so
3026270252Sluigi	 * we still use the regular driver even though the previous
3027270252Sluigi	 * check fails. It is unclear whether we should use
3028270252Sluigi	 * nm_native_on() here.
3029270252Sluigi	 */
3030270252Sluigi	if (!nm_netmap_on(na))
3031231594Sluigi		return 0;
3032250107Sluigi
3033270252Sluigi	if (na->na_flags & NAF_SKIP_INTR) {
3034241719Sluigi		ND("use regular interrupt");
3035241719Sluigi		return 0;
3036241719Sluigi	}
3037241719Sluigi
3038262151Sluigi	netmap_common_irq(ifp, q, work_done);
3039231594Sluigi	return 1;
3040231594Sluigi}
3041231594Sluigi
3042232238Sluigi
3043238837Sluigi/*
3044262151Sluigi * Module loader and unloader
3045238812Sluigi *
3046262151Sluigi * netmap_init() creates the /dev/netmap device and initializes
3047262151Sluigi * all global variables. Returns 0 on success, errno on failure
3048262151Sluigi * (but there is no chance)
3049262151Sluigi *
3050262151Sluigi * netmap_fini() destroys everything.
3051238812Sluigi */
3052238812Sluigi
3053262151Sluigistatic struct cdev *netmap_dev; /* /dev/netmap character device. */
3054262151Sluigiextern struct cdevsw netmap_cdevsw;
3055238812Sluigi
3056238812Sluigi
3057262151Sluigivoid
3058262151Sluiginetmap_fini(void)
3059238812Sluigi{
3060262151Sluigi	// XXX destroy_bridges() ?
3061262151Sluigi	if (netmap_dev)
3062262151Sluigi		destroy_dev(netmap_dev);
3063262151Sluigi	netmap_mem_fini();
3064262151Sluigi	NMG_LOCK_DESTROY();
3065262151Sluigi	printf("netmap: unloaded module.\n");
3066238812Sluigi}
3067238812Sluigi
3068238812Sluigi
3069251139Sluigiint
3070227614Sluiginetmap_init(void)
3071227614Sluigi{
3072227614Sluigi	int error;
3073227614Sluigi
3074262151Sluigi	NMG_LOCK_INIT();
3075262151Sluigi
3076262151Sluigi	error = netmap_mem_init();
3077262151Sluigi	if (error != 0)
3078262151Sluigi		goto fail;
3079278779Sluigi	/*
3080278779Sluigi	 * MAKEDEV_ETERNAL_KLD avoids an expensive check on syscalls
3081278779Sluigi	 * when the module is compiled in.
3082278779Sluigi	 * XXX could use make_dev_credv() to get error number
3083278779Sluigi	 */
3084278779Sluigi	netmap_dev = make_dev_credf(MAKEDEV_ETERNAL_KLD,
3085278779Sluigi		&netmap_cdevsw, 0, NULL, UID_ROOT, GID_WHEEL, 0600,
3086227614Sluigi			      "netmap");
3087262151Sluigi	if (!netmap_dev)
3088262151Sluigi		goto fail;
3089238812Sluigi
3090262151Sluigi	netmap_init_bridges();
3091270252Sluigi#ifdef __FreeBSD__
3092270252Sluigi	nm_vi_init_index();
3093270252Sluigi#endif
3094262151Sluigi	printf("netmap: loaded module\n");
3095262151Sluigi	return (0);
3096262151Sluigifail:
3097262151Sluigi	netmap_fini();
3098262151Sluigi	return (EINVAL); /* may be incorrect */
3099227614Sluigi}
3100