1289385Sadrian/*
2289385Sadrian * Copyright (C) 2013 Luigi Rizzo. All rights reserved.
3289385Sadrian *
4289385Sadrian * Redistribution and use in source and binary forms, with or without
5289385Sadrian * modification, are permitted provided that the following conditions
6289385Sadrian * are met:
7289385Sadrian *   1. Redistributions of source code must retain the above copyright
8289385Sadrian *      notice, this list of conditions and the following disclaimer.
9289385Sadrian *   2. Redistributions in binary form must reproduce the above copyright
10289385Sadrian *      notice, this list of conditions and the following disclaimer in the
11289385Sadrian *    documentation and/or other materials provided with the distribution.
12289385Sadrian *
13289385Sadrian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14289385Sadrian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15289385Sadrian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16289385Sadrian * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17289385Sadrian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18289385Sadrian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19289385Sadrian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20289385Sadrian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21289385Sadrian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22289385Sadrian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23289385Sadrian * SUCH DAMAGE.
24289385Sadrian */
25289385Sadrian
26289385Sadrian#ifndef NET_PARAVIRT_H
27289385Sadrian#define NET_PARAVIRT_H
28289385Sadrian
29289385Sadrian /*
30289385Sadrian  * $FreeBSD: releng/10.3/sys/net/paravirt.h 289385 2015-10-15 20:36:04Z adrian $
31289385Sadrian  *
32289385Sadrian Support for virtio-like communication between host (H) and guest (G) NICs.
33289385Sadrian
34289385Sadrian THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE.
35289385Sadrian
36289385Sadrian The guest allocates the shared Communication Status Block (csb) and
37289385Sadrian write its physical address at CSBAL and CSBAH (data is little endian).
38289385Sadrian csb->csb_on enables the mode. If disabled, the device acts a regular one.
39289385Sadrian
40289385Sadrian Notifications for tx and rx are exchanged without vm exits
41289385Sadrian if possible. In particular (only mentioning csb mode below),
42289385Sadrian the following actions are performed. In the description below,
43289385Sadrian "double check" means verifying again the condition that caused
44289385Sadrian the previous action, and reverting the action if the condition has
45289385Sadrian changed. The condition typically depends on a variable set by the
46289385Sadrian other party, and the double check is done to avoid races. E.g.
47289385Sadrian
48289385Sadrian	// start with A=0
49289385Sadrian    again:
50289385Sadrian	// do something
51289385Sadrian	if ( cond(C) ) { // C is written by the other side
52289385Sadrian	    A = 1;
53289385Sadrian	    // barrier
54289385Sadrian	    if ( !cond(C) ) {
55289385Sadrian		A = 0;
56289385Sadrian		goto again;
57289385Sadrian	    }
58289385Sadrian	}
59289385Sadrian
60289385Sadrian TX: start from idle:
61289385Sadrian    H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new
62289385Sadrian    transmissions, G always updates guest_tdt.  If host_need_txkick == 1,
63289385Sadrian    G also writes to the TDT, which acts as a kick to H (so pending
64289385Sadrian    writes are always dispatched to H as soon as possible.)
65289385Sadrian
66289385Sadrian TX: active state:
67289385Sadrian    On the kick (TDT write) H sets host_need_txkick == 0 (if not
68289385Sadrian    done already by G), and starts an I/O thread trying to consume
69289385Sadrian    packets from TDH to guest_tdt, periodically refreshing host_tdh
70289385Sadrian    and TDH.  When host_tdh == guest_tdt, H sets host_need_txkick=1,
71289385Sadrian    and then does the "double check" for race avoidance.
72289385Sadrian
73289385Sadrian TX: G runs out of buffers
74289385Sadrian    XXX there are two mechanisms, one boolean (using guest_need_txkick)
75289385Sadrian    and one with a threshold (using guest_txkick_at). They are mutually
76289385Sadrian    exclusive.
77289385Sadrian    BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does
78289385Sadrian        the double check. If H finds guest_need_txkick== 1 on a write
79289385Sadrian        to TDH, it also generates an interrupt.
80289385Sadrian    THRESHOLD: G sets guest_txkick_at to the TDH value for which it
81289385Sadrian	wants to receive an interrupt. When H detects that TDH moves
82289385Sadrian	across guest_txkick_at, it generates an interrupt.
83289385Sadrian	This second mechanism reduces the number of interrupts and
84289385Sadrian	TDT writes on the transmit side when the host is too slow.
85289385Sadrian
86289385Sadrian RX: start from idle
87289385Sadrian    G starts with guest_need_rxkick = 1 when the receive ring is empty.
88289385Sadrian    As packets arrive, H updates host_rdh (and RDH) and also generates an
89289385Sadrian    interrupt when guest_need_rxkick == 1 (so incoming packets are
90289385Sadrian    always reported to G as soon as possible, apart from interrupt
91289385Sadrian    moderation delays). It also tracks guest_rdt for new buffers.
92289385Sadrian
93289385Sadrian RX: active state
94289385Sadrian    As the interrupt arrives, G sets guest_need_rxkick = 0 and starts
95289385Sadrian    draining packets from the receive ring, while updating guest_rdt
96289385Sadrian    When G runs out of packets it sets guest_need_rxkick=1 and does the
97289385Sadrian    double check.
98289385Sadrian
99289385Sadrian RX: H runs out of buffers
100289385Sadrian    XXX there are two mechanisms, one boolean (using host_need_rxkick)
101289385Sadrian    and one with a threshold (using host_xxkick_at). They are mutually
102289385Sadrian    exclusive.
103289385Sadrian    BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the
104289385Sadrian	double check. If G finds host_need_rxkick==1 on updating guest_rdt,
105289385Sadrian        it also writes to RDT causing a kick to H.
106289385Sadrian    THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants
107289385Sadrian	to receive a kick. When G detects that guest_rdt moves across
108289385Sadrian	host_rxkick_at, it writes to RDT thus generates a kick.
109289385Sadrian	This second mechanism reduces the number of kicks and
110289385Sadrian        RDT writes on the receive side when the guest is too slow and
111289385Sadrian	would free only a few buffers at a time.
112289385Sadrian
113289385Sadrian */
114289385Sadrianstruct paravirt_csb {
115289385Sadrian    /* XXX revise the layout to minimize cache bounces.
116289385Sadrian     * Usage is described as follows:
117289385Sadrian     * 	[GH][RW][+-0]	guest/host reads/writes frequently/rarely/almost never
118289385Sadrian     */
119289385Sadrian    /* these are (mostly) written by the guest */
120289385Sadrian    uint32_t guest_tdt;            /* GW+ HR+ pkt to transmit */
121289385Sadrian    uint32_t guest_need_txkick;    /* GW- HR+ G ran out of tx bufs, request kick */
122289385Sadrian    uint32_t guest_need_rxkick;    /* GW- HR+ G ran out of rx pkts, request kick  */
123289385Sadrian    uint32_t guest_csb_on;         /* GW- HR+ enable paravirtual mode */
124289385Sadrian    uint32_t guest_rdt;            /* GW+ HR+ rx buffers available */
125289385Sadrian    uint32_t guest_txkick_at;      /* GW- HR+ tx ring pos. where G expects an intr */
126289385Sadrian    uint32_t guest_use_msix;        /* GW0 HR0 guest uses MSI-X interrupts. */
127289385Sadrian    uint32_t pad[9];
128289385Sadrian
129289385Sadrian    /* these are (mostly) written by the host */
130289385Sadrian    uint32_t host_tdh;             /* GR0 HW- shadow register, mostly unused */
131289385Sadrian    uint32_t host_need_txkick;     /* GR+ HW- start the iothread */
132289385Sadrian    uint32_t host_txcycles_lim;    /* GW- HR- how much to spin before  sleep.
133289385Sadrian				    * set by the guest */
134289385Sadrian    uint32_t host_txcycles;        /* GR0 HW- counter, but no need to be exported */
135289385Sadrian    uint32_t host_rdh;             /* GR0 HW- shadow register, mostly unused */
136289385Sadrian    uint32_t host_need_rxkick;     /* GR+ HW- flush rx queued packets */
137289385Sadrian    uint32_t host_isr;             /* GR* HW* shadow copy of ISR */
138289385Sadrian    uint32_t host_rxkick_at;       /* GR+ HW- rx ring pos where H expects a kick */
139289385Sadrian    uint32_t vnet_ring_high;	/* Vnet ring physical address high. */
140289385Sadrian    uint32_t vnet_ring_low;	/* Vnet ring physical address low. */
141289385Sadrian};
142289385Sadrian
143289385Sadrian#define NET_PARAVIRT_CSB_SIZE   4096
144289385Sadrian#define NET_PARAVIRT_NONE   (~((uint32_t)0))
145289385Sadrian
146289385Sadrian#ifdef	QEMU_PCI_H
147289385Sadrian
148289385Sadrian/*
149289385Sadrian * API functions only available within QEMU
150289385Sadrian */
151289385Sadrian
152289385Sadrianvoid paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal,
153289385Sadrian			uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as);
154289385Sadrian
155289385Sadrian#endif /* QEMU_PCI_H */
156289385Sadrian
157289385Sadrian#endif /* NET_PARAVIRT_H */
158