1270063Sluigi/*
2270063Sluigi * Copyright (C) 2013 Luigi Rizzo. All rights reserved.
3270063Sluigi *
4270063Sluigi * Redistribution and use in source and binary forms, with or without
5270063Sluigi * modification, are permitted provided that the following conditions
6270063Sluigi * are met:
7270063Sluigi *   1. Redistributions of source code must retain the above copyright
8270063Sluigi *      notice, this list of conditions and the following disclaimer.
9270063Sluigi *   2. Redistributions in binary form must reproduce the above copyright
10270063Sluigi *      notice, this list of conditions and the following disclaimer in the
11270063Sluigi *    documentation and/or other materials provided with the distribution.
12270063Sluigi *
13270063Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14270063Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15270063Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16270063Sluigi * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17270063Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18270063Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19270063Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20270063Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21270063Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22270063Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23270063Sluigi * SUCH DAMAGE.
24270063Sluigi */
25270063Sluigi
26270063Sluigi#ifndef NET_PARAVIRT_H
27270063Sluigi#define NET_PARAVIRT_H
28270063Sluigi
29270063Sluigi /*
30270063Sluigi  * $FreeBSD: releng/11.0/sys/net/paravirt.h 270063 2014-08-16 15:00:01Z luigi $
31270063Sluigi  *
32270063Sluigi Support for virtio-like communication between host (H) and guest (G) NICs.
33270063Sluigi
34270063Sluigi THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE.
35270063Sluigi
36270063Sluigi The guest allocates the shared Communication Status Block (csb) and
37270063Sluigi write its physical address at CSBAL and CSBAH (data is little endian).
38270063Sluigi csb->csb_on enables the mode. If disabled, the device acts a regular one.
39270063Sluigi
40270063Sluigi Notifications for tx and rx are exchanged without vm exits
41270063Sluigi if possible. In particular (only mentioning csb mode below),
42270063Sluigi the following actions are performed. In the description below,
43270063Sluigi "double check" means verifying again the condition that caused
44270063Sluigi the previous action, and reverting the action if the condition has
45270063Sluigi changed. The condition typically depends on a variable set by the
46270063Sluigi other party, and the double check is done to avoid races. E.g.
47270063Sluigi
48270063Sluigi	// start with A=0
49270063Sluigi    again:
50270063Sluigi	// do something
51270063Sluigi	if ( cond(C) ) { // C is written by the other side
52270063Sluigi	    A = 1;
53270063Sluigi	    // barrier
54270063Sluigi	    if ( !cond(C) ) {
55270063Sluigi		A = 0;
56270063Sluigi		goto again;
57270063Sluigi	    }
58270063Sluigi	}
59270063Sluigi
60270063Sluigi TX: start from idle:
61270063Sluigi    H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new
62270063Sluigi    transmissions, G always updates guest_tdt.  If host_need_txkick == 1,
63270063Sluigi    G also writes to the TDT, which acts as a kick to H (so pending
64270063Sluigi    writes are always dispatched to H as soon as possible.)
65270063Sluigi
66270063Sluigi TX: active state:
67270063Sluigi    On the kick (TDT write) H sets host_need_txkick == 0 (if not
68270063Sluigi    done already by G), and starts an I/O thread trying to consume
69270063Sluigi    packets from TDH to guest_tdt, periodically refreshing host_tdh
70270063Sluigi    and TDH.  When host_tdh == guest_tdt, H sets host_need_txkick=1,
71270063Sluigi    and then does the "double check" for race avoidance.
72270063Sluigi
73270063Sluigi TX: G runs out of buffers
74270063Sluigi    XXX there are two mechanisms, one boolean (using guest_need_txkick)
75270063Sluigi    and one with a threshold (using guest_txkick_at). They are mutually
76270063Sluigi    exclusive.
77270063Sluigi    BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does
78270063Sluigi        the double check. If H finds guest_need_txkick== 1 on a write
79270063Sluigi        to TDH, it also generates an interrupt.
80270063Sluigi    THRESHOLD: G sets guest_txkick_at to the TDH value for which it
81270063Sluigi	wants to receive an interrupt. When H detects that TDH moves
82270063Sluigi	across guest_txkick_at, it generates an interrupt.
83270063Sluigi	This second mechanism reduces the number of interrupts and
84270063Sluigi	TDT writes on the transmit side when the host is too slow.
85270063Sluigi
86270063Sluigi RX: start from idle
87270063Sluigi    G starts with guest_need_rxkick = 1 when the receive ring is empty.
88270063Sluigi    As packets arrive, H updates host_rdh (and RDH) and also generates an
89270063Sluigi    interrupt when guest_need_rxkick == 1 (so incoming packets are
90270063Sluigi    always reported to G as soon as possible, apart from interrupt
91270063Sluigi    moderation delays). It also tracks guest_rdt for new buffers.
92270063Sluigi
93270063Sluigi RX: active state
94270063Sluigi    As the interrupt arrives, G sets guest_need_rxkick = 0 and starts
95270063Sluigi    draining packets from the receive ring, while updating guest_rdt
96270063Sluigi    When G runs out of packets it sets guest_need_rxkick=1 and does the
97270063Sluigi    double check.
98270063Sluigi
99270063Sluigi RX: H runs out of buffers
100270063Sluigi    XXX there are two mechanisms, one boolean (using host_need_rxkick)
101270063Sluigi    and one with a threshold (using host_xxkick_at). They are mutually
102270063Sluigi    exclusive.
103270063Sluigi    BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the
104270063Sluigi	double check. If G finds host_need_rxkick==1 on updating guest_rdt,
105270063Sluigi        it also writes to RDT causing a kick to H.
106270063Sluigi    THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants
107270063Sluigi	to receive a kick. When G detects that guest_rdt moves across
108270063Sluigi	host_rxkick_at, it writes to RDT thus generates a kick.
109270063Sluigi	This second mechanism reduces the number of kicks and
110270063Sluigi        RDT writes on the receive side when the guest is too slow and
111270063Sluigi	would free only a few buffers at a time.
112270063Sluigi
113270063Sluigi */
114270063Sluigistruct paravirt_csb {
115270063Sluigi    /* XXX revise the layout to minimize cache bounces.
116270063Sluigi     * Usage is described as follows:
117270063Sluigi     * 	[GH][RW][+-0]	guest/host reads/writes frequently/rarely/almost never
118270063Sluigi     */
119270063Sluigi    /* these are (mostly) written by the guest */
120270063Sluigi    uint32_t guest_tdt;            /* GW+ HR+ pkt to transmit */
121270063Sluigi    uint32_t guest_need_txkick;    /* GW- HR+ G ran out of tx bufs, request kick */
122270063Sluigi    uint32_t guest_need_rxkick;    /* GW- HR+ G ran out of rx pkts, request kick  */
123270063Sluigi    uint32_t guest_csb_on;         /* GW- HR+ enable paravirtual mode */
124270063Sluigi    uint32_t guest_rdt;            /* GW+ HR+ rx buffers available */
125270063Sluigi    uint32_t guest_txkick_at;      /* GW- HR+ tx ring pos. where G expects an intr */
126270063Sluigi    uint32_t guest_use_msix;        /* GW0 HR0 guest uses MSI-X interrupts. */
127270063Sluigi    uint32_t pad[9];
128270063Sluigi
129270063Sluigi    /* these are (mostly) written by the host */
130270063Sluigi    uint32_t host_tdh;             /* GR0 HW- shadow register, mostly unused */
131270063Sluigi    uint32_t host_need_txkick;     /* GR+ HW- start the iothread */
132270063Sluigi    uint32_t host_txcycles_lim;    /* GW- HR- how much to spin before  sleep.
133270063Sluigi				    * set by the guest */
134270063Sluigi    uint32_t host_txcycles;        /* GR0 HW- counter, but no need to be exported */
135270063Sluigi    uint32_t host_rdh;             /* GR0 HW- shadow register, mostly unused */
136270063Sluigi    uint32_t host_need_rxkick;     /* GR+ HW- flush rx queued packets */
137270063Sluigi    uint32_t host_isr;             /* GR* HW* shadow copy of ISR */
138270063Sluigi    uint32_t host_rxkick_at;       /* GR+ HW- rx ring pos where H expects a kick */
139270063Sluigi    uint32_t vnet_ring_high;	/* Vnet ring physical address high. */
140270063Sluigi    uint32_t vnet_ring_low;	/* Vnet ring physical address low. */
141270063Sluigi};
142270063Sluigi
143270063Sluigi#define NET_PARAVIRT_CSB_SIZE   4096
144270063Sluigi#define NET_PARAVIRT_NONE   (~((uint32_t)0))
145270063Sluigi
146270063Sluigi#ifdef	QEMU_PCI_H
147270063Sluigi
148270063Sluigi/*
149270063Sluigi * API functions only available within QEMU
150270063Sluigi */
151270063Sluigi
152270063Sluigivoid paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal,
153270063Sluigi			uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as);
154270063Sluigi
155270063Sluigi#endif /* QEMU_PCI_H */
156270063Sluigi
157270063Sluigi#endif /* NET_PARAVIRT_H */
158