1289385Sadrian/* 2289385Sadrian * Copyright (C) 2013 Luigi Rizzo. All rights reserved. 3289385Sadrian * 4289385Sadrian * Redistribution and use in source and binary forms, with or without 5289385Sadrian * modification, are permitted provided that the following conditions 6289385Sadrian * are met: 7289385Sadrian * 1. Redistributions of source code must retain the above copyright 8289385Sadrian * notice, this list of conditions and the following disclaimer. 9289385Sadrian * 2. Redistributions in binary form must reproduce the above copyright 10289385Sadrian * notice, this list of conditions and the following disclaimer in the 11289385Sadrian * documentation and/or other materials provided with the distribution. 12289385Sadrian * 13289385Sadrian * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14289385Sadrian * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15289385Sadrian * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16289385Sadrian * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17289385Sadrian * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18289385Sadrian * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19289385Sadrian * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20289385Sadrian * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21289385Sadrian * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22289385Sadrian * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23289385Sadrian * SUCH DAMAGE. 24289385Sadrian */ 25289385Sadrian 26289385Sadrian#ifndef NET_PARAVIRT_H 27289385Sadrian#define NET_PARAVIRT_H 28289385Sadrian 29289385Sadrian /* 30289385Sadrian * $FreeBSD: releng/10.3/sys/net/paravirt.h 289385 2015-10-15 20:36:04Z adrian $ 31289385Sadrian * 32289385Sadrian Support for virtio-like communication between host (H) and guest (G) NICs. 33289385Sadrian 34289385Sadrian THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE. 35289385Sadrian 36289385Sadrian The guest allocates the shared Communication Status Block (csb) and 37289385Sadrian write its physical address at CSBAL and CSBAH (data is little endian). 38289385Sadrian csb->csb_on enables the mode. If disabled, the device acts a regular one. 39289385Sadrian 40289385Sadrian Notifications for tx and rx are exchanged without vm exits 41289385Sadrian if possible. In particular (only mentioning csb mode below), 42289385Sadrian the following actions are performed. In the description below, 43289385Sadrian "double check" means verifying again the condition that caused 44289385Sadrian the previous action, and reverting the action if the condition has 45289385Sadrian changed. The condition typically depends on a variable set by the 46289385Sadrian other party, and the double check is done to avoid races. E.g. 47289385Sadrian 48289385Sadrian // start with A=0 49289385Sadrian again: 50289385Sadrian // do something 51289385Sadrian if ( cond(C) ) { // C is written by the other side 52289385Sadrian A = 1; 53289385Sadrian // barrier 54289385Sadrian if ( !cond(C) ) { 55289385Sadrian A = 0; 56289385Sadrian goto again; 57289385Sadrian } 58289385Sadrian } 59289385Sadrian 60289385Sadrian TX: start from idle: 61289385Sadrian H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new 62289385Sadrian transmissions, G always updates guest_tdt. If host_need_txkick == 1, 63289385Sadrian G also writes to the TDT, which acts as a kick to H (so pending 64289385Sadrian writes are always dispatched to H as soon as possible.) 65289385Sadrian 66289385Sadrian TX: active state: 67289385Sadrian On the kick (TDT write) H sets host_need_txkick == 0 (if not 68289385Sadrian done already by G), and starts an I/O thread trying to consume 69289385Sadrian packets from TDH to guest_tdt, periodically refreshing host_tdh 70289385Sadrian and TDH. When host_tdh == guest_tdt, H sets host_need_txkick=1, 71289385Sadrian and then does the "double check" for race avoidance. 72289385Sadrian 73289385Sadrian TX: G runs out of buffers 74289385Sadrian XXX there are two mechanisms, one boolean (using guest_need_txkick) 75289385Sadrian and one with a threshold (using guest_txkick_at). They are mutually 76289385Sadrian exclusive. 77289385Sadrian BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does 78289385Sadrian the double check. If H finds guest_need_txkick== 1 on a write 79289385Sadrian to TDH, it also generates an interrupt. 80289385Sadrian THRESHOLD: G sets guest_txkick_at to the TDH value for which it 81289385Sadrian wants to receive an interrupt. When H detects that TDH moves 82289385Sadrian across guest_txkick_at, it generates an interrupt. 83289385Sadrian This second mechanism reduces the number of interrupts and 84289385Sadrian TDT writes on the transmit side when the host is too slow. 85289385Sadrian 86289385Sadrian RX: start from idle 87289385Sadrian G starts with guest_need_rxkick = 1 when the receive ring is empty. 88289385Sadrian As packets arrive, H updates host_rdh (and RDH) and also generates an 89289385Sadrian interrupt when guest_need_rxkick == 1 (so incoming packets are 90289385Sadrian always reported to G as soon as possible, apart from interrupt 91289385Sadrian moderation delays). It also tracks guest_rdt for new buffers. 92289385Sadrian 93289385Sadrian RX: active state 94289385Sadrian As the interrupt arrives, G sets guest_need_rxkick = 0 and starts 95289385Sadrian draining packets from the receive ring, while updating guest_rdt 96289385Sadrian When G runs out of packets it sets guest_need_rxkick=1 and does the 97289385Sadrian double check. 98289385Sadrian 99289385Sadrian RX: H runs out of buffers 100289385Sadrian XXX there are two mechanisms, one boolean (using host_need_rxkick) 101289385Sadrian and one with a threshold (using host_xxkick_at). They are mutually 102289385Sadrian exclusive. 103289385Sadrian BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the 104289385Sadrian double check. If G finds host_need_rxkick==1 on updating guest_rdt, 105289385Sadrian it also writes to RDT causing a kick to H. 106289385Sadrian THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants 107289385Sadrian to receive a kick. When G detects that guest_rdt moves across 108289385Sadrian host_rxkick_at, it writes to RDT thus generates a kick. 109289385Sadrian This second mechanism reduces the number of kicks and 110289385Sadrian RDT writes on the receive side when the guest is too slow and 111289385Sadrian would free only a few buffers at a time. 112289385Sadrian 113289385Sadrian */ 114289385Sadrianstruct paravirt_csb { 115289385Sadrian /* XXX revise the layout to minimize cache bounces. 116289385Sadrian * Usage is described as follows: 117289385Sadrian * [GH][RW][+-0] guest/host reads/writes frequently/rarely/almost never 118289385Sadrian */ 119289385Sadrian /* these are (mostly) written by the guest */ 120289385Sadrian uint32_t guest_tdt; /* GW+ HR+ pkt to transmit */ 121289385Sadrian uint32_t guest_need_txkick; /* GW- HR+ G ran out of tx bufs, request kick */ 122289385Sadrian uint32_t guest_need_rxkick; /* GW- HR+ G ran out of rx pkts, request kick */ 123289385Sadrian uint32_t guest_csb_on; /* GW- HR+ enable paravirtual mode */ 124289385Sadrian uint32_t guest_rdt; /* GW+ HR+ rx buffers available */ 125289385Sadrian uint32_t guest_txkick_at; /* GW- HR+ tx ring pos. where G expects an intr */ 126289385Sadrian uint32_t guest_use_msix; /* GW0 HR0 guest uses MSI-X interrupts. */ 127289385Sadrian uint32_t pad[9]; 128289385Sadrian 129289385Sadrian /* these are (mostly) written by the host */ 130289385Sadrian uint32_t host_tdh; /* GR0 HW- shadow register, mostly unused */ 131289385Sadrian uint32_t host_need_txkick; /* GR+ HW- start the iothread */ 132289385Sadrian uint32_t host_txcycles_lim; /* GW- HR- how much to spin before sleep. 133289385Sadrian * set by the guest */ 134289385Sadrian uint32_t host_txcycles; /* GR0 HW- counter, but no need to be exported */ 135289385Sadrian uint32_t host_rdh; /* GR0 HW- shadow register, mostly unused */ 136289385Sadrian uint32_t host_need_rxkick; /* GR+ HW- flush rx queued packets */ 137289385Sadrian uint32_t host_isr; /* GR* HW* shadow copy of ISR */ 138289385Sadrian uint32_t host_rxkick_at; /* GR+ HW- rx ring pos where H expects a kick */ 139289385Sadrian uint32_t vnet_ring_high; /* Vnet ring physical address high. */ 140289385Sadrian uint32_t vnet_ring_low; /* Vnet ring physical address low. */ 141289385Sadrian}; 142289385Sadrian 143289385Sadrian#define NET_PARAVIRT_CSB_SIZE 4096 144289385Sadrian#define NET_PARAVIRT_NONE (~((uint32_t)0)) 145289385Sadrian 146289385Sadrian#ifdef QEMU_PCI_H 147289385Sadrian 148289385Sadrian/* 149289385Sadrian * API functions only available within QEMU 150289385Sadrian */ 151289385Sadrian 152289385Sadrianvoid paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal, 153289385Sadrian uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as); 154289385Sadrian 155289385Sadrian#endif /* QEMU_PCI_H */ 156289385Sadrian 157289385Sadrian#endif /* NET_PARAVIRT_H */ 158