1270063Sluigi/* 2270063Sluigi * Copyright (C) 2013 Luigi Rizzo. All rights reserved. 3270063Sluigi * 4270063Sluigi * Redistribution and use in source and binary forms, with or without 5270063Sluigi * modification, are permitted provided that the following conditions 6270063Sluigi * are met: 7270063Sluigi * 1. Redistributions of source code must retain the above copyright 8270063Sluigi * notice, this list of conditions and the following disclaimer. 9270063Sluigi * 2. Redistributions in binary form must reproduce the above copyright 10270063Sluigi * notice, this list of conditions and the following disclaimer in the 11270063Sluigi * documentation and/or other materials provided with the distribution. 12270063Sluigi * 13270063Sluigi * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14270063Sluigi * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15270063Sluigi * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16270063Sluigi * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17270063Sluigi * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18270063Sluigi * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19270063Sluigi * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20270063Sluigi * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21270063Sluigi * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22270063Sluigi * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23270063Sluigi * SUCH DAMAGE. 24270063Sluigi */ 25270063Sluigi 26270063Sluigi#ifndef NET_PARAVIRT_H 27270063Sluigi#define NET_PARAVIRT_H 28270063Sluigi 29270063Sluigi /* 30270063Sluigi * $FreeBSD: releng/11.0/sys/net/paravirt.h 270063 2014-08-16 15:00:01Z luigi $ 31270063Sluigi * 32270063Sluigi Support for virtio-like communication between host (H) and guest (G) NICs. 33270063Sluigi 34270063Sluigi THIS IS EXPERIMENTAL CODE AND SUBJECT TO CHANGE. 35270063Sluigi 36270063Sluigi The guest allocates the shared Communication Status Block (csb) and 37270063Sluigi write its physical address at CSBAL and CSBAH (data is little endian). 38270063Sluigi csb->csb_on enables the mode. If disabled, the device acts a regular one. 39270063Sluigi 40270063Sluigi Notifications for tx and rx are exchanged without vm exits 41270063Sluigi if possible. In particular (only mentioning csb mode below), 42270063Sluigi the following actions are performed. In the description below, 43270063Sluigi "double check" means verifying again the condition that caused 44270063Sluigi the previous action, and reverting the action if the condition has 45270063Sluigi changed. The condition typically depends on a variable set by the 46270063Sluigi other party, and the double check is done to avoid races. E.g. 47270063Sluigi 48270063Sluigi // start with A=0 49270063Sluigi again: 50270063Sluigi // do something 51270063Sluigi if ( cond(C) ) { // C is written by the other side 52270063Sluigi A = 1; 53270063Sluigi // barrier 54270063Sluigi if ( !cond(C) ) { 55270063Sluigi A = 0; 56270063Sluigi goto again; 57270063Sluigi } 58270063Sluigi } 59270063Sluigi 60270063Sluigi TX: start from idle: 61270063Sluigi H starts with host_need_txkick=1 when the I/O thread bh is idle. Upon new 62270063Sluigi transmissions, G always updates guest_tdt. If host_need_txkick == 1, 63270063Sluigi G also writes to the TDT, which acts as a kick to H (so pending 64270063Sluigi writes are always dispatched to H as soon as possible.) 65270063Sluigi 66270063Sluigi TX: active state: 67270063Sluigi On the kick (TDT write) H sets host_need_txkick == 0 (if not 68270063Sluigi done already by G), and starts an I/O thread trying to consume 69270063Sluigi packets from TDH to guest_tdt, periodically refreshing host_tdh 70270063Sluigi and TDH. When host_tdh == guest_tdt, H sets host_need_txkick=1, 71270063Sluigi and then does the "double check" for race avoidance. 72270063Sluigi 73270063Sluigi TX: G runs out of buffers 74270063Sluigi XXX there are two mechanisms, one boolean (using guest_need_txkick) 75270063Sluigi and one with a threshold (using guest_txkick_at). They are mutually 76270063Sluigi exclusive. 77270063Sluigi BOOLEAN: when G has no space, it sets guest_need_txkick=1 and does 78270063Sluigi the double check. If H finds guest_need_txkick== 1 on a write 79270063Sluigi to TDH, it also generates an interrupt. 80270063Sluigi THRESHOLD: G sets guest_txkick_at to the TDH value for which it 81270063Sluigi wants to receive an interrupt. When H detects that TDH moves 82270063Sluigi across guest_txkick_at, it generates an interrupt. 83270063Sluigi This second mechanism reduces the number of interrupts and 84270063Sluigi TDT writes on the transmit side when the host is too slow. 85270063Sluigi 86270063Sluigi RX: start from idle 87270063Sluigi G starts with guest_need_rxkick = 1 when the receive ring is empty. 88270063Sluigi As packets arrive, H updates host_rdh (and RDH) and also generates an 89270063Sluigi interrupt when guest_need_rxkick == 1 (so incoming packets are 90270063Sluigi always reported to G as soon as possible, apart from interrupt 91270063Sluigi moderation delays). It also tracks guest_rdt for new buffers. 92270063Sluigi 93270063Sluigi RX: active state 94270063Sluigi As the interrupt arrives, G sets guest_need_rxkick = 0 and starts 95270063Sluigi draining packets from the receive ring, while updating guest_rdt 96270063Sluigi When G runs out of packets it sets guest_need_rxkick=1 and does the 97270063Sluigi double check. 98270063Sluigi 99270063Sluigi RX: H runs out of buffers 100270063Sluigi XXX there are two mechanisms, one boolean (using host_need_rxkick) 101270063Sluigi and one with a threshold (using host_xxkick_at). They are mutually 102270063Sluigi exclusive. 103270063Sluigi BOOLEAN: when H has no space, it sets host_need_rxkick=1 and does the 104270063Sluigi double check. If G finds host_need_rxkick==1 on updating guest_rdt, 105270063Sluigi it also writes to RDT causing a kick to H. 106270063Sluigi THRESHOLD: H sets host_rxkick_at to the RDT value for which it wants 107270063Sluigi to receive a kick. When G detects that guest_rdt moves across 108270063Sluigi host_rxkick_at, it writes to RDT thus generates a kick. 109270063Sluigi This second mechanism reduces the number of kicks and 110270063Sluigi RDT writes on the receive side when the guest is too slow and 111270063Sluigi would free only a few buffers at a time. 112270063Sluigi 113270063Sluigi */ 114270063Sluigistruct paravirt_csb { 115270063Sluigi /* XXX revise the layout to minimize cache bounces. 116270063Sluigi * Usage is described as follows: 117270063Sluigi * [GH][RW][+-0] guest/host reads/writes frequently/rarely/almost never 118270063Sluigi */ 119270063Sluigi /* these are (mostly) written by the guest */ 120270063Sluigi uint32_t guest_tdt; /* GW+ HR+ pkt to transmit */ 121270063Sluigi uint32_t guest_need_txkick; /* GW- HR+ G ran out of tx bufs, request kick */ 122270063Sluigi uint32_t guest_need_rxkick; /* GW- HR+ G ran out of rx pkts, request kick */ 123270063Sluigi uint32_t guest_csb_on; /* GW- HR+ enable paravirtual mode */ 124270063Sluigi uint32_t guest_rdt; /* GW+ HR+ rx buffers available */ 125270063Sluigi uint32_t guest_txkick_at; /* GW- HR+ tx ring pos. where G expects an intr */ 126270063Sluigi uint32_t guest_use_msix; /* GW0 HR0 guest uses MSI-X interrupts. */ 127270063Sluigi uint32_t pad[9]; 128270063Sluigi 129270063Sluigi /* these are (mostly) written by the host */ 130270063Sluigi uint32_t host_tdh; /* GR0 HW- shadow register, mostly unused */ 131270063Sluigi uint32_t host_need_txkick; /* GR+ HW- start the iothread */ 132270063Sluigi uint32_t host_txcycles_lim; /* GW- HR- how much to spin before sleep. 133270063Sluigi * set by the guest */ 134270063Sluigi uint32_t host_txcycles; /* GR0 HW- counter, but no need to be exported */ 135270063Sluigi uint32_t host_rdh; /* GR0 HW- shadow register, mostly unused */ 136270063Sluigi uint32_t host_need_rxkick; /* GR+ HW- flush rx queued packets */ 137270063Sluigi uint32_t host_isr; /* GR* HW* shadow copy of ISR */ 138270063Sluigi uint32_t host_rxkick_at; /* GR+ HW- rx ring pos where H expects a kick */ 139270063Sluigi uint32_t vnet_ring_high; /* Vnet ring physical address high. */ 140270063Sluigi uint32_t vnet_ring_low; /* Vnet ring physical address low. */ 141270063Sluigi}; 142270063Sluigi 143270063Sluigi#define NET_PARAVIRT_CSB_SIZE 4096 144270063Sluigi#define NET_PARAVIRT_NONE (~((uint32_t)0)) 145270063Sluigi 146270063Sluigi#ifdef QEMU_PCI_H 147270063Sluigi 148270063Sluigi/* 149270063Sluigi * API functions only available within QEMU 150270063Sluigi */ 151270063Sluigi 152270063Sluigivoid paravirt_configure_csb(struct paravirt_csb** csb, uint32_t csbbal, 153270063Sluigi uint32_t csbbah, QEMUBH* tx_bh, AddressSpace *as); 154270063Sluigi 155270063Sluigi#endif /* QEMU_PCI_H */ 156270063Sluigi 157270063Sluigi#endif /* NET_PARAVIRT_H */ 158