1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2016 Red Hat, Inc.
4 * Author: Michael S. Tsirkin <mst@redhat.com>
5 *
6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7 * signalling, unconditionally.
8 */
9#define _GNU_SOURCE
10#include "main.h"
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14
15/* Next - Where next entry will be written.
16 * Prev - "Next" value when event triggered previously.
17 * Event - Peer requested event after writing this entry.
18 */
19static inline bool need_event(unsigned short event,
20			      unsigned short next,
21			      unsigned short prev)
22{
23	return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
24}
25
26/* Design:
27 * Guest adds descriptors with unique index values and DESC_HW in flags.
28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29 * Flags are always set last.
30 */
31#define DESC_HW 0x1
32
33struct desc {
34	unsigned short flags;
35	unsigned short index;
36	unsigned len;
37	unsigned long long addr;
38};
39
40/* how much padding is needed to avoid false cache sharing */
41#define HOST_GUEST_PADDING 0x80
42
43/* Mostly read */
44struct event {
45	unsigned short kick_index;
46	unsigned char reserved0[HOST_GUEST_PADDING - 2];
47	unsigned short call_index;
48	unsigned char reserved1[HOST_GUEST_PADDING - 2];
49};
50
51struct data {
52	void *buf; /* descriptor is writeable, we can't get buf from there */
53	void *data;
54} *data;
55
56struct desc *ring;
57struct event *event;
58
59struct guest {
60	unsigned avail_idx;
61	unsigned last_used_idx;
62	unsigned num_free;
63	unsigned kicked_avail_idx;
64	unsigned char reserved[HOST_GUEST_PADDING - 12];
65} guest;
66
67struct host {
68	/* we do not need to track last avail index
69	 * unless we have more than one in flight.
70	 */
71	unsigned used_idx;
72	unsigned called_used_idx;
73	unsigned char reserved[HOST_GUEST_PADDING - 4];
74} host;
75
76/* implemented by ring */
77void alloc_ring(void)
78{
79	int ret;
80	int i;
81
82	ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
83	if (ret) {
84		perror("Unable to allocate ring buffer.\n");
85		exit(3);
86	}
87	event = calloc(1, sizeof(*event));
88	if (!event) {
89		perror("Unable to allocate event buffer.\n");
90		exit(3);
91	}
92	guest.avail_idx = 0;
93	guest.kicked_avail_idx = -1;
94	guest.last_used_idx = 0;
95	host.used_idx = 0;
96	host.called_used_idx = -1;
97	for (i = 0; i < ring_size; ++i) {
98		struct desc desc = {
99			.index = i,
100		};
101		ring[i] = desc;
102	}
103	guest.num_free = ring_size;
104	data = calloc(ring_size, sizeof(*data));
105	if (!data) {
106		perror("Unable to allocate data buffer.\n");
107		exit(3);
108	}
109}
110
111/* guest side */
112int add_inbuf(unsigned len, void *buf, void *datap)
113{
114	unsigned head, index;
115
116	if (!guest.num_free)
117		return -1;
118
119	guest.num_free--;
120	head = (ring_size - 1) & (guest.avail_idx++);
121
122	/* Start with a write. On MESI architectures this helps
123	 * avoid a shared state with consumer that is polling this descriptor.
124	 */
125	ring[head].addr = (unsigned long)(void*)buf;
126	ring[head].len = len;
127	/* read below might bypass write above. That is OK because it's just an
128	 * optimization. If this happens, we will get the cache line in a
129	 * shared state which is unfortunate, but probably not worth it to
130	 * add an explicit full barrier to avoid this.
131	 */
132	barrier();
133	index = ring[head].index;
134	data[index].buf = buf;
135	data[index].data = datap;
136	/* Barrier A (for pairing) */
137	smp_release();
138	ring[head].flags = DESC_HW;
139
140	return 0;
141}
142
143void *get_buf(unsigned *lenp, void **bufp)
144{
145	unsigned head = (ring_size - 1) & guest.last_used_idx;
146	unsigned index;
147	void *datap;
148
149	if (ring[head].flags & DESC_HW)
150		return NULL;
151	/* Barrier B (for pairing) */
152	smp_acquire();
153	*lenp = ring[head].len;
154	index = ring[head].index & (ring_size - 1);
155	datap = data[index].data;
156	*bufp = data[index].buf;
157	data[index].buf = NULL;
158	data[index].data = NULL;
159	guest.num_free++;
160	guest.last_used_idx++;
161	return datap;
162}
163
164bool used_empty()
165{
166	unsigned head = (ring_size - 1) & guest.last_used_idx;
167
168	return (ring[head].flags & DESC_HW);
169}
170
171void disable_call()
172{
173	/* Doing nothing to disable calls might cause
174	 * extra interrupts, but reduces the number of cache misses.
175	 */
176}
177
178bool enable_call()
179{
180	event->call_index = guest.last_used_idx;
181	/* Flush call index write */
182	/* Barrier D (for pairing) */
183	smp_mb();
184	return used_empty();
185}
186
187void kick_available(void)
188{
189	bool need;
190
191	/* Flush in previous flags write */
192	/* Barrier C (for pairing) */
193	smp_mb();
194	need = need_event(event->kick_index,
195			   guest.avail_idx,
196			   guest.kicked_avail_idx);
197
198	guest.kicked_avail_idx = guest.avail_idx;
199	if (need)
200		kick();
201}
202
203/* host side */
204void disable_kick()
205{
206	/* Doing nothing to disable kicks might cause
207	 * extra interrupts, but reduces the number of cache misses.
208	 */
209}
210
211bool enable_kick()
212{
213	event->kick_index = host.used_idx;
214	/* Barrier C (for pairing) */
215	smp_mb();
216	return avail_empty();
217}
218
219bool avail_empty()
220{
221	unsigned head = (ring_size - 1) & host.used_idx;
222
223	return !(ring[head].flags & DESC_HW);
224}
225
226bool use_buf(unsigned *lenp, void **bufp)
227{
228	unsigned head = (ring_size - 1) & host.used_idx;
229
230	if (!(ring[head].flags & DESC_HW))
231		return false;
232
233	/* make sure length read below is not speculated */
234	/* Barrier A (for pairing) */
235	smp_acquire();
236
237	/* simple in-order completion: we don't need
238	 * to touch index at all. This also means we
239	 * can just modify the descriptor in-place.
240	 */
241	ring[head].len--;
242	/* Make sure len is valid before flags.
243	 * Note: alternative is to write len and flags in one access -
244	 * possible on 64 bit architectures but wmb is free on Intel anyway
245	 * so I have no way to test whether it's a gain.
246	 */
247	/* Barrier B (for pairing) */
248	smp_release();
249	ring[head].flags = 0;
250	host.used_idx++;
251	return true;
252}
253
254void call_used(void)
255{
256	bool need;
257
258	/* Flush in previous flags write */
259	/* Barrier D (for pairing) */
260	smp_mb();
261
262	need = need_event(event->call_index,
263			host.used_idx,
264			host.called_used_idx);
265
266	host.called_used_idx = host.used_idx;
267
268	if (need)
269		call();
270}
271