1// SPDX-License-Identifier: BSD-3-Clause
2/*
3 * Copyright (c) 2009-2012,2016,2023 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
5 * Copyright (c) 2012 Citrix Inc.
6 * All rights reserved.
7 */
8
9#include <errno.h>
10#include <fcntl.h>
11#include <emmintrin.h>
12#include <linux/limits.h>
13#include <stdbool.h>
14#include <stdint.h>
15#include <stdio.h>
16#include <string.h>
17#include <sys/mman.h>
18#include <sys/uio.h>
19#include <unistd.h>
20#include "vmbus_bufring.h"
21
22/**
23 * Compiler barrier.
24 *
25 * Guarantees that operation reordering does not occur at compile time
26 * for operations directly before and after the barrier.
27 */
28#define	rte_compiler_barrier()		({ asm volatile ("" : : : "memory"); })
29
30#define VMBUS_RQST_ERROR	0xFFFFFFFFFFFFFFFF
31#define ALIGN(val, align)	((typeof(val))((val) & (~((typeof(val))((align) - 1)))))
32
33void *vmbus_uio_map(int *fd, int size)
34{
35	void *map;
36
37	map = mmap(NULL, 2 * size, PROT_READ | PROT_WRITE, MAP_SHARED, *fd, 0);
38	if (map == MAP_FAILED)
39		return NULL;
40
41	return map;
42}
43
44/* Increase bufring index by inc with wraparound */
45static inline uint32_t vmbus_br_idxinc(uint32_t idx, uint32_t inc, uint32_t sz)
46{
47	idx += inc;
48	if (idx >= sz)
49		idx -= sz;
50
51	return idx;
52}
53
54void vmbus_br_setup(struct vmbus_br *br, void *buf, unsigned int blen)
55{
56	br->vbr = buf;
57	br->windex = br->vbr->windex;
58	br->dsize = blen - sizeof(struct vmbus_bufring);
59}
60
61static inline __always_inline void
62rte_smp_mb(void)
63{
64	asm volatile("lock addl $0, -128(%%rsp); " ::: "memory");
65}
66
67static inline int
68rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src)
69{
70	uint8_t res;
71
72	asm volatile("lock ; "
73		     "cmpxchgl %[src], %[dst];"
74		     "sete %[res];"
75		     : [res] "=a" (res),     /* output */
76		     [dst] "=m" (*dst)
77		     : [src] "r" (src),      /* input */
78		     "a" (exp),
79		     "m" (*dst)
80		     : "memory");            /* no-clobber list */
81	return res;
82}
83
84static inline uint32_t
85vmbus_txbr_copyto(const struct vmbus_br *tbr, uint32_t windex,
86		  const void *src0, uint32_t cplen)
87{
88	uint8_t *br_data = tbr->vbr->data;
89	uint32_t br_dsize = tbr->dsize;
90	const uint8_t *src = src0;
91
92	/* XXX use double mapping like Linux kernel? */
93	if (cplen > br_dsize - windex) {
94		uint32_t fraglen = br_dsize - windex;
95
96		/* Wrap-around detected */
97		memcpy(br_data + windex, src, fraglen);
98		memcpy(br_data, src + fraglen, cplen - fraglen);
99	} else {
100		memcpy(br_data + windex, src, cplen);
101	}
102
103	return vmbus_br_idxinc(windex, cplen, br_dsize);
104}
105
106/*
107 * Write scattered channel packet to TX bufring.
108 *
109 * The offset of this channel packet is written as a 64bits value
110 * immediately after this channel packet.
111 *
112 * The write goes through three stages:
113 *  1. Reserve space in ring buffer for the new data.
114 *     Writer atomically moves priv_write_index.
115 *  2. Copy the new data into the ring.
116 *  3. Update the tail of the ring (visible to host) that indicates
117 *     next read location. Writer updates write_index
118 */
119static int
120vmbus_txbr_write(struct vmbus_br *tbr, const struct iovec iov[], int iovlen)
121{
122	struct vmbus_bufring *vbr = tbr->vbr;
123	uint32_t ring_size = tbr->dsize;
124	uint32_t old_windex, next_windex, windex, total;
125	uint64_t save_windex;
126	int i;
127
128	total = 0;
129	for (i = 0; i < iovlen; i++)
130		total += iov[i].iov_len;
131	total += sizeof(save_windex);
132
133	/* Reserve space in ring */
134	do {
135		uint32_t avail;
136
137		/* Get current free location */
138		old_windex = tbr->windex;
139
140		/* Prevent compiler reordering this with calculation */
141		rte_compiler_barrier();
142
143		avail = vmbus_br_availwrite(tbr, old_windex);
144
145		/* If not enough space in ring, then tell caller. */
146		if (avail <= total)
147			return -EAGAIN;
148
149		next_windex = vmbus_br_idxinc(old_windex, total, ring_size);
150
151		/* Atomic update of next write_index for other threads */
152	} while (!rte_atomic32_cmpset(&tbr->windex, old_windex, next_windex));
153
154	/* Space from old..new is now reserved */
155	windex = old_windex;
156	for (i = 0; i < iovlen; i++)
157		windex = vmbus_txbr_copyto(tbr, windex, iov[i].iov_base, iov[i].iov_len);
158
159	/* Set the offset of the current channel packet. */
160	save_windex = ((uint64_t)old_windex) << 32;
161	windex = vmbus_txbr_copyto(tbr, windex, &save_windex,
162				   sizeof(save_windex));
163
164	/* The region reserved should match region used */
165	if (windex != next_windex)
166		return -EINVAL;
167
168	/* Ensure that data is available before updating host index */
169	rte_compiler_barrier();
170
171	/* Checkin for our reservation. wait for our turn to update host */
172	while (!rte_atomic32_cmpset(&vbr->windex, old_windex, next_windex))
173		_mm_pause();
174
175	return 0;
176}
177
178int rte_vmbus_chan_send(struct vmbus_br *txbr, uint16_t type, void *data,
179			uint32_t dlen, uint32_t flags)
180{
181	struct vmbus_chanpkt pkt;
182	unsigned int pktlen, pad_pktlen;
183	const uint32_t hlen = sizeof(pkt);
184	uint64_t pad = 0;
185	struct iovec iov[3];
186	int error;
187
188	pktlen = hlen + dlen;
189	pad_pktlen = ALIGN(pktlen, sizeof(uint64_t));
190
191	pkt.hdr.type = type;
192	pkt.hdr.flags = flags;
193	pkt.hdr.hlen = hlen >> VMBUS_CHANPKT_SIZE_SHIFT;
194	pkt.hdr.tlen = pad_pktlen >> VMBUS_CHANPKT_SIZE_SHIFT;
195	pkt.hdr.xactid = VMBUS_RQST_ERROR;
196
197	iov[0].iov_base = &pkt;
198	iov[0].iov_len = hlen;
199	iov[1].iov_base = data;
200	iov[1].iov_len = dlen;
201	iov[2].iov_base = &pad;
202	iov[2].iov_len = pad_pktlen - pktlen;
203
204	error = vmbus_txbr_write(txbr, iov, 3);
205
206	return error;
207}
208
209static inline uint32_t
210vmbus_rxbr_copyfrom(const struct vmbus_br *rbr, uint32_t rindex,
211		    void *dst0, size_t cplen)
212{
213	const uint8_t *br_data = rbr->vbr->data;
214	uint32_t br_dsize = rbr->dsize;
215	uint8_t *dst = dst0;
216
217	if (cplen > br_dsize - rindex) {
218		uint32_t fraglen = br_dsize - rindex;
219
220		/* Wrap-around detected. */
221		memcpy(dst, br_data + rindex, fraglen);
222		memcpy(dst + fraglen, br_data, cplen - fraglen);
223	} else {
224		memcpy(dst, br_data + rindex, cplen);
225	}
226
227	return vmbus_br_idxinc(rindex, cplen, br_dsize);
228}
229
230/* Copy data from receive ring but don't change index */
231static int
232vmbus_rxbr_peek(const struct vmbus_br *rbr, void *data, size_t dlen)
233{
234	uint32_t avail;
235
236	/*
237	 * The requested data and the 64bits channel packet
238	 * offset should be there at least.
239	 */
240	avail = vmbus_br_availread(rbr);
241	if (avail < dlen + sizeof(uint64_t))
242		return -EAGAIN;
243
244	vmbus_rxbr_copyfrom(rbr, rbr->vbr->rindex, data, dlen);
245	return 0;
246}
247
248/*
249 * Copy data from receive ring and change index
250 * NOTE:
251 * We assume (dlen + skip) == sizeof(channel packet).
252 */
253static int
254vmbus_rxbr_read(struct vmbus_br *rbr, void *data, size_t dlen, size_t skip)
255{
256	struct vmbus_bufring *vbr = rbr->vbr;
257	uint32_t br_dsize = rbr->dsize;
258	uint32_t rindex;
259
260	if (vmbus_br_availread(rbr) < dlen + skip + sizeof(uint64_t))
261		return -EAGAIN;
262
263	/* Record where host was when we started read (for debug) */
264	rbr->windex = rbr->vbr->windex;
265
266	/*
267	 * Copy channel packet from RX bufring.
268	 */
269	rindex = vmbus_br_idxinc(rbr->vbr->rindex, skip, br_dsize);
270	rindex = vmbus_rxbr_copyfrom(rbr, rindex, data, dlen);
271
272	/*
273	 * Discard this channel packet's 64bits offset, which is useless to us.
274	 */
275	rindex = vmbus_br_idxinc(rindex, sizeof(uint64_t), br_dsize);
276
277	/* Update the read index _after_ the channel packet is fetched.	 */
278	rte_compiler_barrier();
279
280	vbr->rindex = rindex;
281
282	return 0;
283}
284
285int rte_vmbus_chan_recv_raw(struct vmbus_br *rxbr,
286			    void *data, uint32_t *len)
287{
288	struct vmbus_chanpkt_hdr pkt;
289	uint32_t dlen, bufferlen = *len;
290	int error;
291
292	error = vmbus_rxbr_peek(rxbr, &pkt, sizeof(pkt));
293	if (error)
294		return error;
295
296	if (unlikely(pkt.hlen < VMBUS_CHANPKT_HLEN_MIN))
297		/* XXX this channel is dead actually. */
298		return -EIO;
299
300	if (unlikely(pkt.hlen > pkt.tlen))
301		return -EIO;
302
303	/* Length are in quad words */
304	dlen = pkt.tlen << VMBUS_CHANPKT_SIZE_SHIFT;
305	*len = dlen;
306
307	/* If caller buffer is not large enough */
308	if (unlikely(dlen > bufferlen))
309		return -ENOBUFS;
310
311	/* Read data and skip packet header */
312	error = vmbus_rxbr_read(rxbr, data, dlen, 0);
313	if (error)
314		return error;
315
316	/* Return the number of bytes read */
317	return dlen + sizeof(uint64_t);
318}
319