Deleted Added
full compact
hv_ring_buffer.c (296181) hv_ring_buffer.c (298446)
1/*-
1/*-
2 * Copyright (c) 2009-2012 Microsoft Corp.
2 * Copyright (c) 2009-2012,2016 Microsoft Corp.
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29
30#include <sys/param.h>
31#include <sys/lock.h>
32#include <sys/mutex.h>
33#include <sys/sysctl.h>
34
35#include "hv_vmbus_priv.h"
36
37/* Amount of space to write to */
38#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
39 ((z) - ((w) - (r))):((r) - (w))
40
41static int
42hv_rbi_sysctl_stats(SYSCTL_HANDLER_ARGS)
43{
44 hv_vmbus_ring_buffer_info* rbi;
45 uint32_t read_index, write_index, interrupt_mask, sz;
46 uint32_t read_avail, write_avail;
47 char rbi_stats[256];
48
49 rbi = (hv_vmbus_ring_buffer_info*)arg1;
50 read_index = rbi->ring_buffer->read_index;
51 write_index = rbi->ring_buffer->write_index;
52 interrupt_mask = rbi->ring_buffer->interrupt_mask;
53 sz = rbi->ring_data_size;
54 write_avail = HV_BYTES_AVAIL_TO_WRITE(read_index,
55 write_index, sz);
56 read_avail = sz - write_avail;
57 snprintf(rbi_stats, sizeof(rbi_stats),
58 "r_idx:%d "
59 "w_idx:%d "
60 "int_mask:%d "
61 "r_avail:%d "
62 "w_avail:%d",
63 read_index, write_index, interrupt_mask,
64 read_avail, write_avail);
65
66 return (sysctl_handle_string(oidp, rbi_stats,
67 sizeof(rbi_stats), req));
68}
69
70void
71hv_ring_buffer_stat(
72 struct sysctl_ctx_list *ctx,
73 struct sysctl_oid_list *tree_node,
74 hv_vmbus_ring_buffer_info *rbi,
75 const char *desc)
76{
77 SYSCTL_ADD_PROC(ctx, tree_node, OID_AUTO,
78 "ring_buffer_stats",
79 CTLTYPE_STRING|CTLFLAG_RD, rbi, 0,
80 hv_rbi_sysctl_stats, "A", desc);
81}
82/**
83 * @brief Get number of bytes available to read and to write to
84 * for the specified ring buffer
85 */
86static inline void
87get_ring_buffer_avail_bytes(
88 hv_vmbus_ring_buffer_info* rbi,
89 uint32_t* read,
90 uint32_t* write)
91{
92 uint32_t read_loc, write_loc;
93
94 /*
95 * Capture the read/write indices before they changed
96 */
97 read_loc = rbi->ring_buffer->read_index;
98 write_loc = rbi->ring_buffer->write_index;
99
100 *write = HV_BYTES_AVAIL_TO_WRITE(
101 read_loc, write_loc, rbi->ring_data_size);
102 *read = rbi->ring_data_size - *write;
103}
104
105/**
106 * @brief Get the next write location for the specified ring buffer
107 */
108static inline uint32_t
109get_next_write_location(hv_vmbus_ring_buffer_info* ring_info)
110{
111 uint32_t next = ring_info->ring_buffer->write_index;
112 return (next);
113}
114
115/**
116 * @brief Set the next write location for the specified ring buffer
117 */
118static inline void
119set_next_write_location(
120 hv_vmbus_ring_buffer_info* ring_info,
121 uint32_t next_write_location)
122{
123 ring_info->ring_buffer->write_index = next_write_location;
124}
125
126/**
127 * @brief Get the next read location for the specified ring buffer
128 */
129static inline uint32_t
130get_next_read_location(hv_vmbus_ring_buffer_info* ring_info)
131{
132 uint32_t next = ring_info->ring_buffer->read_index;
133 return (next);
134}
135
136/**
137 * @brief Get the next read location + offset for the specified ring buffer.
138 * This allows the caller to skip.
139 */
140static inline uint32_t
141get_next_read_location_with_offset(
142 hv_vmbus_ring_buffer_info* ring_info,
143 uint32_t offset)
144{
145 uint32_t next = ring_info->ring_buffer->read_index;
146 next += offset;
147 next %= ring_info->ring_data_size;
148 return (next);
149}
150
151/**
152 * @brief Set the next read location for the specified ring buffer
153 */
154static inline void
155set_next_read_location(
156 hv_vmbus_ring_buffer_info* ring_info,
157 uint32_t next_read_location)
158{
159 ring_info->ring_buffer->read_index = next_read_location;
160}
161
162/**
163 * @brief Get the start of the ring buffer
164 */
165static inline void *
166get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info)
167{
168 return (void *) ring_info->ring_buffer->buffer;
169}
170
171/**
172 * @brief Get the size of the ring buffer.
173 */
174static inline uint32_t
175get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info)
176{
177 return ring_info->ring_data_size;
178}
179
180/**
181 * Get the read and write indices as uint64_t of the specified ring buffer.
182 */
183static inline uint64_t
184get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info)
185{
186 return (uint64_t) ring_info->ring_buffer->write_index << 32;
187}
188
189void
190hv_ring_buffer_read_begin(
191 hv_vmbus_ring_buffer_info* ring_info)
192{
193 ring_info->ring_buffer->interrupt_mask = 1;
194 mb();
195}
196
197uint32_t
198hv_ring_buffer_read_end(
199 hv_vmbus_ring_buffer_info* ring_info)
200{
201 uint32_t read, write;
202
203 ring_info->ring_buffer->interrupt_mask = 0;
204 mb();
205
206 /*
207 * Now check to see if the ring buffer is still empty.
208 * If it is not, we raced and we need to process new
209 * incoming messages.
210 */
211 get_ring_buffer_avail_bytes(ring_info, &read, &write);
212
213 return (read);
214}
215
216/*
217 * When we write to the ring buffer, check if the host needs to
218 * be signaled. Here is the details of this protocol:
219 *
220 * 1. The host guarantees that while it is draining the
221 * ring buffer, it will set the interrupt_mask to
222 * indicate it does not need to be interrupted when
223 * new data is placed.
224 *
225 * 2. The host guarantees that it will completely drain
226 * the ring buffer before exiting the read loop. Further,
227 * once the ring buffer is empty, it will clear the
228 * interrupt_mask and re-check to see if new data has
229 * arrived.
230 */
231static boolean_t
232hv_ring_buffer_needsig_on_write(
233 uint32_t old_write_location,
234 hv_vmbus_ring_buffer_info* rbi)
235{
236 mb();
237 if (rbi->ring_buffer->interrupt_mask)
238 return (FALSE);
239
240 /* Read memory barrier */
241 rmb();
242 /*
243 * This is the only case we need to signal when the
244 * ring transitions from being empty to non-empty.
245 */
246 if (old_write_location == rbi->ring_buffer->read_index)
247 return (TRUE);
248
249 return (FALSE);
250}
251
252static uint32_t copy_to_ring_buffer(
253 hv_vmbus_ring_buffer_info* ring_info,
254 uint32_t start_write_offset,
255 char* src,
256 uint32_t src_len);
257
258static uint32_t copy_from_ring_buffer(
259 hv_vmbus_ring_buffer_info* ring_info,
260 char* dest,
261 uint32_t dest_len,
262 uint32_t start_read_offset);
263
264
265/**
266 * @brief Get the interrupt mask for the specified ring buffer.
267 */
268uint32_t
269hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi)
270{
271 return rbi->ring_buffer->interrupt_mask;
272}
273
274/**
275 * @brief Initialize the ring buffer.
276 */
277int
278hv_vmbus_ring_buffer_init(
279 hv_vmbus_ring_buffer_info* ring_info,
280 void* buffer,
281 uint32_t buffer_len)
282{
283 memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
284
285 ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
286 ring_info->ring_buffer->read_index =
287 ring_info->ring_buffer->write_index = 0;
288
289 ring_info->ring_size = buffer_len;
290 ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
291
292 mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
293
294 return (0);
295}
296
297/**
298 * @brief Cleanup the ring buffer.
299 */
300void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info)
301{
302 mtx_destroy(&ring_info->ring_lock);
303}
304
305/**
306 * @brief Write to the ring buffer.
307 */
308int
309hv_ring_buffer_write(
310 hv_vmbus_ring_buffer_info* out_ring_info,
311 hv_vmbus_sg_buffer_list sg_buffers[],
312 uint32_t sg_buffer_count,
313 boolean_t *need_sig)
314{
315 int i = 0;
316 uint32_t byte_avail_to_write;
317 uint32_t byte_avail_to_read;
318 uint32_t old_write_location;
319 uint32_t total_bytes_to_write = 0;
320
321 volatile uint32_t next_write_location;
322 uint64_t prev_indices = 0;
323
324 for (i = 0; i < sg_buffer_count; i++) {
325 total_bytes_to_write += sg_buffers[i].length;
326 }
327
328 total_bytes_to_write += sizeof(uint64_t);
329
330 mtx_lock_spin(&out_ring_info->ring_lock);
331
332 get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
333 &byte_avail_to_write);
334
335 /*
336 * If there is only room for the packet, assume it is full.
337 * Otherwise, the next time around, we think the ring buffer
338 * is empty since the read index == write index
339 */
340
341 if (byte_avail_to_write <= total_bytes_to_write) {
342
343 mtx_unlock_spin(&out_ring_info->ring_lock);
344 return (EAGAIN);
345 }
346
347 /*
348 * Write to the ring buffer
349 */
350 next_write_location = get_next_write_location(out_ring_info);
351
352 old_write_location = next_write_location;
353
354 for (i = 0; i < sg_buffer_count; i++) {
355 next_write_location = copy_to_ring_buffer(out_ring_info,
356 next_write_location, (char *) sg_buffers[i].data,
357 sg_buffers[i].length);
358 }
359
360 /*
361 * Set previous packet start
362 */
363 prev_indices = get_ring_buffer_indices(out_ring_info);
364
365 next_write_location = copy_to_ring_buffer(
366 out_ring_info, next_write_location,
367 (char *) &prev_indices, sizeof(uint64_t));
368
369 /*
370 * Full memory barrier before upding the write index.
371 */
372 mb();
373
374 /*
375 * Now, update the write location
376 */
377 set_next_write_location(out_ring_info, next_write_location);
378
379 mtx_unlock_spin(&out_ring_info->ring_lock);
380
381 *need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
382 out_ring_info);
383
384 return (0);
385}
386
387/**
388 * @brief Read without advancing the read index.
389 */
390int
391hv_ring_buffer_peek(
392 hv_vmbus_ring_buffer_info* in_ring_info,
393 void* buffer,
394 uint32_t buffer_len)
395{
396 uint32_t bytesAvailToWrite;
397 uint32_t bytesAvailToRead;
398 uint32_t nextReadLocation = 0;
399
400 mtx_lock_spin(&in_ring_info->ring_lock);
401
402 get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
403 &bytesAvailToWrite);
404
405 /*
406 * Make sure there is something to read
407 */
408 if (bytesAvailToRead < buffer_len) {
409 mtx_unlock_spin(&in_ring_info->ring_lock);
410 return (EAGAIN);
411 }
412
413 /*
414 * Convert to byte offset
415 */
416 nextReadLocation = get_next_read_location(in_ring_info);
417
418 nextReadLocation = copy_from_ring_buffer(
419 in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
420
421 mtx_unlock_spin(&in_ring_info->ring_lock);
422
423 return (0);
424}
425
426/**
427 * @brief Read and advance the read index.
428 */
429int
430hv_ring_buffer_read(
431 hv_vmbus_ring_buffer_info* in_ring_info,
432 void* buffer,
433 uint32_t buffer_len,
434 uint32_t offset)
435{
436 uint32_t bytes_avail_to_write;
437 uint32_t bytes_avail_to_read;
438 uint32_t next_read_location = 0;
439 uint64_t prev_indices = 0;
440
441 if (buffer_len <= 0)
442 return (EINVAL);
443
444 mtx_lock_spin(&in_ring_info->ring_lock);
445
446 get_ring_buffer_avail_bytes(
447 in_ring_info, &bytes_avail_to_read,
448 &bytes_avail_to_write);
449
450 /*
451 * Make sure there is something to read
452 */
453 if (bytes_avail_to_read < buffer_len) {
454 mtx_unlock_spin(&in_ring_info->ring_lock);
455 return (EAGAIN);
456 }
457
458 next_read_location = get_next_read_location_with_offset(
459 in_ring_info,
460 offset);
461
462 next_read_location = copy_from_ring_buffer(
463 in_ring_info,
464 (char *) buffer,
465 buffer_len,
466 next_read_location);
467
468 next_read_location = copy_from_ring_buffer(
469 in_ring_info,
470 (char *) &prev_indices,
471 sizeof(uint64_t),
472 next_read_location);
473
474 /*
475 * Make sure all reads are done before we update the read index since
476 * the writer may start writing to the read area once the read index
477 * is updated.
478 */
479 wmb();
480
481 /*
482 * Update the read index
483 */
484 set_next_read_location(in_ring_info, next_read_location);
485
486 mtx_unlock_spin(&in_ring_info->ring_lock);
487
488 return (0);
489}
490
491/**
492 * @brief Helper routine to copy from source to ring buffer.
493 *
494 * Assume there is enough room. Handles wrap-around in dest case only!
495 */
496uint32_t
497copy_to_ring_buffer(
498 hv_vmbus_ring_buffer_info* ring_info,
499 uint32_t start_write_offset,
500 char* src,
501 uint32_t src_len)
502{
503 char *ring_buffer = get_ring_buffer(ring_info);
504 uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
505 uint32_t fragLen;
506
507 if (src_len > ring_buffer_size - start_write_offset) {
508 /* wrap-around detected! */
509 fragLen = ring_buffer_size - start_write_offset;
510 memcpy(ring_buffer + start_write_offset, src, fragLen);
511 memcpy(ring_buffer, src + fragLen, src_len - fragLen);
512 } else {
513 memcpy(ring_buffer + start_write_offset, src, src_len);
514 }
515
516 start_write_offset += src_len;
517 start_write_offset %= ring_buffer_size;
518
519 return (start_write_offset);
520}
521
522/**
523 * @brief Helper routine to copy to source from ring buffer.
524 *
525 * Assume there is enough room. Handles wrap-around in src case only!
526 */
527uint32_t
528copy_from_ring_buffer(
529 hv_vmbus_ring_buffer_info* ring_info,
530 char* dest,
531 uint32_t dest_len,
532 uint32_t start_read_offset)
533{
534 uint32_t fragLen;
535 char *ring_buffer = get_ring_buffer(ring_info);
536 uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
537
538 if (dest_len > ring_buffer_size - start_read_offset) {
539 /* wrap-around detected at the src */
540 fragLen = ring_buffer_size - start_read_offset;
541 memcpy(dest, ring_buffer + start_read_offset, fragLen);
542 memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
543 } else {
544 memcpy(dest, ring_buffer + start_read_offset, dest_len);
545 }
546
547 start_read_offset += dest_len;
548 start_read_offset %= ring_buffer_size;
549
550 return (start_read_offset);
551}
552
3 * Copyright (c) 2012 NetApp Inc.
4 * Copyright (c) 2012 Citrix Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29
30#include <sys/param.h>
31#include <sys/lock.h>
32#include <sys/mutex.h>
33#include <sys/sysctl.h>
34
35#include "hv_vmbus_priv.h"
36
37/* Amount of space to write to */
38#define HV_BYTES_AVAIL_TO_WRITE(r, w, z) ((w) >= (r))? \
39 ((z) - ((w) - (r))):((r) - (w))
40
41static int
42hv_rbi_sysctl_stats(SYSCTL_HANDLER_ARGS)
43{
44 hv_vmbus_ring_buffer_info* rbi;
45 uint32_t read_index, write_index, interrupt_mask, sz;
46 uint32_t read_avail, write_avail;
47 char rbi_stats[256];
48
49 rbi = (hv_vmbus_ring_buffer_info*)arg1;
50 read_index = rbi->ring_buffer->read_index;
51 write_index = rbi->ring_buffer->write_index;
52 interrupt_mask = rbi->ring_buffer->interrupt_mask;
53 sz = rbi->ring_data_size;
54 write_avail = HV_BYTES_AVAIL_TO_WRITE(read_index,
55 write_index, sz);
56 read_avail = sz - write_avail;
57 snprintf(rbi_stats, sizeof(rbi_stats),
58 "r_idx:%d "
59 "w_idx:%d "
60 "int_mask:%d "
61 "r_avail:%d "
62 "w_avail:%d",
63 read_index, write_index, interrupt_mask,
64 read_avail, write_avail);
65
66 return (sysctl_handle_string(oidp, rbi_stats,
67 sizeof(rbi_stats), req));
68}
69
70void
71hv_ring_buffer_stat(
72 struct sysctl_ctx_list *ctx,
73 struct sysctl_oid_list *tree_node,
74 hv_vmbus_ring_buffer_info *rbi,
75 const char *desc)
76{
77 SYSCTL_ADD_PROC(ctx, tree_node, OID_AUTO,
78 "ring_buffer_stats",
79 CTLTYPE_STRING|CTLFLAG_RD, rbi, 0,
80 hv_rbi_sysctl_stats, "A", desc);
81}
82/**
83 * @brief Get number of bytes available to read and to write to
84 * for the specified ring buffer
85 */
86static inline void
87get_ring_buffer_avail_bytes(
88 hv_vmbus_ring_buffer_info* rbi,
89 uint32_t* read,
90 uint32_t* write)
91{
92 uint32_t read_loc, write_loc;
93
94 /*
95 * Capture the read/write indices before they changed
96 */
97 read_loc = rbi->ring_buffer->read_index;
98 write_loc = rbi->ring_buffer->write_index;
99
100 *write = HV_BYTES_AVAIL_TO_WRITE(
101 read_loc, write_loc, rbi->ring_data_size);
102 *read = rbi->ring_data_size - *write;
103}
104
105/**
106 * @brief Get the next write location for the specified ring buffer
107 */
108static inline uint32_t
109get_next_write_location(hv_vmbus_ring_buffer_info* ring_info)
110{
111 uint32_t next = ring_info->ring_buffer->write_index;
112 return (next);
113}
114
115/**
116 * @brief Set the next write location for the specified ring buffer
117 */
118static inline void
119set_next_write_location(
120 hv_vmbus_ring_buffer_info* ring_info,
121 uint32_t next_write_location)
122{
123 ring_info->ring_buffer->write_index = next_write_location;
124}
125
126/**
127 * @brief Get the next read location for the specified ring buffer
128 */
129static inline uint32_t
130get_next_read_location(hv_vmbus_ring_buffer_info* ring_info)
131{
132 uint32_t next = ring_info->ring_buffer->read_index;
133 return (next);
134}
135
136/**
137 * @brief Get the next read location + offset for the specified ring buffer.
138 * This allows the caller to skip.
139 */
140static inline uint32_t
141get_next_read_location_with_offset(
142 hv_vmbus_ring_buffer_info* ring_info,
143 uint32_t offset)
144{
145 uint32_t next = ring_info->ring_buffer->read_index;
146 next += offset;
147 next %= ring_info->ring_data_size;
148 return (next);
149}
150
151/**
152 * @brief Set the next read location for the specified ring buffer
153 */
154static inline void
155set_next_read_location(
156 hv_vmbus_ring_buffer_info* ring_info,
157 uint32_t next_read_location)
158{
159 ring_info->ring_buffer->read_index = next_read_location;
160}
161
162/**
163 * @brief Get the start of the ring buffer
164 */
165static inline void *
166get_ring_buffer(hv_vmbus_ring_buffer_info* ring_info)
167{
168 return (void *) ring_info->ring_buffer->buffer;
169}
170
171/**
172 * @brief Get the size of the ring buffer.
173 */
174static inline uint32_t
175get_ring_buffer_size(hv_vmbus_ring_buffer_info* ring_info)
176{
177 return ring_info->ring_data_size;
178}
179
180/**
181 * Get the read and write indices as uint64_t of the specified ring buffer.
182 */
183static inline uint64_t
184get_ring_buffer_indices(hv_vmbus_ring_buffer_info* ring_info)
185{
186 return (uint64_t) ring_info->ring_buffer->write_index << 32;
187}
188
189void
190hv_ring_buffer_read_begin(
191 hv_vmbus_ring_buffer_info* ring_info)
192{
193 ring_info->ring_buffer->interrupt_mask = 1;
194 mb();
195}
196
197uint32_t
198hv_ring_buffer_read_end(
199 hv_vmbus_ring_buffer_info* ring_info)
200{
201 uint32_t read, write;
202
203 ring_info->ring_buffer->interrupt_mask = 0;
204 mb();
205
206 /*
207 * Now check to see if the ring buffer is still empty.
208 * If it is not, we raced and we need to process new
209 * incoming messages.
210 */
211 get_ring_buffer_avail_bytes(ring_info, &read, &write);
212
213 return (read);
214}
215
216/*
217 * When we write to the ring buffer, check if the host needs to
218 * be signaled. Here is the details of this protocol:
219 *
220 * 1. The host guarantees that while it is draining the
221 * ring buffer, it will set the interrupt_mask to
222 * indicate it does not need to be interrupted when
223 * new data is placed.
224 *
225 * 2. The host guarantees that it will completely drain
226 * the ring buffer before exiting the read loop. Further,
227 * once the ring buffer is empty, it will clear the
228 * interrupt_mask and re-check to see if new data has
229 * arrived.
230 */
231static boolean_t
232hv_ring_buffer_needsig_on_write(
233 uint32_t old_write_location,
234 hv_vmbus_ring_buffer_info* rbi)
235{
236 mb();
237 if (rbi->ring_buffer->interrupt_mask)
238 return (FALSE);
239
240 /* Read memory barrier */
241 rmb();
242 /*
243 * This is the only case we need to signal when the
244 * ring transitions from being empty to non-empty.
245 */
246 if (old_write_location == rbi->ring_buffer->read_index)
247 return (TRUE);
248
249 return (FALSE);
250}
251
252static uint32_t copy_to_ring_buffer(
253 hv_vmbus_ring_buffer_info* ring_info,
254 uint32_t start_write_offset,
255 char* src,
256 uint32_t src_len);
257
258static uint32_t copy_from_ring_buffer(
259 hv_vmbus_ring_buffer_info* ring_info,
260 char* dest,
261 uint32_t dest_len,
262 uint32_t start_read_offset);
263
264
265/**
266 * @brief Get the interrupt mask for the specified ring buffer.
267 */
268uint32_t
269hv_vmbus_get_ring_buffer_interrupt_mask(hv_vmbus_ring_buffer_info *rbi)
270{
271 return rbi->ring_buffer->interrupt_mask;
272}
273
274/**
275 * @brief Initialize the ring buffer.
276 */
277int
278hv_vmbus_ring_buffer_init(
279 hv_vmbus_ring_buffer_info* ring_info,
280 void* buffer,
281 uint32_t buffer_len)
282{
283 memset(ring_info, 0, sizeof(hv_vmbus_ring_buffer_info));
284
285 ring_info->ring_buffer = (hv_vmbus_ring_buffer*) buffer;
286 ring_info->ring_buffer->read_index =
287 ring_info->ring_buffer->write_index = 0;
288
289 ring_info->ring_size = buffer_len;
290 ring_info->ring_data_size = buffer_len - sizeof(hv_vmbus_ring_buffer);
291
292 mtx_init(&ring_info->ring_lock, "vmbus ring buffer", NULL, MTX_SPIN);
293
294 return (0);
295}
296
297/**
298 * @brief Cleanup the ring buffer.
299 */
300void hv_ring_buffer_cleanup(hv_vmbus_ring_buffer_info* ring_info)
301{
302 mtx_destroy(&ring_info->ring_lock);
303}
304
305/**
306 * @brief Write to the ring buffer.
307 */
308int
309hv_ring_buffer_write(
310 hv_vmbus_ring_buffer_info* out_ring_info,
311 hv_vmbus_sg_buffer_list sg_buffers[],
312 uint32_t sg_buffer_count,
313 boolean_t *need_sig)
314{
315 int i = 0;
316 uint32_t byte_avail_to_write;
317 uint32_t byte_avail_to_read;
318 uint32_t old_write_location;
319 uint32_t total_bytes_to_write = 0;
320
321 volatile uint32_t next_write_location;
322 uint64_t prev_indices = 0;
323
324 for (i = 0; i < sg_buffer_count; i++) {
325 total_bytes_to_write += sg_buffers[i].length;
326 }
327
328 total_bytes_to_write += sizeof(uint64_t);
329
330 mtx_lock_spin(&out_ring_info->ring_lock);
331
332 get_ring_buffer_avail_bytes(out_ring_info, &byte_avail_to_read,
333 &byte_avail_to_write);
334
335 /*
336 * If there is only room for the packet, assume it is full.
337 * Otherwise, the next time around, we think the ring buffer
338 * is empty since the read index == write index
339 */
340
341 if (byte_avail_to_write <= total_bytes_to_write) {
342
343 mtx_unlock_spin(&out_ring_info->ring_lock);
344 return (EAGAIN);
345 }
346
347 /*
348 * Write to the ring buffer
349 */
350 next_write_location = get_next_write_location(out_ring_info);
351
352 old_write_location = next_write_location;
353
354 for (i = 0; i < sg_buffer_count; i++) {
355 next_write_location = copy_to_ring_buffer(out_ring_info,
356 next_write_location, (char *) sg_buffers[i].data,
357 sg_buffers[i].length);
358 }
359
360 /*
361 * Set previous packet start
362 */
363 prev_indices = get_ring_buffer_indices(out_ring_info);
364
365 next_write_location = copy_to_ring_buffer(
366 out_ring_info, next_write_location,
367 (char *) &prev_indices, sizeof(uint64_t));
368
369 /*
370 * Full memory barrier before upding the write index.
371 */
372 mb();
373
374 /*
375 * Now, update the write location
376 */
377 set_next_write_location(out_ring_info, next_write_location);
378
379 mtx_unlock_spin(&out_ring_info->ring_lock);
380
381 *need_sig = hv_ring_buffer_needsig_on_write(old_write_location,
382 out_ring_info);
383
384 return (0);
385}
386
387/**
388 * @brief Read without advancing the read index.
389 */
390int
391hv_ring_buffer_peek(
392 hv_vmbus_ring_buffer_info* in_ring_info,
393 void* buffer,
394 uint32_t buffer_len)
395{
396 uint32_t bytesAvailToWrite;
397 uint32_t bytesAvailToRead;
398 uint32_t nextReadLocation = 0;
399
400 mtx_lock_spin(&in_ring_info->ring_lock);
401
402 get_ring_buffer_avail_bytes(in_ring_info, &bytesAvailToRead,
403 &bytesAvailToWrite);
404
405 /*
406 * Make sure there is something to read
407 */
408 if (bytesAvailToRead < buffer_len) {
409 mtx_unlock_spin(&in_ring_info->ring_lock);
410 return (EAGAIN);
411 }
412
413 /*
414 * Convert to byte offset
415 */
416 nextReadLocation = get_next_read_location(in_ring_info);
417
418 nextReadLocation = copy_from_ring_buffer(
419 in_ring_info, (char *)buffer, buffer_len, nextReadLocation);
420
421 mtx_unlock_spin(&in_ring_info->ring_lock);
422
423 return (0);
424}
425
426/**
427 * @brief Read and advance the read index.
428 */
429int
430hv_ring_buffer_read(
431 hv_vmbus_ring_buffer_info* in_ring_info,
432 void* buffer,
433 uint32_t buffer_len,
434 uint32_t offset)
435{
436 uint32_t bytes_avail_to_write;
437 uint32_t bytes_avail_to_read;
438 uint32_t next_read_location = 0;
439 uint64_t prev_indices = 0;
440
441 if (buffer_len <= 0)
442 return (EINVAL);
443
444 mtx_lock_spin(&in_ring_info->ring_lock);
445
446 get_ring_buffer_avail_bytes(
447 in_ring_info, &bytes_avail_to_read,
448 &bytes_avail_to_write);
449
450 /*
451 * Make sure there is something to read
452 */
453 if (bytes_avail_to_read < buffer_len) {
454 mtx_unlock_spin(&in_ring_info->ring_lock);
455 return (EAGAIN);
456 }
457
458 next_read_location = get_next_read_location_with_offset(
459 in_ring_info,
460 offset);
461
462 next_read_location = copy_from_ring_buffer(
463 in_ring_info,
464 (char *) buffer,
465 buffer_len,
466 next_read_location);
467
468 next_read_location = copy_from_ring_buffer(
469 in_ring_info,
470 (char *) &prev_indices,
471 sizeof(uint64_t),
472 next_read_location);
473
474 /*
475 * Make sure all reads are done before we update the read index since
476 * the writer may start writing to the read area once the read index
477 * is updated.
478 */
479 wmb();
480
481 /*
482 * Update the read index
483 */
484 set_next_read_location(in_ring_info, next_read_location);
485
486 mtx_unlock_spin(&in_ring_info->ring_lock);
487
488 return (0);
489}
490
491/**
492 * @brief Helper routine to copy from source to ring buffer.
493 *
494 * Assume there is enough room. Handles wrap-around in dest case only!
495 */
496uint32_t
497copy_to_ring_buffer(
498 hv_vmbus_ring_buffer_info* ring_info,
499 uint32_t start_write_offset,
500 char* src,
501 uint32_t src_len)
502{
503 char *ring_buffer = get_ring_buffer(ring_info);
504 uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
505 uint32_t fragLen;
506
507 if (src_len > ring_buffer_size - start_write_offset) {
508 /* wrap-around detected! */
509 fragLen = ring_buffer_size - start_write_offset;
510 memcpy(ring_buffer + start_write_offset, src, fragLen);
511 memcpy(ring_buffer, src + fragLen, src_len - fragLen);
512 } else {
513 memcpy(ring_buffer + start_write_offset, src, src_len);
514 }
515
516 start_write_offset += src_len;
517 start_write_offset %= ring_buffer_size;
518
519 return (start_write_offset);
520}
521
522/**
523 * @brief Helper routine to copy to source from ring buffer.
524 *
525 * Assume there is enough room. Handles wrap-around in src case only!
526 */
527uint32_t
528copy_from_ring_buffer(
529 hv_vmbus_ring_buffer_info* ring_info,
530 char* dest,
531 uint32_t dest_len,
532 uint32_t start_read_offset)
533{
534 uint32_t fragLen;
535 char *ring_buffer = get_ring_buffer(ring_info);
536 uint32_t ring_buffer_size = get_ring_buffer_size(ring_info);
537
538 if (dest_len > ring_buffer_size - start_read_offset) {
539 /* wrap-around detected at the src */
540 fragLen = ring_buffer_size - start_read_offset;
541 memcpy(dest, ring_buffer + start_read_offset, fragLen);
542 memcpy(dest + fragLen, ring_buffer, dest_len - fragLen);
543 } else {
544 memcpy(dest, ring_buffer + start_read_offset, dest_len);
545 }
546
547 start_read_offset += dest_len;
548 start_read_offset %= ring_buffer_size;
549
550 return (start_read_offset);
551}
552