1/* SPDX-License-Identifier: GPL-2.0-only */
2/*
3 * Copyright (C) 2016 Red Hat, Inc.
4 * Author: Michael S. Tsirkin <mst@redhat.com>
5 *
6 * Common macros and functions for ring benchmarking.
7 */
8#ifndef MAIN_H
9#define MAIN_H
10
11#include <assert.h>
12#include <stdbool.h>
13
14extern int param;
15
16extern bool do_exit;
17
18#if defined(__x86_64__) || defined(__i386__)
19#include "x86intrin.h"
20
21static inline void wait_cycles(unsigned long long cycles)
22{
23	unsigned long long t;
24
25	t = __rdtsc();
26	while (__rdtsc() - t < cycles) {}
27}
28
29#define VMEXIT_CYCLES 500
30#define VMENTRY_CYCLES 500
31
32#elif defined(__s390x__)
33static inline void wait_cycles(unsigned long long cycles)
34{
35	asm volatile("0: brctg %0,0b" : : "d" (cycles));
36}
37
38/* tweak me */
39#define VMEXIT_CYCLES 200
40#define VMENTRY_CYCLES 200
41
42#else
43static inline void wait_cycles(unsigned long long cycles)
44{
45	_Exit(5);
46}
47#define VMEXIT_CYCLES 0
48#define VMENTRY_CYCLES 0
49#endif
50
51static inline void vmexit(void)
52{
53	if (!do_exit)
54		return;
55
56	wait_cycles(VMEXIT_CYCLES);
57}
58static inline void vmentry(void)
59{
60	if (!do_exit)
61		return;
62
63	wait_cycles(VMENTRY_CYCLES);
64}
65
66/* implemented by ring */
67void alloc_ring(void);
68/* guest side */
69int add_inbuf(unsigned, void *, void *);
70void *get_buf(unsigned *, void **);
71void disable_call();
72bool used_empty();
73bool enable_call();
74void kick_available();
75/* host side */
76void disable_kick();
77bool avail_empty();
78bool enable_kick();
79bool use_buf(unsigned *, void **);
80void call_used();
81
82/* implemented by main */
83extern bool do_sleep;
84void kick(void);
85void wait_for_kick(void);
86void call(void);
87void wait_for_call(void);
88
89extern unsigned ring_size;
90
91/* Compiler barrier - similar to what Linux uses */
92#define barrier() asm volatile("" ::: "memory")
93
94/* Is there a portable way to do this? */
95#if defined(__x86_64__) || defined(__i386__)
96#define cpu_relax() asm ("rep; nop" ::: "memory")
97#elif defined(__s390x__)
98#define cpu_relax() barrier()
99#elif defined(__aarch64__)
100#define cpu_relax() asm ("yield" ::: "memory")
101#else
102#define cpu_relax() assert(0)
103#endif
104
105extern bool do_relax;
106
107static inline void busy_wait(void)
108{
109	if (do_relax)
110		cpu_relax();
111	else
112		/* prevent compiler from removing busy loops */
113		barrier();
114}
115
116#if defined(__x86_64__) || defined(__i386__)
117#define smp_mb()     asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
118#elif defined(__aarch64__)
119#define smp_mb()     asm volatile("dmb ish" ::: "memory")
120#else
121/*
122 * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
123 * with other __ATOMIC_SEQ_CST calls.
124 */
125#define smp_mb() __sync_synchronize()
126#endif
127
128/*
129 * This abuses the atomic builtins for thread fences, and
130 * adds a compiler barrier.
131 */
132#define smp_release() do { \
133    barrier(); \
134    __atomic_thread_fence(__ATOMIC_RELEASE); \
135} while (0)
136
137#define smp_acquire() do { \
138    __atomic_thread_fence(__ATOMIC_ACQUIRE); \
139    barrier(); \
140} while (0)
141
142#if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
143#define smp_wmb() barrier()
144#elif defined(__aarch64__)
145#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
146#else
147#define smp_wmb() smp_release()
148#endif
149
150#ifndef __always_inline
151#define __always_inline inline __attribute__((always_inline))
152#endif
153
154static __always_inline
155void __read_once_size(const volatile void *p, void *res, int size)
156{
157	switch (size) {
158	case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;
159	case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;
160	case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;
161	case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;
162	default:
163		barrier();
164		__builtin_memcpy((void *)res, (const void *)p, size);
165		barrier();
166	}
167}
168
169static __always_inline void __write_once_size(volatile void *p, void *res, int size)
170{
171	switch (size) {
172	case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
173	case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
174	case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
175	case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
176	default:
177		barrier();
178		__builtin_memcpy((void *)p, (const void *)res, size);
179		barrier();
180	}
181}
182
183#ifdef __alpha__
184#define READ_ONCE(x) \
185({									\
186	union { typeof(x) __val; char __c[1]; } __u;			\
187	__read_once_size(&(x), __u.__c, sizeof(x));		\
188	smp_mb(); /* Enforce dependency ordering from x */		\
189	__u.__val;							\
190})
191#else
192#define READ_ONCE(x)							\
193({									\
194	union { typeof(x) __val; char __c[1]; } __u;			\
195	__read_once_size(&(x), __u.__c, sizeof(x));			\
196	__u.__val;							\
197})
198#endif
199
200#define WRITE_ONCE(x, val) \
201({							\
202	union { typeof(x) __val; char __c[1]; } __u =	\
203		{ .__val = (typeof(x)) (val) }; \
204	__write_once_size(&(x), __u.__c, sizeof(x));	\
205	__u.__val;					\
206})
207
208#endif
209