1/*-
2 * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
3 * Copyright (c) 2016, 2017, 2019 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by Konstantin Belousov
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include <sys/param.h>
35#include "namespace.h"
36#include <sys/capsicum.h>
37#include <sys/elf.h>
38#include <sys/fcntl.h>
39#include <sys/mman.h>
40#include <sys/time.h>
41#include <sys/vdso.h>
42#include <errno.h>
43#include <string.h>
44#include <unistd.h>
45#include "un-namespace.h"
46#include <machine/atomic.h>
47#include <machine/cpufunc.h>
48#include <machine/specialreg.h>
49#include <dev/acpica/acpi_hpet.h>
50#ifdef WANT_HYPERV
51#include <dev/hyperv/hyperv.h>
52#endif
53#include <x86/ifunc.h>
54#include "libc_private.h"
55
56static inline u_int
57rdtsc_low(const struct vdso_timehands *th)
58{
59	u_int rv;
60
61	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
62	    : "=a" (rv) : "c" (th->th_x86_shift) : "edx");
63	return (rv);
64}
65
66static inline u_int
67rdtscp_low(const struct vdso_timehands *th)
68{
69	u_int rv;
70
71	__asm __volatile("rdtscp; movl %%edi,%%ecx; shrd %%cl, %%edx, %0"
72	    : "=a" (rv) : "D" (th->th_x86_shift) : "ecx", "edx");
73	return (rv);
74}
75
76static u_int
77rdtsc_low_mb_lfence(const struct vdso_timehands *th)
78{
79	lfence();
80	return (rdtsc_low(th));
81}
82
83static u_int
84rdtsc_low_mb_mfence(const struct vdso_timehands *th)
85{
86	mfence();
87	return (rdtsc_low(th));
88}
89
90static u_int
91rdtsc_low_mb_none(const struct vdso_timehands *th)
92{
93	return (rdtsc_low(th));
94}
95
96static u_int
97rdtsc32_mb_lfence(void)
98{
99	lfence();
100	return (rdtsc32());
101}
102
103static u_int
104rdtsc32_mb_mfence(void)
105{
106	mfence();
107	return (rdtsc32());
108}
109
110static u_int
111rdtsc32_mb_none(void)
112{
113	return (rdtsc32());
114}
115
116static u_int
117rdtscp32_(void)
118{
119	return (rdtscp32());
120}
121
122struct tsc_selector_tag {
123	u_int (*ts_rdtsc32)(void);
124	u_int (*ts_rdtsc_low)(const struct vdso_timehands *);
125};
126
127static const struct tsc_selector_tag tsc_selector[] = {
128	[0] = {				/* Intel, LFENCE */
129		.ts_rdtsc32 =	rdtsc32_mb_lfence,
130		.ts_rdtsc_low =	rdtsc_low_mb_lfence,
131	},
132	[1] = {				/* AMD, MFENCE */
133		.ts_rdtsc32 =	rdtsc32_mb_mfence,
134		.ts_rdtsc_low =	rdtsc_low_mb_mfence,
135	},
136	[2] = {				/* No SSE2 */
137		.ts_rdtsc32 = rdtsc32_mb_none,
138		.ts_rdtsc_low = rdtsc_low_mb_none,
139	},
140	[3] = {				/* RDTSCP */
141		.ts_rdtsc32 =	rdtscp32_,
142		.ts_rdtsc_low =	rdtscp_low,
143	},
144};
145
146static int
147tsc_selector_idx(u_int cpu_feature)
148{
149	u_int amd_feature, cpu_exthigh, cpu_id, p[4], v[3];
150	static const char amd_id[] = "AuthenticAMD";
151	static const char hygon_id[] = "HygonGenuine";
152	bool amd_cpu;
153
154	if (cpu_feature == 0)
155		return (2);	/* should not happen due to RDTSC */
156
157	do_cpuid(0, p);
158	v[0] = p[1];
159	v[1] = p[3];
160	v[2] = p[2];
161	amd_cpu = memcmp(v, amd_id, sizeof(amd_id) - 1) == 0 ||
162	    memcmp(v, hygon_id, sizeof(hygon_id) - 1) == 0;
163
164	do_cpuid(1, p);
165	cpu_id = p[0];
166
167	if (cpu_feature != 0) {
168		do_cpuid(0x80000000, p);
169		cpu_exthigh = p[0];
170	} else {
171		cpu_exthigh = 0;
172	}
173	if (cpu_exthigh >= 0x80000001) {
174		do_cpuid(0x80000001, p);
175		amd_feature = p[3];
176	} else {
177		amd_feature = 0;
178	}
179
180	if ((amd_feature & AMDID_RDTSCP) != 0)
181		return (3);
182	if ((cpu_feature & CPUID_SSE2) == 0)
183		return (2);
184	return (amd_cpu ? 1 : 0);
185}
186
187DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc_low,
188    (const struct vdso_timehands *th), static)
189{
190	return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc_low);
191}
192
193DEFINE_UIFUNC(static, u_int, __vdso_gettc_rdtsc32, (void), static)
194{
195	return (tsc_selector[tsc_selector_idx(cpu_feature)].ts_rdtsc32);
196}
197
198#define	HPET_DEV_MAP_MAX	10
199static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX];
200
201static void
202__vdso_init_hpet(uint32_t u)
203{
204	static const char devprefix[] = "/dev/hpet";
205	char devname[64], *c, *c1, t;
206	volatile char *new_map, *old_map;
207	unsigned int mode;
208	uint32_t u1;
209	int fd;
210
211	c1 = c = stpcpy(devname, devprefix);
212	u1 = u;
213	do {
214		*c++ = u1 % 10 + '0';
215		u1 /= 10;
216	} while (u1 != 0);
217	*c = '\0';
218	for (c--; c1 != c; c1++, c--) {
219		t = *c1;
220		*c1 = *c;
221		*c = t;
222	}
223
224	old_map = hpet_dev_map[u];
225	if (old_map != NULL)
226		return;
227
228	/*
229	 * Explicitely check for the capability mode to avoid
230	 * triggering trap_enocap on the device open by absolute path.
231	 */
232	if ((cap_getmode(&mode) == 0 && mode != 0) ||
233	    (fd = _open(devname, O_RDONLY)) == -1) {
234		/* Prevent the caller from re-entering. */
235		atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
236		    (uintptr_t)old_map, (uintptr_t)MAP_FAILED);
237		return;
238	}
239
240	new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
241	_close(fd);
242	if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
243	    (uintptr_t)old_map, (uintptr_t)new_map) == 0 &&
244	    new_map != MAP_FAILED)
245		munmap((void *)new_map, PAGE_SIZE);
246}
247
248#ifdef WANT_HYPERV
249
250#define HYPERV_REFTSC_DEVPATH	"/dev/" HYPERV_REFTSC_DEVNAME
251
252/*
253 * NOTE:
254 * We use 'NULL' for this variable to indicate that initialization
255 * is required.  And if this variable is 'MAP_FAILED', then Hyper-V
256 * reference TSC can not be used, e.g. in misconfigured jail.
257 */
258static struct hyperv_reftsc *hyperv_ref_tsc;
259
260static void
261__vdso_init_hyperv_tsc(void)
262{
263	int fd;
264	unsigned int mode;
265
266	if (cap_getmode(&mode) == 0 && mode != 0)
267		goto fail;
268
269	fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY);
270	if (fd < 0)
271		goto fail;
272	hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ,
273	    MAP_SHARED, fd, 0);
274	_close(fd);
275
276	return;
277fail:
278	/* Prevent the caller from re-entering. */
279	hyperv_ref_tsc = MAP_FAILED;
280}
281
282static int
283__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc)
284{
285	uint64_t disc, ret, tsc, scale;
286	uint32_t seq;
287	int64_t ofs;
288
289	while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
290		scale = tsc_ref->tsc_scale;
291		ofs = tsc_ref->tsc_ofs;
292
293		mfence();	/* XXXKIB */
294		tsc = rdtsc();
295
296		/* ret = ((tsc * scale) >> 64) + ofs */
297		__asm__ __volatile__ ("mulq %3" :
298		    "=d" (ret), "=a" (disc) :
299		    "a" (tsc), "r" (scale));
300		ret += ofs;
301
302		atomic_thread_fence_acq();
303		if (tsc_ref->tsc_seq == seq) {
304			*tc = ret;
305			return (0);
306		}
307
308		/* Sequence changed; re-sync. */
309	}
310	return (ENOSYS);
311}
312
313#endif	/* WANT_HYPERV */
314
315#pragma weak __vdso_gettc
316int
317__vdso_gettc(const struct vdso_timehands *th, u_int *tc)
318{
319	volatile char *map;
320	uint32_t idx;
321
322	switch (th->th_algo) {
323	case VDSO_TH_ALGO_X86_TSC:
324		*tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) :
325		    __vdso_gettc_rdtsc32();
326		return (0);
327	case VDSO_TH_ALGO_X86_HPET:
328		idx = th->th_x86_hpet_idx;
329		if (idx >= HPET_DEV_MAP_MAX)
330			return (ENOSYS);
331		map = (volatile char *)atomic_load_acq_ptr(
332		    (volatile uintptr_t *)&hpet_dev_map[idx]);
333		if (map == NULL) {
334			__vdso_init_hpet(idx);
335			map = (volatile char *)atomic_load_acq_ptr(
336			    (volatile uintptr_t *)&hpet_dev_map[idx]);
337		}
338		if (map == MAP_FAILED)
339			return (ENOSYS);
340		*tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER);
341		return (0);
342#ifdef WANT_HYPERV
343	case VDSO_TH_ALGO_X86_HVTSC:
344		if (hyperv_ref_tsc == NULL)
345			__vdso_init_hyperv_tsc();
346		if (hyperv_ref_tsc == MAP_FAILED)
347			return (ENOSYS);
348		return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc));
349#endif
350	default:
351		return (ENOSYS);
352	}
353}
354
355#pragma weak __vdso_gettimekeep
356int
357__vdso_gettimekeep(struct vdso_timekeep **tk)
358{
359
360	return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk)));
361}
362