1/*-
2 * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
3 * Copyright (c) 2016, 2017 The FreeBSD Foundation
4 * All rights reserved.
5 *
6 * Portions of this software were developed by Konstantin Belousov
7 * under sponsorship from the FreeBSD Foundation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: stable/11/lib/libc/x86/sys/__vdso_gettc.c 344158 2019-02-15 11:36:16Z kib $");
33
34#include <sys/param.h>
35#include "namespace.h"
36#include <sys/capsicum.h>
37#include <sys/elf.h>
38#include <sys/fcntl.h>
39#include <sys/mman.h>
40#include <sys/time.h>
41#include <sys/vdso.h>
42#include <errno.h>
43#include <string.h>
44#include <unistd.h>
45#include "un-namespace.h"
46#include <machine/atomic.h>
47#include <machine/cpufunc.h>
48#include <machine/specialreg.h>
49#include <dev/acpica/acpi_hpet.h>
50#ifdef WANT_HYPERV
51#include <dev/hyperv/hyperv.h>
52#endif
53#include "libc_private.h"
54
55static enum LMB {
56	LMB_UNKNOWN,
57	LMB_NONE,
58	LMB_MFENCE,
59	LMB_LFENCE
60} lfence_works = LMB_UNKNOWN;
61
62static void
63cpuidp(u_int leaf, u_int p[4])
64{
65
66	__asm __volatile(
67#if defined(__i386__)
68	    "	pushl	%%ebx\n"
69#endif
70	    "	cpuid\n"
71#if defined(__i386__)
72	    "	movl	%%ebx,%1\n"
73	    "	popl	%%ebx"
74#endif
75	    : "=a" (p[0]),
76#if defined(__i386__)
77	    "=r" (p[1]),
78#elif defined(__amd64__)
79	    "=b" (p[1]),
80#else
81#error "Arch"
82#endif
83	    "=c" (p[2]), "=d" (p[3])
84	    :  "0" (leaf));
85}
86
87static enum LMB
88select_lmb(void)
89{
90	u_int p[4];
91	/* Not a typo, string matches our cpuidp() registers use. */
92	static const char intel_id[] = "GenuntelineI";
93
94	cpuidp(0, p);
95	return (memcmp(p + 1, intel_id, sizeof(intel_id) - 1) == 0 ?
96	    LMB_LFENCE : LMB_MFENCE);
97}
98
99static void
100init_fence(void)
101{
102#if defined(__i386__)
103	u_int cpuid_supported, p[4];
104
105	lfence_works = LMB_NONE;
106	__asm __volatile(
107	    "	pushfl\n"
108	    "	popl	%%eax\n"
109	    "	movl    %%eax,%%ecx\n"
110	    "	xorl    $0x200000,%%eax\n"
111	    "	pushl	%%eax\n"
112	    "	popfl\n"
113	    "	pushfl\n"
114	    "	popl    %%eax\n"
115	    "	xorl    %%eax,%%ecx\n"
116	    "	je	1f\n"
117	    "	movl	$1,%0\n"
118	    "	jmp	2f\n"
119	    "1:	movl	$0,%0\n"
120	    "2:\n"
121	    : "=r" (cpuid_supported) : : "eax", "ecx", "cc");
122	if (cpuid_supported) {
123		cpuidp(0x1, p);
124		if ((p[3] & CPUID_SSE2) != 0)
125			lfence_works = select_lmb();
126	}
127#elif defined(__amd64__)
128	lfence_works = select_lmb();
129#else
130#error "Arch"
131#endif
132}
133
134static void
135rdtsc_mb(void)
136{
137
138again:
139	if (__predict_true(lfence_works == LMB_LFENCE)) {
140		lfence();
141		return;
142	} else if (lfence_works == LMB_MFENCE) {
143		mfence();
144		return;
145	} else if (lfence_works == LMB_NONE) {
146		return;
147	}
148	init_fence();
149	goto again;
150}
151
152static u_int
153__vdso_gettc_rdtsc_low(const struct vdso_timehands *th)
154{
155	u_int rv;
156
157	rdtsc_mb();
158	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
159	    : "=a" (rv) : "c" (th->th_x86_shift) : "edx");
160	return (rv);
161}
162
163static u_int
164__vdso_rdtsc32(void)
165{
166
167	rdtsc_mb();
168	return (rdtsc32());
169}
170
171#define	HPET_DEV_MAP_MAX	10
172static volatile char *hpet_dev_map[HPET_DEV_MAP_MAX];
173
174static void
175__vdso_init_hpet(uint32_t u)
176{
177	static const char devprefix[] = "/dev/hpet";
178	char devname[64], *c, *c1, t;
179	volatile char *new_map, *old_map;
180	unsigned int mode;
181	uint32_t u1;
182	int fd;
183
184	c1 = c = stpcpy(devname, devprefix);
185	u1 = u;
186	do {
187		*c++ = u1 % 10 + '0';
188		u1 /= 10;
189	} while (u1 != 0);
190	*c = '\0';
191	for (c--; c1 != c; c1++, c--) {
192		t = *c1;
193		*c1 = *c;
194		*c = t;
195	}
196
197	old_map = hpet_dev_map[u];
198	if (old_map != NULL)
199		return;
200
201	/*
202	 * Explicitely check for the capability mode to avoid
203	 * triggering trap_enocap on the device open by absolute path.
204	 */
205	if ((cap_getmode(&mode) == 0 && mode != 0) ||
206	    (fd = _open(devname, O_RDONLY)) == -1) {
207		/* Prevent the caller from re-entering. */
208		atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
209		    (uintptr_t)old_map, (uintptr_t)MAP_FAILED);
210		return;
211	}
212
213	new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
214	_close(fd);
215	if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
216	    (uintptr_t)old_map, (uintptr_t)new_map) == 0 &&
217	    new_map != MAP_FAILED)
218		munmap((void *)new_map, PAGE_SIZE);
219}
220
221#ifdef WANT_HYPERV
222
223#define HYPERV_REFTSC_DEVPATH	"/dev/" HYPERV_REFTSC_DEVNAME
224
225/*
226 * NOTE:
227 * We use 'NULL' for this variable to indicate that initialization
228 * is required.  And if this variable is 'MAP_FAILED', then Hyper-V
229 * reference TSC can not be used, e.g. in misconfigured jail.
230 */
231static struct hyperv_reftsc *hyperv_ref_tsc;
232
233static void
234__vdso_init_hyperv_tsc(void)
235{
236	int fd;
237	unsigned int mode;
238
239	if (cap_getmode(&mode) == 0 && mode != 0)
240		goto fail;
241
242	fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY);
243	if (fd < 0)
244		goto fail;
245	hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ,
246	    MAP_SHARED, fd, 0);
247	_close(fd);
248
249	return;
250fail:
251	/* Prevent the caller from re-entering. */
252	hyperv_ref_tsc = MAP_FAILED;
253}
254
255static int
256__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc)
257{
258	uint64_t disc, ret, tsc, scale;
259	uint32_t seq;
260	int64_t ofs;
261
262	while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
263		scale = tsc_ref->tsc_scale;
264		ofs = tsc_ref->tsc_ofs;
265
266		rdtsc_mb();
267		tsc = rdtsc();
268
269		/* ret = ((tsc * scale) >> 64) + ofs */
270		__asm__ __volatile__ ("mulq %3" :
271		    "=d" (ret), "=a" (disc) :
272		    "a" (tsc), "r" (scale));
273		ret += ofs;
274
275		atomic_thread_fence_acq();
276		if (tsc_ref->tsc_seq == seq) {
277			*tc = ret;
278			return (0);
279		}
280
281		/* Sequence changed; re-sync. */
282	}
283	return (ENOSYS);
284}
285
286#endif	/* WANT_HYPERV */
287
288#pragma weak __vdso_gettc
289int
290__vdso_gettc(const struct vdso_timehands *th, u_int *tc)
291{
292	volatile char *map;
293	uint32_t idx;
294
295	switch (th->th_algo) {
296	case VDSO_TH_ALGO_X86_TSC:
297		*tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) :
298		    __vdso_rdtsc32();
299		return (0);
300	case VDSO_TH_ALGO_X86_HPET:
301		idx = th->th_x86_hpet_idx;
302		if (idx >= HPET_DEV_MAP_MAX)
303			return (ENOSYS);
304		map = (volatile char *)atomic_load_acq_ptr(
305		    (volatile uintptr_t *)&hpet_dev_map[idx]);
306		if (map == NULL) {
307			__vdso_init_hpet(idx);
308			map = (volatile char *)atomic_load_acq_ptr(
309			    (volatile uintptr_t *)&hpet_dev_map[idx]);
310		}
311		if (map == MAP_FAILED)
312			return (ENOSYS);
313		*tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER);
314		return (0);
315#ifdef WANT_HYPERV
316	case VDSO_TH_ALGO_X86_HVTSC:
317		if (hyperv_ref_tsc == NULL)
318			__vdso_init_hyperv_tsc();
319		if (hyperv_ref_tsc == MAP_FAILED)
320			return (ENOSYS);
321		return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc));
322#endif
323	default:
324		return (ENOSYS);
325	}
326}
327
328#pragma weak __vdso_gettimekeep
329int
330__vdso_gettimekeep(struct vdso_timekeep **tk)
331{
332
333	return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk)));
334}
335