1237434Skib/*-
2237434Skib * Copyright (c) 2012 Konstantin Belousov <kib@FreeBSD.org>
3311927Skib * Copyright (c) 2016, 2017 The FreeBSD Foundation
4304285Skib * All rights reserved.
5237434Skib *
6304285Skib * Portions of this software were developed by Konstantin Belousov
7304285Skib * under sponsorship from the FreeBSD Foundation.
8304285Skib *
9237434Skib * Redistribution and use in source and binary forms, with or without
10237434Skib * modification, are permitted provided that the following conditions
11237434Skib * are met:
12237434Skib * 1. Redistributions of source code must retain the above copyright
13237434Skib *    notice, this list of conditions and the following disclaimer.
14237434Skib * 2. Redistributions in binary form must reproduce the above copyright
15237434Skib *    notice, this list of conditions and the following disclaimer in the
16237434Skib *    documentation and/or other materials provided with the distribution.
17237434Skib *
18237434Skib * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19237434Skib * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20237434Skib * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21237434Skib * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22237434Skib * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23237434Skib * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24237434Skib * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25237434Skib * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26237434Skib * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27237434Skib * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28237434Skib * SUCH DAMAGE.
29237434Skib */
30237434Skib
31237434Skib#include <sys/cdefs.h>
32237434Skib__FBSDID("$FreeBSD: stable/11/lib/libc/x86/sys/__vdso_gettc.c 344158 2019-02-15 11:36:16Z kib $");
33237434Skib
34304285Skib#include <sys/param.h>
35304285Skib#include "namespace.h"
36321650Skib#include <sys/capsicum.h>
37246117Skib#include <sys/elf.h>
38304285Skib#include <sys/fcntl.h>
39304285Skib#include <sys/mman.h>
40237434Skib#include <sys/time.h>
41237434Skib#include <sys/vdso.h>
42304285Skib#include <errno.h>
43304285Skib#include <string.h>
44304285Skib#include <unistd.h>
45304285Skib#include "un-namespace.h"
46311927Skib#include <machine/atomic.h>
47237434Skib#include <machine/cpufunc.h>
48286284Skib#include <machine/specialreg.h>
49304285Skib#include <dev/acpica/acpi_hpet.h>
50313539Sngie#ifdef WANT_HYPERV
51311376Ssephe#include <dev/hyperv/hyperv.h>
52311376Ssephe#endif
53246117Skib#include "libc_private.h"
54237434Skib
55322345Skibstatic enum LMB {
56322345Skib	LMB_UNKNOWN,
57322345Skib	LMB_NONE,
58322345Skib	LMB_MFENCE,
59322345Skib	LMB_LFENCE
60322345Skib} lfence_works = LMB_UNKNOWN;
61322345Skib
62304285Skibstatic void
63322345Skibcpuidp(u_int leaf, u_int p[4])
64286284Skib{
65322345Skib
66322345Skib	__asm __volatile(
67304285Skib#if defined(__i386__)
68322345Skib	    "	pushl	%%ebx\n"
69322345Skib#endif
70322345Skib	    "	cpuid\n"
71322345Skib#if defined(__i386__)
72322345Skib	    "	movl	%%ebx,%1\n"
73322345Skib	    "	popl	%%ebx"
74322345Skib#endif
75322345Skib	    : "=a" (p[0]),
76322345Skib#if defined(__i386__)
77322345Skib	    "=r" (p[1]),
78322345Skib#elif defined(__amd64__)
79322345Skib	    "=b" (p[1]),
80322345Skib#else
81322345Skib#error "Arch"
82322345Skib#endif
83322345Skib	    "=c" (p[2]), "=d" (p[3])
84322345Skib	    :  "0" (leaf));
85322345Skib}
86322345Skib
87322345Skibstatic enum LMB
88322345Skibselect_lmb(void)
89322345Skib{
90322345Skib	u_int p[4];
91344158Skib	/* Not a typo, string matches our cpuidp() registers use. */
92322345Skib	static const char intel_id[] = "GenuntelineI";
93322345Skib
94322345Skib	cpuidp(0, p);
95322345Skib	return (memcmp(p + 1, intel_id, sizeof(intel_id) - 1) == 0 ?
96322345Skib	    LMB_LFENCE : LMB_MFENCE);
97322345Skib}
98322345Skib
99322345Skibstatic void
100322345Skibinit_fence(void)
101322345Skib{
102322345Skib#if defined(__i386__)
103286284Skib	u_int cpuid_supported, p[4];
104286284Skib
105322534Skib	lfence_works = LMB_NONE;
106322345Skib	__asm __volatile(
107322345Skib	    "	pushfl\n"
108322345Skib	    "	popl	%%eax\n"
109322345Skib	    "	movl    %%eax,%%ecx\n"
110322345Skib	    "	xorl    $0x200000,%%eax\n"
111322345Skib	    "	pushl	%%eax\n"
112322345Skib	    "	popfl\n"
113322345Skib	    "	pushfl\n"
114322345Skib	    "	popl    %%eax\n"
115322345Skib	    "	xorl    %%eax,%%ecx\n"
116322345Skib	    "	je	1f\n"
117322345Skib	    "	movl	$1,%0\n"
118322345Skib	    "	jmp	2f\n"
119322345Skib	    "1:	movl	$0,%0\n"
120322345Skib	    "2:\n"
121322345Skib	    : "=r" (cpuid_supported) : : "eax", "ecx", "cc");
122322345Skib	if (cpuid_supported) {
123322345Skib		cpuidp(0x1, p);
124322345Skib		if ((p[3] & CPUID_SSE2) != 0)
125322345Skib			lfence_works = select_lmb();
126322534Skib	}
127304285Skib#elif defined(__amd64__)
128322345Skib	lfence_works = select_lmb();
129304285Skib#else
130322345Skib#error "Arch"
131304285Skib#endif
132286284Skib}
133286284Skib
134322345Skibstatic void
135322345Skibrdtsc_mb(void)
136322345Skib{
137322345Skib
138322345Skibagain:
139322345Skib	if (__predict_true(lfence_works == LMB_LFENCE)) {
140322345Skib		lfence();
141322345Skib		return;
142322345Skib	} else if (lfence_works == LMB_MFENCE) {
143322345Skib		mfence();
144322345Skib		return;
145322345Skib	} else if (lfence_works == LMB_NONE) {
146322345Skib		return;
147322345Skib	}
148322345Skib	init_fence();
149322345Skib	goto again;
150322345Skib}
151322345Skib
152237434Skibstatic u_int
153304285Skib__vdso_gettc_rdtsc_low(const struct vdso_timehands *th)
154237434Skib{
155286284Skib	u_int rv;
156237434Skib
157322345Skib	rdtsc_mb();
158237434Skib	__asm __volatile("rdtsc; shrd %%cl, %%edx, %0"
159237434Skib	    : "=a" (rv) : "c" (th->th_x86_shift) : "edx");
160237434Skib	return (rv);
161237434Skib}
162237434Skib
163286284Skibstatic u_int
164286284Skib__vdso_rdtsc32(void)
165286284Skib{
166286284Skib
167322345Skib	rdtsc_mb();
168304285Skib	return (rdtsc32());
169286284Skib}
170286284Skib
171311927Skib#define	HPET_DEV_MAP_MAX	10
172311927Skibstatic volatile char *hpet_dev_map[HPET_DEV_MAP_MAX];
173304285Skib
174304285Skibstatic void
175304285Skib__vdso_init_hpet(uint32_t u)
176304285Skib{
177304285Skib	static const char devprefix[] = "/dev/hpet";
178304285Skib	char devname[64], *c, *c1, t;
179311927Skib	volatile char *new_map, *old_map;
180321650Skib	unsigned int mode;
181311927Skib	uint32_t u1;
182304285Skib	int fd;
183304285Skib
184304285Skib	c1 = c = stpcpy(devname, devprefix);
185311927Skib	u1 = u;
186304285Skib	do {
187311927Skib		*c++ = u1 % 10 + '0';
188311927Skib		u1 /= 10;
189311927Skib	} while (u1 != 0);
190304285Skib	*c = '\0';
191304285Skib	for (c--; c1 != c; c1++, c--) {
192304285Skib		t = *c1;
193304285Skib		*c1 = *c;
194304285Skib		*c = t;
195304285Skib	}
196311927Skib
197311927Skib	old_map = hpet_dev_map[u];
198311927Skib	if (old_map != NULL)
199311927Skib		return;
200311927Skib
201322042Skib	/*
202322042Skib	 * Explicitely check for the capability mode to avoid
203322042Skib	 * triggering trap_enocap on the device open by absolute path.
204322042Skib	 */
205322042Skib	if ((cap_getmode(&mode) == 0 && mode != 0) ||
206322042Skib	    (fd = _open(devname, O_RDONLY)) == -1) {
207322042Skib		/* Prevent the caller from re-entering. */
208322042Skib		atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
209322042Skib		    (uintptr_t)old_map, (uintptr_t)MAP_FAILED);
210322042Skib		return;
211322042Skib	}
212321650Skib
213311927Skib	new_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
214304285Skib	_close(fd);
215311927Skib	if (atomic_cmpset_rel_ptr((volatile uintptr_t *)&hpet_dev_map[u],
216311927Skib	    (uintptr_t)old_map, (uintptr_t)new_map) == 0 &&
217311927Skib	    new_map != MAP_FAILED)
218311927Skib		munmap((void *)new_map, PAGE_SIZE);
219304285Skib}
220304285Skib
221313539Sngie#ifdef WANT_HYPERV
222311376Ssephe
223311376Ssephe#define HYPERV_REFTSC_DEVPATH	"/dev/" HYPERV_REFTSC_DEVNAME
224311376Ssephe
225311376Ssephe/*
226311376Ssephe * NOTE:
227311376Ssephe * We use 'NULL' for this variable to indicate that initialization
228311376Ssephe * is required.  And if this variable is 'MAP_FAILED', then Hyper-V
229311376Ssephe * reference TSC can not be used, e.g. in misconfigured jail.
230311376Ssephe */
231311376Ssephestatic struct hyperv_reftsc *hyperv_ref_tsc;
232311376Ssephe
233311376Ssephestatic void
234311376Ssephe__vdso_init_hyperv_tsc(void)
235311376Ssephe{
236311376Ssephe	int fd;
237321650Skib	unsigned int mode;
238311376Ssephe
239321650Skib	if (cap_getmode(&mode) == 0 && mode != 0)
240321650Skib		goto fail;
241321650Skib
242311376Ssephe	fd = _open(HYPERV_REFTSC_DEVPATH, O_RDONLY);
243321650Skib	if (fd < 0)
244321650Skib		goto fail;
245311376Ssephe	hyperv_ref_tsc = mmap(NULL, sizeof(*hyperv_ref_tsc), PROT_READ,
246311376Ssephe	    MAP_SHARED, fd, 0);
247311376Ssephe	_close(fd);
248321650Skib
249321650Skib	return;
250321650Skibfail:
251321650Skib	/* Prevent the caller from re-entering. */
252321650Skib	hyperv_ref_tsc = MAP_FAILED;
253311376Ssephe}
254311376Ssephe
255311376Ssephestatic int
256311376Ssephe__vdso_hyperv_tsc(struct hyperv_reftsc *tsc_ref, u_int *tc)
257311376Ssephe{
258311376Ssephe	uint64_t disc, ret, tsc, scale;
259311376Ssephe	uint32_t seq;
260311376Ssephe	int64_t ofs;
261311376Ssephe
262311376Ssephe	while ((seq = atomic_load_acq_int(&tsc_ref->tsc_seq)) != 0) {
263311376Ssephe		scale = tsc_ref->tsc_scale;
264311376Ssephe		ofs = tsc_ref->tsc_ofs;
265311376Ssephe
266322345Skib		rdtsc_mb();
267311376Ssephe		tsc = rdtsc();
268311376Ssephe
269311376Ssephe		/* ret = ((tsc * scale) >> 64) + ofs */
270311376Ssephe		__asm__ __volatile__ ("mulq %3" :
271311376Ssephe		    "=d" (ret), "=a" (disc) :
272311376Ssephe		    "a" (tsc), "r" (scale));
273311376Ssephe		ret += ofs;
274311376Ssephe
275311376Ssephe		atomic_thread_fence_acq();
276311376Ssephe		if (tsc_ref->tsc_seq == seq) {
277311376Ssephe			*tc = ret;
278311376Ssephe			return (0);
279311376Ssephe		}
280311376Ssephe
281311376Ssephe		/* Sequence changed; re-sync. */
282311376Ssephe	}
283311376Ssephe	return (ENOSYS);
284311376Ssephe}
285311376Ssephe
286313539Sngie#endif	/* WANT_HYPERV */
287311376Ssephe
288237434Skib#pragma weak __vdso_gettc
289304285Skibint
290304285Skib__vdso_gettc(const struct vdso_timehands *th, u_int *tc)
291237434Skib{
292311927Skib	volatile char *map;
293311927Skib	uint32_t idx;
294237434Skib
295304285Skib	switch (th->th_algo) {
296304285Skib	case VDSO_TH_ALGO_X86_TSC:
297304285Skib		*tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) :
298304285Skib		    __vdso_rdtsc32();
299304285Skib		return (0);
300304285Skib	case VDSO_TH_ALGO_X86_HPET:
301311927Skib		idx = th->th_x86_hpet_idx;
302311927Skib		if (idx >= HPET_DEV_MAP_MAX)
303311927Skib			return (ENOSYS);
304311927Skib		map = (volatile char *)atomic_load_acq_ptr(
305311927Skib		    (volatile uintptr_t *)&hpet_dev_map[idx]);
306311927Skib		if (map == NULL) {
307311927Skib			__vdso_init_hpet(idx);
308311927Skib			map = (volatile char *)atomic_load_acq_ptr(
309311927Skib			    (volatile uintptr_t *)&hpet_dev_map[idx]);
310304285Skib		}
311311927Skib		if (map == MAP_FAILED)
312304285Skib			return (ENOSYS);
313311927Skib		*tc = *(volatile uint32_t *)(map + HPET_MAIN_COUNTER);
314304285Skib		return (0);
315313539Sngie#ifdef WANT_HYPERV
316311376Ssephe	case VDSO_TH_ALGO_X86_HVTSC:
317311376Ssephe		if (hyperv_ref_tsc == NULL)
318311376Ssephe			__vdso_init_hyperv_tsc();
319311376Ssephe		if (hyperv_ref_tsc == MAP_FAILED)
320311376Ssephe			return (ENOSYS);
321311376Ssephe		return (__vdso_hyperv_tsc(hyperv_ref_tsc, tc));
322311376Ssephe#endif
323304285Skib	default:
324304285Skib		return (ENOSYS);
325304285Skib	}
326237434Skib}
327246117Skib
328246117Skib#pragma weak __vdso_gettimekeep
329246117Skibint
330246117Skib__vdso_gettimekeep(struct vdso_timekeep **tk)
331246117Skib{
332246117Skib
333246117Skib	return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk)));
334246117Skib}
335