1/*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2014 Bryan Venteicher <bryanv@FreeBSD.org>
5 * Copyright (c) 2021 Mathieu Chouquet-Stringer
6 * Copyright (c) 2021 Juniper Networks, Inc.
7 * Copyright (c) 2021 Klara, Inc.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31/*
32 * Linux KVM paravirtual clock support
33 *
34 * References:
35 *     - [1] https://www.kernel.org/doc/html/latest/virt/kvm/cpuid.html
36 *     - [2] https://www.kernel.org/doc/html/latest/virt/kvm/msr.html
37 */
38
39#include <sys/param.h>
40#include <sys/bus.h>
41#include <sys/domainset.h>
42#include <sys/kernel.h>
43#include <sys/malloc.h>
44#include <sys/module.h>
45#include <sys/smp.h>
46#include <sys/sysctl.h>
47
48#include <vm/vm.h>
49#include <vm/pmap.h>
50#include <vm/vm_extern.h>
51
52#include <machine/pvclock.h>
53#include <x86/kvm.h>
54
55#include "clock_if.h"
56
57#define	KVM_CLOCK_DEVNAME		"kvmclock"
58/*
59 * Note: Chosen to be (1) above HPET's value (always 950), (2) above the TSC's
60 * default value of 800, and (3) below the TSC's value when it supports the
61 * "Invariant TSC" feature and is believed to be synchronized across all CPUs.
62 */
63#define	KVM_CLOCK_TC_QUALITY		975
64
65struct kvm_clock_softc {
66	struct pvclock			 pvc;
67	struct pvclock_wall_clock	 wc;
68	struct pvclock_vcpu_time_info	*timeinfos;
69	u_int				 msr_tc;
70	u_int				 msr_wc;
71#ifndef EARLY_AP_STARTUP
72	int				 firstcpu;
73#endif
74};
75
76static struct pvclock_wall_clock *kvm_clock_get_wallclock(void *arg);
77static void	kvm_clock_system_time_enable(struct kvm_clock_softc *sc,
78		    const cpuset_t *cpus);
79static void	kvm_clock_system_time_enable_pcpu(void *arg);
80static void	kvm_clock_setup_sysctl(device_t);
81
82static struct pvclock_wall_clock *
83kvm_clock_get_wallclock(void *arg)
84{
85	struct kvm_clock_softc *sc = arg;
86
87	wrmsr(sc->msr_wc, vtophys(&sc->wc));
88	return (&sc->wc);
89}
90
91static void
92kvm_clock_system_time_enable(struct kvm_clock_softc *sc, const cpuset_t *cpus)
93{
94	smp_rendezvous_cpus(*cpus, NULL, kvm_clock_system_time_enable_pcpu,
95	    NULL, sc);
96}
97
98static void
99kvm_clock_system_time_enable_pcpu(void *arg)
100{
101	struct kvm_clock_softc *sc = arg;
102
103	/*
104	 * See [2]; the lsb of this MSR is the system time enable bit.
105	 */
106	wrmsr(sc->msr_tc, vtophys(&(sc->timeinfos)[curcpu]) | 1);
107}
108
109#ifndef EARLY_AP_STARTUP
110static void
111kvm_clock_init_smp(void *arg __unused)
112{
113	devclass_t kvm_clock_devclass;
114	cpuset_t cpus;
115	struct kvm_clock_softc *sc;
116
117	kvm_clock_devclass = devclass_find(KVM_CLOCK_DEVNAME);
118	sc = devclass_get_softc(kvm_clock_devclass, 0);
119	if (sc == NULL || mp_ncpus == 1)
120		return;
121
122	/*
123	 * Register with the hypervisor on all CPUs except the one that
124	 * registered in kvm_clock_attach().
125	 */
126	cpus = all_cpus;
127	KASSERT(CPU_ISSET(sc->firstcpu, &cpus),
128	    ("%s: invalid first CPU %d", __func__, sc->firstcpu));
129	CPU_CLR(sc->firstcpu, &cpus);
130	kvm_clock_system_time_enable(sc, &cpus);
131}
132SYSINIT(kvm_clock, SI_SUB_SMP, SI_ORDER_ANY, kvm_clock_init_smp, NULL);
133#endif
134
135static void
136kvm_clock_identify(driver_t *driver, device_t parent)
137{
138	u_int regs[4];
139
140	kvm_cpuid_get_features(regs);
141	if ((regs[0] &
142	    (KVM_FEATURE_CLOCKSOURCE2 | KVM_FEATURE_CLOCKSOURCE)) == 0)
143		return;
144	if (device_find_child(parent, KVM_CLOCK_DEVNAME, -1))
145		return;
146	BUS_ADD_CHILD(parent, 0, KVM_CLOCK_DEVNAME, 0);
147}
148
149static int
150kvm_clock_probe(device_t dev)
151{
152	device_set_desc(dev, "KVM paravirtual clock");
153	return (BUS_PROBE_DEFAULT);
154}
155
156static int
157kvm_clock_attach(device_t dev)
158{
159	u_int regs[4];
160	struct kvm_clock_softc *sc = device_get_softc(dev);
161	bool stable_flag_supported;
162
163	/* Process KVM "features" CPUID leaf content: */
164	kvm_cpuid_get_features(regs);
165	if ((regs[0] & KVM_FEATURE_CLOCKSOURCE2) != 0) {
166		sc->msr_tc = KVM_MSR_SYSTEM_TIME_NEW;
167		sc->msr_wc = KVM_MSR_WALL_CLOCK_NEW;
168	} else {
169		KASSERT((regs[0] & KVM_FEATURE_CLOCKSOURCE) != 0,
170		    ("Clocksource feature flags disappeared since "
171		    "kvm_clock_identify: regs[0] %#0x.", regs[0]));
172		sc->msr_tc = KVM_MSR_SYSTEM_TIME;
173		sc->msr_wc = KVM_MSR_WALL_CLOCK;
174	}
175	stable_flag_supported =
176	    (regs[0] & KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) != 0;
177
178	/* Set up 'struct pvclock_vcpu_time_info' page(s): */
179	sc->timeinfos = kmem_malloc(mp_ncpus *
180	    sizeof(struct pvclock_vcpu_time_info), M_WAITOK | M_ZERO);
181#ifdef EARLY_AP_STARTUP
182	kvm_clock_system_time_enable(sc, &all_cpus);
183#else
184	sc->firstcpu = curcpu;
185	kvm_clock_system_time_enable_pcpu(sc);
186#endif
187
188	/*
189	 * Init pvclock; register KVM clock wall clock, register KVM clock
190	 * timecounter, and set up the requisite infrastructure for vDSO access
191	 * to this timecounter.
192	 *     Regarding 'tc_flags': Since the KVM MSR documentation does not
193	 *     specifically discuss suspend/resume scenarios, conservatively
194	 *     leave 'TC_FLAGS_SUSPEND_SAFE' cleared and assume that the system
195	 *     time must be re-inited in such cases.
196	 */
197	sc->pvc.get_wallclock = kvm_clock_get_wallclock;
198	sc->pvc.get_wallclock_arg = sc;
199	sc->pvc.timeinfos = sc->timeinfos;
200	sc->pvc.stable_flag_supported = stable_flag_supported;
201	pvclock_init(&sc->pvc, dev, KVM_CLOCK_DEVNAME, KVM_CLOCK_TC_QUALITY, 0);
202	kvm_clock_setup_sysctl(dev);
203	return (0);
204}
205
206static int
207kvm_clock_detach(device_t dev)
208{
209	struct kvm_clock_softc *sc = device_get_softc(dev);
210
211	return (pvclock_destroy(&sc->pvc));
212}
213
214static int
215kvm_clock_suspend(device_t dev)
216{
217	return (0);
218}
219
220static int
221kvm_clock_resume(device_t dev)
222{
223	/*
224	 * See note in 'kvm_clock_attach()' regarding 'TC_FLAGS_SUSPEND_SAFE';
225	 * conservatively assume that the system time must be re-inited in
226	 * suspend/resume scenarios.
227	 */
228	kvm_clock_system_time_enable(device_get_softc(dev), &all_cpus);
229	pvclock_resume();
230	inittodr(time_second);
231	return (0);
232}
233
234static int
235kvm_clock_gettime(device_t dev, struct timespec *ts)
236{
237	struct kvm_clock_softc *sc = device_get_softc(dev);
238
239	pvclock_gettime(&sc->pvc, ts);
240	return (0);
241}
242
243static int
244kvm_clock_settime(device_t dev, struct timespec *ts)
245{
246	/*
247	 * Even though it is not possible to set the KVM clock's wall clock, to
248	 * avoid the possibility of periodic benign error messages from
249	 * 'settime_task_func()', report success rather than, e.g., 'ENODEV'.
250	 */
251	return (0);
252}
253
254static int
255kvm_clock_tsc_freq_sysctl(SYSCTL_HANDLER_ARGS)
256{
257	struct kvm_clock_softc *sc = oidp->oid_arg1;
258        uint64_t freq = pvclock_tsc_freq(sc->timeinfos);
259
260        return (sysctl_handle_64(oidp, &freq, 0, req));
261}
262
263static void
264kvm_clock_setup_sysctl(device_t dev)
265{
266	struct kvm_clock_softc *sc = device_get_softc(dev);
267        struct sysctl_ctx_list *ctx = device_get_sysctl_ctx(dev);
268        struct sysctl_oid *tree = device_get_sysctl_tree(dev);
269        struct sysctl_oid_list *child = SYSCTL_CHILDREN(tree);
270
271        SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tsc_freq",
272            CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
273            kvm_clock_tsc_freq_sysctl, "QU",
274            "Time Stamp Counter frequency");
275}
276
277static device_method_t kvm_clock_methods[] = {
278	DEVMETHOD(device_identify,	kvm_clock_identify),
279	DEVMETHOD(device_probe,		kvm_clock_probe),
280	DEVMETHOD(device_attach,	kvm_clock_attach),
281	DEVMETHOD(device_detach,	kvm_clock_detach),
282	DEVMETHOD(device_suspend,	kvm_clock_suspend),
283	DEVMETHOD(device_resume,	kvm_clock_resume),
284	/* clock interface */
285	DEVMETHOD(clock_gettime,	kvm_clock_gettime),
286	DEVMETHOD(clock_settime,	kvm_clock_settime),
287
288	DEVMETHOD_END
289};
290
291static driver_t kvm_clock_driver = {
292	KVM_CLOCK_DEVNAME,
293	kvm_clock_methods,
294	sizeof(struct kvm_clock_softc),
295};
296
297DRIVER_MODULE(kvm_clock, nexus, kvm_clock_driver, 0, 0);
298