pvclock.c revision 1.9
1/*	$OpenBSD: pvclock.c,v 1.9 2023/02/04 19:19:37 cheloha Exp $	*/
2
3/*
4 * Copyright (c) 2018 Reyk Floeter <reyk@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#if !defined(__i386__) && !defined(__amd64__)
20#error pvclock(4) is only supported on i386 and amd64
21#endif
22
23#include <sys/param.h>
24#include <sys/systm.h>
25#include <sys/kernel.h>
26#include <sys/timetc.h>
27#include <sys/timeout.h>
28#include <sys/malloc.h>
29#include <sys/atomic.h>
30
31#include <machine/cpu.h>
32#include <machine/atomic.h>
33#include <uvm/uvm_extern.h>
34
35#include <dev/pv/pvvar.h>
36#include <dev/pv/pvreg.h>
37
38uint pvclock_lastcount;
39
40struct pvclock_softc {
41	struct device		 sc_dev;
42	void			*sc_time;
43	paddr_t			 sc_paddr;
44	struct timecounter	*sc_tc;
45};
46
47#define DEVNAME(_s)			((_s)->sc_dev.dv_xname)
48
49int	 pvclock_match(struct device *, void *, void *);
50void	 pvclock_attach(struct device *, struct device *, void *);
51int	 pvclock_activate(struct device *, int);
52
53uint	 pvclock_get_timecount(struct timecounter *);
54void	 pvclock_read_time_info(struct pvclock_softc *,
55	    struct pvclock_time_info *);
56
57static inline uint32_t
58	 pvclock_read_begin(const struct pvclock_time_info *);
59static inline int
60	 pvclock_read_done(const struct pvclock_time_info *, uint32_t);
61
62const struct cfattach pvclock_ca = {
63	sizeof(struct pvclock_softc),
64	pvclock_match,
65	pvclock_attach,
66	NULL,
67	pvclock_activate
68};
69
70struct cfdriver pvclock_cd = {
71	NULL,
72	"pvclock",
73	DV_DULL
74};
75
76struct timecounter pvclock_timecounter = {
77	.tc_get_timecount = pvclock_get_timecount,
78	.tc_counter_mask = ~0u,
79	.tc_frequency = 0,
80	.tc_name = NULL,
81	.tc_quality = -2000,
82	.tc_priv = NULL,
83	.tc_user = 0,
84};
85
86int
87pvclock_match(struct device *parent, void *match, void *aux)
88{
89	struct pv_attach_args	*pva = aux;
90	struct pvbus_hv		*hv;
91
92	/*
93	 * pvclock is provided by different hypervisors, we currently
94	 * only support the "kvmclock".
95	 */
96	hv = &pva->pva_hv[PVBUS_KVM];
97	if (hv->hv_base == 0)
98		hv = &pva->pva_hv[PVBUS_OPENBSD];
99	if (hv->hv_base != 0) {
100		/*
101		 * We only implement support for the 2nd version of pvclock.
102		 * The first version is basically the same but with different
103		 * non-standard MSRs and it is deprecated.
104		 */
105		if ((hv->hv_features & (1 << KVM_FEATURE_CLOCKSOURCE2)) == 0)
106			return (0);
107
108		/*
109		 * Only the "stable" clock with a sync'ed TSC is supported.
110		 * In this case the host guarantees that the TSC is constant
111		 * and invariant, either by the underlying TSC or by passing
112		 * on a synchronized value.
113		 */
114		if ((hv->hv_features &
115		    (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT)) == 0)
116			return (0);
117
118		return (1);
119	}
120
121	return (0);
122}
123
124void
125pvclock_attach(struct device *parent, struct device *self, void *aux)
126{
127	struct pvclock_softc		*sc = (struct pvclock_softc *)self;
128	struct pvclock_time_info	*ti;
129	paddr_t			 	 pa;
130	uint32_t			 version;
131	uint8_t				 flags;
132
133	if ((sc->sc_time = km_alloc(PAGE_SIZE,
134	    &kv_any, &kp_zero, &kd_nowait)) == NULL) {
135		printf(": time page allocation failed\n");
136		return;
137	}
138	if (!pmap_extract(pmap_kernel(), (vaddr_t)sc->sc_time, &pa)) {
139		printf(": time page PA extraction failed\n");
140		km_free(sc->sc_time, PAGE_SIZE, &kv_any, &kp_zero);
141		return;
142	}
143
144	wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
145	sc->sc_paddr = pa;
146
147	ti = sc->sc_time;
148	do {
149		version = pvclock_read_begin(ti);
150		flags = ti->ti_flags;
151	} while (!pvclock_read_done(ti, version));
152
153	sc->sc_tc = &pvclock_timecounter;
154	sc->sc_tc->tc_name = DEVNAME(sc);
155	sc->sc_tc->tc_frequency = 1000000000ULL;
156	sc->sc_tc->tc_priv = sc;
157
158	pvclock_lastcount = 0;
159
160	/* Better than HPET but below TSC */
161	sc->sc_tc->tc_quality = 1500;
162
163	if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
164		/* if tsc is not stable, set a lower priority */
165		/* Better than i8254 but below HPET */
166		sc->sc_tc->tc_quality = 500;
167	}
168
169	tc_init(sc->sc_tc);
170
171	printf("\n");
172}
173
174int
175pvclock_activate(struct device *self, int act)
176{
177	struct pvclock_softc	*sc = (struct pvclock_softc *)self;
178	int			 rv = 0;
179	paddr_t			 pa = sc->sc_paddr;
180
181	switch (act) {
182	case DVACT_POWERDOWN:
183		wrmsr(KVM_MSR_SYSTEM_TIME, pa & ~PVCLOCK_SYSTEM_TIME_ENABLE);
184		break;
185	case DVACT_RESUME:
186		wrmsr(KVM_MSR_SYSTEM_TIME, pa | PVCLOCK_SYSTEM_TIME_ENABLE);
187		break;
188	}
189
190	return (rv);
191}
192
193static inline uint32_t
194pvclock_read_begin(const struct pvclock_time_info *ti)
195{
196	uint32_t version = ti->ti_version & ~0x1;
197	virtio_membar_sync();
198	return (version);
199}
200
201static inline int
202pvclock_read_done(const struct pvclock_time_info *ti,
203    uint32_t version)
204{
205	virtio_membar_sync();
206	return (ti->ti_version == version);
207}
208
209uint
210pvclock_get_timecount(struct timecounter *tc)
211{
212	struct pvclock_softc		*sc = tc->tc_priv;
213	struct pvclock_time_info	*ti;
214	uint64_t			 tsc_timestamp, system_time, delta, ctr;
215	uint32_t			 version, mul_frac;
216	int8_t				 shift;
217	uint8_t				 flags;
218
219	ti = sc->sc_time;
220	do {
221		version = pvclock_read_begin(ti);
222		system_time = ti->ti_system_time;
223		tsc_timestamp = ti->ti_tsc_timestamp;
224		mul_frac = ti->ti_tsc_to_system_mul;
225		shift = ti->ti_tsc_shift;
226		flags = ti->ti_flags;
227	} while (!pvclock_read_done(ti, version));
228
229	/*
230	 * The algorithm is described in
231	 * linux/Documentation/virtual/kvm/msr.txt
232	 */
233	delta = rdtsc() - tsc_timestamp;
234	if (shift < 0)
235		delta >>= -shift;
236	else
237		delta <<= shift;
238	ctr = ((delta * mul_frac) >> 32) + system_time;
239
240	if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
241		return (ctr);
242
243	if (ctr < pvclock_lastcount)
244		return (pvclock_lastcount);
245
246	atomic_swap_uint(&pvclock_lastcount, ctr);
247
248	return (ctr);
249}
250