1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Generic userspace implementations of gettimeofday() and similar.
4 */
5#include <vdso/datapage.h>
6#include <vdso/helpers.h>
7
8#ifndef vdso_calc_delta
9/*
10 * Default implementation which works for all sane clocksources. That
11 * obviously excludes x86/TSC.
12 */
13static __always_inline
14u64 vdso_calc_delta(u64 cycles, u64 last, u64 mask, u32 mult)
15{
16	return ((cycles - last) & mask) * mult;
17}
18#endif
19
20#ifndef vdso_shift_ns
21static __always_inline u64 vdso_shift_ns(u64 ns, u32 shift)
22{
23	return ns >> shift;
24}
25#endif
26
27#ifndef __arch_vdso_hres_capable
28static inline bool __arch_vdso_hres_capable(void)
29{
30	return true;
31}
32#endif
33
34#ifndef vdso_clocksource_ok
35static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
36{
37	return vd->clock_mode != VDSO_CLOCKMODE_NONE;
38}
39#endif
40
41#ifndef vdso_cycles_ok
42static inline bool vdso_cycles_ok(u64 cycles)
43{
44	return true;
45}
46#endif
47
48#ifdef CONFIG_TIME_NS
49static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
50					  struct __kernel_timespec *ts)
51{
52	const struct vdso_data *vd;
53	const struct timens_offset *offs = &vdns->offset[clk];
54	const struct vdso_timestamp *vdso_ts;
55	u64 cycles, last, ns;
56	u32 seq;
57	s64 sec;
58
59	vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE);
60	vd = __arch_get_timens_vdso_data(vd);
61	if (clk != CLOCK_MONOTONIC_RAW)
62		vd = &vd[CS_HRES_COARSE];
63	else
64		vd = &vd[CS_RAW];
65	vdso_ts = &vd->basetime[clk];
66
67	do {
68		seq = vdso_read_begin(vd);
69
70		if (unlikely(!vdso_clocksource_ok(vd)))
71			return -1;
72
73		cycles = __arch_get_hw_counter(vd->clock_mode, vd);
74		if (unlikely(!vdso_cycles_ok(cycles)))
75			return -1;
76		ns = vdso_ts->nsec;
77		last = vd->cycle_last;
78		ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
79		ns = vdso_shift_ns(ns, vd->shift);
80		sec = vdso_ts->sec;
81	} while (unlikely(vdso_read_retry(vd, seq)));
82
83	/* Add the namespace offset */
84	sec += offs->sec;
85	ns += offs->nsec;
86
87	/*
88	 * Do this outside the loop: a race inside the loop could result
89	 * in __iter_div_u64_rem() being extremely slow.
90	 */
91	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
92	ts->tv_nsec = ns;
93
94	return 0;
95}
96#else
97static __always_inline
98const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
99{
100	return NULL;
101}
102
103static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
104					  struct __kernel_timespec *ts)
105{
106	return -EINVAL;
107}
108#endif
109
110static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
111				   struct __kernel_timespec *ts)
112{
113	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
114	u64 cycles, last, sec, ns;
115	u32 seq;
116
117	/* Allows to compile the high resolution parts out */
118	if (!__arch_vdso_hres_capable())
119		return -1;
120
121	do {
122		/*
123		 * Open coded to handle VDSO_CLOCKMODE_TIMENS. Time namespace
124		 * enabled tasks have a special VVAR page installed which
125		 * has vd->seq set to 1 and vd->clock_mode set to
126		 * VDSO_CLOCKMODE_TIMENS. For non time namespace affected tasks
127		 * this does not affect performance because if vd->seq is
128		 * odd, i.e. a concurrent update is in progress the extra
129		 * check for vd->clock_mode is just a few extra
130		 * instructions while spin waiting for vd->seq to become
131		 * even again.
132		 */
133		while (unlikely((seq = READ_ONCE(vd->seq)) & 1)) {
134			if (IS_ENABLED(CONFIG_TIME_NS) &&
135			    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
136				return do_hres_timens(vd, clk, ts);
137			cpu_relax();
138		}
139		smp_rmb();
140
141		if (unlikely(!vdso_clocksource_ok(vd)))
142			return -1;
143
144		cycles = __arch_get_hw_counter(vd->clock_mode, vd);
145		if (unlikely(!vdso_cycles_ok(cycles)))
146			return -1;
147		ns = vdso_ts->nsec;
148		last = vd->cycle_last;
149		ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
150		ns = vdso_shift_ns(ns, vd->shift);
151		sec = vdso_ts->sec;
152	} while (unlikely(vdso_read_retry(vd, seq)));
153
154	/*
155	 * Do this outside the loop: a race inside the loop could result
156	 * in __iter_div_u64_rem() being extremely slow.
157	 */
158	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
159	ts->tv_nsec = ns;
160
161	return 0;
162}
163
164#ifdef CONFIG_TIME_NS
165static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
166					    struct __kernel_timespec *ts)
167{
168	const struct vdso_data *vd = __arch_get_timens_vdso_data(vdns);
169	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
170	const struct timens_offset *offs = &vdns->offset[clk];
171	u64 nsec;
172	s64 sec;
173	s32 seq;
174
175	do {
176		seq = vdso_read_begin(vd);
177		sec = vdso_ts->sec;
178		nsec = vdso_ts->nsec;
179	} while (unlikely(vdso_read_retry(vd, seq)));
180
181	/* Add the namespace offset */
182	sec += offs->sec;
183	nsec += offs->nsec;
184
185	/*
186	 * Do this outside the loop: a race inside the loop could result
187	 * in __iter_div_u64_rem() being extremely slow.
188	 */
189	ts->tv_sec = sec + __iter_div_u64_rem(nsec, NSEC_PER_SEC, &nsec);
190	ts->tv_nsec = nsec;
191	return 0;
192}
193#else
194static __always_inline int do_coarse_timens(const struct vdso_data *vdns, clockid_t clk,
195					    struct __kernel_timespec *ts)
196{
197	return -1;
198}
199#endif
200
201static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
202				     struct __kernel_timespec *ts)
203{
204	const struct vdso_timestamp *vdso_ts = &vd->basetime[clk];
205	u32 seq;
206
207	do {
208		/*
209		 * Open coded to handle VDSO_CLOCK_TIMENS. See comment in
210		 * do_hres().
211		 */
212		while ((seq = READ_ONCE(vd->seq)) & 1) {
213			if (IS_ENABLED(CONFIG_TIME_NS) &&
214			    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
215				return do_coarse_timens(vd, clk, ts);
216			cpu_relax();
217		}
218		smp_rmb();
219
220		ts->tv_sec = vdso_ts->sec;
221		ts->tv_nsec = vdso_ts->nsec;
222	} while (unlikely(vdso_read_retry(vd, seq)));
223
224	return 0;
225}
226
227static __always_inline int
228__cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock,
229			     struct __kernel_timespec *ts)
230{
231	u32 msk;
232
233	/* Check for negative values or invalid clocks */
234	if (unlikely((u32) clock >= MAX_CLOCKS))
235		return -1;
236
237	/*
238	 * Convert the clockid to a bitmask and use it to check which
239	 * clocks are handled in the VDSO directly.
240	 */
241	msk = 1U << clock;
242	if (likely(msk & VDSO_HRES))
243		vd = &vd[CS_HRES_COARSE];
244	else if (msk & VDSO_COARSE)
245		return do_coarse(&vd[CS_HRES_COARSE], clock, ts);
246	else if (msk & VDSO_RAW)
247		vd = &vd[CS_RAW];
248	else
249		return -1;
250
251	return do_hres(vd, clock, ts);
252}
253
254static __maybe_unused int
255__cvdso_clock_gettime_data(const struct vdso_data *vd, clockid_t clock,
256			   struct __kernel_timespec *ts)
257{
258	int ret = __cvdso_clock_gettime_common(vd, clock, ts);
259
260	if (unlikely(ret))
261		return clock_gettime_fallback(clock, ts);
262	return 0;
263}
264
265static __maybe_unused int
266__cvdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts)
267{
268	return __cvdso_clock_gettime_data(__arch_get_vdso_data(), clock, ts);
269}
270
271#ifdef BUILD_VDSO32
272static __maybe_unused int
273__cvdso_clock_gettime32_data(const struct vdso_data *vd, clockid_t clock,
274			     struct old_timespec32 *res)
275{
276	struct __kernel_timespec ts;
277	int ret;
278
279	ret = __cvdso_clock_gettime_common(vd, clock, &ts);
280
281	if (unlikely(ret))
282		return clock_gettime32_fallback(clock, res);
283
284	/* For ret == 0 */
285	res->tv_sec = ts.tv_sec;
286	res->tv_nsec = ts.tv_nsec;
287
288	return ret;
289}
290
291static __maybe_unused int
292__cvdso_clock_gettime32(clockid_t clock, struct old_timespec32 *res)
293{
294	return __cvdso_clock_gettime32_data(__arch_get_vdso_data(), clock, res);
295}
296#endif /* BUILD_VDSO32 */
297
298static __maybe_unused int
299__cvdso_gettimeofday_data(const struct vdso_data *vd,
300			  struct __kernel_old_timeval *tv, struct timezone *tz)
301{
302
303	if (likely(tv != NULL)) {
304		struct __kernel_timespec ts;
305
306		if (do_hres(&vd[CS_HRES_COARSE], CLOCK_REALTIME, &ts))
307			return gettimeofday_fallback(tv, tz);
308
309		tv->tv_sec = ts.tv_sec;
310		tv->tv_usec = (u32)ts.tv_nsec / NSEC_PER_USEC;
311	}
312
313	if (unlikely(tz != NULL)) {
314		if (IS_ENABLED(CONFIG_TIME_NS) &&
315		    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
316			vd = __arch_get_timens_vdso_data(vd);
317
318		tz->tz_minuteswest = vd[CS_HRES_COARSE].tz_minuteswest;
319		tz->tz_dsttime = vd[CS_HRES_COARSE].tz_dsttime;
320	}
321
322	return 0;
323}
324
325static __maybe_unused int
326__cvdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz)
327{
328	return __cvdso_gettimeofday_data(__arch_get_vdso_data(), tv, tz);
329}
330
331#ifdef VDSO_HAS_TIME
332static __maybe_unused __kernel_old_time_t
333__cvdso_time_data(const struct vdso_data *vd, __kernel_old_time_t *time)
334{
335	__kernel_old_time_t t;
336
337	if (IS_ENABLED(CONFIG_TIME_NS) &&
338	    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
339		vd = __arch_get_timens_vdso_data(vd);
340
341	t = READ_ONCE(vd[CS_HRES_COARSE].basetime[CLOCK_REALTIME].sec);
342
343	if (time)
344		*time = t;
345
346	return t;
347}
348
349static __maybe_unused __kernel_old_time_t __cvdso_time(__kernel_old_time_t *time)
350{
351	return __cvdso_time_data(__arch_get_vdso_data(), time);
352}
353#endif /* VDSO_HAS_TIME */
354
355#ifdef VDSO_HAS_CLOCK_GETRES
356static __maybe_unused
357int __cvdso_clock_getres_common(const struct vdso_data *vd, clockid_t clock,
358				struct __kernel_timespec *res)
359{
360	u32 msk;
361	u64 ns;
362
363	/* Check for negative values or invalid clocks */
364	if (unlikely((u32) clock >= MAX_CLOCKS))
365		return -1;
366
367	if (IS_ENABLED(CONFIG_TIME_NS) &&
368	    vd->clock_mode == VDSO_CLOCKMODE_TIMENS)
369		vd = __arch_get_timens_vdso_data(vd);
370
371	/*
372	 * Convert the clockid to a bitmask and use it to check which
373	 * clocks are handled in the VDSO directly.
374	 */
375	msk = 1U << clock;
376	if (msk & (VDSO_HRES | VDSO_RAW)) {
377		/*
378		 * Preserves the behaviour of posix_get_hrtimer_res().
379		 */
380		ns = READ_ONCE(vd[CS_HRES_COARSE].hrtimer_res);
381	} else if (msk & VDSO_COARSE) {
382		/*
383		 * Preserves the behaviour of posix_get_coarse_res().
384		 */
385		ns = LOW_RES_NSEC;
386	} else {
387		return -1;
388	}
389
390	if (likely(res)) {
391		res->tv_sec = 0;
392		res->tv_nsec = ns;
393	}
394	return 0;
395}
396
397static __maybe_unused
398int __cvdso_clock_getres_data(const struct vdso_data *vd, clockid_t clock,
399			      struct __kernel_timespec *res)
400{
401	int ret = __cvdso_clock_getres_common(vd, clock, res);
402
403	if (unlikely(ret))
404		return clock_getres_fallback(clock, res);
405	return 0;
406}
407
408static __maybe_unused
409int __cvdso_clock_getres(clockid_t clock, struct __kernel_timespec *res)
410{
411	return __cvdso_clock_getres_data(__arch_get_vdso_data(), clock, res);
412}
413
414#ifdef BUILD_VDSO32
415static __maybe_unused int
416__cvdso_clock_getres_time32_data(const struct vdso_data *vd, clockid_t clock,
417				 struct old_timespec32 *res)
418{
419	struct __kernel_timespec ts;
420	int ret;
421
422	ret = __cvdso_clock_getres_common(vd, clock, &ts);
423
424	if (unlikely(ret))
425		return clock_getres32_fallback(clock, res);
426
427	if (likely(res)) {
428		res->tv_sec = ts.tv_sec;
429		res->tv_nsec = ts.tv_nsec;
430	}
431	return ret;
432}
433
434static __maybe_unused int
435__cvdso_clock_getres_time32(clockid_t clock, struct old_timespec32 *res)
436{
437	return __cvdso_clock_getres_time32_data(__arch_get_vdso_data(),
438						clock, res);
439}
440#endif /* BUILD_VDSO32 */
441#endif /* VDSO_HAS_CLOCK_GETRES */
442