1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * vMTRR implementation
4 *
5 * Copyright (C) 2006 Qumranet, Inc.
6 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
7 * Copyright(C) 2015 Intel Corporation.
8 *
9 * Authors:
10 *   Yaniv Kamay  <yaniv@qumranet.com>
11 *   Avi Kivity   <avi@qumranet.com>
12 *   Marcelo Tosatti <mtosatti@redhat.com>
13 *   Paolo Bonzini <pbonzini@redhat.com>
14 *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
15 */
16#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
17
18#include <linux/kvm_host.h>
19#include <asm/mtrr.h>
20
21#include "cpuid.h"
22#include "mmu.h"
23
24#define IA32_MTRR_DEF_TYPE_E		(1ULL << 11)
25#define IA32_MTRR_DEF_TYPE_FE		(1ULL << 10)
26#define IA32_MTRR_DEF_TYPE_TYPE_MASK	(0xff)
27
28static bool is_mtrr_base_msr(unsigned int msr)
29{
30	/* MTRR base MSRs use even numbers, masks use odd numbers. */
31	return !(msr & 0x1);
32}
33
34static struct kvm_mtrr_range *var_mtrr_msr_to_range(struct kvm_vcpu *vcpu,
35						    unsigned int msr)
36{
37	int index = (msr - MTRRphysBase_MSR(0)) / 2;
38
39	return &vcpu->arch.mtrr_state.var_ranges[index];
40}
41
42static bool msr_mtrr_valid(unsigned msr)
43{
44	switch (msr) {
45	case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1):
46	case MSR_MTRRfix64K_00000:
47	case MSR_MTRRfix16K_80000:
48	case MSR_MTRRfix16K_A0000:
49	case MSR_MTRRfix4K_C0000:
50	case MSR_MTRRfix4K_C8000:
51	case MSR_MTRRfix4K_D0000:
52	case MSR_MTRRfix4K_D8000:
53	case MSR_MTRRfix4K_E0000:
54	case MSR_MTRRfix4K_E8000:
55	case MSR_MTRRfix4K_F0000:
56	case MSR_MTRRfix4K_F8000:
57	case MSR_MTRRdefType:
58		return true;
59	}
60	return false;
61}
62
63static bool valid_mtrr_type(unsigned t)
64{
65	return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
66}
67
68static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
69{
70	int i;
71	u64 mask;
72
73	if (!msr_mtrr_valid(msr))
74		return false;
75
76	if (msr == MSR_MTRRdefType) {
77		if (data & ~0xcff)
78			return false;
79		return valid_mtrr_type(data & 0xff);
80	} else if (msr >= MSR_MTRRfix64K_00000 && msr <= MSR_MTRRfix4K_F8000) {
81		for (i = 0; i < 8 ; i++)
82			if (!valid_mtrr_type((data >> (i * 8)) & 0xff))
83				return false;
84		return true;
85	}
86
87	/* variable MTRRs */
88	WARN_ON(!(msr >= MTRRphysBase_MSR(0) &&
89		  msr <= MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1)));
90
91	mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
92	if ((msr & 1) == 0) {
93		/* MTRR base */
94		if (!valid_mtrr_type(data & 0xff))
95			return false;
96		mask |= 0xf00;
97	} else
98		/* MTRR mask */
99		mask |= 0x7ff;
100
101	return (data & mask) == 0;
102}
103
104static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state)
105{
106	return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_E);
107}
108
109static bool fixed_mtrr_is_enabled(struct kvm_mtrr *mtrr_state)
110{
111	return !!(mtrr_state->deftype & IA32_MTRR_DEF_TYPE_FE);
112}
113
114static u8 mtrr_default_type(struct kvm_mtrr *mtrr_state)
115{
116	return mtrr_state->deftype & IA32_MTRR_DEF_TYPE_TYPE_MASK;
117}
118
119static u8 mtrr_disabled_type(struct kvm_vcpu *vcpu)
120{
121	/*
122	 * Intel SDM 11.11.2.2: all MTRRs are disabled when
123	 * IA32_MTRR_DEF_TYPE.E bit is cleared, and the UC
124	 * memory type is applied to all of physical memory.
125	 *
126	 * However, virtual machines can be run with CPUID such that
127	 * there are no MTRRs.  In that case, the firmware will never
128	 * enable MTRRs and it is obviously undesirable to run the
129	 * guest entirely with UC memory and we use WB.
130	 */
131	if (guest_cpuid_has(vcpu, X86_FEATURE_MTRR))
132		return MTRR_TYPE_UNCACHABLE;
133	else
134		return MTRR_TYPE_WRBACK;
135}
136
137/*
138* Three terms are used in the following code:
139* - segment, it indicates the address segments covered by fixed MTRRs.
140* - unit, it corresponds to the MSR entry in the segment.
141* - range, a range is covered in one memory cache type.
142*/
143struct fixed_mtrr_segment {
144	u64 start;
145	u64 end;
146
147	int range_shift;
148
149	/* the start position in kvm_mtrr.fixed_ranges[]. */
150	int range_start;
151};
152
153static struct fixed_mtrr_segment fixed_seg_table[] = {
154	/* MSR_MTRRfix64K_00000, 1 unit. 64K fixed mtrr. */
155	{
156		.start = 0x0,
157		.end = 0x80000,
158		.range_shift = 16, /* 64K */
159		.range_start = 0,
160	},
161
162	/*
163	 * MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000, 2 units,
164	 * 16K fixed mtrr.
165	 */
166	{
167		.start = 0x80000,
168		.end = 0xc0000,
169		.range_shift = 14, /* 16K */
170		.range_start = 8,
171	},
172
173	/*
174	 * MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000, 8 units,
175	 * 4K fixed mtrr.
176	 */
177	{
178		.start = 0xc0000,
179		.end = 0x100000,
180		.range_shift = 12, /* 12K */
181		.range_start = 24,
182	}
183};
184
185/*
186 * The size of unit is covered in one MSR, one MSR entry contains
187 * 8 ranges so that unit size is always 8 * 2^range_shift.
188 */
189static u64 fixed_mtrr_seg_unit_size(int seg)
190{
191	return 8 << fixed_seg_table[seg].range_shift;
192}
193
194static bool fixed_msr_to_seg_unit(u32 msr, int *seg, int *unit)
195{
196	switch (msr) {
197	case MSR_MTRRfix64K_00000:
198		*seg = 0;
199		*unit = 0;
200		break;
201	case MSR_MTRRfix16K_80000 ... MSR_MTRRfix16K_A0000:
202		*seg = 1;
203		*unit = array_index_nospec(
204			msr - MSR_MTRRfix16K_80000,
205			MSR_MTRRfix16K_A0000 - MSR_MTRRfix16K_80000 + 1);
206		break;
207	case MSR_MTRRfix4K_C0000 ... MSR_MTRRfix4K_F8000:
208		*seg = 2;
209		*unit = array_index_nospec(
210			msr - MSR_MTRRfix4K_C0000,
211			MSR_MTRRfix4K_F8000 - MSR_MTRRfix4K_C0000 + 1);
212		break;
213	default:
214		return false;
215	}
216
217	return true;
218}
219
220static void fixed_mtrr_seg_unit_range(int seg, int unit, u64 *start, u64 *end)
221{
222	struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
223	u64 unit_size = fixed_mtrr_seg_unit_size(seg);
224
225	*start = mtrr_seg->start + unit * unit_size;
226	*end = *start + unit_size;
227	WARN_ON(*end > mtrr_seg->end);
228}
229
230static int fixed_mtrr_seg_unit_range_index(int seg, int unit)
231{
232	struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
233
234	WARN_ON(mtrr_seg->start + unit * fixed_mtrr_seg_unit_size(seg)
235		> mtrr_seg->end);
236
237	/* each unit has 8 ranges. */
238	return mtrr_seg->range_start + 8 * unit;
239}
240
241static int fixed_mtrr_seg_end_range_index(int seg)
242{
243	struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
244	int n;
245
246	n = (mtrr_seg->end - mtrr_seg->start) >> mtrr_seg->range_shift;
247	return mtrr_seg->range_start + n - 1;
248}
249
250static bool fixed_msr_to_range(u32 msr, u64 *start, u64 *end)
251{
252	int seg, unit;
253
254	if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
255		return false;
256
257	fixed_mtrr_seg_unit_range(seg, unit, start, end);
258	return true;
259}
260
261static int fixed_msr_to_range_index(u32 msr)
262{
263	int seg, unit;
264
265	if (!fixed_msr_to_seg_unit(msr, &seg, &unit))
266		return -1;
267
268	return fixed_mtrr_seg_unit_range_index(seg, unit);
269}
270
271static int fixed_mtrr_addr_to_seg(u64 addr)
272{
273	struct fixed_mtrr_segment *mtrr_seg;
274	int seg, seg_num = ARRAY_SIZE(fixed_seg_table);
275
276	for (seg = 0; seg < seg_num; seg++) {
277		mtrr_seg = &fixed_seg_table[seg];
278		if (mtrr_seg->start <= addr && addr < mtrr_seg->end)
279			return seg;
280	}
281
282	return -1;
283}
284
285static int fixed_mtrr_addr_seg_to_range_index(u64 addr, int seg)
286{
287	struct fixed_mtrr_segment *mtrr_seg;
288	int index;
289
290	mtrr_seg = &fixed_seg_table[seg];
291	index = mtrr_seg->range_start;
292	index += (addr - mtrr_seg->start) >> mtrr_seg->range_shift;
293	return index;
294}
295
296static u64 fixed_mtrr_range_end_addr(int seg, int index)
297{
298	struct fixed_mtrr_segment *mtrr_seg = &fixed_seg_table[seg];
299	int pos = index - mtrr_seg->range_start;
300
301	return mtrr_seg->start + ((pos + 1) << mtrr_seg->range_shift);
302}
303
304static void var_mtrr_range(struct kvm_mtrr_range *range, u64 *start, u64 *end)
305{
306	u64 mask;
307
308	*start = range->base & PAGE_MASK;
309
310	mask = range->mask & PAGE_MASK;
311
312	/* This cannot overflow because writing to the reserved bits of
313	 * variable MTRRs causes a #GP.
314	 */
315	*end = (*start | ~mask) + 1;
316}
317
318static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
319{
320	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
321	gfn_t start, end;
322
323	if (!kvm_mmu_honors_guest_mtrrs(vcpu->kvm))
324		return;
325
326	if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType)
327		return;
328
329	/* fixed MTRRs. */
330	if (fixed_msr_to_range(msr, &start, &end)) {
331		if (!fixed_mtrr_is_enabled(mtrr_state))
332			return;
333	} else if (msr == MSR_MTRRdefType) {
334		start = 0x0;
335		end = ~0ULL;
336	} else {
337		/* variable range MTRRs. */
338		var_mtrr_range(var_mtrr_msr_to_range(vcpu, msr), &start, &end);
339	}
340
341	kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
342}
343
344static bool var_mtrr_range_is_valid(struct kvm_mtrr_range *range)
345{
346	return (range->mask & (1 << 11)) != 0;
347}
348
349static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
350{
351	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
352	struct kvm_mtrr_range *tmp, *cur;
353
354	cur = var_mtrr_msr_to_range(vcpu, msr);
355
356	/* remove the entry if it's in the list. */
357	if (var_mtrr_range_is_valid(cur))
358		list_del(&cur->node);
359
360	/*
361	 * Set all illegal GPA bits in the mask, since those bits must
362	 * implicitly be 0.  The bits are then cleared when reading them.
363	 */
364	if (is_mtrr_base_msr(msr))
365		cur->base = data;
366	else
367		cur->mask = data | kvm_vcpu_reserved_gpa_bits_raw(vcpu);
368
369	/* add it to the list if it's enabled. */
370	if (var_mtrr_range_is_valid(cur)) {
371		list_for_each_entry(tmp, &mtrr_state->head, node)
372			if (cur->base >= tmp->base)
373				break;
374		list_add_tail(&cur->node, &tmp->node);
375	}
376}
377
378int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
379{
380	int index;
381
382	if (!kvm_mtrr_valid(vcpu, msr, data))
383		return 1;
384
385	index = fixed_msr_to_range_index(msr);
386	if (index >= 0)
387		*(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data;
388	else if (msr == MSR_MTRRdefType)
389		vcpu->arch.mtrr_state.deftype = data;
390	else
391		set_var_mtrr_msr(vcpu, msr, data);
392
393	update_mtrr(vcpu, msr);
394	return 0;
395}
396
397int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
398{
399	int index;
400
401	/* MSR_MTRRcap is a readonly MSR. */
402	if (msr == MSR_MTRRcap) {
403		/*
404		 * SMRR = 0
405		 * WC = 1
406		 * FIX = 1
407		 * VCNT = KVM_NR_VAR_MTRR
408		 */
409		*pdata = 0x500 | KVM_NR_VAR_MTRR;
410		return 0;
411	}
412
413	if (!msr_mtrr_valid(msr))
414		return 1;
415
416	index = fixed_msr_to_range_index(msr);
417	if (index >= 0) {
418		*pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index];
419	} else if (msr == MSR_MTRRdefType) {
420		*pdata = vcpu->arch.mtrr_state.deftype;
421	} else {
422		/* Variable MTRRs */
423		if (is_mtrr_base_msr(msr))
424			*pdata = var_mtrr_msr_to_range(vcpu, msr)->base;
425		else
426			*pdata = var_mtrr_msr_to_range(vcpu, msr)->mask;
427
428		*pdata &= ~kvm_vcpu_reserved_gpa_bits_raw(vcpu);
429	}
430
431	return 0;
432}
433
434void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu)
435{
436	INIT_LIST_HEAD(&vcpu->arch.mtrr_state.head);
437}
438
439struct mtrr_iter {
440	/* input fields. */
441	struct kvm_mtrr *mtrr_state;
442	u64 start;
443	u64 end;
444
445	/* output fields. */
446	int mem_type;
447	/* mtrr is completely disabled? */
448	bool mtrr_disabled;
449	/* [start, end) is not fully covered in MTRRs? */
450	bool partial_map;
451
452	/* private fields. */
453	union {
454		/* used for fixed MTRRs. */
455		struct {
456			int index;
457			int seg;
458		};
459
460		/* used for var MTRRs. */
461		struct {
462			struct kvm_mtrr_range *range;
463			/* max address has been covered in var MTRRs. */
464			u64 start_max;
465		};
466	};
467
468	bool fixed;
469};
470
471static bool mtrr_lookup_fixed_start(struct mtrr_iter *iter)
472{
473	int seg, index;
474
475	if (!fixed_mtrr_is_enabled(iter->mtrr_state))
476		return false;
477
478	seg = fixed_mtrr_addr_to_seg(iter->start);
479	if (seg < 0)
480		return false;
481
482	iter->fixed = true;
483	index = fixed_mtrr_addr_seg_to_range_index(iter->start, seg);
484	iter->index = index;
485	iter->seg = seg;
486	return true;
487}
488
489static bool match_var_range(struct mtrr_iter *iter,
490			    struct kvm_mtrr_range *range)
491{
492	u64 start, end;
493
494	var_mtrr_range(range, &start, &end);
495	if (!(start >= iter->end || end <= iter->start)) {
496		iter->range = range;
497
498		/*
499		 * the function is called when we do kvm_mtrr.head walking.
500		 * Range has the minimum base address which interleaves
501		 * [looker->start_max, looker->end).
502		 */
503		iter->partial_map |= iter->start_max < start;
504
505		/* update the max address has been covered. */
506		iter->start_max = max(iter->start_max, end);
507		return true;
508	}
509
510	return false;
511}
512
513static void __mtrr_lookup_var_next(struct mtrr_iter *iter)
514{
515	struct kvm_mtrr *mtrr_state = iter->mtrr_state;
516
517	list_for_each_entry_continue(iter->range, &mtrr_state->head, node)
518		if (match_var_range(iter, iter->range))
519			return;
520
521	iter->range = NULL;
522	iter->partial_map |= iter->start_max < iter->end;
523}
524
525static void mtrr_lookup_var_start(struct mtrr_iter *iter)
526{
527	struct kvm_mtrr *mtrr_state = iter->mtrr_state;
528
529	iter->fixed = false;
530	iter->start_max = iter->start;
531	iter->range = NULL;
532	iter->range = list_prepare_entry(iter->range, &mtrr_state->head, node);
533
534	__mtrr_lookup_var_next(iter);
535}
536
537static void mtrr_lookup_fixed_next(struct mtrr_iter *iter)
538{
539	/* terminate the lookup. */
540	if (fixed_mtrr_range_end_addr(iter->seg, iter->index) >= iter->end) {
541		iter->fixed = false;
542		iter->range = NULL;
543		return;
544	}
545
546	iter->index++;
547
548	/* have looked up for all fixed MTRRs. */
549	if (iter->index >= ARRAY_SIZE(iter->mtrr_state->fixed_ranges))
550		return mtrr_lookup_var_start(iter);
551
552	/* switch to next segment. */
553	if (iter->index > fixed_mtrr_seg_end_range_index(iter->seg))
554		iter->seg++;
555}
556
557static void mtrr_lookup_var_next(struct mtrr_iter *iter)
558{
559	__mtrr_lookup_var_next(iter);
560}
561
562static void mtrr_lookup_start(struct mtrr_iter *iter)
563{
564	if (!mtrr_is_enabled(iter->mtrr_state)) {
565		iter->mtrr_disabled = true;
566		return;
567	}
568
569	if (!mtrr_lookup_fixed_start(iter))
570		mtrr_lookup_var_start(iter);
571}
572
573static void mtrr_lookup_init(struct mtrr_iter *iter,
574			     struct kvm_mtrr *mtrr_state, u64 start, u64 end)
575{
576	iter->mtrr_state = mtrr_state;
577	iter->start = start;
578	iter->end = end;
579	iter->mtrr_disabled = false;
580	iter->partial_map = false;
581	iter->fixed = false;
582	iter->range = NULL;
583
584	mtrr_lookup_start(iter);
585}
586
587static bool mtrr_lookup_okay(struct mtrr_iter *iter)
588{
589	if (iter->fixed) {
590		iter->mem_type = iter->mtrr_state->fixed_ranges[iter->index];
591		return true;
592	}
593
594	if (iter->range) {
595		iter->mem_type = iter->range->base & 0xff;
596		return true;
597	}
598
599	return false;
600}
601
602static void mtrr_lookup_next(struct mtrr_iter *iter)
603{
604	if (iter->fixed)
605		mtrr_lookup_fixed_next(iter);
606	else
607		mtrr_lookup_var_next(iter);
608}
609
610#define mtrr_for_each_mem_type(_iter_, _mtrr_, _gpa_start_, _gpa_end_) \
611	for (mtrr_lookup_init(_iter_, _mtrr_, _gpa_start_, _gpa_end_); \
612	     mtrr_lookup_okay(_iter_); mtrr_lookup_next(_iter_))
613
614u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn)
615{
616	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
617	struct mtrr_iter iter;
618	u64 start, end;
619	int type = -1;
620	const int wt_wb_mask = (1 << MTRR_TYPE_WRBACK)
621			       | (1 << MTRR_TYPE_WRTHROUGH);
622
623	start = gfn_to_gpa(gfn);
624	end = start + PAGE_SIZE;
625
626	mtrr_for_each_mem_type(&iter, mtrr_state, start, end) {
627		int curr_type = iter.mem_type;
628
629		/*
630		 * Please refer to Intel SDM Volume 3: 11.11.4.1 MTRR
631		 * Precedences.
632		 */
633
634		if (type == -1) {
635			type = curr_type;
636			continue;
637		}
638
639		/*
640		 * If two or more variable memory ranges match and the
641		 * memory types are identical, then that memory type is
642		 * used.
643		 */
644		if (type == curr_type)
645			continue;
646
647		/*
648		 * If two or more variable memory ranges match and one of
649		 * the memory types is UC, the UC memory type used.
650		 */
651		if (curr_type == MTRR_TYPE_UNCACHABLE)
652			return MTRR_TYPE_UNCACHABLE;
653
654		/*
655		 * If two or more variable memory ranges match and the
656		 * memory types are WT and WB, the WT memory type is used.
657		 */
658		if (((1 << type) & wt_wb_mask) &&
659		      ((1 << curr_type) & wt_wb_mask)) {
660			type = MTRR_TYPE_WRTHROUGH;
661			continue;
662		}
663
664		/*
665		 * For overlaps not defined by the above rules, processor
666		 * behavior is undefined.
667		 */
668
669		/* We use WB for this undefined behavior. :( */
670		return MTRR_TYPE_WRBACK;
671	}
672
673	if (iter.mtrr_disabled)
674		return mtrr_disabled_type(vcpu);
675
676	/* not contained in any MTRRs. */
677	if (type == -1)
678		return mtrr_default_type(mtrr_state);
679
680	/*
681	 * We just check one page, partially covered by MTRRs is
682	 * impossible.
683	 */
684	WARN_ON(iter.partial_map);
685
686	return type;
687}
688EXPORT_SYMBOL_GPL(kvm_mtrr_get_guest_memory_type);
689
690bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
691					  int page_num)
692{
693	struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
694	struct mtrr_iter iter;
695	u64 start, end;
696	int type = -1;
697
698	start = gfn_to_gpa(gfn);
699	end = gfn_to_gpa(gfn + page_num);
700	mtrr_for_each_mem_type(&iter, mtrr_state, start, end) {
701		if (type == -1) {
702			type = iter.mem_type;
703			continue;
704		}
705
706		if (type != iter.mem_type)
707			return false;
708	}
709
710	if (iter.mtrr_disabled)
711		return true;
712
713	if (!iter.partial_map)
714		return true;
715
716	if (type == -1)
717		return true;
718
719	return type == mtrr_default_type(mtrr_state);
720}
721