1/*  Generic MTRR (Memory Type Range Register) driver.
2
3    Copyright (C) 1997-2000  Richard Gooch
4    Copyright (c) 2002	     Patrick Mochel
5
6    This library is free software; you can redistribute it and/or
7    modify it under the terms of the GNU Library General Public
8    License as published by the Free Software Foundation; either
9    version 2 of the License, or (at your option) any later version.
10
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Library General Public License for more details.
15
16    You should have received a copy of the GNU Library General Public
17    License along with this library; if not, write to the Free
18    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19
20    Richard Gooch may be reached by email at  rgooch@atnf.csiro.au
21    The postal address is:
22      Richard Gooch, c/o ATNF, P. O. Box 76, Epping, N.S.W., 2121, Australia.
23
24    Source: "Pentium Pro Family Developer's Manual, Volume 3:
25    Operating System Writer's Guide" (Intel document number 242692),
26    section 11.11.7
27
28    This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
29    on 6-7 March 2002.
30    Source: Intel Architecture Software Developers Manual, Volume 3:
31    System Programming Guide; Section 9.11. (1997 edition - PPro).
32*/
33
34#include <linux/types.h> /* FIXME: kvm_para.h needs this */
35
36#include <linux/stop_machine.h>
37#include <linux/kvm_para.h>
38#include <linux/uaccess.h>
39#include <linux/export.h>
40#include <linux/mutex.h>
41#include <linux/init.h>
42#include <linux/sort.h>
43#include <linux/cpu.h>
44#include <linux/pci.h>
45#include <linux/smp.h>
46#include <linux/syscore_ops.h>
47#include <linux/rcupdate.h>
48
49#include <asm/cacheinfo.h>
50#include <asm/cpufeature.h>
51#include <asm/e820/api.h>
52#include <asm/mtrr.h>
53#include <asm/msr.h>
54#include <asm/memtype.h>
55
56#include "mtrr.h"
57
58/* arch_phys_wc_add returns an MTRR register index plus this offset. */
59#define MTRR_TO_PHYS_WC_OFFSET 1000
60
61u32 num_var_ranges;
62
63unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
64DEFINE_MUTEX(mtrr_mutex);
65
66const struct mtrr_ops *mtrr_if;
67
68/*  Returns non-zero if we have the write-combining memory type  */
69static int have_wrcomb(void)
70{
71	struct pci_dev *dev;
72
73	dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
74	if (dev != NULL) {
75		/*
76		 * ServerWorks LE chipsets < rev 6 have problems with
77		 * write-combining. Don't allow it and leave room for other
78		 * chipsets to be tagged
79		 */
80		if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
81		    dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
82		    dev->revision <= 5) {
83			pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
84			pci_dev_put(dev);
85			return 0;
86		}
87		/*
88		 * Intel 450NX errata # 23. Non ascending cacheline evictions to
89		 * write combining memory may resulting in data corruption
90		 */
91		if (dev->vendor == PCI_VENDOR_ID_INTEL &&
92		    dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
93			pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
94			pci_dev_put(dev);
95			return 0;
96		}
97		pci_dev_put(dev);
98	}
99	return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
100}
101
102static void __init init_table(void)
103{
104	int i, max;
105
106	max = num_var_ranges;
107	for (i = 0; i < max; i++)
108		mtrr_usage_table[i] = 1;
109}
110
111struct set_mtrr_data {
112	unsigned long	smp_base;
113	unsigned long	smp_size;
114	unsigned int	smp_reg;
115	mtrr_type	smp_type;
116};
117
118/**
119 * mtrr_rendezvous_handler - Work done in the synchronization handler. Executed
120 * by all the CPUs.
121 * @info: pointer to mtrr configuration data
122 *
123 * Returns nothing.
124 */
125static int mtrr_rendezvous_handler(void *info)
126{
127	struct set_mtrr_data *data = info;
128
129	mtrr_if->set(data->smp_reg, data->smp_base,
130		     data->smp_size, data->smp_type);
131	return 0;
132}
133
134static inline int types_compatible(mtrr_type type1, mtrr_type type2)
135{
136	return type1 == MTRR_TYPE_UNCACHABLE ||
137	       type2 == MTRR_TYPE_UNCACHABLE ||
138	       (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
139	       (type1 == MTRR_TYPE_WRBACK && type2 == MTRR_TYPE_WRTHROUGH);
140}
141
142/**
143 * set_mtrr - update mtrrs on all processors
144 * @reg:	mtrr in question
145 * @base:	mtrr base
146 * @size:	mtrr size
147 * @type:	mtrr type
148 *
149 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
150 *
151 * 1. Queue work to do the following on all processors:
152 * 2. Disable Interrupts
153 * 3. Wait for all procs to do so
154 * 4. Enter no-fill cache mode
155 * 5. Flush caches
156 * 6. Clear PGE bit
157 * 7. Flush all TLBs
158 * 8. Disable all range registers
159 * 9. Update the MTRRs
160 * 10. Enable all range registers
161 * 11. Flush all TLBs and caches again
162 * 12. Enter normal cache mode and reenable caching
163 * 13. Set PGE
164 * 14. Wait for buddies to catch up
165 * 15. Enable interrupts.
166 *
167 * What does that mean for us? Well, stop_machine() will ensure that
168 * the rendezvous handler is started on each CPU. And in lockstep they
169 * do the state transition of disabling interrupts, updating MTRR's
170 * (the CPU vendors may each do it differently, so we call mtrr_if->set()
171 * callback and let them take care of it.) and enabling interrupts.
172 *
173 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
174 * becomes nops.
175 */
176static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size,
177		     mtrr_type type)
178{
179	struct set_mtrr_data data = { .smp_reg = reg,
180				      .smp_base = base,
181				      .smp_size = size,
182				      .smp_type = type
183				    };
184
185	stop_machine_cpuslocked(mtrr_rendezvous_handler, &data, cpu_online_mask);
186
187	generic_rebuild_map();
188}
189
190/**
191 * mtrr_add_page - Add a memory type region
192 * @base: Physical base address of region in pages (in units of 4 kB!)
193 * @size: Physical size of region in pages (4 kB)
194 * @type: Type of MTRR desired
195 * @increment: If this is true do usage counting on the region
196 *
197 * Memory type region registers control the caching on newer Intel and
198 * non Intel processors. This function allows drivers to request an
199 * MTRR is added. The details and hardware specifics of each processor's
200 * implementation are hidden from the caller, but nevertheless the
201 * caller should expect to need to provide a power of two size on an
202 * equivalent power of two boundary.
203 *
204 * If the region cannot be added either because all regions are in use
205 * or the CPU cannot support it a negative value is returned. On success
206 * the register number for this entry is returned, but should be treated
207 * as a cookie only.
208 *
209 * On a multiprocessor machine the changes are made to all processors.
210 * This is required on x86 by the Intel processors.
211 *
212 * The available types are
213 *
214 * %MTRR_TYPE_UNCACHABLE - No caching
215 *
216 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
217 *
218 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
219 *
220 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
221 *
222 * BUGS: Needs a quiet flag for the cases where drivers do not mind
223 * failures and do not wish system log messages to be sent.
224 */
225int mtrr_add_page(unsigned long base, unsigned long size,
226		  unsigned int type, bool increment)
227{
228	unsigned long lbase, lsize;
229	int i, replace, error;
230	mtrr_type ltype;
231
232	if (!mtrr_enabled())
233		return -ENXIO;
234
235	error = mtrr_if->validate_add_page(base, size, type);
236	if (error)
237		return error;
238
239	if (type >= MTRR_NUM_TYPES) {
240		pr_warn("type: %u invalid\n", type);
241		return -EINVAL;
242	}
243
244	/* If the type is WC, check that this processor supports it */
245	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
246		pr_warn("your processor doesn't support write-combining\n");
247		return -ENOSYS;
248	}
249
250	if (!size) {
251		pr_warn("zero sized request\n");
252		return -EINVAL;
253	}
254
255	if ((base | (base + size - 1)) >>
256	    (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
257		pr_warn("base or size exceeds the MTRR width\n");
258		return -EINVAL;
259	}
260
261	error = -EINVAL;
262	replace = -1;
263
264	/* No CPU hotplug when we change MTRR entries */
265	cpus_read_lock();
266
267	/* Search for existing MTRR  */
268	mutex_lock(&mtrr_mutex);
269	for (i = 0; i < num_var_ranges; ++i) {
270		mtrr_if->get(i, &lbase, &lsize, &ltype);
271		if (!lsize || base > lbase + lsize - 1 ||
272		    base + size - 1 < lbase)
273			continue;
274		/*
275		 * At this point we know there is some kind of
276		 * overlap/enclosure
277		 */
278		if (base < lbase || base + size - 1 > lbase + lsize - 1) {
279			if (base <= lbase &&
280			    base + size - 1 >= lbase + lsize - 1) {
281				/*  New region encloses an existing region  */
282				if (type == ltype) {
283					replace = replace == -1 ? i : -2;
284					continue;
285				} else if (types_compatible(type, ltype))
286					continue;
287			}
288			pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
289				lsize);
290			goto out;
291		}
292		/* New region is enclosed by an existing region */
293		if (ltype != type) {
294			if (types_compatible(type, ltype))
295				continue;
296			pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
297				base, size, mtrr_attrib_to_str(ltype),
298				mtrr_attrib_to_str(type));
299			goto out;
300		}
301		if (increment)
302			++mtrr_usage_table[i];
303		error = i;
304		goto out;
305	}
306	/* Search for an empty MTRR */
307	i = mtrr_if->get_free_region(base, size, replace);
308	if (i >= 0) {
309		set_mtrr(i, base, size, type);
310		if (likely(replace < 0)) {
311			mtrr_usage_table[i] = 1;
312		} else {
313			mtrr_usage_table[i] = mtrr_usage_table[replace];
314			if (increment)
315				mtrr_usage_table[i]++;
316			if (unlikely(replace != i)) {
317				set_mtrr(replace, 0, 0, 0);
318				mtrr_usage_table[replace] = 0;
319			}
320		}
321	} else {
322		pr_info("no more MTRRs available\n");
323	}
324	error = i;
325 out:
326	mutex_unlock(&mtrr_mutex);
327	cpus_read_unlock();
328	return error;
329}
330
331static int mtrr_check(unsigned long base, unsigned long size)
332{
333	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
334		pr_warn("size and base must be multiples of 4 kiB\n");
335		Dprintk("size: 0x%lx  base: 0x%lx\n", size, base);
336		dump_stack();
337		return -1;
338	}
339	return 0;
340}
341
342/**
343 * mtrr_add - Add a memory type region
344 * @base: Physical base address of region
345 * @size: Physical size of region
346 * @type: Type of MTRR desired
347 * @increment: If this is true do usage counting on the region
348 *
349 * Memory type region registers control the caching on newer Intel and
350 * non Intel processors. This function allows drivers to request an
351 * MTRR is added. The details and hardware specifics of each processor's
352 * implementation are hidden from the caller, but nevertheless the
353 * caller should expect to need to provide a power of two size on an
354 * equivalent power of two boundary.
355 *
356 * If the region cannot be added either because all regions are in use
357 * or the CPU cannot support it a negative value is returned. On success
358 * the register number for this entry is returned, but should be treated
359 * as a cookie only.
360 *
361 * On a multiprocessor machine the changes are made to all processors.
362 * This is required on x86 by the Intel processors.
363 *
364 * The available types are
365 *
366 * %MTRR_TYPE_UNCACHABLE - No caching
367 *
368 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
369 *
370 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
371 *
372 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
373 *
374 * BUGS: Needs a quiet flag for the cases where drivers do not mind
375 * failures and do not wish system log messages to be sent.
376 */
377int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
378	     bool increment)
379{
380	if (!mtrr_enabled())
381		return -ENODEV;
382	if (mtrr_check(base, size))
383		return -EINVAL;
384	return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
385			     increment);
386}
387
388/**
389 * mtrr_del_page - delete a memory type region
390 * @reg: Register returned by mtrr_add
391 * @base: Physical base address
392 * @size: Size of region
393 *
394 * If register is supplied then base and size are ignored. This is
395 * how drivers should call it.
396 *
397 * Releases an MTRR region. If the usage count drops to zero the
398 * register is freed and the region returns to default state.
399 * On success the register is returned, on failure a negative error
400 * code.
401 */
402int mtrr_del_page(int reg, unsigned long base, unsigned long size)
403{
404	int i, max;
405	mtrr_type ltype;
406	unsigned long lbase, lsize;
407	int error = -EINVAL;
408
409	if (!mtrr_enabled())
410		return -ENODEV;
411
412	max = num_var_ranges;
413	/* No CPU hotplug when we change MTRR entries */
414	cpus_read_lock();
415	mutex_lock(&mtrr_mutex);
416	if (reg < 0) {
417		/*  Search for existing MTRR  */
418		for (i = 0; i < max; ++i) {
419			mtrr_if->get(i, &lbase, &lsize, &ltype);
420			if (lbase == base && lsize == size) {
421				reg = i;
422				break;
423			}
424		}
425		if (reg < 0) {
426			Dprintk("no MTRR for %lx000,%lx000 found\n", base, size);
427			goto out;
428		}
429	}
430	if (reg >= max) {
431		pr_warn("register: %d too big\n", reg);
432		goto out;
433	}
434	mtrr_if->get(reg, &lbase, &lsize, &ltype);
435	if (lsize < 1) {
436		pr_warn("MTRR %d not used\n", reg);
437		goto out;
438	}
439	if (mtrr_usage_table[reg] < 1) {
440		pr_warn("reg: %d has count=0\n", reg);
441		goto out;
442	}
443	if (--mtrr_usage_table[reg] < 1)
444		set_mtrr(reg, 0, 0, 0);
445	error = reg;
446 out:
447	mutex_unlock(&mtrr_mutex);
448	cpus_read_unlock();
449	return error;
450}
451
452/**
453 * mtrr_del - delete a memory type region
454 * @reg: Register returned by mtrr_add
455 * @base: Physical base address
456 * @size: Size of region
457 *
458 * If register is supplied then base and size are ignored. This is
459 * how drivers should call it.
460 *
461 * Releases an MTRR region. If the usage count drops to zero the
462 * register is freed and the region returns to default state.
463 * On success the register is returned, on failure a negative error
464 * code.
465 */
466int mtrr_del(int reg, unsigned long base, unsigned long size)
467{
468	if (!mtrr_enabled())
469		return -ENODEV;
470	if (mtrr_check(base, size))
471		return -EINVAL;
472	return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
473}
474
475/**
476 * arch_phys_wc_add - add a WC MTRR and handle errors if PAT is unavailable
477 * @base: Physical base address
478 * @size: Size of region
479 *
480 * If PAT is available, this does nothing.  If PAT is unavailable, it
481 * attempts to add a WC MTRR covering size bytes starting at base and
482 * logs an error if this fails.
483 *
484 * The called should provide a power of two size on an equivalent
485 * power of two boundary.
486 *
487 * Drivers must store the return value to pass to mtrr_del_wc_if_needed,
488 * but drivers should not try to interpret that return value.
489 */
490int arch_phys_wc_add(unsigned long base, unsigned long size)
491{
492	int ret;
493
494	if (pat_enabled() || !mtrr_enabled())
495		return 0;  /* Success!  (We don't need to do anything.) */
496
497	ret = mtrr_add(base, size, MTRR_TYPE_WRCOMB, true);
498	if (ret < 0) {
499		pr_warn("Failed to add WC MTRR for [%p-%p]; performance may suffer.",
500			(void *)base, (void *)(base + size - 1));
501		return ret;
502	}
503	return ret + MTRR_TO_PHYS_WC_OFFSET;
504}
505EXPORT_SYMBOL(arch_phys_wc_add);
506
507/*
508 * arch_phys_wc_del - undoes arch_phys_wc_add
509 * @handle: Return value from arch_phys_wc_add
510 *
511 * This cleans up after mtrr_add_wc_if_needed.
512 *
513 * The API guarantees that mtrr_del_wc_if_needed(error code) and
514 * mtrr_del_wc_if_needed(0) do nothing.
515 */
516void arch_phys_wc_del(int handle)
517{
518	if (handle >= 1) {
519		WARN_ON(handle < MTRR_TO_PHYS_WC_OFFSET);
520		mtrr_del(handle - MTRR_TO_PHYS_WC_OFFSET, 0, 0);
521	}
522}
523EXPORT_SYMBOL(arch_phys_wc_del);
524
525/*
526 * arch_phys_wc_index - translates arch_phys_wc_add's return value
527 * @handle: Return value from arch_phys_wc_add
528 *
529 * This will turn the return value from arch_phys_wc_add into an mtrr
530 * index suitable for debugging.
531 *
532 * Note: There is no legitimate use for this function, except possibly
533 * in printk line.  Alas there is an illegitimate use in some ancient
534 * drm ioctls.
535 */
536int arch_phys_wc_index(int handle)
537{
538	if (handle < MTRR_TO_PHYS_WC_OFFSET)
539		return -1;
540	else
541		return handle - MTRR_TO_PHYS_WC_OFFSET;
542}
543EXPORT_SYMBOL_GPL(arch_phys_wc_index);
544
545int __initdata changed_by_mtrr_cleanup;
546
547/**
548 * mtrr_bp_init - initialize MTRRs on the boot CPU
549 *
550 * This needs to be called early; before any of the other CPUs are
551 * initialized (i.e. before smp_init()).
552 */
553void __init mtrr_bp_init(void)
554{
555	bool generic_mtrrs = cpu_feature_enabled(X86_FEATURE_MTRR);
556	const char *why = "(not available)";
557	unsigned long config, dummy;
558
559	phys_hi_rsvd = GENMASK(31, boot_cpu_data.x86_phys_bits - 32);
560
561	if (!generic_mtrrs && mtrr_state.enabled) {
562		/*
563		 * Software overwrite of MTRR state, only for generic case.
564		 * Note that X86_FEATURE_MTRR has been reset in this case.
565		 */
566		init_table();
567		mtrr_build_map();
568		pr_info("MTRRs set to read-only\n");
569
570		return;
571	}
572
573	if (generic_mtrrs)
574		mtrr_if = &generic_mtrr_ops;
575	else
576		mtrr_set_if();
577
578	if (mtrr_enabled()) {
579		/* Get the number of variable MTRR ranges. */
580		if (mtrr_if == &generic_mtrr_ops)
581			rdmsr(MSR_MTRRcap, config, dummy);
582		else
583			config = mtrr_if->var_regs;
584		num_var_ranges = config & MTRR_CAP_VCNT;
585
586		init_table();
587		if (mtrr_if == &generic_mtrr_ops) {
588			/* BIOS may override */
589			if (get_mtrr_state()) {
590				memory_caching_control |= CACHE_MTRR;
591				changed_by_mtrr_cleanup = mtrr_cleanup();
592				mtrr_build_map();
593			} else {
594				mtrr_if = NULL;
595				why = "by BIOS";
596			}
597		}
598	}
599
600	if (!mtrr_enabled())
601		pr_info("MTRRs disabled %s\n", why);
602}
603
604/**
605 * mtrr_save_state - Save current fixed-range MTRR state of the first
606 *	cpu in cpu_online_mask.
607 */
608void mtrr_save_state(void)
609{
610	int first_cpu;
611
612	if (!mtrr_enabled())
613		return;
614
615	first_cpu = cpumask_first(cpu_online_mask);
616	smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
617}
618
619static int __init mtrr_init_finalize(void)
620{
621	/*
622	 * Map might exist if mtrr_overwrite_state() has been called or if
623	 * mtrr_enabled() returns true.
624	 */
625	mtrr_copy_map();
626
627	if (!mtrr_enabled())
628		return 0;
629
630	if (memory_caching_control & CACHE_MTRR) {
631		if (!changed_by_mtrr_cleanup)
632			mtrr_state_warn();
633		return 0;
634	}
635
636	mtrr_register_syscore();
637
638	return 0;
639}
640subsys_initcall(mtrr_init_finalize);
641