1/*  x86-64 MTRR (Memory Type Range Register) driver.
2	Based largely upon arch/i386/kernel/mtrr.c
3
4	Copyright (C) 1997-2000  Richard Gooch
5	Copyright (C) 2002 Dave Jones.
6
7	This library is free software; you can redistribute it and/or
8	modify it under the terms of the GNU Library General Public
9	License as published by the Free Software Foundation; either
10	version 2 of the License, or (at your option) any later version.
11
12	This library is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15	Library General Public License for more details.
16
17	You should have received a copy of the GNU Library General Public
18	License along with this library; if not, write to the Free
19	Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20
21	(For earlier history, see arch/i386/kernel/mtrr.c)
22	v2.00	September 2001	Dave Jones <davej@suse.de>
23	  Initial rewrite for x86-64.
24	  Removal of non-Intel style MTRR code.
25	v2.01  June 2002  Dave Jones <davej@suse.de>
26	  Removal of redundant abstraction layer.
27	  64-bit fixes.
28	v2.02  July 2002  Dave Jones <davej@suse.de>
29	  Fix gentry inconsistencies between kernel/userspace.
30	  More casts to clean up warnings.
31*/
32
33#include <linux/types.h>
34#include <linux/errno.h>
35#include <linux/sched.h>
36#include <linux/tty.h>
37#include <linux/timer.h>
38#include <linux/config.h>
39#include <linux/kernel.h>
40#include <linux/wait.h>
41#include <linux/string.h>
42#include <linux/slab.h>
43#include <linux/ioport.h>
44#include <linux/delay.h>
45#include <linux/fs.h>
46#include <linux/ctype.h>
47#include <linux/proc_fs.h>
48#include <linux/devfs_fs_kernel.h>
49#include <linux/mm.h>
50#include <linux/module.h>
51#define MTRR_NEED_STRINGS
52#include <asm/mtrr.h>
53#include <linux/init.h>
54#include <linux/smp.h>
55#include <linux/smp_lock.h>
56#include <linux/agp_backend.h>
57
58#include <asm/uaccess.h>
59#include <asm/io.h>
60#include <asm/processor.h>
61#include <asm/system.h>
62#include <asm/pgtable.h>
63#include <asm/segment.h>
64#include <asm/bitops.h>
65#include <asm/atomic.h>
66#include <asm/msr.h>
67
68#include <asm/hardirq.h>
69#include <linux/irq.h>
70
71#define MTRR_VERSION "2.02 (20020716)"
72
73#undef Dprintk
74
75#define Dprintk(...)
76
77#define TRUE  1
78#define FALSE 0
79
80#define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg))
81#define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1)
82
83#define NUM_FIXED_RANGES 88
84
85#define MTRR_CHANGE_MASK_FIXED 0x01
86#define MTRR_CHANGE_MASK_VARIABLE 0x02
87#define MTRR_CHANGE_MASK_DEFTYPE 0x04
88
89typedef u8 mtrr_type;
90
91#define LINE_SIZE 80
92
93#ifdef CONFIG_SMP
94#define set_mtrr(reg,base,size,type) set_mtrr_smp (reg, base, size, type)
95#else
96#define set_mtrr(reg,base,size,type) set_mtrr_up (reg, base, size, type, TRUE)
97#endif
98
99#if defined(CONFIG_PROC_FS) || defined(CONFIG_DEVFS_FS)
100#define USERSPACE_INTERFACE
101#endif
102
103#ifdef USERSPACE_INTERFACE
104static char *ascii_buffer;
105static unsigned int ascii_buf_bytes;
106static void compute_ascii (void);
107#else
108#define compute_ascii() while (0)
109#endif
110
111static unsigned int *usage_table;
112static DECLARE_MUTEX (mtrr_lock);
113
114struct set_mtrr_context {
115	u32 deftype_lo;
116	u32 deftype_hi;
117	unsigned long flags;
118	u64 cr4val;
119};
120
121
122/*  Put the processor into a state where MTRRs can be safely set  */
123static void set_mtrr_prepare (struct set_mtrr_context *ctxt)
124{
125	u64 cr0;
126
127	/* Disable interrupts locally */
128	__save_flags(ctxt->flags);
129	__cli();
130
131	/* Save value of CR4 and clear Page Global Enable (bit 7) */
132	if (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability)) {
133		ctxt->cr4val = read_cr4();
134		write_cr4(ctxt->cr4val & ~(1UL << 7));
135	}
136
137	/* Disable and flush caches. Note that wbinvd flushes the TLBs as
138	   a side-effect */
139	cr0 = read_cr0() | 0x40000000;
140	wbinvd();
141	write_cr0(cr0);
142	wbinvd();
143
144	/* Disable MTRRs, and set the default type to uncached */
145	rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
146	wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi);
147}
148
149
150/* Restore the processor after a set_mtrr_prepare */
151static void set_mtrr_done (struct set_mtrr_context *ctxt)
152{
153	/* Flush caches and TLBs */
154	wbinvd();
155
156	/* Restore MTRRdefType */
157	wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
158
159	/* Enable caches */
160	write_cr0(read_cr0() & 0xbfffffff);
161
162	/* Restore value of CR4 */
163	if (test_bit(X86_FEATURE_PGE, boot_cpu_data.x86_capability))
164		write_cr4 (ctxt->cr4val);
165
166	/* Re-enable interrupts locally (if enabled previously) */
167	__restore_flags(ctxt->flags);
168}
169
170
171/*  This function returns the number of variable MTRRs  */
172static unsigned int get_num_var_ranges (void)
173{
174	u32 config, dummy;
175
176	rdmsr (MSR_MTRRcap, config, dummy);
177	return (config & 0xff);
178}
179
180
181/*  Returns non-zero if we have the write-combining memory type  */
182static int have_wrcomb (void)
183{
184	u32 config, dummy;
185
186	rdmsr (MSR_MTRRcap, config, dummy);
187	return (config & (1 << 10));
188}
189
190
191static u64 size_or_mask, size_and_mask;
192
193static void get_mtrr (unsigned int reg, u64 *base, u32 *size, mtrr_type * type)
194{
195	u32 mask_lo, mask_hi, base_lo, base_hi;
196	u64 newsize;
197
198	rdmsr (MSR_MTRRphysMask(reg), mask_lo, mask_hi);
199	if ((mask_lo & 0x800) == 0) {
200		/*  Invalid (i.e. free) range  */
201		*base = 0;
202		*size = 0;
203		*type = 0;
204		return;
205	}
206
207	rdmsr (MSR_MTRRphysBase(reg), base_lo, base_hi);
208
209	/* Work out the shifted address mask. */
210	newsize = (u64) mask_hi << 32 | (mask_lo & ~0x800);
211	newsize = ~newsize+1;
212	*size = (u32) newsize >> PAGE_SHIFT;
213	*base = base_hi << (32 - PAGE_SHIFT) | base_lo >> PAGE_SHIFT;
214	*type = base_lo & 0xff;
215}
216
217
218
219/*
220 * Set variable MTRR register on the local CPU.
221 *  <reg> The register to set.
222 *  <base> The base address of the region.
223 *  <size> The size of the region. If this is 0 the region is disabled.
224 *  <type> The type of the region.
225 *  <do_safe> If TRUE, do the change safely. If FALSE, safety measures should
226 *  be done externally.
227 */
228static void set_mtrr_up (unsigned int reg, u64 base,
229		   u32 size, mtrr_type type, int do_safe)
230{
231	struct set_mtrr_context ctxt;
232	u64 base64;
233	u64 size64;
234
235	if (do_safe)
236		set_mtrr_prepare (&ctxt);
237
238	if (size == 0) {
239		/* The invalid bit is kept in the mask, so we simply clear the
240		   relevant mask register to disable a range. */
241		wrmsr (MSR_MTRRphysMask(reg), 0, 0);
242	} else {
243		base64 = (base << PAGE_SHIFT) & size_and_mask;
244		wrmsr (MSR_MTRRphysBase(reg), base64 | type, base64 >> 32);
245
246		size64 = ~((size << PAGE_SHIFT) - 1);
247		size64 = size64 & size_and_mask;
248		wrmsr (MSR_MTRRphysMask(reg), (u32) (size64 | 0x800), (u32) (size64 >> 32));
249	}
250	if (do_safe)
251		set_mtrr_done (&ctxt);
252}
253
254
255#ifdef CONFIG_SMP
256
257struct mtrr_var_range {
258	u32 base_lo;
259	u32 base_hi;
260	u32 mask_lo;
261	u32 mask_hi;
262};
263
264/*  Get the MSR pair relating to a var range  */
265static void __init get_mtrr_var_range (unsigned int index,
266		struct mtrr_var_range *vr)
267{
268	rdmsr (MSR_MTRRphysBase(index), vr->base_lo, vr->base_hi);
269	rdmsr (MSR_MTRRphysMask(index), vr->mask_lo, vr->mask_hi);
270}
271
272
273/*  Set the MSR pair relating to a var range. Returns TRUE if
274    changes are made  */
275static int __init set_mtrr_var_range_testing (unsigned int index,
276		struct mtrr_var_range *vr)
277{
278	u32 lo, hi;
279	int changed = FALSE;
280
281	rdmsr (MSR_MTRRphysBase(index), lo, hi);
282	if ((vr->base_lo & 0xfffff0ff) != (lo & 0xfffff0ff) ||
283		(vr->base_hi & 0x000fffff) != (hi & 0x000fffff)) {
284		wrmsr (MSR_MTRRphysBase(index), vr->base_lo, vr->base_hi);
285		changed = TRUE;
286	}
287
288	rdmsr (MSR_MTRRphysMask(index), lo, hi);
289	if ((vr->mask_lo & 0xfffff800) != (lo & 0xfffff800) ||
290		(vr->mask_hi & 0x000fffff) != (hi & 0x000fffff)) {
291		wrmsr (MSR_MTRRphysMask(index), vr->mask_lo, vr->mask_hi);
292		changed = TRUE;
293	}
294	return changed;
295}
296
297
298static void __init get_fixed_ranges (mtrr_type * frs)
299{
300	u32 *p = (u32 *) frs;
301	int i;
302
303	rdmsr (MSR_MTRRfix64K_00000, p[0], p[1]);
304
305	for (i = 0; i < 2; i++)
306		rdmsr (MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
307	for (i = 0; i < 8; i++)
308		rdmsr (MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
309}
310
311
312static int __init set_fixed_ranges_testing (mtrr_type * frs)
313{
314	u32 *p = (u32 *) frs;
315	int changed = FALSE;
316	int i;
317	u32 lo, hi;
318
319	Dprintk (KERN_INFO "mtrr: rdmsr 64K_00000\n");
320	rdmsr (MSR_MTRRfix64K_00000, lo, hi);
321	if (p[0] != lo || p[1] != hi) {
322		Dprintk (KERN_INFO "mtrr: Writing %x:%x to 64K MSR. lohi were %x:%x\n", p[0], p[1], lo, hi);
323		wrmsr (MSR_MTRRfix64K_00000, p[0], p[1]);
324		changed = TRUE;
325	}
326
327	Dprintk (KERN_INFO "mtrr: rdmsr 16K_80000\n");
328	for (i = 0; i < 2; i++) {
329		rdmsr (MSR_MTRRfix16K_80000 + i, lo, hi);
330		if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) {
331			Dprintk (KERN_INFO "mtrr: Writing %x:%x to 16K MSR%d. lohi were %x:%x\n", p[2 + i * 2], p[3 + i * 2], i, lo, hi );
332			wrmsr (MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]);
333			changed = TRUE;
334		}
335	}
336
337	Dprintk (KERN_INFO "mtrr: rdmsr 4K_C0000\n");
338	for (i = 0; i < 8; i++) {
339		rdmsr (MSR_MTRRfix4K_C0000 + i, lo, hi);
340		Dprintk (KERN_INFO "mtrr: MTRRfix4K_C0000+%d = %x:%x\n", i, lo, hi);
341		if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) {
342			Dprintk (KERN_INFO "mtrr: Writing %x:%x to 4K MSR%d. lohi were %x:%x\n", p[6 + i * 2], p[7 + i * 2], i, lo, hi);
343			wrmsr (MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]);
344			changed = TRUE;
345		}
346	}
347	return changed;
348}
349
350
351struct mtrr_state {
352	unsigned int num_var_ranges;
353	struct mtrr_var_range *var_ranges;
354	mtrr_type fixed_ranges[NUM_FIXED_RANGES];
355	mtrr_type def_type;
356	unsigned char enabled;
357};
358
359
360/*  Grab all of the MTRR state for this CPU into *state  */
361static void __init get_mtrr_state (struct mtrr_state *state)
362{
363	unsigned int nvrs, i;
364	struct mtrr_var_range *vrs;
365	u32 lo, dummy;
366
367	nvrs = state->num_var_ranges = get_num_var_ranges();
368	vrs = state->var_ranges
369	    = kmalloc (nvrs * sizeof (struct mtrr_var_range), GFP_KERNEL);
370	if (vrs == NULL)
371		nvrs = state->num_var_ranges = 0;
372
373	for (i = 0; i < nvrs; i++)
374		get_mtrr_var_range (i, &vrs[i]);
375	get_fixed_ranges (state->fixed_ranges);
376
377	rdmsr (MSR_MTRRdefType, lo, dummy);
378	state->def_type = (lo & 0xff);
379	state->enabled = (lo & 0xc00) >> 10;
380}
381
382
383/*  Free resources associated with a struct mtrr_state  */
384static void __init finalize_mtrr_state (struct mtrr_state *state)
385{
386	if (state->var_ranges)
387		kfree (state->var_ranges);
388}
389
390
391/*
392 * Set the MTRR state for this CPU.
393 *  <state> The MTRR state information to read.
394 *  <ctxt> Some relevant CPU context.
395 *  [NOTE] The CPU must already be in a safe state for MTRR changes.
396 *  [RETURNS] 0 if no changes made, else a mask indication what was changed.
397 */
398static u64 __init set_mtrr_state (struct mtrr_state *state,
399		struct set_mtrr_context *ctxt)
400{
401	unsigned int i;
402	u64 change_mask = 0;
403
404	for (i = 0; i < state->num_var_ranges; i++)
405		if (set_mtrr_var_range_testing (i, &state->var_ranges[i]))
406			change_mask |= MTRR_CHANGE_MASK_VARIABLE;
407
408	if (set_fixed_ranges_testing (state->fixed_ranges))
409		change_mask |= MTRR_CHANGE_MASK_FIXED;
410	/* Set_mtrr_restore restores the old value of MTRRdefType,
411	   so to set it we fiddle with the saved value  */
412	if ((ctxt->deftype_lo & 0xff) != state->def_type
413	    || ((ctxt->deftype_lo & 0xc00) >> 10) != state->enabled) {
414		ctxt->deftype_lo |= (state->def_type | state->enabled << 10);
415		change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
416	}
417
418	return change_mask;
419}
420
421
422static atomic_t undone_count;
423static volatile int wait_barrier_execute = FALSE;
424static volatile int wait_barrier_cache_enable = FALSE;
425
426struct set_mtrr_data {
427	u64 smp_base;
428	u32 smp_size;
429	unsigned int smp_reg;
430	mtrr_type smp_type;
431};
432
433/*
434 * Synchronisation handler. Executed by "other" CPUs.
435 */
436static void ipi_handler (void *info)
437{
438	struct set_mtrr_data *data = info;
439	struct set_mtrr_context ctxt;
440
441	set_mtrr_prepare (&ctxt);
442	/* Notify master that I've flushed and disabled my cache  */
443	atomic_dec (&undone_count);
444	while (wait_barrier_execute)
445		barrier ();
446
447	/* The master has cleared me to execute  */
448	set_mtrr_up (data->smp_reg, data->smp_base, data->smp_size,
449			data->smp_type, FALSE);
450
451	/* Notify master CPU that I've executed the function  */
452	atomic_dec (&undone_count);
453
454	/* Wait for master to clear me to enable cache and return  */
455	while (wait_barrier_cache_enable)
456		barrier ();
457	set_mtrr_done (&ctxt);
458}
459
460
461static void set_mtrr_smp (unsigned int reg, u64 base, u32 size, mtrr_type type)
462{
463	struct set_mtrr_data data;
464	struct set_mtrr_context ctxt;
465
466	data.smp_reg = reg;
467	data.smp_base = base;
468	data.smp_size = size;
469	data.smp_type = type;
470	wait_barrier_execute = TRUE;
471	wait_barrier_cache_enable = TRUE;
472	atomic_set (&undone_count, smp_num_cpus - 1);
473
474	/*  Start the ball rolling on other CPUs  */
475	if (smp_call_function (ipi_handler, &data, 1, 0) != 0)
476		panic ("mtrr: timed out waiting for other CPUs\n");
477
478	/* Flush and disable the local CPU's cache */
479	set_mtrr_prepare (&ctxt);
480
481	/*  Wait for all other CPUs to flush and disable their caches  */
482	while (atomic_read (&undone_count) > 0)
483		barrier ();
484
485	/* Set up for completion wait and then release other CPUs to change MTRRs */
486	atomic_set (&undone_count, smp_num_cpus - 1);
487	wait_barrier_execute = FALSE;
488	set_mtrr_up (reg, base, size, type, FALSE);
489
490	/*  Now wait for other CPUs to complete the function  */
491	while (atomic_read (&undone_count) > 0)
492		barrier ();
493
494	/*  Now all CPUs should have finished the function. Release the barrier to
495	   allow them to re-enable their caches and return from their interrupt,
496	   then enable the local cache and return  */
497	wait_barrier_cache_enable = FALSE;
498	set_mtrr_done (&ctxt);
499}
500
501
502/*  Some BIOS's are fucked and don't set all MTRRs the same!  */
503static void __init mtrr_state_warn (u32 mask)
504{
505	if (!mask)
506		return;
507	if (mask & MTRR_CHANGE_MASK_FIXED)
508		printk (KERN_INFO "mtrr: your CPUs had inconsistent fixed MTRR settings\n");
509	if (mask & MTRR_CHANGE_MASK_VARIABLE)
510		printk (KERN_INFO "mtrr: your CPUs had inconsistent variable MTRR settings\n");
511	if (mask & MTRR_CHANGE_MASK_DEFTYPE)
512		printk (KERN_INFO "mtrr: your CPUs had inconsistent MTRRdefType settings\n");
513	printk (KERN_INFO "mtrr: probably your BIOS does not setup all CPUs\n");
514}
515
516#endif	/*  CONFIG_SMP  */
517
518
519static inline char * attrib_to_str (int x)
520{
521	return (x <= 6) ? mtrr_strings[x] : "?";
522}
523
524
525static void __init init_table (void)
526{
527	int i, max;
528
529	max = get_num_var_ranges ();
530	if ((usage_table = kmalloc (max * sizeof *usage_table, GFP_KERNEL))==NULL) {
531		printk ("mtrr: could not allocate\n");
532		return;
533	}
534
535	for (i = 0; i < max; i++)
536		usage_table[i] = 1;
537
538#ifdef USERSPACE_INTERFACE
539	if ((ascii_buffer = kmalloc (max * LINE_SIZE, GFP_KERNEL)) == NULL) {
540		printk ("mtrr: could not allocate\n");
541		return;
542	}
543	ascii_buf_bytes = 0;
544	compute_ascii ();
545#endif
546}
547
548
549/*
550 * Get a free MTRR.
551 * returns the index of the region on success, else -1 on error.
552*/
553static int get_free_region(void)
554{
555	int i, max;
556	mtrr_type ltype;
557	u64 lbase;
558	u32 lsize;
559
560	max = get_num_var_ranges ();
561	for (i = 0; i < max; ++i) {
562		get_mtrr (i, &lbase, &lsize, &ltype);
563		if (lsize == 0)
564			return i;
565	}
566	return -ENOSPC;
567}
568
569
570/**
571 *	mtrr_add_page - Add a memory type region
572 *	@base: Physical base address of region in pages (4 KB)
573 *	@size: Physical size of region in pages (4 KB)
574 *	@type: Type of MTRR desired
575 *	@increment: If this is true do usage counting on the region
576 *	Returns The MTRR register on success, else a negative number
577 *	indicating the error code.
578 *
579 *	Memory type region registers control the caching on newer
580 *	processors. This function allows drivers to request an MTRR is added.
581 *	The caller should expect to need to provide a power of two size on
582 *	an equivalent power of two boundary.
583 *
584 *	If the region cannot be added either because all regions are in use
585 *	or the CPU cannot support it a negative value is returned. On success
586 *	the register number for this entry is returned, but should be treated
587 *	as a cookie only.
588 *
589 *	On a multiprocessor machine the changes are made to all processors.
590 *
591 *	The available types are
592 *
593 *	%MTRR_TYPE_UNCACHABLE	-	No caching
594 *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
595 *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
596 *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
597 *
598 *	BUGS: Needs a quiet flag for the cases where drivers do not mind
599 *	failures and do not wish system log messages to be sent.
600 */
601
602int mtrr_add_page (u64 base, u32 size, unsigned int type, char increment)
603{
604	int i, max;
605	mtrr_type ltype;
606	u64 lbase, last;
607	u32 lsize;
608
609	if (base + size < 0x100) {
610		printk (KERN_WARNING
611			"mtrr: cannot set region below 1 MiB (0x%Lx000,0x%x000)\n",
612			base, size);
613		return -EINVAL;
614	}
615
616
617	/*  Check upper bits of base and last are equal and lower bits are 0
618	   for base and 1 for last  */
619	last = base + size - 1;
620	for (lbase = base; !(lbase & 1) && (last & 1);
621	     lbase = lbase >> 1, last = last >> 1) ;
622
623	if (lbase != last) {
624		printk (KERN_WARNING
625			"mtrr: base(0x%Lx000) is not aligned on a size(0x%x000) boundary\n",
626			base, size);
627		return -EINVAL;
628	}
629
630	if (type >= MTRR_NUM_TYPES) {
631		printk ("mtrr: type: %u illegal\n", type);
632		return -EINVAL;
633	}
634
635	/*  If the type is WC, check that this processor supports it  */
636	if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
637		printk (KERN_WARNING
638			"mtrr: your processor doesn't support write-combining\n");
639		return -ENOSYS;
640	}
641
642	if (base & (size_or_mask>>PAGE_SHIFT)) {
643		printk (KERN_WARNING "mtrr: base(%Lx) exceeds the MTRR width(%Lx)\n",
644				base, (size_or_mask>>PAGE_SHIFT));
645		return -EINVAL;
646	}
647
648	if (size & (size_or_mask>>PAGE_SHIFT)) {
649		printk (KERN_WARNING "mtrr: size exceeds the MTRR width\n");
650		return -EINVAL;
651	}
652
653	increment = increment ? 1 : 0;
654	max = get_num_var_ranges ();
655	/*  Search for existing MTRR  */
656	down (&mtrr_lock);
657	for (i = 0; i < max; ++i) {
658		get_mtrr (i, &lbase, &lsize, &ltype);
659		if (base >= lbase + lsize)
660			continue;
661		if ((base < lbase) && (base + size <= lbase))
662			continue;
663
664		/*  At this point we know there is some kind of overlap/enclosure  */
665		if ((base < lbase) || (base + size > lbase + lsize)) {
666			up (&mtrr_lock);
667			printk (KERN_WARNING
668				"mtrr: 0x%Lx000,0x%x000 overlaps existing"
669				" 0x%Lx000,0x%x000\n", base, size, lbase, lsize);
670			return -EINVAL;
671		}
672		/*  New region is enclosed by an existing region  */
673		if (ltype != type) {
674			if (type == MTRR_TYPE_UNCACHABLE)
675				continue;
676			up (&mtrr_lock);
677			printk
678			    ("mtrr: type mismatch for %Lx000,%x000 old: %s new: %s\n",
679			     base, size,
680				 attrib_to_str (ltype),
681			     attrib_to_str (type));
682			return -EINVAL;
683		}
684		if (increment)
685			++usage_table[i];
686		compute_ascii ();
687		up (&mtrr_lock);
688		return i;
689	}
690	/*  Search for an empty MTRR  */
691	i = get_free_region();
692	if (i < 0) {
693		up (&mtrr_lock);
694		printk ("mtrr: no more MTRRs available\n");
695		return i;
696	}
697	set_mtrr (i, base, size, type);
698	usage_table[i] = 1;
699	compute_ascii ();
700	up (&mtrr_lock);
701	return i;
702}
703
704
705/**
706 *	mtrr_add - Add a memory type region
707 *	@base: Physical base address of region
708 *	@size: Physical size of region
709 *	@type: Type of MTRR desired
710 *	@increment: If this is true do usage counting on the region
711 *	Return the MTRR register on success, else a negative numbe
712 *	indicating the error code.
713 *
714 *	Memory type region registers control the caching on newer processors.
715 *	This function allows drivers to request an MTRR is added.
716 *	The caller should expect to need to provide a power of two size on
717 *	an equivalent power of two boundary.
718 *
719 *	If the region cannot be added either because all regions are in use
720 *	or the CPU cannot support it a negative value is returned. On success
721 *	the register number for this entry is returned, but should be treated
722 *	as a cookie only.
723 *
724 *	On a multiprocessor machine the changes are made to all processors.
725 *	This is required on x86 by the Intel processors.
726 *
727 *	The available types are
728 *
729 *	%MTRR_TYPE_UNCACHABLE	-	No caching
730 *	%MTRR_TYPE_WRBACK	-	Write data back in bursts whenever
731 *	%MTRR_TYPE_WRCOMB	-	Write data back soon but allow bursts
732 *	%MTRR_TYPE_WRTHROUGH	-	Cache reads but not writes
733 *
734 *	BUGS: Needs a quiet flag for the cases where drivers do not mind
735 *	failures and do not wish system log messages to be sent.
736 */
737
738int mtrr_add (u64 base, u32 size, unsigned int type, char increment)
739{
740	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
741		printk ("mtrr: size and base must be multiples of 4 kiB\n");
742		printk ("mtrr: size: 0x%x  base: 0x%Lx\n", size, base);
743		return -EINVAL;
744	}
745	return mtrr_add_page (base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
746			      increment);
747}
748
749
750/**
751 *	mtrr_del_page - delete a memory type region
752 *	@reg: Register returned by mtrr_add
753 *	@base: Physical base address
754 *	@size: Size of region
755 *
756 *	If register is supplied then base and size are ignored. This is
757 *	how drivers should call it.
758 *
759 *	Releases an MTRR region. If the usage count drops to zero the
760 *	register is freed and the region returns to default state.
761 *	On success the register is returned, on failure a negative error
762 *	code.
763 */
764
765int mtrr_del_page (int reg, u64 base, u32 size)
766{
767	int i, max;
768	mtrr_type ltype;
769	u64 lbase;
770	u32 lsize;
771
772	max = get_num_var_ranges ();
773	down (&mtrr_lock);
774	if (reg < 0) {
775		/*  Search for existing MTRR  */
776		for (i = 0; i < max; ++i) {
777			get_mtrr (i, &lbase, &lsize, &ltype);
778			if (lbase == base && lsize == size) {
779				reg = i;
780				break;
781			}
782		}
783		if (reg < 0) {
784			up (&mtrr_lock);
785			printk ("mtrr: no MTRR for %Lx000,%x000 found\n", base, size);
786			return -EINVAL;
787		}
788	}
789
790	if (reg >= max) {
791		up (&mtrr_lock);
792		printk ("mtrr: register: %d too big\n", reg);
793		return -EINVAL;
794	}
795	get_mtrr (reg, &lbase, &lsize, &ltype);
796
797	if (lsize < 1) {
798		up (&mtrr_lock);
799		printk ("mtrr: MTRR %d not used\n", reg);
800		return -EINVAL;
801	}
802
803	if (usage_table[reg] < 1) {
804		up (&mtrr_lock);
805		printk ("mtrr: reg: %d has count=0\n", reg);
806		return -EINVAL;
807	}
808
809	if (--usage_table[reg] < 1)
810		set_mtrr (reg, 0, 0, 0);
811	compute_ascii ();
812	up (&mtrr_lock);
813	return reg;
814}
815
816
817/**
818 *	mtrr_del - delete a memory type region
819 *	@reg: Register returned by mtrr_add
820 *	@base: Physical base address
821 *	@size: Size of region
822 *
823 *	If register is supplied then base and size are ignored. This is
824 *	how drivers should call it.
825 *
826 *	Releases an MTRR region. If the usage count drops to zero the
827 *	register is freed and the region returns to default state.
828 *	On success the register is returned, on failure a negative error
829 *	code.
830 */
831
832int mtrr_del (int reg, u64 base, u32 size)
833{
834	if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
835		printk ("mtrr: size and base must be multiples of 4 kiB\n");
836		printk ("mtrr: size: 0x%x  base: 0x%Lx\n", size, base);
837		return -EINVAL;
838	}
839	return mtrr_del_page (reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
840}
841
842
843#ifdef USERSPACE_INTERFACE
844
845static int mtrr_file_add (u64 base, u32 size, unsigned int type,
846		struct file *file, int page)
847{
848	int reg, max;
849	unsigned int *fcount = file->private_data;
850
851	max = get_num_var_ranges ();
852	if (fcount == NULL) {
853		if ((fcount =
854		     kmalloc (max * sizeof *fcount, GFP_KERNEL)) == NULL) {
855			printk ("mtrr: could not allocate\n");
856			return -ENOMEM;
857		}
858		memset (fcount, 0, max * sizeof *fcount);
859		file->private_data = fcount;
860	}
861
862	if (!page) {
863		if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
864			printk
865			    (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
866			printk (KERN_INFO "mtrr: size: 0x%x  base: 0x%Lx\n", size, base);
867			return -EINVAL;
868		}
869		base >>= PAGE_SHIFT;
870		size >>= PAGE_SHIFT;
871	}
872
873	reg = mtrr_add_page (base, size, type, 1);
874
875	if (reg >= 0)
876		++fcount[reg];
877	return reg;
878}
879
880
881static int mtrr_file_del (u64 base, u32 size,
882		struct file *file, int page)
883{
884	int reg;
885	unsigned int *fcount = file->private_data;
886
887	if (!page) {
888		if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
889			printk
890			    (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
891			printk (KERN_INFO "mtrr: size: 0x%x  base: 0x%Lx\n", size, base);
892			return -EINVAL;
893		}
894		base >>= PAGE_SHIFT;
895		size >>= PAGE_SHIFT;
896	}
897	reg = mtrr_del_page (-1, base, size);
898	if (reg < 0)
899		return reg;
900	if (fcount == NULL)
901		return reg;
902	if (fcount[reg] < 1)
903		return -EINVAL;
904	--fcount[reg];
905	return reg;
906}
907
908
909static ssize_t mtrr_read (struct file *file, char *buf, size_t len,
910		loff_t * ppos)
911{
912	if (*ppos >= ascii_buf_bytes)
913		return 0;
914
915	if (*ppos + len > ascii_buf_bytes)
916		len = ascii_buf_bytes - *ppos;
917
918	if (copy_to_user (buf, ascii_buffer + *ppos, len))
919		return -EFAULT;
920
921	*ppos += len;
922	return len;
923}
924
925
926static ssize_t mtrr_write (struct file *file, const char *buf,
927		size_t len, loff_t * ppos)
928/*  Format of control line:
929    "base=%Lx size=%Lx type=%s"     OR:
930    "disable=%d"
931*/
932{
933	int i, err, reg;
934	u64 base;
935	u32 size;
936	char *ptr;
937	char line[LINE_SIZE];
938
939	if (!capable(CAP_SYS_ADMIN))
940		return -EPERM;
941
942	/*  Can't seek (pwrite) on this device  */
943	if (ppos != &file->f_pos)
944		return -ESPIPE;
945	memset (line, 0, LINE_SIZE);
946
947	if (len > LINE_SIZE)
948		len = LINE_SIZE;
949
950	if (copy_from_user (line, buf, len - 1))
951		return -EFAULT;
952	ptr = line + strlen (line) - 1;
953
954	if (*ptr == '\n')
955		*ptr = '\0';
956
957	if (!strncmp (line, "disable=", 8)) {
958		reg = simple_strtoul (line + 8, &ptr, 0);
959		err = mtrr_del_page (reg, 0, 0);
960		if (err < 0)
961			return err;
962		return len;
963	}
964
965	if (strncmp (line, "base=", 5)) {
966		printk (KERN_INFO "mtrr: no \"base=\" in line: \"%s\"\n", line);
967		return -EINVAL;
968	}
969
970	base = simple_strtoull (line + 5, &ptr, 0);
971
972	for (; isspace (*ptr); ++ptr) ;
973
974	if (strncmp (ptr, "size=", 5)) {
975		printk (KERN_INFO "mtrr: no \"size=\" in line: \"%s\"\n", line);
976		return -EINVAL;
977	}
978
979	size = simple_strtoull (ptr + 5, &ptr, 0);
980
981	if ((base & 0xfff) || (size & 0xfff)) {
982		printk (KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n");
983		printk (KERN_INFO "mtrr: size: 0x%x  base: 0x%Lx\n", size, base);
984		return -EINVAL;
985	}
986
987	for (; isspace (*ptr); ++ptr) ;
988
989	if (strncmp (ptr, "type=", 5)) {
990		printk (KERN_INFO "mtrr: no \"type=\" in line: \"%s\"\n", line);
991		return -EINVAL;
992	}
993	ptr += 5;
994
995	for (; isspace (*ptr); ++ptr) ;
996
997	for (i = 0; i < MTRR_NUM_TYPES; ++i) {
998		if (strcmp (ptr, mtrr_strings[i]))
999			continue;
1000		base >>= PAGE_SHIFT;
1001		size >>= PAGE_SHIFT;
1002		err = mtrr_add_page ((u64) base, size, i, 1);
1003		if (err < 0)
1004			return err;
1005		return len;
1006	}
1007	printk (KERN_INFO "mtrr: illegal type: \"%s\"\n", ptr);
1008	return -EINVAL;
1009}
1010
1011
1012static int mtrr_ioctl (struct inode *inode, struct file *file,
1013		unsigned int cmd, unsigned long arg)
1014{
1015	int err;
1016	mtrr_type type;
1017	struct mtrr_sentry sentry;
1018	struct mtrr_gentry gentry;
1019
1020	switch (cmd) {
1021	default:
1022		return -ENOIOCTLCMD;
1023
1024	case MTRRIOC_ADD_ENTRY:
1025		if (!capable(CAP_SYS_ADMIN))
1026			return -EPERM;
1027		if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
1028			return -EFAULT;
1029		err = mtrr_file_add (sentry.base, sentry.size, sentry.type,
1030				   file, 0);
1031		if (err < 0)
1032			return err;
1033		break;
1034
1035	case MTRRIOC_SET_ENTRY:
1036		if (!capable(CAP_SYS_ADMIN))
1037			return -EPERM;
1038		if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
1039			return -EFAULT;
1040		err = mtrr_add (sentry.base, sentry.size, sentry.type, 0);
1041		if (err < 0)
1042			return err;
1043		break;
1044
1045	case MTRRIOC_DEL_ENTRY:
1046		if (!capable(CAP_SYS_ADMIN))
1047			return -EPERM;
1048		if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
1049			return -EFAULT;
1050		err = mtrr_file_del (sentry.base, sentry.size, file, 0);
1051		if (err < 0)
1052			return err;
1053		break;
1054
1055	case MTRRIOC_KILL_ENTRY:
1056		if (!capable(CAP_SYS_ADMIN))
1057			return -EPERM;
1058		if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
1059			return -EFAULT;
1060		err = mtrr_del (-1, sentry.base, sentry.size);
1061		if (err < 0)
1062			return err;
1063		break;
1064
1065	case MTRRIOC_GET_ENTRY:
1066		if (copy_from_user (&gentry, (void *) arg, sizeof gentry))
1067			return -EFAULT;
1068		if (gentry.regnum >= get_num_var_ranges ())
1069			return -EINVAL;
1070		get_mtrr (gentry.regnum, (u64*) &gentry.base, &gentry.size, &type);
1071
1072		/* Hide entries that go above 4GB */
1073		if (gentry.base + gentry.size > 0x100000
1074		    || gentry.size == 0x100000)
1075			gentry.base = gentry.size = gentry.type = 0;
1076		else {
1077			gentry.base <<= PAGE_SHIFT;
1078			gentry.size <<= PAGE_SHIFT;
1079			gentry.type = type;
1080		}
1081
1082		if (copy_to_user ((void *) arg, &gentry, sizeof gentry))
1083			return -EFAULT;
1084		break;
1085
1086	case MTRRIOC_ADD_PAGE_ENTRY:
1087		if (!capable(CAP_SYS_ADMIN))
1088			return -EPERM;
1089		if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
1090			return -EFAULT;
1091		err = mtrr_file_add (sentry.base, sentry.size, sentry.type, file, 1);
1092		if (err < 0)
1093			return err;
1094		break;
1095
1096	case MTRRIOC_SET_PAGE_ENTRY:
1097		if (!capable(CAP_SYS_ADMIN))
1098			return -EPERM;
1099		if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
1100			return -EFAULT;
1101		err = mtrr_add_page (sentry.base, sentry.size, sentry.type, 0);
1102		if (err < 0)
1103			return err;
1104		break;
1105
1106	case MTRRIOC_DEL_PAGE_ENTRY:
1107		if (!capable(CAP_SYS_ADMIN))
1108			return -EPERM;
1109		if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
1110			return -EFAULT;
1111		err = mtrr_file_del (sentry.base, sentry.size, file, 1);
1112		if (err < 0)
1113			return err;
1114		break;
1115
1116	case MTRRIOC_KILL_PAGE_ENTRY:
1117		if (!capable(CAP_SYS_ADMIN))
1118			return -EPERM;
1119		if (copy_from_user (&sentry, (void *) arg, sizeof sentry))
1120			return -EFAULT;
1121		err = mtrr_del_page (-1, sentry.base, sentry.size);
1122		if (err < 0)
1123			return err;
1124		break;
1125
1126	case MTRRIOC_GET_PAGE_ENTRY:
1127		if (copy_from_user (&gentry, (void *) arg, sizeof gentry))
1128			return -EFAULT;
1129		if (gentry.regnum >= get_num_var_ranges ())
1130			return -EINVAL;
1131		get_mtrr (gentry.regnum, (u64*) &gentry.base, &gentry.size, &type);
1132		gentry.type = type;
1133
1134		if (copy_to_user ((void *) arg, &gentry, sizeof gentry))
1135			return -EFAULT;
1136		break;
1137	}
1138	return 0;
1139}
1140
1141
1142static int mtrr_close (struct inode *ino, struct file *file)
1143{
1144	int i, max;
1145	unsigned int *fcount = file->private_data;
1146
1147	if (fcount == NULL)
1148		return 0;
1149
1150	lock_kernel ();
1151	max = get_num_var_ranges ();
1152	for (i = 0; i < max; ++i) {
1153		while (fcount[i] > 0) {
1154			if (mtrr_del (i, 0, 0) < 0)
1155				printk ("mtrr: reg %d not used\n", i);
1156			--fcount[i];
1157		}
1158	}
1159	unlock_kernel ();
1160	kfree (fcount);
1161	file->private_data = NULL;
1162	return 0;
1163}
1164
1165
1166static struct file_operations mtrr_fops = {
1167	owner:	THIS_MODULE,
1168	read:	mtrr_read,
1169	write:	mtrr_write,
1170	ioctl:	mtrr_ioctl,
1171	release:mtrr_close,
1172};
1173
1174#ifdef CONFIG_PROC_FS
1175static struct proc_dir_entry *proc_root_mtrr;
1176#endif
1177
1178static devfs_handle_t devfs_handle;
1179
1180static void compute_ascii (void)
1181{
1182	char factor;
1183	int i, max;
1184	mtrr_type type;
1185	u64 base;
1186	u32 size;
1187
1188	ascii_buf_bytes = 0;
1189	max = get_num_var_ranges ();
1190	for (i = 0; i < max; i++) {
1191		get_mtrr (i, &base, &size, &type);
1192		if (size == 0)
1193			usage_table[i] = 0;
1194		else {
1195			if (size < (0x100000 >> PAGE_SHIFT)) {
1196				/* less than 1MB */
1197				factor = 'K';
1198				size <<= PAGE_SHIFT - 10;
1199			} else {
1200				factor = 'M';
1201				size >>= 20 - PAGE_SHIFT;
1202			}
1203			sprintf (ascii_buffer + ascii_buf_bytes,
1204				"reg%02i: base=0x%05Lx000 (%4iMB), size=%4i%cB: %s, count=%d\n",
1205				i, base, (u32) base >> (20 - PAGE_SHIFT), size, factor,
1206				attrib_to_str (type), usage_table[i]);
1207			ascii_buf_bytes += strlen (ascii_buffer + ascii_buf_bytes);
1208		}
1209	}
1210	devfs_set_file_size (devfs_handle, ascii_buf_bytes);
1211#ifdef CONFIG_PROC_FS
1212	if (proc_root_mtrr)
1213		proc_root_mtrr->size = ascii_buf_bytes;
1214#endif
1215}
1216
1217#endif	/*  USERSPACE_INTERFACE  */
1218
1219EXPORT_SYMBOL (mtrr_add);
1220EXPORT_SYMBOL (mtrr_del);
1221
1222
1223static void __init mtrr_setup (void)
1224{
1225	printk ("mtrr: v%s)\n", MTRR_VERSION);
1226
1227	if (test_bit (X86_FEATURE_MTRR, boot_cpu_data.x86_capability)) {
1228		/* Query the width (in bits) of the physical
1229		   addressable memory on the Hammer family. */
1230		if ((cpuid_eax (0x80000000) >= 0x80000008)) {
1231			u32 phys_addr;
1232			phys_addr = cpuid_eax (0x80000008) & 0xff;
1233			size_or_mask = ~((1L << phys_addr) - 1);
1234			/*
1235			 * top bits MBZ as its beyond the addressable range.
1236			 * bottom bits MBZ as we don't care about lower 12 bits of addr.
1237			 */
1238			size_and_mask = (~size_or_mask) & 0x000ffffffffff000L;
1239		}
1240	}
1241}
1242
1243#ifdef CONFIG_SMP
1244
1245static volatile u32 smp_changes_mask __initdata = 0;
1246static struct mtrr_state smp_mtrr_state __initdata = { 0, 0 };
1247
1248void __init mtrr_init_boot_cpu (void)
1249{
1250	mtrr_setup();
1251	get_mtrr_state (&smp_mtrr_state);
1252}
1253
1254
1255void __init mtrr_init_secondary_cpu (void)
1256{
1257	u64 mask;
1258	int count;
1259	struct set_mtrr_context ctxt;
1260
1261	/* Note that this is not ideal, since the cache is only flushed/disabled
1262	   for this CPU while the MTRRs are changed, but changing this requires
1263	   more invasive changes to the way the kernel boots  */
1264	set_mtrr_prepare (&ctxt);
1265	mask = set_mtrr_state (&smp_mtrr_state, &ctxt);
1266	set_mtrr_done (&ctxt);
1267
1268	/*  Use the atomic bitops to update the global mask  */
1269	for (count = 0; count < sizeof mask * 8; ++count) {
1270		if (mask & 0x01)
1271			set_bit (count, &smp_changes_mask);
1272		mask >>= 1;
1273	}
1274}
1275
1276#endif	/*  CONFIG_SMP  */
1277
1278
1279int __init mtrr_init (void)
1280{
1281#ifdef CONFIG_SMP
1282	/* mtrr_setup() should already have been called from mtrr_init_boot_cpu() */
1283
1284	finalize_mtrr_state (&smp_mtrr_state);
1285	mtrr_state_warn (smp_changes_mask);
1286#else
1287	mtrr_setup();
1288#endif
1289
1290#ifdef CONFIG_PROC_FS
1291	proc_root_mtrr = create_proc_entry ("mtrr", S_IWUSR | S_IRUGO, &proc_root);
1292	if (proc_root_mtrr) {
1293		proc_root_mtrr->owner = THIS_MODULE;
1294		proc_root_mtrr->proc_fops = &mtrr_fops;
1295	}
1296#endif
1297#ifdef CONFIG_DEVFS_FS
1298	devfs_handle = devfs_register (NULL, "cpu/mtrr", DEVFS_FL_DEFAULT, 0, 0,
1299				S_IFREG | S_IRUGO | S_IWUSR,
1300				&mtrr_fops, NULL);
1301#endif
1302	init_table ();
1303	return 0;
1304}
1305