cpufunc.h revision 50477
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * $FreeBSD: head/sys/i386/include/cpufunc.h 50477 1999-08-28 01:08:13Z peter $
34 */
35
36/*
37 * Functions to provide access to special i386 instructions.
38 */
39
40#ifndef _MACHINE_CPUFUNC_H_
41#define	_MACHINE_CPUFUNC_H_
42
43#define readb(va)	(*(volatile u_int8_t *) (va))
44#define readw(va)	(*(volatile u_int16_t *) (va))
45#define readl(va)	(*(volatile u_int32_t *) (va))
46
47#define writeb(va, d)	(*(volatile u_int8_t *) (va) = (d))
48#define writew(va, d)	(*(volatile u_int16_t *) (va) = (d))
49#define writel(va, d)	(*(volatile u_int32_t *) (va) = (d))
50
51#ifdef	__GNUC__
52
53#ifdef SMP
54#include <machine/lock.h>		/* XXX */
55#endif
56
57#ifdef SWTCH_OPTIM_STATS
58extern	int	tlb_flush_count;	/* XXX */
59#endif
60
61static __inline void
62breakpoint(void)
63{
64	__asm __volatile("int $3");
65}
66
67static __inline void
68disable_intr(void)
69{
70	__asm __volatile("cli" : : : "memory");
71#ifdef SMP
72	MPINTR_LOCK();
73#endif
74}
75
76static __inline void
77enable_intr(void)
78{
79#ifdef SMP
80	MPINTR_UNLOCK();
81#endif
82	__asm __volatile("sti");
83}
84
85
86#define	HAVE_INLINE__BSFL
87
88static __inline int
89__bsfl(int mask)
90{
91	int	result;
92
93	/*
94	 * bsfl turns out to be not all that slow on 486's.  It can beaten
95	 * using a binary search to reduce to 4 bits and then a table lookup,
96	 * but only if the code is inlined and in the cache, and the code
97	 * is quite large so inlining it probably busts the cache.
98	 */
99	__asm __volatile("bsfl %0,%0" : "=r" (result) : "0" (mask));
100	return (result);
101}
102
103#define	HAVE_INLINE_FFS
104
105static __inline int
106ffs(int mask)
107{
108	/*
109	 * Note that gcc-2's builtin ffs would be used if we didn't declare
110	 * this inline or turn off the builtin.  The builtin is faster but
111	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6.
112	 */
113	 return mask == 0 ? mask : __bsfl(mask) + 1;
114}
115
116#define	HAVE_INLINE__BSRL
117
118static __inline int
119__bsrl(int mask)
120{
121	int	result;
122	__asm __volatile("bsrl %0,%0" : "=r" (result) : "0" (mask));
123	return (result);
124}
125
126#define	HAVE_INLINE_FLS
127
128static __inline int
129fls(int mask)
130{
131	return mask == 0 ? mask : __bsrl(mask) + 1;
132}
133
134#if __GNUC__ < 2
135
136#define	inb(port)		inbv(port)
137#define	outb(port, data)	outbv(port, data)
138
139#else /* __GNUC >= 2 */
140
141/*
142 * The following complications are to get around gcc not having a
143 * constraint letter for the range 0..255.  We still put "d" in the
144 * constraint because "i" isn't a valid constraint when the port
145 * isn't constant.  This only matters for -O0 because otherwise
146 * the non-working version gets optimized away.
147 *
148 * Use an expression-statement instead of a conditional expression
149 * because gcc-2.6.0 would promote the operands of the conditional
150 * and produce poor code for "if ((inb(var) & const1) == const2)".
151 *
152 * The unnecessary test `(port) < 0x10000' is to generate a warning if
153 * the `port' has type u_short or smaller.  Such types are pessimal.
154 * This actually only works for signed types.  The range check is
155 * careful to avoid generating warnings.
156 */
157#define	inb(port) __extension__ ({					\
158	u_char	_data;							\
159	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
160	    && (port) < 0x10000)					\
161		_data = inbc(port);					\
162	else								\
163		_data = inbv(port);					\
164	_data; })
165
166#define	outb(port, data) (						\
167	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
168	&& (port) < 0x10000						\
169	? outbc(port, data) : outbv(port, data))
170
171static __inline u_char
172inbc(u_int port)
173{
174	u_char	data;
175
176	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
177	return (data);
178}
179
180static __inline void
181outbc(u_int port, u_char data)
182{
183	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
184}
185
186#endif /* __GNUC <= 2 */
187
188static __inline u_char
189inbv(u_int port)
190{
191	u_char	data;
192	/*
193	 * We use %%dx and not %1 here because i/o is done at %dx and not at
194	 * %edx, while gcc generates inferior code (movw instead of movl)
195	 * if we tell it to load (u_short) port.
196	 */
197	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
198	return (data);
199}
200
201static __inline u_int
202inl(u_int port)
203{
204	u_int	data;
205
206	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
207	return (data);
208}
209
210static __inline void
211insb(u_int port, void *addr, size_t cnt)
212{
213	__asm __volatile("cld; rep; insb"
214			 : "=D" (addr), "=c" (cnt)
215			 :  "0" (addr),  "1" (cnt), "d" (port)
216			 : "memory");
217}
218
219static __inline void
220insw(u_int port, void *addr, size_t cnt)
221{
222	__asm __volatile("cld; rep; insw"
223			 : "=D" (addr), "=c" (cnt)
224			 :  "0" (addr),  "1" (cnt), "d" (port)
225			 : "memory");
226}
227
228static __inline void
229insl(u_int port, void *addr, size_t cnt)
230{
231	__asm __volatile("cld; rep; insl"
232			 : "=D" (addr), "=c" (cnt)
233			 :  "0" (addr),  "1" (cnt), "d" (port)
234			 : "memory");
235}
236
237static __inline void
238invd(void)
239{
240	__asm __volatile("invd");
241}
242
243#if defined(SMP) && defined(KERNEL)
244
245/*
246 * When using APIC IPI's, invlpg() is not simply the invlpg instruction
247 * (this is a bug) and the inlining cost is prohibitive since the call
248 * executes into the IPI transmission system.
249 */
250void	invlpg		__P((u_int addr));
251void	invltlb		__P((void));
252
253static __inline void
254cpu_invlpg(void *addr)
255{
256	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
257}
258
259static __inline void
260cpu_invltlb(void)
261{
262	u_int	temp;
263	/*
264	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
265	 * is inlined.
266	 */
267	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
268			 : : "memory");
269#if defined(SWTCH_OPTIM_STATS)
270	++tlb_flush_count;
271#endif
272}
273
274#else /* !(SMP && KERNEL) */
275
276static __inline void
277invlpg(u_int addr)
278{
279	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
280}
281
282static __inline void
283invltlb(void)
284{
285	u_int	temp;
286	/*
287	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
288	 * is inlined.
289	 */
290	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
291			 : : "memory");
292#ifdef SWTCH_OPTIM_STATS
293	++tlb_flush_count;
294#endif
295}
296
297#endif /* SMP && KERNEL */
298
299static __inline u_short
300inw(u_int port)
301{
302	u_short	data;
303
304	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
305	return (data);
306}
307
308static __inline u_int
309loadandclear(volatile u_int *addr)
310{
311	u_int	result;
312
313	__asm __volatile("xorl %0,%0; xchgl %1,%0"
314			 : "=&r" (result) : "m" (*addr));
315	return (result);
316}
317
318static __inline void
319outbv(u_int port, u_char data)
320{
321	u_char	al;
322	/*
323	 * Use an unnecessary assignment to help gcc's register allocator.
324	 * This make a large difference for gcc-1.40 and a tiny difference
325	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
326	 * best results.  gcc-2.6.0 can't handle this.
327	 */
328	al = data;
329	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
330}
331
332static __inline void
333outl(u_int port, u_int data)
334{
335	/*
336	 * outl() and outw() aren't used much so we haven't looked at
337	 * possible micro-optimizations such as the unnecessary
338	 * assignment for them.
339	 */
340	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
341}
342
343static __inline void
344outsb(u_int port, const void *addr, size_t cnt)
345{
346	__asm __volatile("cld; rep; outsb"
347			 : "=S" (addr), "=c" (cnt)
348			 :  "0" (addr),  "1" (cnt), "d" (port));
349}
350
351static __inline void
352outsw(u_int port, const void *addr, size_t cnt)
353{
354	__asm __volatile("cld; rep; outsw"
355			 : "=S" (addr), "=c" (cnt)
356			 :  "0" (addr),  "1" (cnt), "d" (port));
357}
358
359static __inline void
360outsl(u_int port, const void *addr, size_t cnt)
361{
362	__asm __volatile("cld; rep; outsl"
363			 : "=S" (addr), "=c" (cnt)
364			 :  "0" (addr),  "1" (cnt), "d" (port));
365}
366
367static __inline void
368outw(u_int port, u_short data)
369{
370	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
371}
372
373static __inline u_int
374rcr2(void)
375{
376	u_int	data;
377
378	__asm __volatile("movl %%cr2,%0" : "=r" (data));
379	return (data);
380}
381
382static __inline u_int
383read_eflags(void)
384{
385	u_int	ef;
386
387	__asm __volatile("pushfl; popl %0" : "=r" (ef));
388	return (ef);
389}
390
391static __inline u_int64_t
392rdmsr(u_int msr)
393{
394	u_int64_t rv;
395
396	__asm __volatile(".byte 0x0f, 0x32" : "=A" (rv) : "c" (msr));
397	return (rv);
398}
399
400static __inline u_int64_t
401rdpmc(u_int pmc)
402{
403	u_int64_t rv;
404
405	__asm __volatile(".byte 0x0f, 0x33" : "=A" (rv) : "c" (pmc));
406	return (rv);
407}
408
409static __inline u_int64_t
410rdtsc(void)
411{
412	u_int64_t rv;
413
414	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
415	return (rv);
416}
417
418static __inline void
419wbinvd(void)
420{
421	__asm __volatile("wbinvd");
422}
423
424static __inline void
425write_eflags(u_int ef)
426{
427	__asm __volatile("pushl %0; popfl" : : "r" (ef));
428}
429
430static __inline void
431wrmsr(u_int msr, u_int64_t newval)
432{
433	__asm __volatile(".byte 0x0f, 0x30" : : "A" (newval), "c" (msr));
434}
435
436static __inline u_int
437rfs(void)
438{
439	u_int sel;
440	__asm __volatile("movl %%fs,%0" : "=r" (sel));
441	return (sel);
442}
443
444static __inline u_int
445rgs(void)
446{
447	u_int sel;
448	__asm __volatile("movl %%gs,%0" : "=r" (sel));
449	return (sel);
450}
451
452static __inline void
453load_fs(u_int sel)
454{
455	__asm __volatile("movl %0,%%fs" : : "r" (sel));
456}
457
458static __inline void
459load_gs(u_int sel)
460{
461	__asm __volatile("movl %0,%%gs" : : "r" (sel));
462}
463
464#else /* !__GNUC__ */
465
466int	breakpoint	__P((void));
467void	disable_intr	__P((void));
468void	enable_intr	__P((void));
469u_char	inb		__P((u_int port));
470u_int	inl		__P((u_int port));
471void	insb		__P((u_int port, void *addr, size_t cnt));
472void	insl		__P((u_int port, void *addr, size_t cnt));
473void	insw		__P((u_int port, void *addr, size_t cnt));
474void	invd		__P((void));
475void	invlpg		__P((u_int addr));
476void	invltlb		__P((void));
477u_short	inw		__P((u_int port));
478u_int	loadandclear	__P((u_int *addr));
479void	outb		__P((u_int port, u_char data));
480void	outl		__P((u_int port, u_int data));
481void	outsb		__P((u_int port, void *addr, size_t cnt));
482void	outsl		__P((u_int port, void *addr, size_t cnt));
483void	outsw		__P((u_int port, void *addr, size_t cnt));
484void	outw		__P((u_int port, u_short data));
485u_int	rcr2		__P((void));
486u_int64_t rdmsr		__P((u_int msr));
487u_int64_t rdpmc		__P((u_int pmc));
488u_int64_t rdtsc		__P((void));
489u_int	read_eflags	__P((void));
490void	wbinvd		__P((void));
491void	write_eflags	__P((u_int ef));
492void	wrmsr		__P((u_int msr, u_int64_t newval));
493u_int	rfs		__P((void));
494u_int	rgs		__P((void));
495void	load_fs		__P((u_int sel));
496void	load_gs		__P((u_int sel));
497
498#endif	/* __GNUC__ */
499
500void	load_cr0	__P((u_int cr0));
501void	load_cr3	__P((u_int cr3));
502void	load_cr4	__P((u_int cr4));
503void	ltr		__P((u_short sel));
504u_int	rcr0		__P((void));
505u_int	rcr3		__P((void));
506u_int	rcr4		__P((void));
507
508#endif /* !_MACHINE_CPUFUNC_H_ */
509