cpufunc.h revision 42427
1/*-
2 * Copyright (c) 1993 The Regents of the University of California.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 *	$Id: cpufunc.h,v 1.84 1999/01/08 19:51:02 bde Exp $
34 */
35
36/*
37 * Functions to provide access to special i386 instructions.
38 */
39
40#ifndef _MACHINE_CPUFUNC_H_
41#define	_MACHINE_CPUFUNC_H_
42
43#define readb(va)	(*(volatile u_int8_t *) (va))
44#define readw(va)	(*(volatile u_int16_t *) (va))
45#define readl(va)	(*(volatile u_int32_t *) (va))
46
47#define writeb(va, d)	(*(volatile u_int8_t *) (va) = (d))
48#define writew(va, d)	(*(volatile u_int16_t *) (va) = (d))
49#define writel(va, d)	(*(volatile u_int32_t *) (va) = (d))
50
51#ifdef	__GNUC__
52
53#ifdef SMP
54#include <machine/lock.h>		/* XXX */
55#endif
56
57#ifdef SWTCH_OPTIM_STATS
58extern	int	tlb_flush_count;	/* XXX */
59#endif
60
61static __inline void
62breakpoint(void)
63{
64	__asm __volatile("int $3");
65}
66
67static __inline void
68disable_intr(void)
69{
70	__asm __volatile("cli" : : : "memory");
71#ifdef SMP
72	MPINTR_LOCK();
73#endif
74}
75
76static __inline void
77enable_intr(void)
78{
79#ifdef SMP
80	MPINTR_UNLOCK();
81#endif
82	__asm __volatile("sti");
83}
84
85#define	HAVE_INLINE_FFS
86
87static __inline int
88ffs(int mask)
89{
90	int	result;
91	/*
92	 * bsfl turns out to be not all that slow on 486's.  It can beaten
93	 * using a binary search to reduce to 4 bits and then a table lookup,
94	 * but only if the code is inlined and in the cache, and the code
95	 * is quite large so inlining it probably busts the cache.
96	 *
97	 * Note that gcc-2's builtin ffs would be used if we didn't declare
98	 * this inline or turn off the builtin.  The builtin is faster but
99	 * broken in gcc-2.4.5 and slower but working in gcc-2.5 and 2.6.
100	 */
101	__asm __volatile("testl %0,%0; je 1f; bsfl %0,%0; incl %0; 1:"
102			 : "=r" (result) : "0" (mask));
103	return (result);
104}
105
106#define	HAVE_INLINE_FLS
107
108static __inline int
109fls(int mask)
110{
111	int	result;
112	__asm __volatile("testl %0,%0; je 1f; bsrl %0,%0; incl %0; 1:"
113			 : "=r" (result) : "0" (mask));
114	return (result);
115}
116
117#if __GNUC__ < 2
118
119#define	inb(port)		inbv(port)
120#define	outb(port, data)	outbv(port, data)
121
122#else /* __GNUC >= 2 */
123
124/*
125 * The following complications are to get around gcc not having a
126 * constraint letter for the range 0..255.  We still put "d" in the
127 * constraint because "i" isn't a valid constraint when the port
128 * isn't constant.  This only matters for -O0 because otherwise
129 * the non-working version gets optimized away.
130 *
131 * Use an expression-statement instead of a conditional expression
132 * because gcc-2.6.0 would promote the operands of the conditional
133 * and produce poor code for "if ((inb(var) & const1) == const2)".
134 *
135 * The unnecessary test `(port) < 0x10000' is to generate a warning if
136 * the `port' has type u_short or smaller.  Such types are pessimal.
137 * This actually only works for signed types.  The range check is
138 * careful to avoid generating warnings.
139 */
140#define	inb(port) __extension__ ({					\
141	u_char	_data;							\
142	if (__builtin_constant_p(port) && ((port) & 0xffff) < 0x100	\
143	    && (port) < 0x10000)					\
144		_data = inbc(port);					\
145	else								\
146		_data = inbv(port);					\
147	_data; })
148
149#define	outb(port, data) (						\
150	__builtin_constant_p(port) && ((port) & 0xffff) < 0x100		\
151	&& (port) < 0x10000						\
152	? outbc(port, data) : outbv(port, data))
153
154static __inline u_char
155inbc(u_int port)
156{
157	u_char	data;
158
159	__asm __volatile("inb %1,%0" : "=a" (data) : "id" ((u_short)(port)));
160	return (data);
161}
162
163static __inline void
164outbc(u_int port, u_char data)
165{
166	__asm __volatile("outb %0,%1" : : "a" (data), "id" ((u_short)(port)));
167}
168
169#endif /* __GNUC <= 2 */
170
171static __inline u_char
172inbv(u_int port)
173{
174	u_char	data;
175	/*
176	 * We use %%dx and not %1 here because i/o is done at %dx and not at
177	 * %edx, while gcc generates inferior code (movw instead of movl)
178	 * if we tell it to load (u_short) port.
179	 */
180	__asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port));
181	return (data);
182}
183
184static __inline u_int
185inl(u_int port)
186{
187	u_int	data;
188
189	__asm __volatile("inl %%dx,%0" : "=a" (data) : "d" (port));
190	return (data);
191}
192
193static __inline void
194insb(u_int port, void *addr, size_t cnt)
195{
196	__asm __volatile("cld; rep; insb"
197			 : "=D" (addr), "=c" (cnt)
198			 :  "0" (addr),  "1" (cnt), "d" (port)
199			 : "memory");
200}
201
202static __inline void
203insw(u_int port, void *addr, size_t cnt)
204{
205	__asm __volatile("cld; rep; insw"
206			 : "=D" (addr), "=c" (cnt)
207			 :  "0" (addr),  "1" (cnt), "d" (port)
208			 : "memory");
209}
210
211static __inline void
212insl(u_int port, void *addr, size_t cnt)
213{
214	__asm __volatile("cld; rep; insl"
215			 : "=D" (addr), "=c" (cnt)
216			 :  "0" (addr),  "1" (cnt), "d" (port)
217			 : "memory");
218}
219
220static __inline void
221invd(void)
222{
223	__asm __volatile("invd");
224}
225
226#if defined(SMP) && defined(KERNEL)
227
228/*
229 * When using APIC IPI's, invlpg() is not simply the invlpg instruction
230 * (this is a bug) and the inlining cost is prohibitive since the call
231 * executes into the IPI transmission system.
232 */
233void	invlpg		__P((u_int addr));
234void	invltlb		__P((void));
235
236static __inline void
237cpu_invlpg(void *addr)
238{
239	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
240}
241
242static __inline void
243cpu_invltlb(void)
244{
245	u_int	temp;
246	/*
247	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
248	 * is inlined.
249	 */
250	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
251			 : : "memory");
252#if defined(SWTCH_OPTIM_STATS)
253	++tlb_flush_count;
254#endif
255}
256
257#else /* !(SMP && KERNEL) */
258
259static __inline void
260invlpg(u_int addr)
261{
262	__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
263}
264
265static __inline void
266invltlb(void)
267{
268	u_int	temp;
269	/*
270	 * This should be implemented as load_cr3(rcr3()) when load_cr3()
271	 * is inlined.
272	 */
273	__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
274			 : : "memory");
275#ifdef SWTCH_OPTIM_STATS
276	++tlb_flush_count;
277#endif
278}
279
280#endif /* SMP && KERNEL */
281
282static __inline u_short
283inw(u_int port)
284{
285	u_short	data;
286
287	__asm __volatile("inw %%dx,%0" : "=a" (data) : "d" (port));
288	return (data);
289}
290
291static __inline u_int
292loadandclear(u_int *addr)
293{
294	u_int	result;
295
296	__asm __volatile("xorl %0,%0; xchgl %1,%0"
297			 : "=&r" (result) : "m" (*addr));
298	return (result);
299}
300
301static __inline void
302outbv(u_int port, u_char data)
303{
304	u_char	al;
305	/*
306	 * Use an unnecessary assignment to help gcc's register allocator.
307	 * This make a large difference for gcc-1.40 and a tiny difference
308	 * for gcc-2.6.0.  For gcc-1.40, al had to be ``asm("ax")'' for
309	 * best results.  gcc-2.6.0 can't handle this.
310	 */
311	al = data;
312	__asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port));
313}
314
315static __inline void
316outl(u_int port, u_int data)
317{
318	/*
319	 * outl() and outw() aren't used much so we haven't looked at
320	 * possible micro-optimizations such as the unnecessary
321	 * assignment for them.
322	 */
323	__asm __volatile("outl %0,%%dx" : : "a" (data), "d" (port));
324}
325
326static __inline void
327outsb(u_int port, const void *addr, size_t cnt)
328{
329	__asm __volatile("cld; rep; outsb"
330			 : "=S" (addr), "=c" (cnt)
331			 :  "0" (addr),  "1" (cnt), "d" (port));
332}
333
334static __inline void
335outsw(u_int port, const void *addr, size_t cnt)
336{
337	__asm __volatile("cld; rep; outsw"
338			 : "=S" (addr), "=c" (cnt)
339			 :  "0" (addr),  "1" (cnt), "d" (port));
340}
341
342static __inline void
343outsl(u_int port, const void *addr, size_t cnt)
344{
345	__asm __volatile("cld; rep; outsl"
346			 : "=S" (addr), "=c" (cnt)
347			 :  "0" (addr),  "1" (cnt), "d" (port));
348}
349
350static __inline void
351outw(u_int port, u_short data)
352{
353	__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
354}
355
356static __inline u_int
357rcr2(void)
358{
359	u_int	data;
360
361	__asm __volatile("movl %%cr2,%0" : "=r" (data));
362	return (data);
363}
364
365static __inline u_int
366read_eflags(void)
367{
368	u_int	ef;
369
370	__asm __volatile("pushfl; popl %0" : "=r" (ef));
371	return (ef);
372}
373
374static __inline u_int64_t
375rdmsr(u_int msr)
376{
377	u_int64_t rv;
378
379	__asm __volatile(".byte 0x0f, 0x32" : "=A" (rv) : "c" (msr));
380	return (rv);
381}
382
383static __inline u_int64_t
384rdpmc(u_int pmc)
385{
386	u_int64_t rv;
387
388	__asm __volatile(".byte 0x0f, 0x33" : "=A" (rv) : "c" (pmc));
389	return (rv);
390}
391
392static __inline u_int64_t
393rdtsc(void)
394{
395	u_int64_t rv;
396
397	__asm __volatile(".byte 0x0f, 0x31" : "=A" (rv));
398	return (rv);
399}
400
401static __inline void
402setbits(volatile u_int *addr, u_int bits)
403{
404	__asm __volatile(
405#ifdef SMP
406			 "lock; "
407#endif
408			 "orl %1,%0" : "=m" (*addr) : "ir" (bits));
409}
410
411static __inline void
412wbinvd(void)
413{
414	__asm __volatile("wbinvd");
415}
416
417static __inline void
418write_eflags(u_int ef)
419{
420	__asm __volatile("pushl %0; popfl" : : "r" (ef));
421}
422
423static __inline void
424wrmsr(u_int msr, u_int64_t newval)
425{
426	__asm __volatile(".byte 0x0f, 0x30" : : "A" (newval), "c" (msr));
427}
428
429#else /* !__GNUC__ */
430
431int	breakpoint	__P((void));
432void	disable_intr	__P((void));
433void	enable_intr	__P((void));
434u_char	inb		__P((u_int port));
435u_int	inl		__P((u_int port));
436void	insb		__P((u_int port, void *addr, size_t cnt));
437void	insl		__P((u_int port, void *addr, size_t cnt));
438void	insw		__P((u_int port, void *addr, size_t cnt));
439void	invd		__P((void));
440void	invlpg		__P((u_int addr));
441void	invltlb		__P((void));
442u_short	inw		__P((u_int port));
443u_int	loadandclear	__P((u_int *addr));
444void	outb		__P((u_int port, u_char data));
445void	outl		__P((u_int port, u_int data));
446void	outsb		__P((u_int port, void *addr, size_t cnt));
447void	outsl		__P((u_int port, void *addr, size_t cnt));
448void	outsw		__P((u_int port, void *addr, size_t cnt));
449void	outw		__P((u_int port, u_short data));
450u_int	rcr2		__P((void));
451u_int64_t rdmsr		__P((u_int msr));
452u_int64_t rdpmc		__P((u_int pmc));
453u_int64_t rdtsc		__P((void));
454u_int	read_eflags	__P((void));
455void	setbits		__P((volatile u_int *addr, u_int bits));
456void	wbinvd		__P((void));
457void	write_eflags	__P((u_int ef));
458void	wrmsr		__P((u_int msr, u_int64_t newval));
459
460#endif	/* __GNUC__ */
461
462void	load_cr0	__P((u_int cr0));
463void	load_cr3	__P((u_int cr3));
464void	load_cr4	__P((u_int cr4));
465void	ltr		__P((u_short sel));
466u_int	rcr0		__P((void));
467u_int	rcr3		__P((void));
468u_int	rcr4		__P((void));
469
470#endif /* !_MACHINE_CPUFUNC_H_ */
471