amd64.il revision 9171:ee979187414d
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/
27/ In-line functions for amd64 kernels.
28/
29
30/
31/ return current thread pointer
32/
33/ NOTE: the "0x18" should be replaced by the computed value of the
34/	offset of "cpu_thread" from the beginning of the struct cpu.
35/	Including "assym.h" does not work, however, since that stuff
36/	is PSM-specific and is only visible to the 'unix' build anyway.
37/	Same with current cpu pointer, where "0xc" should be replaced
38/	by the computed value of the offset of "cpu_self".
39/	Ugh -- what a disaster.
40/
41	.inline	threadp,0
42	movq	%gs:0x18, %rax
43	.end
44
45/
46/ return current cpu pointer
47/
48	.inline	curcpup,0
49	movq	%gs:0x10, %rax
50	.end
51
52/
53/ return caller
54/
55	.inline caller,0
56	movq	8(%rbp), %rax
57	.end
58
59/
60/ convert ipl to spl.  This is the identity function for i86
61/
62	.inline	ipltospl,0
63	movq	%rdi, %rax
64	.end
65
66/
67/ find the low order bit in a word
68/
69	.inline lowbit,4
70	movq	$-1, %rax
71	bsfq	%rdi, %rax
72	incq	%rax
73	.end
74
75/
76/ Networking byte order functions (too bad, Intel has the wrong byte order)
77/
78
79	.inline	htonll,4
80	movq	%rdi, %rax
81	bswapq	%rax
82	.end
83
84	.inline	ntohll,4
85	movq	%rdi, %rax
86	bswapq	%rax
87	.end
88
89	.inline	htonl,4
90	movl	%edi, %eax
91	bswap	%eax
92	.end
93
94	.inline	ntohl,4
95	movl	%edi, %eax
96	bswap	%eax
97	.end
98
99	.inline	htons,4
100	movl	%edi, %eax
101	bswap	%eax
102	shrl	$16, %eax
103	.end
104
105	.inline	ntohs,4
106	movl	%edi, %eax
107	bswap	%eax
108	shrl	$16, %eax
109	.end
110
111/*
112 * multiply two long numbers and yield a u_lonlong_t result
113 * Provided to manipulate hrtime_t values.
114 */
115	/* XX64 These don't work correctly with SOS9 build 13.0 yet
116	.inline mul32, 8
117	xorl	%edx, %edx
118	movl	%edi, %eax
119	mull	%esi
120	shlq	$32, %rdx
121	orq	%rdx, %rax
122	ret
123	.end
124	*/
125/*
126 * Unlock hres_lock and increment the count value. (See clock.h)
127 */
128	.inline unlock_hres_lock, 0
129	lock
130	incl	hres_lock
131	.end
132
133	.inline	atomic_orb,8
134	movl	%esi, %eax
135	lock
136	orb	%al,(%rdi)
137	.end
138
139	.inline	atomic_andb,8
140	movl	%esi, %eax
141	lock
142	andb	%al,(%rdi)
143	.end
144
145/*
146 * atomic inc/dec operations.
147 *	void atomic_inc16(uint16_t *addr) { ++*addr; }
148 *	void atomic_dec16(uint16_t *addr) { --*addr; }
149 */
150	.inline	atomic_inc16,4
151	lock
152	incw	(%rdi)
153	.end
154
155	.inline	atomic_dec16,4
156	lock
157	decw	(%rdi)
158	.end
159
160/*
161 * atomic bit clear
162 */
163	.inline atomic_btr32,8
164	lock
165	btrl %esi, (%rdi)
166	setc %al
167	.end
168
169/*
170 * Call the pause instruction.  To the Pentium 4 Xeon processor, it acts as
171 * a hint that the code sequence is a busy spin-wait loop.  Without a pause
172 * instruction in these loops, the P4 Xeon processor may suffer a severe
173 * penalty when exiting the loop because the processor detects a possible
174 * memory violation.  Inserting the pause instruction significantly reduces
175 * the likelihood of a memory order violation, improving performance.
176 * The pause instruction is a NOP on all other IA-32 processors.
177 */
178	.inline ht_pause, 0
179	pause
180	.end
181
182/*
183 * inlines for update_sregs().
184 */
185        .inline __set_ds, 0
186        movw    %di, %ds
187        .end
188
189        .inline __set_es, 0
190        movw    %di, %es
191        .end
192
193        .inline __set_fs, 0
194        movw    %di, %fs
195        .end
196
197        .inline __set_gs, 0
198        movw    %di, %gs
199        .end
200
201	/*
202	 * OPTERON_ERRATUM_88 requires mfence
203	 */
204        .inline __swapgs, 0
205        mfence
206        swapgs
207	.end
208
209/*
210 * prefetch 64 bytes
211 */
212
213 	.inline	prefetch_read_many,8
214	prefetcht0	(%rdi)
215	prefetcht0	32(%rdi)
216	.end
217
218 	.inline	prefetch_read_once,8
219	prefetchnta	(%rdi)
220	prefetchnta	32(%rdi)
221	.end
222
223 	.inline	prefetch_write_many,8
224	prefetcht0	(%rdi)
225	prefetcht0	32(%rdi)
226	.end
227
228 	.inline	prefetch_write_once,8
229	prefetcht0	(%rdi)
230	prefetcht0	32(%rdi)
231	.end
232