1144888Swpaul/*-
2144888Swpaul * Copyright (c) 2005
3144888Swpaul *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
4144888Swpaul *
5144888Swpaul * Redistribution and use in source and binary forms, with or without
6144888Swpaul * modification, are permitted provided that the following conditions
7144888Swpaul * are met:
8144888Swpaul * 1. Redistributions of source code must retain the above copyright
9144888Swpaul *    notice, this list of conditions and the following disclaimer.
10144888Swpaul * 2. Redistributions in binary form must reproduce the above copyright
11144888Swpaul *    notice, this list of conditions and the following disclaimer in the
12144888Swpaul *    documentation and/or other materials provided with the distribution.
13144888Swpaul * 3. All advertising materials mentioning features or use of this software
14144888Swpaul *    must display the following acknowledgement:
15144888Swpaul *      This product includes software developed by Bill Paul.
16144888Swpaul * 4. Neither the name of the author nor the names of any co-contributors
17144888Swpaul *    may be used to endorse or promote products derived from this software
18144888Swpaul *    without specific prior written permission.
19144888Swpaul *
20144888Swpaul * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21144888Swpaul * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22144888Swpaul * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23144888Swpaul * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24144888Swpaul * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25144888Swpaul * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26144888Swpaul * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27144888Swpaul * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28144888Swpaul * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29144888Swpaul * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30144888Swpaul * THE POSSIBILITY OF SUCH DAMAGE.
31144888Swpaul *
32144888Swpaul * $FreeBSD$
33144888Swpaul */
34144888Swpaul
35159548Sjhb/* The 'ret' macro doesn't work in this file if GPROF is enabled. */
36159548Sjhb#ifdef GPROF
37159548Sjhb#undef GPROF
38159548Sjhb#endif
39159548Sjhb
40144888Swpaul#include <machine/asmacros.h>
41144888Swpaul
42144888Swpaul/*
43144888Swpaul * This file contains assembly language wrappers for the different
44144888Swpaul * calling conventions supported by Windows on the i386 architecture.
45144888Swpaul * In FreeBSD, the whole OS typically use same C calling convention
46144888Swpaul * everywhere, namely _cdecl. Windows, on the other hand, uses several
47144888Swpaul * different C calling conventions depending on the circumstances:
48144888Swpaul *
49144888Swpaul * _stdcall: Used for most ordinary Windows APIs. With _stdcall,
50144888Swpaul * arguments are passed on the stack, and the callee unwinds the stack
51144888Swpaul * before returning control to the caller. Not suitable for variadic
52144888Swpaul * functions.
53144888Swpaul *
54144888Swpaul * _fastcall: Used for some APIs that may be invoked frequently and
55144888Swpaul * where speed is a critical factor (e.g. KeAcquireSpinLock() and
56144888Swpaul * KeReleaseSpinLock()) Similar to _stdcall, except the first 2 32-bit
57144888Swpaul * or smaller arguments are passed in the %ecx and %edx registers
58144888Swpaul * instead of on the stack. Not suitable for variadic functions.
59144888Swpaul *
60144888Swpaul * _cdecl: Used for standard C library routines and for variadic
61144888Swpaul * functions.
62144888Swpaul *
63144888Swpaul * _regparm(3): Used for certain assembly routines. All arguments
64144888Swpaul * passed in %eax, %ecx and %edx.
65144888Swpaul *
66144888Swpaul * Furthermore, there is an additional wrinkle that's not obvious
67144888Swpaul * with all code: Microsoft supports the use of exceptions in C
68144888Swpaul * (__try/__except) both in user _and_ kernel mode. Sadly, Windows
69144888Swpaul * structured exception handling uses machine-specific features
70144888Swpaul * that conflict rather badly with FreeBSD. (See utility routines
71144888Swpaul * at the end of this module for more details.)
72144888Swpaul *
73144888Swpaul * We want to support these calling conventions in as portable a manner
74144888Swpaul * as possible. The trick is doing it not only with different versions
75144888Swpaul * of GNU C, but with compilers other than GNU C (e.g. the Solaris
76144888Swpaul * SunOne C compiler). The only sure fire method is with assembly
77144888Swpaul * language trampoline code which both fixes up the argument passing,
78144888Swpaul * stack unwinding and exception/thread context all at once.
79144888Swpaul *
80144888Swpaul * You'll notice that we call the thunk/unthunk routines in the
81144888Swpaul * *_wrap() functions in an awkward way. Rather than branching
82144888Swpaul * directly to the address, we load the address into a register
83144888Swpaul * first as a literal value, then we branch to it. This is done
84144888Swpaul * to insure that the assembler doesn't translate the branch into
85144888Swpaul * a relative branch. We use the *_wrap() routines here as templates
86144888Swpaul * and create the actual trampolines at run time, at which point
87144888Swpaul * we only know the absolute addresses of the thunk and unthunk
88144888Swpaul * routines. So we need to make sure the templates have enough
89144888Swpaul * room in them for the full address.
90151207Swpaul *
91151207Swpaul * Also note that when we call the a thunk/unthunk routine after
92151207Swpaul * invoking a wrapped function, we have to make sure to preserve
93151207Swpaul * the value returned from that function. Most functions return
94151207Swpaul * a 32-bit value in %eax, however some routines return 64-bit
95151207Swpaul * values, which span both %eax and %edx. Consequently, we have
96151207Swpaul * to preserve both registers.
97144888Swpaul */
98144888Swpaul
99144888Swpaul/*
100144888Swpaul * Handle _stdcall going from Windows to UNIX.
101144888Swpaul * This is frustrating, because to do it right you have to
102144888Swpaul * know how many arguments the called function takes, and there's
103144888Swpaul * no way to figure this out on the fly: you just have to be told
104144888Swpaul * ahead of time. We assume there will be 16 arguments. I don't
105144888Swpaul * think there are any Windows APIs that require this many.
106144888Swpaul */
107144888Swpaul
108144888Swpaul	.globl x86_stdcall_wrap_call
109144888Swpaul	.globl x86_stdcall_wrap_arg
110144888Swpaul	.globl x86_stdcall_wrap_end
111144888Swpaul
112144888SwpaulENTRY(x86_stdcall_wrap)
113144888Swpaul	push	%esi
114144888Swpaul	push	%edi
115144888Swpaul	sub	$64,%esp
116144888Swpaul	mov	%esp,%esi
117144888Swpaul	add	$64+8+4,%esi
118144888Swpaul	mov	%esp,%edi
119144888Swpaul	mov	$16,%ecx	# handle up to 16 args
120144888Swpaul	rep
121144888Swpaul	movsl
122144888Swpaul
123144888Swpaul	movl	$ctxsw_wtou, %eax
124144888Swpaul	call	*%eax           # unthunk
125144888Swpaul
126144888Swpaulx86_stdcall_wrap_call:
127144888Swpaul        movl    $0,%eax
128144888Swpaul	call	*%eax		# jump to routine
129151207Swpaul	push	%eax		# preserve return val
130151207Swpaul	push	%edx
131144888Swpaul
132144888Swpaul	movl	$ctxsw_utow, %eax
133144888Swpaul	call	*%eax		# thunk
134144888Swpaul
135151207Swpaul	pop	%edx
136151207Swpaul	pop	%eax		# restore return val
137151207Swpaul
138144888Swpaul	add	$64,%esp	# clean the stack
139144888Swpaul	pop	%edi
140144888Swpaul	pop	%esi
141144888Swpaulx86_stdcall_wrap_arg:
142144888Swpaul	ret	$0xFF
143144888Swpaulx86_stdcall_wrap_end:
144144888Swpaul
145144888Swpaul
146144888Swpaul/*
147144888Swpaul * Handle _stdcall going from UNIX to Windows. This routine
148144888Swpaul * expects to be passed the function to be called, number of
149144888Swpaul * args and the arguments for the Windows function on the stack.
150144888Swpaul */
151144888Swpaul
152144888SwpaulENTRY(x86_stdcall_call)
153144888Swpaul	push	%esi		# must preserve %esi
154144888Swpaul	push	%edi		# and %edi
155144888Swpaul
156144888Swpaul	mov	16(%esp),%eax	# get arg cnt
157144888Swpaul	mov	%eax,%ecx	# save as copy count
158144888Swpaul	mov	%esp,%esi	# Set source address register to point to
159144888Swpaul	add	$20,%esi	# first agument to be forwarded.
160144888Swpaul	shl	$2,%eax		# turn arg cnt into offset
161144888Swpaul	sub	%eax,%esp	# shift stack to new location
162144888Swpaul	mov	%esp,%edi	# store dest copy addr
163144888Swpaul	rep			# do the copy
164144888Swpaul	movsl
165144888Swpaul
166144888Swpaul	call	ctxsw_utow	# thunk
167144888Swpaul
168144888Swpaul	call	*12(%edi)	# branch to stdcall routine
169151207Swpaul	push	%eax		# preserve return val
170151207Swpaul	push	%edx
171144888Swpaul
172144888Swpaul	call	ctxsw_wtou	# unthunk
173144888Swpaul
174151207Swpaul	pop	%edx
175151207Swpaul	pop	%eax		# restore return val
176144888Swpaul	mov	%edi,%esp	# restore stack
177144888Swpaul	pop	%edi		# restore %edi
178144888Swpaul	pop	%esi		# and %esi
179144888Swpaul	ret
180144888Swpaul
181144888Swpaul/*
182144888Swpaul * Fastcall support. Similar to _stdcall, except the first
183144888Swpaul * two arguments are passed in %ecx and %edx. It happens we
184144888Swpaul * only support a small number of _fastcall APIs, none of them
185144888Swpaul * take more than three arguments. So to keep the code size
186144888Swpaul * and complexity down, we only handle 3 arguments here.
187144888Swpaul */
188144888Swpaul
189144888Swpaul/* Call _fastcall function going from Windows to UNIX. */
190144888Swpaul
191144888Swpaul	.globl x86_fastcall_wrap_call
192144888Swpaul	.globl x86_fastcall_wrap_arg
193144888Swpaul	.globl x86_fastcall_wrap_end
194144888Swpaul
195144888SwpaulENTRY(x86_fastcall_wrap)
196144888Swpaul	mov	4(%esp),%eax
197144888Swpaul	push	%eax
198144888Swpaul	push	%edx
199144888Swpaul	push	%ecx
200144888Swpaul
201144888Swpaul	movl	$ctxsw_wtou, %eax
202144888Swpaul	call	*%eax		# unthunk
203144888Swpaul
204144888Swpaulx86_fastcall_wrap_call:
205144888Swpaul	mov	$0,%eax
206144888Swpaul	call	*%eax		# branch to fastcall routine
207144913Swpaul	push	%eax		# preserve return val
208151207Swpaul	push	%edx
209144888Swpaul
210144888Swpaul	movl	$ctxsw_utow, %eax
211144888Swpaul	call	*%eax		# thunk
212144888Swpaul
213151207Swpaul	pop	%edx
214144913Swpaul	pop	%eax		# restore return val
215144888Swpaul	add	$12,%esp	# clean the stack
216144888Swpaulx86_fastcall_wrap_arg:
217144888Swpaul	ret	$0xFF
218144888Swpaulx86_fastcall_wrap_end:
219144888Swpaul
220144888Swpaul/*
221144888Swpaul * Call _fastcall function going from UNIX to Windows.
222144888Swpaul * This routine isn't normally used since NDIS miniport drivers
223144888Swpaul * only have _stdcall entry points, but it's provided anyway
224144888Swpaul * to round out the API, and for testing purposes.
225144888Swpaul */
226144888Swpaul
227144888SwpaulENTRY(x86_fastcall_call)
228144888Swpaul	mov	4(%esp),%eax
229144888Swpaul	push	16(%esp)
230144888Swpaul
231144888Swpaul	call	ctxsw_utow	# thunk
232144888Swpaul
233144888Swpaul	mov	12(%esp),%ecx
234144888Swpaul	mov	16(%esp),%edx
235144888Swpaul	call	*8(%esp)	# branch to fastcall routine
236144913Swpaul	push	%eax		# preserve return val
237151207Swpaul	push	%edx
238144888Swpaul
239144888Swpaul	call	ctxsw_wtou	# unthunk
240144888Swpaul
241151207Swpaul	pop	%edx
242144913Swpaul	pop	%eax		# restore return val
243144888Swpaul	add	$4,%esp		# clean the stack
244144888Swpaul	ret
245144888Swpaul
246144888Swpaul/*
247144888Swpaul * Call regparm(3) function going from Windows to UNIX. Arguments
248144888Swpaul * are passed in %eax, %edx and %ecx. Note that while additional
249144888Swpaul * arguments are passed on the stack, we never bother when them,
250144888Swpaul * since the only regparm(3) routines we need to wrap never take
251144888Swpaul * more than 3 arguments.
252144888Swpaul */
253144888Swpaul
254144888Swpaul	.globl x86_regparm_wrap_call
255144888Swpaul	.globl x86_regparm_wrap_end
256144888Swpaul
257144888SwpaulENTRY(x86_regparm_wrap)
258144888Swpaul	push	%ecx
259144888Swpaul	push	%edx
260144888Swpaul	push	%eax
261144888Swpaul
262144888Swpaul	movl	$ctxsw_wtou, %eax
263144888Swpaul	call	*%eax		# unthunk
264144888Swpaul
265144888Swpaulx86_regparm_wrap_call:
266144888Swpaul	movl	$0,%eax
267144888Swpaul	call	*%eax		# jump to routine
268144913Swpaul	push	%eax		# preserve return val
269144913Swpaul	push	%edx		# preserve return val
270144888Swpaul
271144888Swpaul	movl	$ctxsw_utow, %eax
272144888Swpaul	call	*%eax		# thunk
273144888Swpaul
274144913Swpaul	pop	%edx		# restore return val
275144913Swpaul	pop	%eax		# restore return val
276144888Swpaul	add	$12,%esp	# restore stack
277144888Swpaul	ret
278144888Swpaulx86_regparm_wrap_end:
279144888Swpaul
280144888Swpaul/*
281144888Swpaul * Call regparm(3) function going from UNIX to Windows.
282144888Swpaul * This routine isn't normally used since NDIS miniport drivers
283144888Swpaul * only have _stdcall entry points, but it's provided anyway
284144888Swpaul * to round out the API, and for testing purposes.
285144888Swpaul */
286144888Swpaul
287144888SwpaulENTRY(x86_regparm_call)
288144888Swpaul	call	ctxsw_utow	# thunk
289144888Swpaul
290144888Swpaul	mov	8(%esp),%eax
291144888Swpaul	mov	12(%esp),%edx
292144888Swpaul	mov	16(%esp),%ecx
293144888Swpaul	call	*4(%esp)	# branch to fastcall routine
294144913Swpaul	push	%eax		# preserve return val
295144913Swpaul	push	%edx		# preserve return val
296144888Swpaul
297144888Swpaul	call	ctxsw_wtou	# unthunk
298144888Swpaul
299144913Swpaul	pop	%edx		# restore return val
300144913Swpaul	pop	%eax		# restore return val
301144888Swpaul	ret
302144888Swpaul
303144888Swpaul/*
304144888Swpaul * Ugly hack alert:
305144888Swpaul *
306144888Swpaul * On Win32/i386, using __try/__except results in code that tries to
307144888Swpaul * manipulate what's supposed to be the Windows Threada Environment
308144888Swpaul * Block (TEB), which one accesses via the %fs register. In particular,
309144888Swpaul * %fs:0 (the first DWORD in the TEB) points to the exception
310144888Swpaul * registration list. Unfortunately, FreeBSD uses %fs for the
311144888Swpaul * per-cpu data structure (pcpu), and we can't allow Windows code
312144888Swpaul * to muck with that. I don't even know what Solaris uses %fs for
313144888Swpaul * (or if it even uses it at all).
314144888Swpaul *
315144888Swpaul * Even worse, in 32-bit protected mode, %fs is a selector that
316144888Swpaul * refers to an entry in either the GDT or the LDT. Ideally, we would
317144888Swpaul * like to be able to temporarily point it at another descriptor
318144888Swpaul * while Windows code executes, but to do that we need a separate
319144888Swpaul * descriptor entry of our own to play with.
320144888Swpaul *
321144888Swpaul * Therefore, we go to some trouble to learn the existing layout of
322144888Swpaul * the GDT and update it to include an extra entry that we can use.
323144888Swpaul * We need the following utility routines to help us do that. On
324144888Swpaul * FreeBSD, index #7 in the GDT happens to be unused, so we turn
325144888Swpaul * this into our own data segment descriptor. It would be better
326144888Swpaul * if we could use a private LDT entry, but there's no easy way to
327144888Swpaul * do that in SMP mode because of the way FreeBSD handles user LDTs.
328144888Swpaul *
329144888Swpaul * Once we have a custom descriptor, we have to thunk/unthunk whenever
330144888Swpaul * we cross between FreeBSD code and Windows code. The thunking is
331144888Swpaul * based on the premise that when executing instructions in the
332144888Swpaul * Windows binary itself, we won't go to sleep. This is because in
333144888Swpaul * order to yield the CPU, the code has to call back out to a FreeBSD
334144888Swpaul * routine first, and when that happens we can unthunk in order to
335144888Swpaul * restore FreeBSD context. What we're desperately trying to avoid is
336144888Swpaul * being involuntarily pre-empted with the %fs register still pointing
337144888Swpaul * to our fake TIB: if FreeBSD code runs with %fs pointing at our
338144888Swpaul * Windows TIB instead of pcpu, we'll panic the kernel. Fortunately,
339144888Swpaul * the only way involuntary preemption can occur is if an interrupt
340144888Swpaul * fires, and the trap handler saves/restores %fs for us.
341144888Swpaul *
342144888Swpaul * The thunking routines themselves, ctxsw_utow() (Context SWitch UNIX
343144888Swpaul * to Windows) and ctxsw_wtou() (Context SWitch Windows to UNIX), are
344144888Swpaul * external to this module. This is done simply because it's easier
345144888Swpaul * to manipulate data structures in C rather than assembly.
346144888Swpaul */
347144888Swpaul
348144888SwpaulENTRY(x86_getldt)
349144888Swpaul	movl	4(%esp),%eax
350144888Swpaul	sgdtl	(%eax)
351144888Swpaul	movl	8(%esp),%eax
352144888Swpaul	sldt	(%eax)
353144888Swpaul	xor	%eax,%eax
354144888Swpaul	ret
355144888Swpaul
356144888SwpaulENTRY(x86_setldt)
357144888Swpaul	movl    4(%esp),%eax
358144888Swpaul	lgdt	(%eax)
359144888Swpaul	jmp	1f
360144888Swpaul	nop
361144888Swpaul1:
362144888Swpaul	movl    8(%esp),%eax
363144888Swpaul	lldt	%ax
364144888Swpaul	xor	%eax,%eax
365144888Swpaul	ret
366144888Swpaul
367144888SwpaulENTRY(x86_getfs)
368144888Swpaul	mov	%fs,%ax
369144888Swpaul	ret
370144888Swpaul
371144888SwpaulENTRY(x86_setfs)
372187948Sobrien	mov	4(%esp),%fs
373144888Swpaul	ret
374144888Swpaul
375144888SwpaulENTRY(x86_gettid)
376144888Swpaul	mov	%fs:12,%eax
377144888Swpaul	ret
378151977Swpaul
379151977SwpaulENTRY(x86_critical_enter)
380151977Swpaul	cli
381151977Swpaul	ret
382151977Swpaul
383151977SwpaulENTRY(x86_critical_exit)
384151977Swpaul	sti
385151977Swpaul	ret
386