1/*- 2 * Copyright (c) 2005 3 * Bill Paul <wpaul@windriver.com>. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by Bill Paul. 16 * 4. Neither the name of the author nor the names of any co-contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 30 * THE POSSIBILITY OF SUCH DAMAGE. 31 * 32 * $FreeBSD$ 33 */ 34 35/* The 'ret' macro doesn't work in this file if GPROF is enabled. */ 36#ifdef GPROF 37#undef GPROF 38#endif 39 40#include <machine/asmacros.h> 41 42/* 43 * This file contains assembly language wrappers for the different 44 * calling conventions supported by Windows on the i386 architecture. 45 * In FreeBSD, the whole OS typically use same C calling convention 46 * everywhere, namely _cdecl. Windows, on the other hand, uses several 47 * different C calling conventions depending on the circumstances: 48 * 49 * _stdcall: Used for most ordinary Windows APIs. With _stdcall, 50 * arguments are passed on the stack, and the callee unwinds the stack 51 * before returning control to the caller. Not suitable for variadic 52 * functions. 53 * 54 * _fastcall: Used for some APIs that may be invoked frequently and 55 * where speed is a critical factor (e.g. KeAcquireSpinLock() and 56 * KeReleaseSpinLock()) Similar to _stdcall, except the first 2 32-bit 57 * or smaller arguments are passed in the %ecx and %edx registers 58 * instead of on the stack. Not suitable for variadic functions. 59 * 60 * _cdecl: Used for standard C library routines and for variadic 61 * functions. 62 * 63 * _regparm(3): Used for certain assembly routines. All arguments 64 * passed in %eax, %ecx and %edx. 65 * 66 * Furthermore, there is an additional wrinkle that's not obvious 67 * with all code: Microsoft supports the use of exceptions in C 68 * (__try/__except) both in user _and_ kernel mode. Sadly, Windows 69 * structured exception handling uses machine-specific features 70 * that conflict rather badly with FreeBSD. (See utility routines 71 * at the end of this module for more details.) 72 * 73 * We want to support these calling conventions in as portable a manner 74 * as possible. The trick is doing it not only with different versions 75 * of GNU C, but with compilers other than GNU C (e.g. the Solaris 76 * SunOne C compiler). The only sure fire method is with assembly 77 * language trampoline code which both fixes up the argument passing, 78 * stack unwinding and exception/thread context all at once. 79 * 80 * You'll notice that we call the thunk/unthunk routines in the 81 * *_wrap() functions in an awkward way. Rather than branching 82 * directly to the address, we load the address into a register 83 * first as a literal value, then we branch to it. This is done 84 * to insure that the assembler doesn't translate the branch into 85 * a relative branch. We use the *_wrap() routines here as templates 86 * and create the actual trampolines at run time, at which point 87 * we only know the absolute addresses of the thunk and unthunk 88 * routines. So we need to make sure the templates have enough 89 * room in them for the full address. 90 * 91 * Also note that when we call the a thunk/unthunk routine after 92 * invoking a wrapped function, we have to make sure to preserve 93 * the value returned from that function. Most functions return 94 * a 32-bit value in %eax, however some routines return 64-bit 95 * values, which span both %eax and %edx. Consequently, we have 96 * to preserve both registers. 97 */ 98 99/* 100 * Handle _stdcall going from Windows to UNIX. 101 * This is frustrating, because to do it right you have to 102 * know how many arguments the called function takes, and there's 103 * no way to figure this out on the fly: you just have to be told 104 * ahead of time. We assume there will be 16 arguments. I don't 105 * think there are any Windows APIs that require this many. 106 */ 107 108 .globl x86_stdcall_wrap_call 109 .globl x86_stdcall_wrap_arg 110 .globl x86_stdcall_wrap_end 111 112ENTRY(x86_stdcall_wrap) 113 push %esi 114 push %edi 115 sub $64,%esp 116 mov %esp,%esi 117 add $64+8+4,%esi 118 mov %esp,%edi 119 mov $16,%ecx # handle up to 16 args 120 rep 121 movsl 122 123 movl $ctxsw_wtou, %eax 124 call *%eax # unthunk 125 126x86_stdcall_wrap_call: 127 movl $0,%eax 128 call *%eax # jump to routine 129 push %eax # preserve return val 130 push %edx 131 132 movl $ctxsw_utow, %eax 133 call *%eax # thunk 134 135 pop %edx 136 pop %eax # restore return val 137 138 add $64,%esp # clean the stack 139 pop %edi 140 pop %esi 141x86_stdcall_wrap_arg: 142 ret $0xFF 143x86_stdcall_wrap_end: 144 145 146/* 147 * Handle _stdcall going from UNIX to Windows. This routine 148 * expects to be passed the function to be called, number of 149 * args and the arguments for the Windows function on the stack. 150 */ 151 152ENTRY(x86_stdcall_call) 153 push %esi # must preserve %esi 154 push %edi # and %edi 155 156 mov 16(%esp),%eax # get arg cnt 157 mov %eax,%ecx # save as copy count 158 mov %esp,%esi # Set source address register to point to 159 add $20,%esi # first agument to be forwarded. 160 shl $2,%eax # turn arg cnt into offset 161 sub %eax,%esp # shift stack to new location 162 mov %esp,%edi # store dest copy addr 163 rep # do the copy 164 movsl 165 166 call ctxsw_utow # thunk 167 168 call *12(%edi) # branch to stdcall routine 169 push %eax # preserve return val 170 push %edx 171 172 call ctxsw_wtou # unthunk 173 174 pop %edx 175 pop %eax # restore return val 176 mov %edi,%esp # restore stack 177 pop %edi # restore %edi 178 pop %esi # and %esi 179 ret 180 181/* 182 * Fastcall support. Similar to _stdcall, except the first 183 * two arguments are passed in %ecx and %edx. It happens we 184 * only support a small number of _fastcall APIs, none of them 185 * take more than three arguments. So to keep the code size 186 * and complexity down, we only handle 3 arguments here. 187 */ 188 189/* Call _fastcall function going from Windows to UNIX. */ 190 191 .globl x86_fastcall_wrap_call 192 .globl x86_fastcall_wrap_arg 193 .globl x86_fastcall_wrap_end 194 195ENTRY(x86_fastcall_wrap) 196 mov 4(%esp),%eax 197 push %eax 198 push %edx 199 push %ecx 200 201 movl $ctxsw_wtou, %eax 202 call *%eax # unthunk 203 204x86_fastcall_wrap_call: 205 mov $0,%eax 206 call *%eax # branch to fastcall routine 207 push %eax # preserve return val 208 push %edx 209 210 movl $ctxsw_utow, %eax 211 call *%eax # thunk 212 213 pop %edx 214 pop %eax # restore return val 215 add $12,%esp # clean the stack 216x86_fastcall_wrap_arg: 217 ret $0xFF 218x86_fastcall_wrap_end: 219 220/* 221 * Call _fastcall function going from UNIX to Windows. 222 * This routine isn't normally used since NDIS miniport drivers 223 * only have _stdcall entry points, but it's provided anyway 224 * to round out the API, and for testing purposes. 225 */ 226 227ENTRY(x86_fastcall_call) 228 mov 4(%esp),%eax 229 push 16(%esp) 230 231 call ctxsw_utow # thunk 232 233 mov 12(%esp),%ecx 234 mov 16(%esp),%edx 235 call *8(%esp) # branch to fastcall routine 236 push %eax # preserve return val 237 push %edx 238 239 call ctxsw_wtou # unthunk 240 241 pop %edx 242 pop %eax # restore return val 243 add $4,%esp # clean the stack 244 ret 245 246/* 247 * Call regparm(3) function going from Windows to UNIX. Arguments 248 * are passed in %eax, %edx and %ecx. Note that while additional 249 * arguments are passed on the stack, we never bother when them, 250 * since the only regparm(3) routines we need to wrap never take 251 * more than 3 arguments. 252 */ 253 254 .globl x86_regparm_wrap_call 255 .globl x86_regparm_wrap_end 256 257ENTRY(x86_regparm_wrap) 258 push %ecx 259 push %edx 260 push %eax 261 262 movl $ctxsw_wtou, %eax 263 call *%eax # unthunk 264 265x86_regparm_wrap_call: 266 movl $0,%eax 267 call *%eax # jump to routine 268 push %eax # preserve return val 269 push %edx # preserve return val 270 271 movl $ctxsw_utow, %eax 272 call *%eax # thunk 273 274 pop %edx # restore return val 275 pop %eax # restore return val 276 add $12,%esp # restore stack 277 ret 278x86_regparm_wrap_end: 279 280/* 281 * Call regparm(3) function going from UNIX to Windows. 282 * This routine isn't normally used since NDIS miniport drivers 283 * only have _stdcall entry points, but it's provided anyway 284 * to round out the API, and for testing purposes. 285 */ 286 287ENTRY(x86_regparm_call) 288 call ctxsw_utow # thunk 289 290 mov 8(%esp),%eax 291 mov 12(%esp),%edx 292 mov 16(%esp),%ecx 293 call *4(%esp) # branch to fastcall routine 294 push %eax # preserve return val 295 push %edx # preserve return val 296 297 call ctxsw_wtou # unthunk 298 299 pop %edx # restore return val 300 pop %eax # restore return val 301 ret 302 303/* 304 * Ugly hack alert: 305 * 306 * On Win32/i386, using __try/__except results in code that tries to 307 * manipulate what's supposed to be the Windows Threada Environment 308 * Block (TEB), which one accesses via the %fs register. In particular, 309 * %fs:0 (the first DWORD in the TEB) points to the exception 310 * registration list. Unfortunately, FreeBSD uses %fs for the 311 * per-cpu data structure (pcpu), and we can't allow Windows code 312 * to muck with that. I don't even know what Solaris uses %fs for 313 * (or if it even uses it at all). 314 * 315 * Even worse, in 32-bit protected mode, %fs is a selector that 316 * refers to an entry in either the GDT or the LDT. Ideally, we would 317 * like to be able to temporarily point it at another descriptor 318 * while Windows code executes, but to do that we need a separate 319 * descriptor entry of our own to play with. 320 * 321 * Therefore, we go to some trouble to learn the existing layout of 322 * the GDT and update it to include an extra entry that we can use. 323 * We need the following utility routines to help us do that. On 324 * FreeBSD, index #7 in the GDT happens to be unused, so we turn 325 * this into our own data segment descriptor. It would be better 326 * if we could use a private LDT entry, but there's no easy way to 327 * do that in SMP mode because of the way FreeBSD handles user LDTs. 328 * 329 * Once we have a custom descriptor, we have to thunk/unthunk whenever 330 * we cross between FreeBSD code and Windows code. The thunking is 331 * based on the premise that when executing instructions in the 332 * Windows binary itself, we won't go to sleep. This is because in 333 * order to yield the CPU, the code has to call back out to a FreeBSD 334 * routine first, and when that happens we can unthunk in order to 335 * restore FreeBSD context. What we're desperately trying to avoid is 336 * being involuntarily pre-empted with the %fs register still pointing 337 * to our fake TIB: if FreeBSD code runs with %fs pointing at our 338 * Windows TIB instead of pcpu, we'll panic the kernel. Fortunately, 339 * the only way involuntary preemption can occur is if an interrupt 340 * fires, and the trap handler saves/restores %fs for us. 341 * 342 * The thunking routines themselves, ctxsw_utow() (Context SWitch UNIX 343 * to Windows) and ctxsw_wtou() (Context SWitch Windows to UNIX), are 344 * external to this module. This is done simply because it's easier 345 * to manipulate data structures in C rather than assembly. 346 */ 347 348ENTRY(x86_getldt) 349 movl 4(%esp),%eax 350 sgdtl (%eax) 351 movl 8(%esp),%eax 352 sldt (%eax) 353 xor %eax,%eax 354 ret 355 356ENTRY(x86_setldt) 357 movl 4(%esp),%eax 358 lgdt (%eax) 359 jmp 1f 360 nop 3611: 362 movl 8(%esp),%eax 363 lldt %ax 364 xor %eax,%eax 365 ret 366 367ENTRY(x86_getfs) 368 mov %fs,%ax 369 ret 370 371ENTRY(x86_setfs) 372 mov 4(%esp),%fs 373 ret 374 375ENTRY(x86_gettid) 376 mov %fs:12,%eax 377 ret 378 379ENTRY(x86_critical_enter) 380 cli 381 ret 382 383ENTRY(x86_critical_exit) 384 sti 385 ret 386