subr_hal.c revision 189719
1/*- 2 * Copyright (c) 2003 3 * Bill Paul <wpaul@windriver.com>. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by Bill Paul. 16 * 4. Neither the name of the author nor the names of any co-contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 30 * THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#include <sys/cdefs.h> 34__FBSDID("$FreeBSD: head/sys/compat/ndis/subr_hal.c 189719 2009-03-12 02:51:55Z weongyo $"); 35 36#include <sys/param.h> 37#include <sys/types.h> 38#include <sys/errno.h> 39 40#include <sys/callout.h> 41#include <sys/kernel.h> 42#include <sys/lock.h> 43#include <sys/mutex.h> 44#include <sys/proc.h> 45#include <sys/sched.h> 46#include <sys/module.h> 47 48#include <sys/systm.h> 49#include <machine/bus.h> 50 51#include <sys/bus.h> 52#include <sys/rman.h> 53 54#include <compat/ndis/pe_var.h> 55#include <compat/ndis/resource_var.h> 56#include <compat/ndis/cfg_var.h> 57#include <compat/ndis/ntoskrnl_var.h> 58#include <compat/ndis/hal_var.h> 59 60static void KeStallExecutionProcessor(uint32_t); 61static void WRITE_PORT_BUFFER_ULONG(uint32_t *, 62 uint32_t *, uint32_t); 63static void WRITE_PORT_BUFFER_USHORT(uint16_t *, 64 uint16_t *, uint32_t); 65static void WRITE_PORT_BUFFER_UCHAR(uint8_t *, 66 uint8_t *, uint32_t); 67static void WRITE_PORT_ULONG(uint32_t *, uint32_t); 68static void WRITE_PORT_USHORT(uint16_t *, uint16_t); 69static void WRITE_PORT_UCHAR(uint8_t *, uint8_t); 70static uint32_t READ_PORT_ULONG(uint32_t *); 71static uint16_t READ_PORT_USHORT(uint16_t *); 72static uint8_t READ_PORT_UCHAR(uint8_t *); 73static void READ_PORT_BUFFER_ULONG(uint32_t *, 74 uint32_t *, uint32_t); 75static void READ_PORT_BUFFER_USHORT(uint16_t *, 76 uint16_t *, uint32_t); 77static void READ_PORT_BUFFER_UCHAR(uint8_t *, 78 uint8_t *, uint32_t); 79static uint64_t KeQueryPerformanceCounter(uint64_t *); 80static void _KeLowerIrql(uint8_t); 81static uint8_t KeRaiseIrqlToDpcLevel(void); 82static void dummy (void); 83 84#define NDIS_MAXCPUS 64 85static struct mtx disp_lock[NDIS_MAXCPUS]; 86 87int 88hal_libinit() 89{ 90 image_patch_table *patch; 91 int i; 92 93 for (i = 0; i < NDIS_MAXCPUS; i++) 94 mtx_init(&disp_lock[i], "HAL preemption lock", 95 "HAL lock", MTX_RECURSE|MTX_DEF); 96 97 patch = hal_functbl; 98 while (patch->ipt_func != NULL) { 99 windrv_wrap((funcptr)patch->ipt_func, 100 (funcptr *)&patch->ipt_wrap, 101 patch->ipt_argcnt, patch->ipt_ftype); 102 patch++; 103 } 104 105 106 return(0); 107} 108 109int 110hal_libfini() 111{ 112 image_patch_table *patch; 113 int i; 114 115 for (i = 0; i < NDIS_MAXCPUS; i++) 116 mtx_destroy(&disp_lock[i]); 117 118 patch = hal_functbl; 119 while (patch->ipt_func != NULL) { 120 windrv_unwrap(patch->ipt_wrap); 121 patch++; 122 } 123 124 return(0); 125} 126 127static void 128KeStallExecutionProcessor(usecs) 129 uint32_t usecs; 130{ 131 DELAY(usecs); 132 return; 133} 134 135static void 136WRITE_PORT_ULONG(port, val) 137 uint32_t *port; 138 uint32_t val; 139{ 140 bus_space_write_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val); 141 return; 142} 143 144static void 145WRITE_PORT_USHORT(uint16_t *port, uint16_t val) 146{ 147 bus_space_write_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val); 148 return; 149} 150 151static void 152WRITE_PORT_UCHAR(uint8_t *port, uint8_t val) 153{ 154 bus_space_write_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val); 155 return; 156} 157 158static void 159WRITE_PORT_BUFFER_ULONG(port, val, cnt) 160 uint32_t *port; 161 uint32_t *val; 162 uint32_t cnt; 163{ 164 bus_space_write_multi_4(NDIS_BUS_SPACE_IO, 0x0, 165 (bus_size_t)port, val, cnt); 166 return; 167} 168 169static void 170WRITE_PORT_BUFFER_USHORT(port, val, cnt) 171 uint16_t *port; 172 uint16_t *val; 173 uint32_t cnt; 174{ 175 bus_space_write_multi_2(NDIS_BUS_SPACE_IO, 0x0, 176 (bus_size_t)port, val, cnt); 177 return; 178} 179 180static void 181WRITE_PORT_BUFFER_UCHAR(port, val, cnt) 182 uint8_t *port; 183 uint8_t *val; 184 uint32_t cnt; 185{ 186 bus_space_write_multi_1(NDIS_BUS_SPACE_IO, 0x0, 187 (bus_size_t)port, val, cnt); 188 return; 189} 190 191static uint16_t 192READ_PORT_USHORT(port) 193 uint16_t *port; 194{ 195 return(bus_space_read_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port)); 196} 197 198static uint32_t 199READ_PORT_ULONG(port) 200 uint32_t *port; 201{ 202 return(bus_space_read_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port)); 203} 204 205static uint8_t 206READ_PORT_UCHAR(port) 207 uint8_t *port; 208{ 209 return(bus_space_read_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port)); 210} 211 212static void 213READ_PORT_BUFFER_ULONG(port, val, cnt) 214 uint32_t *port; 215 uint32_t *val; 216 uint32_t cnt; 217{ 218 bus_space_read_multi_4(NDIS_BUS_SPACE_IO, 0x0, 219 (bus_size_t)port, val, cnt); 220 return; 221} 222 223static void 224READ_PORT_BUFFER_USHORT(port, val, cnt) 225 uint16_t *port; 226 uint16_t *val; 227 uint32_t cnt; 228{ 229 bus_space_read_multi_2(NDIS_BUS_SPACE_IO, 0x0, 230 (bus_size_t)port, val, cnt); 231 return; 232} 233 234static void 235READ_PORT_BUFFER_UCHAR(port, val, cnt) 236 uint8_t *port; 237 uint8_t *val; 238 uint32_t cnt; 239{ 240 bus_space_read_multi_1(NDIS_BUS_SPACE_IO, 0x0, 241 (bus_size_t)port, val, cnt); 242 return; 243} 244 245/* 246 * The spinlock implementation in Windows differs from that of FreeBSD. 247 * The basic operation of spinlocks involves two steps: 1) spin in a 248 * tight loop while trying to acquire a lock, 2) after obtaining the 249 * lock, disable preemption. (Note that on uniprocessor systems, you're 250 * allowed to skip the first step and just lock out pre-emption, since 251 * it's not possible for you to be in contention with another running 252 * thread.) Later, you release the lock then re-enable preemption. 253 * The difference between Windows and FreeBSD lies in how preemption 254 * is disabled. In FreeBSD, it's done using critical_enter(), which on 255 * the x86 arch translates to a cli instruction. This masks off all 256 * interrupts, and effectively stops the scheduler from ever running 257 * so _nothing_ can execute except the current thread. In Windows, 258 * preemption is disabled by raising the processor IRQL to DISPATCH_LEVEL. 259 * This stops other threads from running, but does _not_ block device 260 * interrupts. This means ISRs can still run, and they can make other 261 * threads runable, but those other threads won't be able to execute 262 * until the current thread lowers the IRQL to something less than 263 * DISPATCH_LEVEL. 264 * 265 * There's another commonly used IRQL in Windows, which is APC_LEVEL. 266 * An APC is an Asynchronous Procedure Call, which differs from a DPC 267 * (Defered Procedure Call) in that a DPC is queued up to run in 268 * another thread, while an APC runs in the thread that scheduled 269 * it (similar to a signal handler in a UNIX process). We don't 270 * actually support the notion of APCs in FreeBSD, so for now, the 271 * only IRQLs we're interested in are DISPATCH_LEVEL and PASSIVE_LEVEL. 272 * 273 * To simulate DISPATCH_LEVEL, we raise the current thread's priority 274 * to PI_REALTIME, which is the highest we can give it. This should, 275 * if I understand things correctly, prevent anything except for an 276 * interrupt thread from preempting us. PASSIVE_LEVEL is basically 277 * everything else. 278 * 279 * Be aware that, at least on the x86 arch, the Windows spinlock 280 * functions are divided up in peculiar ways. The actual spinlock 281 * functions are KfAcquireSpinLock() and KfReleaseSpinLock(), and 282 * they live in HAL.dll. Meanwhile, KeInitializeSpinLock(), 283 * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel() 284 * live in ntoskrnl.exe. Most Windows source code will call 285 * KeAcquireSpinLock() and KeReleaseSpinLock(), but these are just 286 * macros that call KfAcquireSpinLock() and KfReleaseSpinLock(). 287 * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel() 288 * perform the lock aquisition/release functions without doing the 289 * IRQL manipulation, and are used when one is already running at 290 * DISPATCH_LEVEL. Make sense? Good. 291 * 292 * According to the Microsoft documentation, any thread that calls 293 * KeAcquireSpinLock() must be running at IRQL <= DISPATCH_LEVEL. If 294 * we detect someone trying to acquire a spinlock from DEVICE_LEVEL 295 * or HIGH_LEVEL, we panic. 296 * 297 * Alternate sleep-lock-based spinlock implementation 298 * -------------------------------------------------- 299 * 300 * The earlier spinlock implementation was arguably a bit of a hack 301 * and presented several problems. It was basically designed to provide 302 * the functionality of spinlocks without incurring the wrath of 303 * WITNESS. We could get away with using both our spinlock implementation 304 * and FreeBSD sleep locks at the same time, but if WITNESS knew what 305 * we were really up to, it would have spanked us rather severely. 306 * 307 * There's another method we can use based entirely on sleep locks. 308 * First, it's important to realize that everything we're locking 309 * resides inside Project Evil itself: any critical data being locked 310 * by drivers belongs to the drivers, and should not be referenced 311 * by any other OS code outside of the NDISulator. The priority-based 312 * locking scheme has system-wide effects, just like real spinlocks 313 * (blocking preemption affects the whole CPU), but since we keep all 314 * our critical data private, we can use a simpler mechanism that 315 * affects only code/threads directly related to Project Evil. 316 * 317 * The idea is to create a sleep lock mutex for each CPU in the system. 318 * When a CPU running in the NDISulator wants to acquire a spinlock, it 319 * does the following: 320 * - Pin ourselves to the current CPU 321 * - Acquire the mutex for the current CPU 322 * - Spin on the spinlock variable using atomic test and set, just like 323 * a real spinlock. 324 * - Once we have the lock, we execute our critical code 325 * 326 * To give up the lock, we do: 327 * - Clear the spinlock variable with an atomic op 328 * - Release the per-CPU mutex 329 * - Unpin ourselves from the current CPU. 330 * 331 * On a uniprocessor system, this means all threads that access protected 332 * data are serialized through the per-CPU mutex. After one thread 333 * acquires the 'spinlock,' any other thread that uses a spinlock on the 334 * current CPU will block on the per-CPU mutex, which has the same general 335 * effect of blocking pre-emption, but _only_ for those threads that are 336 * running NDISulator code. 337 * 338 * On a multiprocessor system, threads on different CPUs all block on 339 * their respective per-CPU mutex, and the atomic test/set operation 340 * on the spinlock variable provides inter-CPU synchronization, though 341 * only for threads running NDISulator code. 342 * 343 * This method solves an important problem. In Windows, you're allowed 344 * to do an ExAllocatePoolWithTag() with a spinlock held, provided you 345 * allocate from NonPagedPool. This implies an atomic heap allocation 346 * that will not cause the current thread to sleep. (You can't sleep 347 * while holding real spinlock: clowns will eat you.) But in FreeBSD, 348 * malloc(9) _always_ triggers the acquisition of a sleep lock, even 349 * when you use M_NOWAIT. This is not a problem for FreeBSD native 350 * code: you're allowed to sleep in things like interrupt threads. But 351 * it is a problem with the old priority-based spinlock implementation: 352 * even though we get away with it most of the time, we really can't 353 * do a malloc(9) after doing a KeAcquireSpinLock() or KeRaiseIrql(). 354 * With the new implementation, it's not a problem: you're allowed to 355 * acquire more than one sleep lock (as long as you avoid lock order 356 * reversals). 357 * 358 * The one drawback to this approach is that now we have a lot of 359 * contention on one per-CPU mutex within the NDISulator code. Whether 360 * or not this is preferable to the expected Windows spinlock behavior 361 * of blocking pre-emption is debatable. 362 */ 363 364uint8_t 365KfAcquireSpinLock(lock) 366 kspin_lock *lock; 367{ 368 uint8_t oldirql; 369 370 KeRaiseIrql(DISPATCH_LEVEL, &oldirql); 371 KeAcquireSpinLockAtDpcLevel(lock); 372 373 return(oldirql); 374} 375 376void 377KfReleaseSpinLock(kspin_lock *lock, uint8_t newirql) 378{ 379 KeReleaseSpinLockFromDpcLevel(lock); 380 KeLowerIrql(newirql); 381 382 return; 383} 384 385uint8_t 386KeGetCurrentIrql() 387{ 388 if (mtx_owned(&disp_lock[curthread->td_oncpu])) 389 return(DISPATCH_LEVEL); 390 return(PASSIVE_LEVEL); 391} 392 393static uint64_t 394KeQueryPerformanceCounter(freq) 395 uint64_t *freq; 396{ 397 if (freq != NULL) 398 *freq = hz; 399 400 return((uint64_t)ticks); 401} 402 403uint8_t 404KfRaiseIrql(uint8_t irql) 405{ 406 uint8_t oldirql; 407 408 oldirql = KeGetCurrentIrql(); 409 410 /* I am so going to hell for this. */ 411 if (oldirql > irql) 412 panic("IRQL_NOT_LESS_THAN"); 413 414 if (oldirql != DISPATCH_LEVEL) { 415 sched_pin(); 416 mtx_lock(&disp_lock[curthread->td_oncpu]); 417 } 418/*printf("RAISE IRQL: %d %d\n", irql, oldirql);*/ 419 420 return(oldirql); 421} 422 423void 424KfLowerIrql(uint8_t oldirql) 425{ 426 if (oldirql == DISPATCH_LEVEL) 427 return; 428 429 if (KeGetCurrentIrql() != DISPATCH_LEVEL) 430 panic("IRQL_NOT_GREATER_THAN"); 431 432 mtx_unlock(&disp_lock[curthread->td_oncpu]); 433 sched_unpin(); 434 435 return; 436} 437 438static uint8_t 439KeRaiseIrqlToDpcLevel(void) 440{ 441 uint8_t irql; 442 443 KeRaiseIrql(DISPATCH_LEVEL, &irql); 444 return(irql); 445} 446 447static void 448_KeLowerIrql(uint8_t oldirql) 449{ 450 KeLowerIrql(oldirql); 451 return; 452} 453 454static void dummy() 455{ 456 printf ("hal dummy called...\n"); 457 return; 458} 459 460image_patch_table hal_functbl[] = { 461 IMPORT_SFUNC(KeStallExecutionProcessor, 1), 462 IMPORT_SFUNC(WRITE_PORT_ULONG, 2), 463 IMPORT_SFUNC(WRITE_PORT_USHORT, 2), 464 IMPORT_SFUNC(WRITE_PORT_UCHAR, 2), 465 IMPORT_SFUNC(WRITE_PORT_BUFFER_ULONG, 3), 466 IMPORT_SFUNC(WRITE_PORT_BUFFER_USHORT, 3), 467 IMPORT_SFUNC(WRITE_PORT_BUFFER_UCHAR, 3), 468 IMPORT_SFUNC(READ_PORT_ULONG, 1), 469 IMPORT_SFUNC(READ_PORT_USHORT, 1), 470 IMPORT_SFUNC(READ_PORT_UCHAR, 1), 471 IMPORT_SFUNC(READ_PORT_BUFFER_ULONG, 3), 472 IMPORT_SFUNC(READ_PORT_BUFFER_USHORT, 3), 473 IMPORT_SFUNC(READ_PORT_BUFFER_UCHAR, 3), 474 IMPORT_FFUNC(KfAcquireSpinLock, 1), 475 IMPORT_FFUNC(KfReleaseSpinLock, 1), 476 IMPORT_SFUNC(KeGetCurrentIrql, 0), 477 IMPORT_SFUNC(KeQueryPerformanceCounter, 1), 478 IMPORT_FFUNC(KfLowerIrql, 1), 479 IMPORT_FFUNC(KfRaiseIrql, 1), 480 IMPORT_SFUNC(KeRaiseIrqlToDpcLevel, 0), 481#undef KeLowerIrql 482 IMPORT_SFUNC_MAP(KeLowerIrql, _KeLowerIrql, 1), 483 484 /* 485 * This last entry is a catch-all for any function we haven't 486 * implemented yet. The PE import list patching routine will 487 * use it for any function that doesn't have an explicit match 488 * in this table. 489 */ 490 491 { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL }, 492 493 /* End of list. */ 494 495 { NULL, NULL, NULL } 496}; 497