1/*- 2 * SPDX-License-Identifier: BSD-4-Clause 3 * 4 * Copyright (c) 2003 5 * Bill Paul <wpaul@windriver.com>. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. All advertising materials mentioning features or use of this software 16 * must display the following acknowledgement: 17 * This product includes software developed by Bill Paul. 18 * 4. Neither the name of the author nor the names of any co-contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 32 * THE POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35#include <sys/cdefs.h> 36__FBSDID("$FreeBSD$"); 37 38#include <sys/param.h> 39#include <sys/types.h> 40#include <sys/errno.h> 41 42#include <sys/callout.h> 43#include <sys/kernel.h> 44#include <sys/lock.h> 45#include <sys/mutex.h> 46#include <sys/proc.h> 47#include <sys/sched.h> 48#include <sys/module.h> 49 50#include <sys/systm.h> 51#include <machine/bus.h> 52 53#include <sys/bus.h> 54#include <sys/rman.h> 55 56#include <compat/ndis/pe_var.h> 57#include <compat/ndis/resource_var.h> 58#include <compat/ndis/cfg_var.h> 59#include <compat/ndis/ntoskrnl_var.h> 60#include <compat/ndis/hal_var.h> 61 62static void KeStallExecutionProcessor(uint32_t); 63static void WRITE_PORT_BUFFER_ULONG(uint32_t *, 64 uint32_t *, uint32_t); 65static void WRITE_PORT_BUFFER_USHORT(uint16_t *, 66 uint16_t *, uint32_t); 67static void WRITE_PORT_BUFFER_UCHAR(uint8_t *, 68 uint8_t *, uint32_t); 69static void WRITE_PORT_ULONG(uint32_t *, uint32_t); 70static void WRITE_PORT_USHORT(uint16_t *, uint16_t); 71static void WRITE_PORT_UCHAR(uint8_t *, uint8_t); 72static uint32_t READ_PORT_ULONG(uint32_t *); 73static uint16_t READ_PORT_USHORT(uint16_t *); 74static uint8_t READ_PORT_UCHAR(uint8_t *); 75static void READ_PORT_BUFFER_ULONG(uint32_t *, 76 uint32_t *, uint32_t); 77static void READ_PORT_BUFFER_USHORT(uint16_t *, 78 uint16_t *, uint32_t); 79static void READ_PORT_BUFFER_UCHAR(uint8_t *, 80 uint8_t *, uint32_t); 81static uint64_t KeQueryPerformanceCounter(uint64_t *); 82static void _KeLowerIrql(uint8_t); 83static uint8_t KeRaiseIrqlToDpcLevel(void); 84static void dummy (void); 85 86#define NDIS_MAXCPUS 64 87static struct mtx disp_lock[NDIS_MAXCPUS]; 88 89int 90hal_libinit() 91{ 92 image_patch_table *patch; 93 int i; 94 95 for (i = 0; i < NDIS_MAXCPUS; i++) 96 mtx_init(&disp_lock[i], "HAL preemption lock", 97 "HAL lock", MTX_RECURSE|MTX_DEF); 98 99 patch = hal_functbl; 100 while (patch->ipt_func != NULL) { 101 windrv_wrap((funcptr)patch->ipt_func, 102 (funcptr *)&patch->ipt_wrap, 103 patch->ipt_argcnt, patch->ipt_ftype); 104 patch++; 105 } 106 107 return (0); 108} 109 110int 111hal_libfini() 112{ 113 image_patch_table *patch; 114 int i; 115 116 for (i = 0; i < NDIS_MAXCPUS; i++) 117 mtx_destroy(&disp_lock[i]); 118 119 patch = hal_functbl; 120 while (patch->ipt_func != NULL) { 121 windrv_unwrap(patch->ipt_wrap); 122 patch++; 123 } 124 125 return (0); 126} 127 128static void 129KeStallExecutionProcessor(usecs) 130 uint32_t usecs; 131{ 132 DELAY(usecs); 133} 134 135static void 136WRITE_PORT_ULONG(port, val) 137 uint32_t *port; 138 uint32_t val; 139{ 140 bus_space_write_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val); 141} 142 143static void 144WRITE_PORT_USHORT(uint16_t *port, uint16_t val) 145{ 146 bus_space_write_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val); 147} 148 149static void 150WRITE_PORT_UCHAR(uint8_t *port, uint8_t val) 151{ 152 bus_space_write_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val); 153} 154 155static void 156WRITE_PORT_BUFFER_ULONG(port, val, cnt) 157 uint32_t *port; 158 uint32_t *val; 159 uint32_t cnt; 160{ 161 bus_space_write_multi_4(NDIS_BUS_SPACE_IO, 0x0, 162 (bus_size_t)port, val, cnt); 163} 164 165static void 166WRITE_PORT_BUFFER_USHORT(port, val, cnt) 167 uint16_t *port; 168 uint16_t *val; 169 uint32_t cnt; 170{ 171 bus_space_write_multi_2(NDIS_BUS_SPACE_IO, 0x0, 172 (bus_size_t)port, val, cnt); 173} 174 175static void 176WRITE_PORT_BUFFER_UCHAR(port, val, cnt) 177 uint8_t *port; 178 uint8_t *val; 179 uint32_t cnt; 180{ 181 bus_space_write_multi_1(NDIS_BUS_SPACE_IO, 0x0, 182 (bus_size_t)port, val, cnt); 183} 184 185static uint16_t 186READ_PORT_USHORT(port) 187 uint16_t *port; 188{ 189 return (bus_space_read_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port)); 190} 191 192static uint32_t 193READ_PORT_ULONG(port) 194 uint32_t *port; 195{ 196 return (bus_space_read_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port)); 197} 198 199static uint8_t 200READ_PORT_UCHAR(port) 201 uint8_t *port; 202{ 203 return (bus_space_read_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port)); 204} 205 206static void 207READ_PORT_BUFFER_ULONG(port, val, cnt) 208 uint32_t *port; 209 uint32_t *val; 210 uint32_t cnt; 211{ 212 bus_space_read_multi_4(NDIS_BUS_SPACE_IO, 0x0, 213 (bus_size_t)port, val, cnt); 214} 215 216static void 217READ_PORT_BUFFER_USHORT(port, val, cnt) 218 uint16_t *port; 219 uint16_t *val; 220 uint32_t cnt; 221{ 222 bus_space_read_multi_2(NDIS_BUS_SPACE_IO, 0x0, 223 (bus_size_t)port, val, cnt); 224} 225 226static void 227READ_PORT_BUFFER_UCHAR(port, val, cnt) 228 uint8_t *port; 229 uint8_t *val; 230 uint32_t cnt; 231{ 232 bus_space_read_multi_1(NDIS_BUS_SPACE_IO, 0x0, 233 (bus_size_t)port, val, cnt); 234} 235 236/* 237 * The spinlock implementation in Windows differs from that of FreeBSD. 238 * The basic operation of spinlocks involves two steps: 1) spin in a 239 * tight loop while trying to acquire a lock, 2) after obtaining the 240 * lock, disable preemption. (Note that on uniprocessor systems, you're 241 * allowed to skip the first step and just lock out pre-emption, since 242 * it's not possible for you to be in contention with another running 243 * thread.) Later, you release the lock then re-enable preemption. 244 * The difference between Windows and FreeBSD lies in how preemption 245 * is disabled. In FreeBSD, it's done using critical_enter(), which on 246 * the x86 arch translates to a cli instruction. This masks off all 247 * interrupts, and effectively stops the scheduler from ever running 248 * so _nothing_ can execute except the current thread. In Windows, 249 * preemption is disabled by raising the processor IRQL to DISPATCH_LEVEL. 250 * This stops other threads from running, but does _not_ block device 251 * interrupts. This means ISRs can still run, and they can make other 252 * threads runable, but those other threads won't be able to execute 253 * until the current thread lowers the IRQL to something less than 254 * DISPATCH_LEVEL. 255 * 256 * There's another commonly used IRQL in Windows, which is APC_LEVEL. 257 * An APC is an Asynchronous Procedure Call, which differs from a DPC 258 * (Defered Procedure Call) in that a DPC is queued up to run in 259 * another thread, while an APC runs in the thread that scheduled 260 * it (similar to a signal handler in a UNIX process). We don't 261 * actually support the notion of APCs in FreeBSD, so for now, the 262 * only IRQLs we're interested in are DISPATCH_LEVEL and PASSIVE_LEVEL. 263 * 264 * To simulate DISPATCH_LEVEL, we raise the current thread's priority 265 * to PI_REALTIME, which is the highest we can give it. This should, 266 * if I understand things correctly, prevent anything except for an 267 * interrupt thread from preempting us. PASSIVE_LEVEL is basically 268 * everything else. 269 * 270 * Be aware that, at least on the x86 arch, the Windows spinlock 271 * functions are divided up in peculiar ways. The actual spinlock 272 * functions are KfAcquireSpinLock() and KfReleaseSpinLock(), and 273 * they live in HAL.dll. Meanwhile, KeInitializeSpinLock(), 274 * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel() 275 * live in ntoskrnl.exe. Most Windows source code will call 276 * KeAcquireSpinLock() and KeReleaseSpinLock(), but these are just 277 * macros that call KfAcquireSpinLock() and KfReleaseSpinLock(). 278 * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel() 279 * perform the lock acquisition/release functions without doing the 280 * IRQL manipulation, and are used when one is already running at 281 * DISPATCH_LEVEL. Make sense? Good. 282 * 283 * According to the Microsoft documentation, any thread that calls 284 * KeAcquireSpinLock() must be running at IRQL <= DISPATCH_LEVEL. If 285 * we detect someone trying to acquire a spinlock from DEVICE_LEVEL 286 * or HIGH_LEVEL, we panic. 287 * 288 * Alternate sleep-lock-based spinlock implementation 289 * -------------------------------------------------- 290 * 291 * The earlier spinlock implementation was arguably a bit of a hack 292 * and presented several problems. It was basically designed to provide 293 * the functionality of spinlocks without incurring the wrath of 294 * WITNESS. We could get away with using both our spinlock implementation 295 * and FreeBSD sleep locks at the same time, but if WITNESS knew what 296 * we were really up to, it would have spanked us rather severely. 297 * 298 * There's another method we can use based entirely on sleep locks. 299 * First, it's important to realize that everything we're locking 300 * resides inside Project Evil itself: any critical data being locked 301 * by drivers belongs to the drivers, and should not be referenced 302 * by any other OS code outside of the NDISulator. The priority-based 303 * locking scheme has system-wide effects, just like real spinlocks 304 * (blocking preemption affects the whole CPU), but since we keep all 305 * our critical data private, we can use a simpler mechanism that 306 * affects only code/threads directly related to Project Evil. 307 * 308 * The idea is to create a sleep lock mutex for each CPU in the system. 309 * When a CPU running in the NDISulator wants to acquire a spinlock, it 310 * does the following: 311 * - Pin ourselves to the current CPU 312 * - Acquire the mutex for the current CPU 313 * - Spin on the spinlock variable using atomic test and set, just like 314 * a real spinlock. 315 * - Once we have the lock, we execute our critical code 316 * 317 * To give up the lock, we do: 318 * - Clear the spinlock variable with an atomic op 319 * - Release the per-CPU mutex 320 * - Unpin ourselves from the current CPU. 321 * 322 * On a uniprocessor system, this means all threads that access protected 323 * data are serialized through the per-CPU mutex. After one thread 324 * acquires the 'spinlock,' any other thread that uses a spinlock on the 325 * current CPU will block on the per-CPU mutex, which has the same general 326 * effect of blocking pre-emption, but _only_ for those threads that are 327 * running NDISulator code. 328 * 329 * On a multiprocessor system, threads on different CPUs all block on 330 * their respective per-CPU mutex, and the atomic test/set operation 331 * on the spinlock variable provides inter-CPU synchronization, though 332 * only for threads running NDISulator code. 333 * 334 * This method solves an important problem. In Windows, you're allowed 335 * to do an ExAllocatePoolWithTag() with a spinlock held, provided you 336 * allocate from NonPagedPool. This implies an atomic heap allocation 337 * that will not cause the current thread to sleep. (You can't sleep 338 * while holding real spinlock: clowns will eat you.) But in FreeBSD, 339 * malloc(9) _always_ triggers the acquisition of a sleep lock, even 340 * when you use M_NOWAIT. This is not a problem for FreeBSD native 341 * code: you're allowed to sleep in things like interrupt threads. But 342 * it is a problem with the old priority-based spinlock implementation: 343 * even though we get away with it most of the time, we really can't 344 * do a malloc(9) after doing a KeAcquireSpinLock() or KeRaiseIrql(). 345 * With the new implementation, it's not a problem: you're allowed to 346 * acquire more than one sleep lock (as long as you avoid lock order 347 * reversals). 348 * 349 * The one drawback to this approach is that now we have a lot of 350 * contention on one per-CPU mutex within the NDISulator code. Whether 351 * or not this is preferable to the expected Windows spinlock behavior 352 * of blocking pre-emption is debatable. 353 */ 354 355uint8_t 356KfAcquireSpinLock(lock) 357 kspin_lock *lock; 358{ 359 uint8_t oldirql; 360 361 KeRaiseIrql(DISPATCH_LEVEL, &oldirql); 362 KeAcquireSpinLockAtDpcLevel(lock); 363 364 return (oldirql); 365} 366 367void 368KfReleaseSpinLock(kspin_lock *lock, uint8_t newirql) 369{ 370 KeReleaseSpinLockFromDpcLevel(lock); 371 KeLowerIrql(newirql); 372} 373 374uint8_t 375KeGetCurrentIrql() 376{ 377 if (mtx_owned(&disp_lock[curthread->td_oncpu])) 378 return (DISPATCH_LEVEL); 379 return (PASSIVE_LEVEL); 380} 381 382static uint64_t 383KeQueryPerformanceCounter(freq) 384 uint64_t *freq; 385{ 386 if (freq != NULL) 387 *freq = hz; 388 389 return ((uint64_t)ticks); 390} 391 392uint8_t 393KfRaiseIrql(uint8_t irql) 394{ 395 uint8_t oldirql; 396 397 sched_pin(); 398 oldirql = KeGetCurrentIrql(); 399 400 /* I am so going to hell for this. */ 401 if (oldirql > irql) 402 panic("IRQL_NOT_LESS_THAN_OR_EQUAL"); 403 404 if (oldirql != DISPATCH_LEVEL) 405 mtx_lock(&disp_lock[curthread->td_oncpu]); 406 else 407 sched_unpin(); 408 409/*printf("RAISE IRQL: %d %d\n", irql, oldirql);*/ 410 411 return (oldirql); 412} 413 414void 415KfLowerIrql(uint8_t oldirql) 416{ 417 if (oldirql == DISPATCH_LEVEL) 418 return; 419 420 if (KeGetCurrentIrql() != DISPATCH_LEVEL) 421 panic("IRQL_NOT_GREATER_THAN"); 422 423 mtx_unlock(&disp_lock[curthread->td_oncpu]); 424 sched_unpin(); 425} 426 427static uint8_t 428KeRaiseIrqlToDpcLevel(void) 429{ 430 uint8_t irql; 431 432 KeRaiseIrql(DISPATCH_LEVEL, &irql); 433 return (irql); 434} 435 436static void 437_KeLowerIrql(uint8_t oldirql) 438{ 439 KeLowerIrql(oldirql); 440} 441 442static void dummy() 443{ 444 printf("hal dummy called...\n"); 445} 446 447image_patch_table hal_functbl[] = { 448 IMPORT_SFUNC(KeStallExecutionProcessor, 1), 449 IMPORT_SFUNC(WRITE_PORT_ULONG, 2), 450 IMPORT_SFUNC(WRITE_PORT_USHORT, 2), 451 IMPORT_SFUNC(WRITE_PORT_UCHAR, 2), 452 IMPORT_SFUNC(WRITE_PORT_BUFFER_ULONG, 3), 453 IMPORT_SFUNC(WRITE_PORT_BUFFER_USHORT, 3), 454 IMPORT_SFUNC(WRITE_PORT_BUFFER_UCHAR, 3), 455 IMPORT_SFUNC(READ_PORT_ULONG, 1), 456 IMPORT_SFUNC(READ_PORT_USHORT, 1), 457 IMPORT_SFUNC(READ_PORT_UCHAR, 1), 458 IMPORT_SFUNC(READ_PORT_BUFFER_ULONG, 3), 459 IMPORT_SFUNC(READ_PORT_BUFFER_USHORT, 3), 460 IMPORT_SFUNC(READ_PORT_BUFFER_UCHAR, 3), 461 IMPORT_FFUNC(KfAcquireSpinLock, 1), 462 IMPORT_FFUNC(KfReleaseSpinLock, 1), 463 IMPORT_SFUNC(KeGetCurrentIrql, 0), 464 IMPORT_SFUNC(KeQueryPerformanceCounter, 1), 465 IMPORT_FFUNC(KfLowerIrql, 1), 466 IMPORT_FFUNC(KfRaiseIrql, 1), 467 IMPORT_SFUNC(KeRaiseIrqlToDpcLevel, 0), 468#undef KeLowerIrql 469 IMPORT_SFUNC_MAP(KeLowerIrql, _KeLowerIrql, 1), 470 471 /* 472 * This last entry is a catch-all for any function we haven't 473 * implemented yet. The PE import list patching routine will 474 * use it for any function that doesn't have an explicit match 475 * in this table. 476 */ 477 478 { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL }, 479 480 /* End of list. */ 481 { NULL, NULL, NULL } 482}; 483