subr_hal.c revision 189719
1/*-
2 * Copyright (c) 2003
3 *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by Bill Paul.
16 * 4. Neither the name of the author nor the names of any co-contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
30 * THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: head/sys/compat/ndis/subr_hal.c 189719 2009-03-12 02:51:55Z weongyo $");
35
36#include <sys/param.h>
37#include <sys/types.h>
38#include <sys/errno.h>
39
40#include <sys/callout.h>
41#include <sys/kernel.h>
42#include <sys/lock.h>
43#include <sys/mutex.h>
44#include <sys/proc.h>
45#include <sys/sched.h>
46#include <sys/module.h>
47
48#include <sys/systm.h>
49#include <machine/bus.h>
50
51#include <sys/bus.h>
52#include <sys/rman.h>
53
54#include <compat/ndis/pe_var.h>
55#include <compat/ndis/resource_var.h>
56#include <compat/ndis/cfg_var.h>
57#include <compat/ndis/ntoskrnl_var.h>
58#include <compat/ndis/hal_var.h>
59
60static void KeStallExecutionProcessor(uint32_t);
61static void WRITE_PORT_BUFFER_ULONG(uint32_t *,
62	uint32_t *, uint32_t);
63static void WRITE_PORT_BUFFER_USHORT(uint16_t *,
64	uint16_t *, uint32_t);
65static void WRITE_PORT_BUFFER_UCHAR(uint8_t *,
66	uint8_t *, uint32_t);
67static void WRITE_PORT_ULONG(uint32_t *, uint32_t);
68static void WRITE_PORT_USHORT(uint16_t *, uint16_t);
69static void WRITE_PORT_UCHAR(uint8_t *, uint8_t);
70static uint32_t READ_PORT_ULONG(uint32_t *);
71static uint16_t READ_PORT_USHORT(uint16_t *);
72static uint8_t READ_PORT_UCHAR(uint8_t *);
73static void READ_PORT_BUFFER_ULONG(uint32_t *,
74	uint32_t *, uint32_t);
75static void READ_PORT_BUFFER_USHORT(uint16_t *,
76	uint16_t *, uint32_t);
77static void READ_PORT_BUFFER_UCHAR(uint8_t *,
78	uint8_t *, uint32_t);
79static uint64_t KeQueryPerformanceCounter(uint64_t *);
80static void _KeLowerIrql(uint8_t);
81static uint8_t KeRaiseIrqlToDpcLevel(void);
82static void dummy (void);
83
84#define NDIS_MAXCPUS 64
85static struct mtx disp_lock[NDIS_MAXCPUS];
86
87int
88hal_libinit()
89{
90	image_patch_table	*patch;
91	int			i;
92
93	for (i = 0; i < NDIS_MAXCPUS; i++)
94		mtx_init(&disp_lock[i], "HAL preemption lock",
95		    "HAL lock", MTX_RECURSE|MTX_DEF);
96
97	patch = hal_functbl;
98	while (patch->ipt_func != NULL) {
99		windrv_wrap((funcptr)patch->ipt_func,
100		    (funcptr *)&patch->ipt_wrap,
101		    patch->ipt_argcnt, patch->ipt_ftype);
102		patch++;
103	}
104
105
106	return(0);
107}
108
109int
110hal_libfini()
111{
112	image_patch_table	*patch;
113	int			i;
114
115	for (i = 0; i < NDIS_MAXCPUS; i++)
116		mtx_destroy(&disp_lock[i]);
117
118	patch = hal_functbl;
119	while (patch->ipt_func != NULL) {
120		windrv_unwrap(patch->ipt_wrap);
121		patch++;
122	}
123
124	return(0);
125}
126
127static void
128KeStallExecutionProcessor(usecs)
129	uint32_t		usecs;
130{
131	DELAY(usecs);
132	return;
133}
134
135static void
136WRITE_PORT_ULONG(port, val)
137	uint32_t		*port;
138	uint32_t		val;
139{
140	bus_space_write_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
141	return;
142}
143
144static void
145WRITE_PORT_USHORT(uint16_t *port, uint16_t val)
146{
147	bus_space_write_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
148	return;
149}
150
151static void
152WRITE_PORT_UCHAR(uint8_t *port, uint8_t val)
153{
154	bus_space_write_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
155	return;
156}
157
158static void
159WRITE_PORT_BUFFER_ULONG(port, val, cnt)
160	uint32_t		*port;
161	uint32_t		*val;
162	uint32_t		cnt;
163{
164	bus_space_write_multi_4(NDIS_BUS_SPACE_IO, 0x0,
165	    (bus_size_t)port, val, cnt);
166	return;
167}
168
169static void
170WRITE_PORT_BUFFER_USHORT(port, val, cnt)
171	uint16_t		*port;
172	uint16_t		*val;
173	uint32_t		cnt;
174{
175	bus_space_write_multi_2(NDIS_BUS_SPACE_IO, 0x0,
176	    (bus_size_t)port, val, cnt);
177	return;
178}
179
180static void
181WRITE_PORT_BUFFER_UCHAR(port, val, cnt)
182	uint8_t			*port;
183	uint8_t			*val;
184	uint32_t		cnt;
185{
186	bus_space_write_multi_1(NDIS_BUS_SPACE_IO, 0x0,
187	    (bus_size_t)port, val, cnt);
188	return;
189}
190
191static uint16_t
192READ_PORT_USHORT(port)
193	uint16_t		*port;
194{
195	return(bus_space_read_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
196}
197
198static uint32_t
199READ_PORT_ULONG(port)
200	uint32_t		*port;
201{
202	return(bus_space_read_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
203}
204
205static uint8_t
206READ_PORT_UCHAR(port)
207	uint8_t			*port;
208{
209	return(bus_space_read_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
210}
211
212static void
213READ_PORT_BUFFER_ULONG(port, val, cnt)
214	uint32_t		*port;
215	uint32_t		*val;
216	uint32_t		cnt;
217{
218	bus_space_read_multi_4(NDIS_BUS_SPACE_IO, 0x0,
219	    (bus_size_t)port, val, cnt);
220	return;
221}
222
223static void
224READ_PORT_BUFFER_USHORT(port, val, cnt)
225	uint16_t		*port;
226	uint16_t		*val;
227	uint32_t		cnt;
228{
229	bus_space_read_multi_2(NDIS_BUS_SPACE_IO, 0x0,
230	    (bus_size_t)port, val, cnt);
231	return;
232}
233
234static void
235READ_PORT_BUFFER_UCHAR(port, val, cnt)
236	uint8_t			*port;
237	uint8_t			*val;
238	uint32_t		cnt;
239{
240	bus_space_read_multi_1(NDIS_BUS_SPACE_IO, 0x0,
241	    (bus_size_t)port, val, cnt);
242	return;
243}
244
245/*
246 * The spinlock implementation in Windows differs from that of FreeBSD.
247 * The basic operation of spinlocks involves two steps: 1) spin in a
248 * tight loop while trying to acquire a lock, 2) after obtaining the
249 * lock, disable preemption. (Note that on uniprocessor systems, you're
250 * allowed to skip the first step and just lock out pre-emption, since
251 * it's not possible for you to be in contention with another running
252 * thread.) Later, you release the lock then re-enable preemption.
253 * The difference between Windows and FreeBSD lies in how preemption
254 * is disabled. In FreeBSD, it's done using critical_enter(), which on
255 * the x86 arch translates to a cli instruction. This masks off all
256 * interrupts, and effectively stops the scheduler from ever running
257 * so _nothing_ can execute except the current thread. In Windows,
258 * preemption is disabled by raising the processor IRQL to DISPATCH_LEVEL.
259 * This stops other threads from running, but does _not_ block device
260 * interrupts. This means ISRs can still run, and they can make other
261 * threads runable, but those other threads won't be able to execute
262 * until the current thread lowers the IRQL to something less than
263 * DISPATCH_LEVEL.
264 *
265 * There's another commonly used IRQL in Windows, which is APC_LEVEL.
266 * An APC is an Asynchronous Procedure Call, which differs from a DPC
267 * (Defered Procedure Call) in that a DPC is queued up to run in
268 * another thread, while an APC runs in the thread that scheduled
269 * it (similar to a signal handler in a UNIX process). We don't
270 * actually support the notion of APCs in FreeBSD, so for now, the
271 * only IRQLs we're interested in are DISPATCH_LEVEL and PASSIVE_LEVEL.
272 *
273 * To simulate DISPATCH_LEVEL, we raise the current thread's priority
274 * to PI_REALTIME, which is the highest we can give it. This should,
275 * if I understand things correctly, prevent anything except for an
276 * interrupt thread from preempting us. PASSIVE_LEVEL is basically
277 * everything else.
278 *
279 * Be aware that, at least on the x86 arch, the Windows spinlock
280 * functions are divided up in peculiar ways. The actual spinlock
281 * functions are KfAcquireSpinLock() and KfReleaseSpinLock(), and
282 * they live in HAL.dll. Meanwhile, KeInitializeSpinLock(),
283 * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel()
284 * live in ntoskrnl.exe. Most Windows source code will call
285 * KeAcquireSpinLock() and KeReleaseSpinLock(), but these are just
286 * macros that call KfAcquireSpinLock() and KfReleaseSpinLock().
287 * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel()
288 * perform the lock aquisition/release functions without doing the
289 * IRQL manipulation, and are used when one is already running at
290 * DISPATCH_LEVEL. Make sense? Good.
291 *
292 * According to the Microsoft documentation, any thread that calls
293 * KeAcquireSpinLock() must be running at IRQL <= DISPATCH_LEVEL. If
294 * we detect someone trying to acquire a spinlock from DEVICE_LEVEL
295 * or HIGH_LEVEL, we panic.
296 *
297 * Alternate sleep-lock-based spinlock implementation
298 * --------------------------------------------------
299 *
300 * The earlier spinlock implementation was arguably a bit of a hack
301 * and presented several problems. It was basically designed to provide
302 * the functionality of spinlocks without incurring the wrath of
303 * WITNESS. We could get away with using both our spinlock implementation
304 * and FreeBSD sleep locks at the same time, but if WITNESS knew what
305 * we were really up to, it would have spanked us rather severely.
306 *
307 * There's another method we can use based entirely on sleep locks.
308 * First, it's important to realize that everything we're locking
309 * resides inside Project Evil itself: any critical data being locked
310 * by drivers belongs to the drivers, and should not be referenced
311 * by any other OS code outside of the NDISulator. The priority-based
312 * locking scheme has system-wide effects, just like real spinlocks
313 * (blocking preemption affects the whole CPU), but since we keep all
314 * our critical data private, we can use a simpler mechanism that
315 * affects only code/threads directly related to Project Evil.
316 *
317 * The idea is to create a sleep lock mutex for each CPU in the system.
318 * When a CPU running in the NDISulator wants to acquire a spinlock, it
319 * does the following:
320 * - Pin ourselves to the current CPU
321 * - Acquire the mutex for the current CPU
322 * - Spin on the spinlock variable using atomic test and set, just like
323 *   a real spinlock.
324 * - Once we have the lock, we execute our critical code
325 *
326 * To give up the lock, we do:
327 * - Clear the spinlock variable with an atomic op
328 * - Release the per-CPU mutex
329 * - Unpin ourselves from the current CPU.
330 *
331 * On a uniprocessor system, this means all threads that access protected
332 * data are serialized through the per-CPU mutex. After one thread
333 * acquires the 'spinlock,' any other thread that uses a spinlock on the
334 * current CPU will block on the per-CPU mutex, which has the same general
335 * effect of blocking pre-emption, but _only_ for those threads that are
336 * running NDISulator code.
337 *
338 * On a multiprocessor system, threads on different CPUs all block on
339 * their respective per-CPU mutex, and the atomic test/set operation
340 * on the spinlock variable provides inter-CPU synchronization, though
341 * only for threads running NDISulator code.
342 *
343 * This method solves an important problem. In Windows, you're allowed
344 * to do an ExAllocatePoolWithTag() with a spinlock held, provided you
345 * allocate from NonPagedPool. This implies an atomic heap allocation
346 * that will not cause the current thread to sleep. (You can't sleep
347 * while holding real spinlock: clowns will eat you.) But in FreeBSD,
348 * malloc(9) _always_ triggers the acquisition of a sleep lock, even
349 * when you use M_NOWAIT. This is not a problem for FreeBSD native
350 * code: you're allowed to sleep in things like interrupt threads. But
351 * it is a problem with the old priority-based spinlock implementation:
352 * even though we get away with it most of the time, we really can't
353 * do a malloc(9) after doing a KeAcquireSpinLock() or KeRaiseIrql().
354 * With the new implementation, it's not a problem: you're allowed to
355 * acquire more than one sleep lock (as long as you avoid lock order
356 * reversals).
357 *
358 * The one drawback to this approach is that now we have a lot of
359 * contention on one per-CPU mutex within the NDISulator code. Whether
360 * or not this is preferable to the expected Windows spinlock behavior
361 * of blocking pre-emption is debatable.
362 */
363
364uint8_t
365KfAcquireSpinLock(lock)
366	kspin_lock		*lock;
367{
368	uint8_t			oldirql;
369
370	KeRaiseIrql(DISPATCH_LEVEL, &oldirql);
371	KeAcquireSpinLockAtDpcLevel(lock);
372
373	return(oldirql);
374}
375
376void
377KfReleaseSpinLock(kspin_lock *lock, uint8_t newirql)
378{
379	KeReleaseSpinLockFromDpcLevel(lock);
380	KeLowerIrql(newirql);
381
382	return;
383}
384
385uint8_t
386KeGetCurrentIrql()
387{
388	if (mtx_owned(&disp_lock[curthread->td_oncpu]))
389		return(DISPATCH_LEVEL);
390	return(PASSIVE_LEVEL);
391}
392
393static uint64_t
394KeQueryPerformanceCounter(freq)
395	uint64_t		*freq;
396{
397	if (freq != NULL)
398		*freq = hz;
399
400	return((uint64_t)ticks);
401}
402
403uint8_t
404KfRaiseIrql(uint8_t irql)
405{
406	uint8_t			oldirql;
407
408	oldirql = KeGetCurrentIrql();
409
410	/* I am so going to hell for this. */
411	if (oldirql > irql)
412		panic("IRQL_NOT_LESS_THAN");
413
414	if (oldirql != DISPATCH_LEVEL) {
415		sched_pin();
416		mtx_lock(&disp_lock[curthread->td_oncpu]);
417	}
418/*printf("RAISE IRQL: %d %d\n", irql, oldirql);*/
419
420	return(oldirql);
421}
422
423void
424KfLowerIrql(uint8_t oldirql)
425{
426	if (oldirql == DISPATCH_LEVEL)
427		return;
428
429	if (KeGetCurrentIrql() != DISPATCH_LEVEL)
430		panic("IRQL_NOT_GREATER_THAN");
431
432	mtx_unlock(&disp_lock[curthread->td_oncpu]);
433	sched_unpin();
434
435	return;
436}
437
438static uint8_t
439KeRaiseIrqlToDpcLevel(void)
440{
441	uint8_t			irql;
442
443	KeRaiseIrql(DISPATCH_LEVEL, &irql);
444	return(irql);
445}
446
447static void
448_KeLowerIrql(uint8_t oldirql)
449{
450	KeLowerIrql(oldirql);
451	return;
452}
453
454static void dummy()
455{
456	printf ("hal dummy called...\n");
457	return;
458}
459
460image_patch_table hal_functbl[] = {
461	IMPORT_SFUNC(KeStallExecutionProcessor, 1),
462	IMPORT_SFUNC(WRITE_PORT_ULONG, 2),
463	IMPORT_SFUNC(WRITE_PORT_USHORT, 2),
464	IMPORT_SFUNC(WRITE_PORT_UCHAR, 2),
465	IMPORT_SFUNC(WRITE_PORT_BUFFER_ULONG, 3),
466	IMPORT_SFUNC(WRITE_PORT_BUFFER_USHORT, 3),
467	IMPORT_SFUNC(WRITE_PORT_BUFFER_UCHAR, 3),
468	IMPORT_SFUNC(READ_PORT_ULONG, 1),
469	IMPORT_SFUNC(READ_PORT_USHORT, 1),
470	IMPORT_SFUNC(READ_PORT_UCHAR, 1),
471	IMPORT_SFUNC(READ_PORT_BUFFER_ULONG, 3),
472	IMPORT_SFUNC(READ_PORT_BUFFER_USHORT, 3),
473	IMPORT_SFUNC(READ_PORT_BUFFER_UCHAR, 3),
474	IMPORT_FFUNC(KfAcquireSpinLock, 1),
475	IMPORT_FFUNC(KfReleaseSpinLock, 1),
476	IMPORT_SFUNC(KeGetCurrentIrql, 0),
477	IMPORT_SFUNC(KeQueryPerformanceCounter, 1),
478	IMPORT_FFUNC(KfLowerIrql, 1),
479	IMPORT_FFUNC(KfRaiseIrql, 1),
480	IMPORT_SFUNC(KeRaiseIrqlToDpcLevel, 0),
481#undef KeLowerIrql
482	IMPORT_SFUNC_MAP(KeLowerIrql, _KeLowerIrql, 1),
483
484	/*
485	 * This last entry is a catch-all for any function we haven't
486	 * implemented yet. The PE import list patching routine will
487	 * use it for any function that doesn't have an explicit match
488	 * in this table.
489	 */
490
491	{ NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL },
492
493	/* End of list. */
494
495	{ NULL, NULL, NULL }
496};
497