1/*-
2 * SPDX-License-Identifier: BSD-4-Clause
3 *
4 * Copyright (c) 2003
5 *	Bill Paul <wpaul@windriver.com>.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software
16 *    must display the following acknowledgement:
17 *	This product includes software developed by Bill Paul.
18 * 4. Neither the name of the author nor the names of any co-contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
32 * THE POSSIBILITY OF SUCH DAMAGE.
33 */
34
35#include <sys/cdefs.h>
36__FBSDID("$FreeBSD$");
37
38#include <sys/param.h>
39#include <sys/types.h>
40#include <sys/errno.h>
41
42#include <sys/callout.h>
43#include <sys/kernel.h>
44#include <sys/lock.h>
45#include <sys/mutex.h>
46#include <sys/proc.h>
47#include <sys/sched.h>
48#include <sys/module.h>
49
50#include <sys/systm.h>
51#include <machine/bus.h>
52
53#include <sys/bus.h>
54#include <sys/rman.h>
55
56#include <compat/ndis/pe_var.h>
57#include <compat/ndis/resource_var.h>
58#include <compat/ndis/cfg_var.h>
59#include <compat/ndis/ntoskrnl_var.h>
60#include <compat/ndis/hal_var.h>
61
62static void KeStallExecutionProcessor(uint32_t);
63static void WRITE_PORT_BUFFER_ULONG(uint32_t *,
64	uint32_t *, uint32_t);
65static void WRITE_PORT_BUFFER_USHORT(uint16_t *,
66	uint16_t *, uint32_t);
67static void WRITE_PORT_BUFFER_UCHAR(uint8_t *,
68	uint8_t *, uint32_t);
69static void WRITE_PORT_ULONG(uint32_t *, uint32_t);
70static void WRITE_PORT_USHORT(uint16_t *, uint16_t);
71static void WRITE_PORT_UCHAR(uint8_t *, uint8_t);
72static uint32_t READ_PORT_ULONG(uint32_t *);
73static uint16_t READ_PORT_USHORT(uint16_t *);
74static uint8_t READ_PORT_UCHAR(uint8_t *);
75static void READ_PORT_BUFFER_ULONG(uint32_t *,
76	uint32_t *, uint32_t);
77static void READ_PORT_BUFFER_USHORT(uint16_t *,
78	uint16_t *, uint32_t);
79static void READ_PORT_BUFFER_UCHAR(uint8_t *,
80	uint8_t *, uint32_t);
81static uint64_t KeQueryPerformanceCounter(uint64_t *);
82static void _KeLowerIrql(uint8_t);
83static uint8_t KeRaiseIrqlToDpcLevel(void);
84static void dummy (void);
85
86#define NDIS_MAXCPUS 64
87static struct mtx disp_lock[NDIS_MAXCPUS];
88
89int
90hal_libinit()
91{
92	image_patch_table	*patch;
93	int			i;
94
95	for (i = 0; i < NDIS_MAXCPUS; i++)
96		mtx_init(&disp_lock[i], "HAL preemption lock",
97		    "HAL lock", MTX_RECURSE|MTX_DEF);
98
99	patch = hal_functbl;
100	while (patch->ipt_func != NULL) {
101		windrv_wrap((funcptr)patch->ipt_func,
102		    (funcptr *)&patch->ipt_wrap,
103		    patch->ipt_argcnt, patch->ipt_ftype);
104		patch++;
105	}
106
107	return (0);
108}
109
110int
111hal_libfini()
112{
113	image_patch_table	*patch;
114	int			i;
115
116	for (i = 0; i < NDIS_MAXCPUS; i++)
117		mtx_destroy(&disp_lock[i]);
118
119	patch = hal_functbl;
120	while (patch->ipt_func != NULL) {
121		windrv_unwrap(patch->ipt_wrap);
122		patch++;
123	}
124
125	return (0);
126}
127
128static void
129KeStallExecutionProcessor(usecs)
130	uint32_t		usecs;
131{
132	DELAY(usecs);
133}
134
135static void
136WRITE_PORT_ULONG(port, val)
137	uint32_t		*port;
138	uint32_t		val;
139{
140	bus_space_write_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
141}
142
143static void
144WRITE_PORT_USHORT(uint16_t *port, uint16_t val)
145{
146	bus_space_write_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
147}
148
149static void
150WRITE_PORT_UCHAR(uint8_t *port, uint8_t val)
151{
152	bus_space_write_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
153}
154
155static void
156WRITE_PORT_BUFFER_ULONG(port, val, cnt)
157	uint32_t		*port;
158	uint32_t		*val;
159	uint32_t		cnt;
160{
161	bus_space_write_multi_4(NDIS_BUS_SPACE_IO, 0x0,
162	    (bus_size_t)port, val, cnt);
163}
164
165static void
166WRITE_PORT_BUFFER_USHORT(port, val, cnt)
167	uint16_t		*port;
168	uint16_t		*val;
169	uint32_t		cnt;
170{
171	bus_space_write_multi_2(NDIS_BUS_SPACE_IO, 0x0,
172	    (bus_size_t)port, val, cnt);
173}
174
175static void
176WRITE_PORT_BUFFER_UCHAR(port, val, cnt)
177	uint8_t			*port;
178	uint8_t			*val;
179	uint32_t		cnt;
180{
181	bus_space_write_multi_1(NDIS_BUS_SPACE_IO, 0x0,
182	    (bus_size_t)port, val, cnt);
183}
184
185static uint16_t
186READ_PORT_USHORT(port)
187	uint16_t		*port;
188{
189	return (bus_space_read_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
190}
191
192static uint32_t
193READ_PORT_ULONG(port)
194	uint32_t		*port;
195{
196	return (bus_space_read_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
197}
198
199static uint8_t
200READ_PORT_UCHAR(port)
201	uint8_t			*port;
202{
203	return (bus_space_read_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
204}
205
206static void
207READ_PORT_BUFFER_ULONG(port, val, cnt)
208	uint32_t		*port;
209	uint32_t		*val;
210	uint32_t		cnt;
211{
212	bus_space_read_multi_4(NDIS_BUS_SPACE_IO, 0x0,
213	    (bus_size_t)port, val, cnt);
214}
215
216static void
217READ_PORT_BUFFER_USHORT(port, val, cnt)
218	uint16_t		*port;
219	uint16_t		*val;
220	uint32_t		cnt;
221{
222	bus_space_read_multi_2(NDIS_BUS_SPACE_IO, 0x0,
223	    (bus_size_t)port, val, cnt);
224}
225
226static void
227READ_PORT_BUFFER_UCHAR(port, val, cnt)
228	uint8_t			*port;
229	uint8_t			*val;
230	uint32_t		cnt;
231{
232	bus_space_read_multi_1(NDIS_BUS_SPACE_IO, 0x0,
233	    (bus_size_t)port, val, cnt);
234}
235
236/*
237 * The spinlock implementation in Windows differs from that of FreeBSD.
238 * The basic operation of spinlocks involves two steps: 1) spin in a
239 * tight loop while trying to acquire a lock, 2) after obtaining the
240 * lock, disable preemption. (Note that on uniprocessor systems, you're
241 * allowed to skip the first step and just lock out pre-emption, since
242 * it's not possible for you to be in contention with another running
243 * thread.) Later, you release the lock then re-enable preemption.
244 * The difference between Windows and FreeBSD lies in how preemption
245 * is disabled. In FreeBSD, it's done using critical_enter(), which on
246 * the x86 arch translates to a cli instruction. This masks off all
247 * interrupts, and effectively stops the scheduler from ever running
248 * so _nothing_ can execute except the current thread. In Windows,
249 * preemption is disabled by raising the processor IRQL to DISPATCH_LEVEL.
250 * This stops other threads from running, but does _not_ block device
251 * interrupts. This means ISRs can still run, and they can make other
252 * threads runable, but those other threads won't be able to execute
253 * until the current thread lowers the IRQL to something less than
254 * DISPATCH_LEVEL.
255 *
256 * There's another commonly used IRQL in Windows, which is APC_LEVEL.
257 * An APC is an Asynchronous Procedure Call, which differs from a DPC
258 * (Defered Procedure Call) in that a DPC is queued up to run in
259 * another thread, while an APC runs in the thread that scheduled
260 * it (similar to a signal handler in a UNIX process). We don't
261 * actually support the notion of APCs in FreeBSD, so for now, the
262 * only IRQLs we're interested in are DISPATCH_LEVEL and PASSIVE_LEVEL.
263 *
264 * To simulate DISPATCH_LEVEL, we raise the current thread's priority
265 * to PI_REALTIME, which is the highest we can give it. This should,
266 * if I understand things correctly, prevent anything except for an
267 * interrupt thread from preempting us. PASSIVE_LEVEL is basically
268 * everything else.
269 *
270 * Be aware that, at least on the x86 arch, the Windows spinlock
271 * functions are divided up in peculiar ways. The actual spinlock
272 * functions are KfAcquireSpinLock() and KfReleaseSpinLock(), and
273 * they live in HAL.dll. Meanwhile, KeInitializeSpinLock(),
274 * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel()
275 * live in ntoskrnl.exe. Most Windows source code will call
276 * KeAcquireSpinLock() and KeReleaseSpinLock(), but these are just
277 * macros that call KfAcquireSpinLock() and KfReleaseSpinLock().
278 * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel()
279 * perform the lock acquisition/release functions without doing the
280 * IRQL manipulation, and are used when one is already running at
281 * DISPATCH_LEVEL. Make sense? Good.
282 *
283 * According to the Microsoft documentation, any thread that calls
284 * KeAcquireSpinLock() must be running at IRQL <= DISPATCH_LEVEL. If
285 * we detect someone trying to acquire a spinlock from DEVICE_LEVEL
286 * or HIGH_LEVEL, we panic.
287 *
288 * Alternate sleep-lock-based spinlock implementation
289 * --------------------------------------------------
290 *
291 * The earlier spinlock implementation was arguably a bit of a hack
292 * and presented several problems. It was basically designed to provide
293 * the functionality of spinlocks without incurring the wrath of
294 * WITNESS. We could get away with using both our spinlock implementation
295 * and FreeBSD sleep locks at the same time, but if WITNESS knew what
296 * we were really up to, it would have spanked us rather severely.
297 *
298 * There's another method we can use based entirely on sleep locks.
299 * First, it's important to realize that everything we're locking
300 * resides inside Project Evil itself: any critical data being locked
301 * by drivers belongs to the drivers, and should not be referenced
302 * by any other OS code outside of the NDISulator. The priority-based
303 * locking scheme has system-wide effects, just like real spinlocks
304 * (blocking preemption affects the whole CPU), but since we keep all
305 * our critical data private, we can use a simpler mechanism that
306 * affects only code/threads directly related to Project Evil.
307 *
308 * The idea is to create a sleep lock mutex for each CPU in the system.
309 * When a CPU running in the NDISulator wants to acquire a spinlock, it
310 * does the following:
311 * - Pin ourselves to the current CPU
312 * - Acquire the mutex for the current CPU
313 * - Spin on the spinlock variable using atomic test and set, just like
314 *   a real spinlock.
315 * - Once we have the lock, we execute our critical code
316 *
317 * To give up the lock, we do:
318 * - Clear the spinlock variable with an atomic op
319 * - Release the per-CPU mutex
320 * - Unpin ourselves from the current CPU.
321 *
322 * On a uniprocessor system, this means all threads that access protected
323 * data are serialized through the per-CPU mutex. After one thread
324 * acquires the 'spinlock,' any other thread that uses a spinlock on the
325 * current CPU will block on the per-CPU mutex, which has the same general
326 * effect of blocking pre-emption, but _only_ for those threads that are
327 * running NDISulator code.
328 *
329 * On a multiprocessor system, threads on different CPUs all block on
330 * their respective per-CPU mutex, and the atomic test/set operation
331 * on the spinlock variable provides inter-CPU synchronization, though
332 * only for threads running NDISulator code.
333 *
334 * This method solves an important problem. In Windows, you're allowed
335 * to do an ExAllocatePoolWithTag() with a spinlock held, provided you
336 * allocate from NonPagedPool. This implies an atomic heap allocation
337 * that will not cause the current thread to sleep. (You can't sleep
338 * while holding real spinlock: clowns will eat you.) But in FreeBSD,
339 * malloc(9) _always_ triggers the acquisition of a sleep lock, even
340 * when you use M_NOWAIT. This is not a problem for FreeBSD native
341 * code: you're allowed to sleep in things like interrupt threads. But
342 * it is a problem with the old priority-based spinlock implementation:
343 * even though we get away with it most of the time, we really can't
344 * do a malloc(9) after doing a KeAcquireSpinLock() or KeRaiseIrql().
345 * With the new implementation, it's not a problem: you're allowed to
346 * acquire more than one sleep lock (as long as you avoid lock order
347 * reversals).
348 *
349 * The one drawback to this approach is that now we have a lot of
350 * contention on one per-CPU mutex within the NDISulator code. Whether
351 * or not this is preferable to the expected Windows spinlock behavior
352 * of blocking pre-emption is debatable.
353 */
354
355uint8_t
356KfAcquireSpinLock(lock)
357	kspin_lock		*lock;
358{
359	uint8_t			oldirql;
360
361	KeRaiseIrql(DISPATCH_LEVEL, &oldirql);
362	KeAcquireSpinLockAtDpcLevel(lock);
363
364	return (oldirql);
365}
366
367void
368KfReleaseSpinLock(kspin_lock *lock, uint8_t newirql)
369{
370	KeReleaseSpinLockFromDpcLevel(lock);
371	KeLowerIrql(newirql);
372}
373
374uint8_t
375KeGetCurrentIrql()
376{
377	if (mtx_owned(&disp_lock[curthread->td_oncpu]))
378		return (DISPATCH_LEVEL);
379	return (PASSIVE_LEVEL);
380}
381
382static uint64_t
383KeQueryPerformanceCounter(freq)
384	uint64_t		*freq;
385{
386	if (freq != NULL)
387		*freq = hz;
388
389	return ((uint64_t)ticks);
390}
391
392uint8_t
393KfRaiseIrql(uint8_t irql)
394{
395	uint8_t			oldirql;
396
397	sched_pin();
398	oldirql = KeGetCurrentIrql();
399
400	/* I am so going to hell for this. */
401	if (oldirql > irql)
402		panic("IRQL_NOT_LESS_THAN_OR_EQUAL");
403
404	if (oldirql != DISPATCH_LEVEL)
405		mtx_lock(&disp_lock[curthread->td_oncpu]);
406	else
407		sched_unpin();
408
409/*printf("RAISE IRQL: %d %d\n", irql, oldirql);*/
410
411	return (oldirql);
412}
413
414void
415KfLowerIrql(uint8_t oldirql)
416{
417	if (oldirql == DISPATCH_LEVEL)
418		return;
419
420	if (KeGetCurrentIrql() != DISPATCH_LEVEL)
421		panic("IRQL_NOT_GREATER_THAN");
422
423	mtx_unlock(&disp_lock[curthread->td_oncpu]);
424	sched_unpin();
425}
426
427static uint8_t
428KeRaiseIrqlToDpcLevel(void)
429{
430	uint8_t			irql;
431
432	KeRaiseIrql(DISPATCH_LEVEL, &irql);
433	return (irql);
434}
435
436static void
437_KeLowerIrql(uint8_t oldirql)
438{
439	KeLowerIrql(oldirql);
440}
441
442static void dummy()
443{
444	printf("hal dummy called...\n");
445}
446
447image_patch_table hal_functbl[] = {
448	IMPORT_SFUNC(KeStallExecutionProcessor, 1),
449	IMPORT_SFUNC(WRITE_PORT_ULONG, 2),
450	IMPORT_SFUNC(WRITE_PORT_USHORT, 2),
451	IMPORT_SFUNC(WRITE_PORT_UCHAR, 2),
452	IMPORT_SFUNC(WRITE_PORT_BUFFER_ULONG, 3),
453	IMPORT_SFUNC(WRITE_PORT_BUFFER_USHORT, 3),
454	IMPORT_SFUNC(WRITE_PORT_BUFFER_UCHAR, 3),
455	IMPORT_SFUNC(READ_PORT_ULONG, 1),
456	IMPORT_SFUNC(READ_PORT_USHORT, 1),
457	IMPORT_SFUNC(READ_PORT_UCHAR, 1),
458	IMPORT_SFUNC(READ_PORT_BUFFER_ULONG, 3),
459	IMPORT_SFUNC(READ_PORT_BUFFER_USHORT, 3),
460	IMPORT_SFUNC(READ_PORT_BUFFER_UCHAR, 3),
461	IMPORT_FFUNC(KfAcquireSpinLock, 1),
462	IMPORT_FFUNC(KfReleaseSpinLock, 1),
463	IMPORT_SFUNC(KeGetCurrentIrql, 0),
464	IMPORT_SFUNC(KeQueryPerformanceCounter, 1),
465	IMPORT_FFUNC(KfLowerIrql, 1),
466	IMPORT_FFUNC(KfRaiseIrql, 1),
467	IMPORT_SFUNC(KeRaiseIrqlToDpcLevel, 0),
468#undef KeLowerIrql
469	IMPORT_SFUNC_MAP(KeLowerIrql, _KeLowerIrql, 1),
470
471	/*
472	 * This last entry is a catch-all for any function we haven't
473	 * implemented yet. The PE import list patching routine will
474	 * use it for any function that doesn't have an explicit match
475	 * in this table.
476	 */
477
478	{ NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL },
479
480	/* End of list. */
481	{ NULL, NULL, NULL }
482};
483