1/*
2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License").  You may not use this file except in compliance with the
9 * License.  Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23#if ACPI_SUPPORT
24
25#include <IOKit/IOMapper.h>
26#include <IOKit/IOKitKeysPrivate.h>
27#include <libkern/tree.h>
28#include <libkern/OSDebug.h>
29#include <i386/cpuid.h>
30#include "dmar.h"
31
32/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
33
34extern "C" vm_offset_t ml_io_map(vm_offset_t phys_addr, vm_size_t size);
35extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
36
37/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
38
39#define ENA_QI			1
40#define TABLE_CB		0
41#define BSIMPLE			0
42
43#define KP				0
44#define	VTASRT			0
45
46#define kLargeThresh	(128)
47#define kLargeThresh2	(32)
48#define kVPages  		(1<<22)
49#define kBPagesLog2 	(18)
50#define kBPagesSafe		((1<<kBPagesLog2)-(1<<(kBPagesLog2 - 2)))      /* 3/4 */
51#define kBPagesReserve	((1<<kBPagesLog2)-(1<<(kBPagesLog2 - 3)))      /* 7/8 */
52#define kRPages  		(1<<20)
53
54#define kQIPageCount    (2)
55
56#define kTlbDrainReads  (0ULL)
57#define kTlbDrainWrites (0ULL)
58
59#define VTLOG(fmt, args...)                   \
60    do {                                                    						\
61        if ((gIOPCIFlags & kIOPCIConfiguratorVTLog) && !ml_at_interrupt_context())  \
62            IOLog(fmt, ## args);                           							\
63        if (gIOPCIFlags & kIOPCIConfiguratorVTLog)        							\
64            kprintf(fmt, ## args);                          						\
65    } while(0)
66
67
68#if VTASRT
69
70#define vtassert(ex)  \
71	((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex))
72
73#define vtd_space_nfault(x,y,z) _vtd_space_nfault(x,y,z)
74
75#define STAT_ADD(space, name, value) do { space->stats.name += value; } while (false);
76
77#else	/* VTASRT */
78
79#define vtassert(ex)
80#define vtd_space_nfault(x,y,z)
81
82#define STAT_ADD(space, name, value) do { space->stats.name += value; } while (false);
83//#define STAT_ADD(space, name, value)
84
85#endif	/* !VTASRT */
86
87
88#if TABLE_CB
89#define table_flush(addr, size, linesize) clflush((uintptr_t)(addr), (size), linesize);
90#else
91#define table_flush(addr, size, linesize) __mfence();
92#endif
93
94
95#if BSIMPLE
96#define BLOCK(l)	IOSimpleLockLock(l)
97#define BUNLOCK(l)	IOSimpleLockUnlock(l)
98#else
99#define BLOCK(l)	IOLockLock(l)
100#define BUNLOCK(l)	IOLockUnlock(l)
101#endif
102
103#define arrayCount(x)	(sizeof(x) / sizeof(x[0]))
104
105/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
106
107enum
108{
109	kEntryPresent = 0x00000001ULL
110};
111
112struct root_entry_t
113{
114    uint64_t context_entry_ptr;
115    uint64_t resv;
116};
117
118struct context_entry_t
119{
120    uint64_t address_space_root;
121    uint64_t context_entry;
122};
123
124struct qi_descriptor_t
125{
126    uint64_t command;
127    uint64_t address;
128};
129
130// address_space_root
131enum
132{
133//	kEntryPresent 			= 0x00000001ULL,
134	kFaultProcessingDisable = 0x00000002ULL,
135	kTranslationType0 		= 0x00000000ULL,
136	kTranslationType1 		= 0x00000004ULL,
137	kTranslationType2 		= 0x00000008ULL,
138	kTranslationType3 		= 0x0000000CULL,
139	kEvictionHint 		    = 0x00000010ULL,
140	kAddressLocalityHint    = 0x00000020ULL,
141};
142
143// context_entry
144enum
145{
146	kAddressWidth30			= 0x00000000ULL,
147	kAddressWidth39			= 0x00000001ULL,
148	kAddressWidth48			= 0x00000002ULL,
149	kAddressWidth57			= 0x00000003ULL,
150	kAddressWidth64			= 0x00000004ULL,
151
152	kContextAvail1		    = 0x00000008ULL,	// 4b
153	kDomainIdentifier1		= 0x00000100ULL,	// 16b
154};
155
156enum
157{
158	kNotTheDomain = 1ULL << 32,
159	kTheDomain    = 2ULL
160};
161
162typedef uint64_t page_entry_t;
163
164// page_entry_t
165enum
166{
167	kReadAccess 			= 0x00000001ULL,
168	kWriteAccess			= 0x00000002ULL,
169	kPageAccess				= kReadAccess|kWriteAccess,
170	kPageAvail1			    = 0x00000004ULL,	// 5b
171	kSuperPage			    = 0x00000080ULL,
172	kPageAvail2			    = 0x00000100ULL,	// 3b
173	kSnoopBehavior		    = 0x00000800ULL,
174	kTransientMapping		= 0x4000000000000000ULL,
175	kPageAvail3				= 0x8000000000000000ULL, // 1b
176
177	kPageAddrMask			= 0x3ffffffffffff000ULL
178};
179
180struct vtd_registers_t
181{
182/*00*/ 	uint32_t version;
183/*04*/	uint32_t res1;
184/*08*/	uint64_t capability;
185/*10*/	uint64_t extended_capability;
186/*18*/	uint32_t global_command;
187/*1c*/	uint32_t global_status;
188/*20*/	uint64_t root_entry_table;
189/*28*/	uint64_t context_command;
190/*30*/	uint32_t res2;
191/*34*/	uint32_t fault_status;
192/*38*/	uint32_t fault_event_control;
193/*3c*/	uint32_t fault_event_data;
194/*40*/	uint32_t fault_event_address;
195/*44*/	uint32_t fault_event_upper_address;
196/*48*/	uint64_t res3[2];
197/*58*/	uint64_t advanced_fault;
198/*60*/	uint32_t res4;
199/*64*/	uint32_t protected_memory_enable;
200/*68*/	uint32_t protected_low_memory_base;
201/*6c*/	uint32_t protected_low_memory_limit;
202/*70*/	uint64_t protected_high_memory_base;
203/*78*/	uint64_t protected_high_memory_limit;
204/*80*/	uint64_t invalidation_queue_head;
205/*88*/	uint64_t invalidation_queue_tail;
206/*90*/	uint64_t invalidation_queue_address;
207/*98*/	uint32_t res5;
208/*9c*/	uint32_t invalidation_completion_status;
209/*a0*/	uint32_t invalidation_completion_event_control;
210/*a4*/	uint32_t invalidation_completion_event_data;
211/*a8*/	uint32_t invalidation_completion_event_address;
212/*ac*/	uint32_t invalidation_completion_event_upper_address;
213/*b0*/	uint64_t res6;
214/*b8*/	uint64_t interrupt_remapping_table;
215/*c0*/
216};
217
218struct vtd_iotlb_registers_t
219{
220/*00*/	uint64_t address;
221/*08*/	uint64_t command;
222};
223struct vtd_fault_registers_t
224{
225/*00*/	uint64_t fault_low;
226/*08*/	uint64_t fault_high;
227};
228
229typedef char vtd_registers_t_check[(sizeof(vtd_registers_t) == 0xc0) ? 1 : -1];
230
231/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
232
233struct vtd_unit_t
234{
235    ACPI_DMAR_HARDWARE_UNIT * dmar;
236    volatile vtd_registers_t * regs;
237    volatile vtd_iotlb_registers_t * iotlb;
238    volatile vtd_fault_registers_t * faults;
239
240    IOMemoryMap *     qi_map;
241    qi_descriptor_t * qi_table;
242
243	uint64_t root;
244	uint64_t msi_address;
245    uint64_t qi_address;
246    uint64_t qi_stamp_address;
247
248	uint32_t qi_tail;
249	uint32_t qi_mask;
250    volatile
251    uint32_t qi_stamp;
252
253	uint32_t msi_data;
254    uint32_t num_fault;
255    uint32_t rounding;
256
257    uint8_t  global:1;
258    uint8_t  caching:1;
259    uint8_t  selective:1;
260};
261
262/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
263
264static inline void __mfence(void)
265{
266    __asm__ volatile("mfence");
267}
268
269static inline void __clflush(void *ptr)
270{
271	__asm__ volatile("clflush (%0)" : : "r" (ptr));
272}
273
274static inline void clflush(uintptr_t addr, unsigned int count, uintptr_t linesize)
275{
276	uintptr_t  bound = (addr + count + linesize -1) & ~(linesize - 1);
277	__mfence();
278	while (addr < bound)
279	{
280		__clflush((void *) (uintptr_t) addr);
281		addr += linesize;
282	}
283	__mfence();
284}
285
286static
287vtd_unit_t * unit_init(ACPI_DMAR_HARDWARE_UNIT * dmar)
288{
289	vtd_unit_t * unit;
290
291	unit = IONew(vtd_unit_t, 1);
292	if (!unit) return (NULL);
293	bzero(unit, sizeof(vtd_unit_t));
294
295	unit->dmar = dmar;
296
297	VTLOG("unit %p Address %llx, Flags %x\n",
298			dmar, dmar->Address, dmar->Flags);
299
300	unit->regs = (typeof unit->regs) ml_io_map(dmar->Address, 0x1000);
301
302	uint32_t
303	offset = (unit->regs->extended_capability >> (8 - 4)) & (((1 << 10) - 1) << 4);
304	unit->iotlb = (typeof(unit->iotlb)) (((uintptr_t)unit->regs) + offset);
305
306	offset = (unit->regs->capability >> (24 - 4)) & (((1 << 10) - 1) << 4);
307	unit->faults = (typeof(unit->faults)) (((uintptr_t)unit->regs) + offset);
308	unit->num_fault = (1 + ((unit->regs->capability >> 40) & ((1 << 8) - 1)));
309
310	unit->selective = (1 & (unit->regs->capability >> 39));
311	unit->rounding = (0x3f & (unit->regs->capability >> 48));
312	unit->caching = (1 & (unit->regs->capability >> 7));
313	unit->global = (ACPI_DMAR_INCLUDE_ALL & dmar->Flags);
314
315	VTLOG("cap 0x%llx extcap 0x%llx glob %d cache sel %d mode %d iotlb %p nfault[%d] %p\n",
316			unit->regs->capability, unit->regs->extended_capability,
317			unit->global, unit->selective, unit->caching,
318			unit->iotlb, unit->num_fault, unit->faults);
319
320	// caching is only allowed for VMs
321	if (unit->caching
322	// disable IG unit
323	|| ((!unit->global) && (!(kIOPCIConfiguratorIGIsMapped & gIOPCIFlags))))
324	{
325		IODelete(unit, vtd_unit_t, 1);
326		unit = NULL;
327	}
328
329	return (unit);
330}
331
332static void
333unit_faults(vtd_unit_t * unit, bool log)
334{
335	uint32_t idx;
336	for (idx = 0; idx < unit->num_fault; idx++)
337	{
338		uint64_t h, l;
339		uint32_t faults_pending;
340
341		faults_pending = unit->regs->fault_status;
342		h = unit->faults[idx].fault_high;
343		l = unit->faults[idx].fault_low;
344		unit->faults[idx].fault_high = h;
345		unit->regs->fault_status = faults_pending;
346		__mfence();
347		if (log && ((1ULL << 63) & h))
348		{
349			char msg[256];
350			snprintf(msg, sizeof(msg), "vtd[%d] fault: device %d:%d:%d reason 0x%x %c:0x%llx", idx,
351				(int)(255 & (h >> 8)), (int)(31 & (h >> 3)), (int)(7 & (h >> 0)),
352				(int)(255 & (h >> (96 - 64))), (h & (1ULL << (126 - 64))) ? 'R' : 'W', l);
353			IOLog("%s\n", msg);
354			kprintf("%s\n", msg);
355			if (kIOPCIConfiguratorPanicOnFault & gIOPCIFlags) panic("%s", msg);
356		}
357	}
358}
359
360static void
361unit_enable(vtd_unit_t * unit)
362{
363    uint32_t command;
364
365	VTLOG("unit %p global status 0x%x\n", unit, unit->regs->global_status);
366
367	unit->regs->root_entry_table = unit->root;
368	__mfence();
369
370	unit->regs->global_command = (1UL<<30);
371	__mfence();
372	while (!((1UL<<30) & unit->regs->global_status)) {}
373//	VTLOG("did set root\n");
374
375	unit->regs->context_command = (1ULL<<63) | (1ULL<<61);
376	__mfence();
377	while ((1ULL<<63) & unit->regs->context_command) {}
378//	VTLOG("did context inval\n");
379
380	// global & rw drain
381	unit->iotlb->command = (1ULL<<63) | (1ULL<<60) | (1ULL<<49) | (1ULL<<48);
382	__mfence();
383	while ((1ULL<<63) & unit->iotlb->command) {}
384//	VTLOG("did iotlb inval\n");
385
386	unit->qi_tail = 0;
387	unit->regs->invalidation_queue_head = 0;
388	unit->regs->invalidation_queue_tail = 0;
389    unit->regs->invalidation_queue_address = unit->qi_address;
390
391	command = 0;
392
393#if ENA_QI
394	command |= (1UL<<26);
395	unit->regs->global_command = command;
396	__mfence();
397	while (!((1UL<<26) & unit->regs->global_status)) {}
398	VTLOG("did ena qi p 0x%qx v %p\n", unit->qi_address, unit->qi_table);
399#endif
400
401	command |= (1UL<<31);
402	unit->regs->global_command = command;
403	__mfence();
404	while (!((1UL<<31) & unit->regs->global_status)) {}
405	VTLOG("did ena\n");
406
407	if (unit->msi_address)
408	{
409		unit->regs->invalidation_completion_event_data          = unit->msi_data;
410		unit->regs->invalidation_completion_event_address       = unit->msi_address;
411		unit->regs->invalidation_completion_event_upper_address = (unit->msi_address >> 32);
412
413		unit->regs->fault_event_data          = unit->msi_data + 1;
414		unit->regs->fault_event_address       = unit->msi_address;
415		unit->regs->fault_event_upper_address = (unit->msi_address >> 32);
416
417		__mfence();
418		unit_faults(unit, false);
419
420		unit->regs->fault_event_control = 0;					// ints ena
421		unit->regs->invalidation_completion_event_control = 0;	// ints ena
422		unit->regs->invalidation_completion_status = 1;
423	}
424}
425
426static void
427unit_quiesce(vtd_unit_t * unit)
428{
429	VTLOG("unit %p quiesce\n", unit);
430	// completion stamps will continue after wake
431}
432
433static void
434unit_invalidate(vtd_unit_t * unit,
435							uint64_t did, ppnum_t addr, ppnum_t mask, bool leaf)
436{
437	if (unit->selective)
438	{
439		 unit->iotlb->address = ptoa_64(addr) | (leaf << 6) | mask;
440		 __mfence();
441		 unit->iotlb->command = (1ULL<<63) | (3ULL<<60) | (kTlbDrainReads<<49) | (kTlbDrainWrites<<48) | (did << 32);
442	}
443	else unit->iotlb->command = (1ULL<<63) | (1ULL<<60) | (kTlbDrainReads<<49) | (kTlbDrainWrites<<48);
444	__mfence();
445}
446
447static void
448unit_invalidate_done(vtd_unit_t * unit)
449{
450	while ((1ULL<<63) & unit->iotlb->command) {}
451}
452
453/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
454
455typedef uint32_t vtd_vaddr_t;
456
457union vtd_table_entry
458{
459	struct
460	{
461		uint     read:1 	__attribute__ ((packed));
462		uint     write:1 	__attribute__ ((packed));
463		uint     resv:10 	__attribute__ ((packed));
464		uint64_t addr:51 	__attribute__ ((packed));
465		uint     used:1 	__attribute__ ((packed));
466	} used;
467	struct
468	{
469		uint access:2 		__attribute__ ((packed));
470		uint next:28 		__attribute__ ((packed));
471		uint prev:28 		__attribute__ ((packed));
472		uint size:5 		__attribute__ ((packed));
473		uint free:1 		__attribute__ ((packed));
474	} free;
475	uint64_t bits;
476};
477typedef union vtd_table_entry vtd_table_entry_t;
478
479typedef uint32_t vtd_rbaddr_t;
480
481struct vtd_rblock
482{
483	RB_ENTRY(vtd_rblock) address_link;
484	RB_ENTRY(vtd_rblock) size_link;
485
486	vtd_rbaddr_t start;
487	vtd_rbaddr_t end;
488};
489
490RB_HEAD(vtd_rbaddr_list, vtd_rblock);
491RB_HEAD(vtd_rbsize_list, vtd_rblock);
492
493struct vtd_space_stats
494{
495    ppnum_t vsize;
496    ppnum_t tables;
497    ppnum_t bused;
498    ppnum_t rused;
499    ppnum_t largest_paging;
500    ppnum_t largest_32b;
501    ppnum_t inserts;
502    ppnum_t max_inval[2];
503    ppnum_t breakups;
504    ppnum_t merges;
505    ppnum_t allocs[64];
506	ppnum_t bcounts[20];
507};
508typedef struct vtd_space_stats vtd_space_stats_t;
509
510struct vtd_free_queued_t
511{
512    ppnum_t  addr;
513    ppnum_t  size;
514    uint32_t stamp;
515};
516enum
517{
518	kFreeQCount = 2,
519	kFreeQElems = 256
520};
521
522struct vtd_space
523{
524#if BSIMPLE
525	IOSimpleLock *      block;
526#else
527	IOLock *            block;
528#endif
529	IOLock *            rlock;
530	ppnum_t				vsize;
531	ppnum_t				rsize;
532	size_t      	    table_bitmap_size;
533	uint8_t *   	    table_bitmap;
534	IOMemoryMap *       table_map;
535	vtd_table_entry_t *	tables[6];
536	uint32_t            cachelinesize;
537	ppnum_t             root_page;
538	uint8_t				max_level;
539    uint8_t             waiting_space;
540	uint8_t     	    bheads_count;
541	vtd_table_entry_t * bheads;
542
543	vtd_space_stats_t   stats;
544
545    vtd_free_queued_t   free_queue[kFreeQCount][kFreeQElems];
546    volatile uint32_t	free_head[kFreeQCount];
547    volatile uint32_t   free_tail[kFreeQCount];
548    uint32_t			free_mask;
549    uint32_t            stamp;
550
551	struct vtd_rbaddr_list rbaddr_list;
552	struct vtd_rbsize_list rbsize_list;
553};
554typedef struct vtd_space vtd_space_t;
555
556/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
557
558static vtd_vaddr_t
559vtd_log2up(vtd_vaddr_t size)
560{
561	if (1 == size) size = 0;
562	else size = 32 - __builtin_clz((unsigned int)size - 1);
563	return (size);
564}
565
566static vtd_vaddr_t
567vtd_log2down(vtd_vaddr_t size)
568{
569	size = 31 - __builtin_clz((unsigned int)size);
570	return (size);
571}
572
573static void
574_vtd_space_nfault(vtd_space_t * bf, vtd_vaddr_t start, vtd_vaddr_t size)
575{
576	vtd_vaddr_t index;
577	vtd_vaddr_t byte;
578	uint8_t bit;
579
580	vtassert((start + size) < bf->vsize);
581
582	size += (start & 511);
583	size = (size + 511) & ~511;
584
585	while (true)
586	{
587		index = (start >> 9);
588		byte = (index >> 3);
589		bit = (1 << (7 & index));
590		vtassert(bf->table_bitmap[byte] & bit);
591		if (size < 512) break;
592		size -= 512;
593		start += 512;
594	}
595}
596
597static bool
598vtd_space_present(vtd_space_t * bf, vtd_vaddr_t start)
599{
600	vtd_vaddr_t byte;
601	uint8_t bit;
602
603	vtassert(start < bf->vsize);
604
605	start >>= 9;
606	byte = (start >> 3);
607	bit = (1 << (7 & start));
608	return (bf->table_bitmap[byte] & bit);
609}
610
611static void
612vtd_space_fault(vtd_space_t * bf, vtd_vaddr_t start, vtd_vaddr_t size)
613{
614	vtd_vaddr_t index;
615	vtd_vaddr_t byte;
616	uint8_t     bits, bit;
617	IOReturn    kr;
618
619	vtassert((start + size) < bf->vsize);
620
621	size += (start & 511);
622	size = (size + 511) & ~511;
623
624	while (true)
625	{
626		index = (start >> 9);
627		byte = (index >> 3);
628		index &= 7;
629		bits = bf->table_bitmap[byte];
630#if 1
631		if (0xff == bits)
632		{
633			index = (8 - index) * 512;
634			if (size <= index) break;
635			size -= index;
636			start += index;
637			continue;
638		}
639#endif
640		bit = (1 << index);
641		if (!(bits & bit))
642		{
643			bf->table_bitmap[byte] = bits | bit;
644			index = start & ~511;
645
646//			VTLOG("table fault addr 0x%x, table %p\n", start, &bf->tables[0][start]);
647			kr = bf->table_map->wireRange(kIODirectionOutIn, index << 3, page_size);
648			vtassert(kr == KERN_SUCCESS);
649			STAT_ADD(bf, tables, 1);
650
651			bf->tables[0][index].bits = 0;
652			ppnum_t lvl0page = pmap_find_phys(kernel_pmap, (uintptr_t) &bf->tables[0][index]);
653			if (!lvl0page) panic("!lvl0page");
654			bf->tables[1][index >> 9].bits = ptoa_64(lvl0page) | kPageAccess;
655			table_flush(&bf->tables[1][index >> 9], sizeof(vtd_table_entry_t), bf->cachelinesize);
656		}
657		if (size <= 512) break;
658		size -= 512;
659		start += 512;
660	}
661}
662
663static void
664vtd_space_set(vtd_space_t * bf, vtd_vaddr_t start, vtd_vaddr_t size,
665			  uint32_t mapOptions, upl_page_info_t * pageList)
666{
667	ppnum_t idx;
668	uint8_t access = kReadAccess | 0*kWriteAccess;
669
670	if (kIODMAMapPhysicallyContiguous & mapOptions) VTLOG("map phys %x, %x\n", pageList[0].phys_addr, size);
671
672	if (mapOptions & kIODMAMapWriteAccess) access |= kWriteAccess;
673
674	vtassert((start + size) <= bf->vsize);
675	vtd_space_nfault(bf, start, size);
676
677	if (kIODMAMapPhysicallyContiguous & mapOptions)
678	{
679		for (idx = 0; idx < size; idx++)
680		{
681			bf->tables[0][start + idx].bits = (access | ptoa_64(pageList[0].phys_addr + idx));
682		}
683#if TABLE_CB
684		table_flush(&bf->tables[0][start], size * sizeof(vtd_table_entry_t), bf->cachelinesize);
685#endif
686	}
687	else
688	{
689#if TABLE_CB
690    	ppnum_t j;
691		for (idx = 0; size >= 8; size -= 8, idx += 8)
692		{
693			j = 0;
694			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
695			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
696			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
697			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
698			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
699			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
700			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
701			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr));
702			__mfence();
703			__clflush((void *) &bf->tables[0][start + idx].bits);
704		}
705		if (size)
706		{
707			for (j = 0; j < size; j++)
708			{
709				bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr));
710			}
711			__mfence();
712			__clflush((void *) &bf->tables[0][start + idx].bits);
713		}
714#else
715		for (idx = 0; idx < size; idx++)
716		{
717			bf->tables[0][start + idx].bits = (access | ptoa_64(pageList[idx].phys_addr));
718		}
719#endif
720	}
721	__mfence();
722}
723
724#include "balloc.c"
725#include "rballoc.c"
726
727/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
728
729class AppleVTD : public IOMapper
730{
731    OSDeclareDefaultStructors(AppleVTD);
732
733public:
734	IOSimpleLock  		   * fHWLock;
735	const OSData  	       * fDMARData;
736	IOWorkLoop             * fWorkLoop;
737	IOInterruptEventSource * fIntES;
738	IOInterruptEventSource * fFaultES;
739    IOTimerEventSource     * fTimerES;
740
741	enum { kMaxUnits = 8 };
742	vtd_unit_t * units[kMaxUnits];
743
744	uint32_t fTreeBits;
745	uint32_t fMaxRoundSize;
746
747	uint32_t fCacheLineSize;
748
749	IOMemoryMap * fTableMap;
750	IOMemoryMap * fContextTableMap;
751
752	ppnum_t  fRootEntryPage;
753
754	vtd_space_t * fSpace;
755
756	static void install(IOWorkLoop * wl, uint32_t flags,
757						IOService * provider, const OSData * data);
758	bool init(IOWorkLoop * wl, const OSData * data);
759
760    virtual void free();
761    virtual bool initHardware(IOService *provider);
762
763	vtd_space_t * space_create(uint32_t cachelinesize, uint32_t treebits, ppnum_t vsize,
764							   uint32_t buddybits, ppnum_t rsize);
765	vtd_vaddr_t space_alloc(vtd_space_t * bf, vtd_vaddr_t size,
766							uint32_t mapOptions, const IODMAMapSpecification * mapSpecification,
767							upl_page_info_t * pageList);
768	void space_free(vtd_space_t * bf, vtd_vaddr_t addr, vtd_vaddr_t size);
769	void space_alloc_fixed(vtd_space_t * bf, vtd_vaddr_t addr, vtd_vaddr_t size);
770
771    IOReturn handleInterrupt(IOInterruptEventSource * source, int count);
772    IOReturn handleFault(IOInterruptEventSource * source, int count);
773	IOReturn timer(OSObject * owner, IOTimerEventSource * sender);
774	virtual IOReturn callPlatformFunction(const OSSymbol * functionName,
775										  bool waitForFunction,
776										  void * param1, void * param2,
777										  void * param3, void * param4);
778
779	void iovmInvalidateSync(ppnum_t addr, IOItemCount pages);
780    void checkFree(uint32_t queue);
781
782    virtual ppnum_t iovmAlloc(IOItemCount pages);
783    virtual void iovmFree(ppnum_t addr, IOItemCount pages);
784
785    virtual void iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page);
786    virtual void iovmInsert(ppnum_t addr, IOItemCount offset,
787                            ppnum_t *pageList, IOItemCount pageCount);
788    virtual void iovmInsert(ppnum_t addr, IOItemCount offset,
789                            upl_page_info_t *pageList, IOItemCount pageCount);
790
791    virtual ppnum_t iovmMapMemory(
792    			  OSObject                    * memory,   // dma command or iomd
793				  ppnum_t                       offsetPage,
794				  ppnum_t                       pageCount,
795				  uint32_t                      options,
796				  upl_page_info_t             * pageList,
797				  const IODMAMapSpecification * mapSpecification);
798
799    virtual addr64_t mapAddr(IOPhysicalAddress addr);
800};
801
802
803OSDefineMetaClassAndStructors(AppleVTD, IOMapper);
804#define super IOMapper
805
806/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
807
808void
809AppleVTD::install(IOWorkLoop * wl, uint32_t flags,
810					IOService * provider, const OSData * data)
811{
812	AppleVTD * mapper = 0;
813	bool ok = false;
814
815	if (!IOService::getPlatform()->getProperty(kIOPlatformMapperPresentKey)) return;
816
817	VTLOG("DMAR %p\n", data);
818	if (data)
819	{
820		mapper = new AppleVTD;
821		if (mapper)
822		{
823			if (mapper->init(wl, data) && mapper->attach(provider))
824			{
825				ok = mapper->start(provider);
826				if (!ok) mapper->detach(provider);
827			}
828			mapper->release();
829		}
830	}
831	if (!ok)
832	{
833		IOService::getPlatform()->removeProperty(kIOPlatformMapperPresentKey);
834		IOMapper::setMapperRequired(false);
835	}
836}
837
838bool
839AppleVTD::init(IOWorkLoop * wl, const OSData * data)
840{
841	uint32_t unitIdx;
842
843	if (!super::init()) return (false);
844
845	data->retain();
846	fDMARData = data;
847	wl->retain();
848	fWorkLoop = wl;
849	fCacheLineSize = cpuid_info()->cache_linesize;
850
851	ACPI_TABLE_DMAR *           dmar = (typeof(dmar))      data->getBytesNoCopy();
852	ACPI_DMAR_HEADER *          dmarEnd = (typeof(dmarEnd))(((uintptr_t) dmar) + data->getLength());
853	ACPI_DMAR_HEADER *          hdr = (typeof(hdr))      (dmar + 1);
854	ACPI_DMAR_HARDWARE_UNIT *   unit;
855
856	VTLOG("DMAR Width %x, Flags %x\n", dmar->Width, dmar->Flags);
857
858	for (unitIdx = 0; hdr < dmarEnd;
859			hdr = (typeof(hdr))(((uintptr_t) hdr) + hdr->Length))
860	{
861		switch (hdr->Type)
862		{
863			case ACPI_DMAR_TYPE_HARDWARE_UNIT:
864				unit = (typeof(unit)) hdr;
865				if ((units[unitIdx] = unit_init(unit))) unitIdx++;
866				break;
867		}
868	}
869
870	return (unitIdx != 0);
871}
872
873void AppleVTD::free()
874{
875	super::free();
876}
877
878vtd_space_t *
879AppleVTD::space_create(uint32_t cachelinesize,
880						uint32_t treebits, ppnum_t vsize, uint32_t buddybits, ppnum_t rsize)
881{
882	IOBufferMemoryDescriptor * md;
883	IOReturn 	   kr = kIOReturnSuccess;
884	vtd_space_t *  bf;
885	uint32_t       count;
886	mach_vm_size_t alloc;
887	uint32_t       level;
888	uint32_t       bit;
889
890	vtassert(vsize >= (1U << buddybits));
891	vtassert(vsize > rsize);
892	vtassert(buddybits > (9 + 3));
893	vtassert(treebits > 12);
894
895	bf = IONew(vtd_space_t, 1);
896	if (!bf) return (NULL);
897	bzero(bf, sizeof(vtd_space_t));
898
899	bf->rlock = IOLockAlloc();
900#if BSIMPLE
901	bf->block = fHWLock;
902#else
903	bf->block = IOLockAlloc();
904#endif
905	bf->cachelinesize = cachelinesize;
906
907	treebits -= 12;
908	vsize = (vsize + 511) & ~511;
909	bf->vsize = vsize;
910	bf->table_bitmap_size = ((vsize / 512) + 7) / 8;
911	bf->table_bitmap = IONew(uint8_t, bf->table_bitmap_size);
912	if (!bf->table_bitmap) return (NULL);
913	bzero(bf->table_bitmap, bf->table_bitmap_size);
914
915	alloc = 0;
916	level = 0;
917	bit   = 0;
918	while (bit < treebits)
919	{
920		count = (vsize >> bit);
921		if (!count) count = 1;
922		alloc += round_page_64(count * sizeof(vtd_table_entry_t));
923		bit += 9;
924		level++;
925	}
926	bf->max_level = level - 1;
927
928	VTLOG("level %d, bmd...0x%llx\n", bf->max_level, alloc);
929	md = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL,
930						kIOMemoryPageable |
931#if !TABLE_CB
932						kIOMapWriteCombineCache |
933#endif
934						kIOMemoryMapperNone,
935						alloc, page_size);
936	VTLOG("bmd %p\n", md);
937	vtassert(md);
938	if (!md) return (NULL);
939
940//	kr = bmd->prepare(kIODirectionOutIn);
941//	vtassert(KERN_SUCCESS == kr);
942
943	bf->table_map = md->map();
944	vtassert(bf->table_map);
945	md->release();
946
947	vtassert(bf->table_map);
948	if (!bf->table_map) return (NULL);
949
950	vtd_table_entry_t * table;
951	table = (typeof(table)) bf->table_map->getVirtualAddress();
952
953	vtd_table_entry_t * prior = NULL;
954	vtd_table_entry_t * next = table;
955	mach_vm_size_t      offset;
956	uint32_t idx;
957
958	level = 0;
959	bit   = 0;
960	while (bit < treebits)
961	{
962		count = (vsize >> bit);
963		if (!count) count = 1;
964
965		vtassert(level < arrayCount(bf->tables));
966		vtassert(level <= bf->max_level);
967		bf->tables[level] = next;
968		if (level == 1)
969		{
970			// wire levels >0
971			offset = ((next - table) * sizeof(vtd_table_entry_t));
972			VTLOG("wire [%llx, %llx]\n", offset, alloc);
973			kr = bf->table_map->wireRange(kIODirectionOutIn, offset, alloc - offset);
974			vtassert(KERN_SUCCESS == kr);
975			STAT_ADD(bf, tables, atop_64(alloc - offset));
976			if (KERN_SUCCESS != kr)
977			{
978				bf->table_map->release();
979				return (NULL);
980			}
981		}
982		else if (level >= 2)
983		{
984			for (idx = 0; idx < count; idx++)
985			{
986				ppnum_t lvl2page = pmap_find_phys(kernel_pmap, (uintptr_t) &prior[idx << 9]);
987				if (!lvl2page) panic("!lvl2page");
988				VTLOG("lvl2 %p[%x] = %p\n", next, idx, &prior[idx << 9]);
989				next[idx].bits = (kPageAccess | ptoa_64(lvl2page));
990			}
991		}
992		prior = next;
993		next = next + ((count + 511) & ~511);
994		bit += 9;
995		level++;
996	}
997	table_flush(&bf->tables[1][0], alloc - offset, bf->cachelinesize);
998
999#if !TABLE_CB
1000	IOSetProcessorCacheMode(kernel_task, (IOVirtualAddress) &bf->tables[0][0], page_size, kIOCopybackCache);
1001#endif
1002
1003	VTLOG("tables %p, %p, %p, %p, %p, %p\n", bf->tables[0], bf->tables[1], bf->tables[2],
1004						   						bf->tables[3], bf->tables[4], bf->tables[5]);
1005
1006	bf->root_page = pmap_find_phys(kernel_pmap, (uintptr_t) bf->tables[bf->max_level]);
1007	if (!bf->root_page) panic("!root_page");
1008	VTLOG("tree root 0x%llx\n", ptoa_64(bf->root_page));
1009
1010	vtd_ballocator_init(bf, buddybits);
1011	bf->rsize = rsize;
1012	vtd_rballocator_init(bf, rsize, vsize - rsize);
1013
1014	VTLOG("bsize 0x%x, bsafe 0x%x, breserve 0x%x, rsize 0x%x\n",
1015	        (1<<kBPagesLog2), kBPagesSafe, kBPagesReserve, bf->rsize);
1016
1017	STAT_ADD(bf, vsize, vsize);
1018	OSData *
1019	data = OSData::withBytesNoCopy(&bf->stats, sizeof(bf->stats));
1020	if (data)
1021	{
1022		setProperty("stats", data);
1023		data->release();
1024	}
1025
1026	bf->stamp = 0x100;
1027	bf->free_mask  = (kFreeQElems - 1);
1028
1029	return (bf);
1030}
1031
1032vtd_baddr_t
1033AppleVTD::space_alloc(vtd_space_t * bf, vtd_baddr_t size,
1034					  uint32_t mapOptions, const IODMAMapSpecification * mapSpecification,
1035					  upl_page_info_t * pageList)
1036{
1037	vtd_vaddr_t addr;
1038    vtd_vaddr_t align = 1;
1039    vtd_baddr_t largethresh;
1040    bool        uselarge;
1041	uint32_t    list;
1042
1043	if ((kIODMAMapPagingPath & mapOptions) && (size > bf->stats.largest_paging))
1044		bf->stats.largest_paging = size;
1045
1046	list = vtd_log2up(size);
1047
1048	if (mapSpecification)
1049	{
1050		if (mapSpecification->numAddressBits
1051			&& (mapSpecification->numAddressBits <= 32)
1052			&& (size > bf->stats.largest_32b))		bf->stats.largest_32b = size;
1053
1054		if (mapSpecification->alignment > page_size) align = atop_64(mapSpecification->alignment);
1055	}
1056
1057	if (bf->stats.bused >= kBPagesReserve)
1058	{
1059		largethresh = 1;
1060	}
1061	else if (bf->stats.bused >= kBPagesSafe)
1062	{
1063		largethresh = kLargeThresh2;
1064	}
1065	else
1066	{
1067		largethresh = kLargeThresh;
1068	}
1069
1070	if (!(kIODMAMapPagingPath & mapOptions)
1071		&& (size >= largethresh)
1072		&& mapSpecification
1073		&& mapSpecification->numAddressBits
1074		&& ((1ULL << (mapSpecification->numAddressBits - 12)) >= bf->vsize))
1075	{
1076		uselarge = true;
1077	}
1078	else
1079	{
1080		uselarge = false;
1081		if (align > size) size = align;
1082	}
1083
1084#if 0
1085	IOSimpleLockLock(fHWLock);
1086	checkFree(uselarge);
1087	IOSimpleLockUnlock(fHWLock);
1088#endif
1089
1090	do
1091	{
1092		if (uselarge)
1093		{
1094			IOLockLock(bf->rlock);
1095			addr = vtd_rballoc(bf, size, align, fMaxRoundSize, mapOptions, pageList);
1096			STAT_ADD(bf, allocs[list], 1);
1097			if (addr)
1098			{
1099				STAT_ADD(bf, rused, size);
1100				vtd_space_fault(bf, addr, size);
1101			}
1102			IOLockUnlock(bf->rlock);
1103			if (addr && pageList) vtd_space_set(bf, addr, size, mapOptions, pageList);
1104		}
1105		else
1106		{
1107			BLOCK(bf->block);
1108			addr = vtd_balloc(bf, size, mapOptions, pageList);
1109			STAT_ADD(bf, allocs[list], 1);
1110			if (addr) STAT_ADD(bf, bused, (1 << list));
1111			BUNLOCK(bf->block);
1112		}
1113		if (addr) break;
1114		if (!uselarge && (size >= (1 << (kBPagesLog2 - 2)))) break;
1115
1116		IOLockLock(bf->rlock);
1117		bf->waiting_space = true;
1118		IOLockSleep(bf->rlock, &bf->waiting_space, THREAD_UNINT);
1119		IOLockUnlock(bf->rlock);
1120//		IOLog("AppleVTD: waiting space (%d)\n", size);
1121		VTLOG("AppleVTD: waiting space (%d, bused %d, rused %d)\n",
1122				size, bf->stats.bused, bf->stats.rused);
1123	}
1124	while (true);
1125
1126	return (addr);
1127}
1128
1129void
1130AppleVTD::space_free(vtd_space_t * bf, vtd_baddr_t addr, vtd_baddr_t size)
1131{
1132	uint32_t list;
1133
1134	vtassert(addr);
1135	vtassert((addr + size) <= bf->vsize);
1136
1137	if (addr >= bf->rsize)
1138	{
1139		IOLockLock(bf->rlock);
1140		vtd_rbfree(bf, addr, size, fMaxRoundSize);
1141		STAT_ADD(bf, rused, -size);
1142		IOLockUnlock(bf->rlock);
1143	}
1144	else
1145	{
1146		list = vtd_log2up(size);
1147		BLOCK(bf->block);
1148		vtd_bfree(bf, addr, size);
1149		STAT_ADD(bf, bused, -(1 << list));
1150		BUNLOCK(bf->block);
1151	}
1152
1153	if (bf->waiting_space)
1154	{
1155		IOLockLock(bf->rlock);
1156		bf->waiting_space = false;
1157		IOLockWakeup(bf->rlock, &bf->waiting_space, false);
1158		IOLockUnlock(bf->rlock);
1159	}
1160}
1161
1162void
1163AppleVTD::space_alloc_fixed(vtd_space_t * bf, vtd_baddr_t addr, vtd_baddr_t size)
1164{
1165	vtd_balloc_fixed(bf, addr, size);
1166	vtd_rballoc_fixed(bf, addr, size);
1167	vtd_space_fault(bf, addr, size);
1168}
1169
1170static page_entry_t
1171vtd_tree_read(page_entry_t root, uint32_t width, addr64_t addr)
1172{
1173	page_entry_t entry = root;
1174	page_entry_t table;
1175	uint32_t index;
1176	uint32_t level = 0;
1177
1178	while (width > 12)
1179	{
1180		width -= 9;
1181		index = (addr >> (width - 3)) & (511 << 3);
1182
1183		table = entry & kPageAddrMask;
1184		entry = ml_phys_read_double_64(table + index);
1185
1186		if (!(kPageAccess & entry))
1187			break;
1188		level++;
1189	}
1190
1191	return (entry);
1192}
1193
1194bool
1195AppleVTD::initHardware(IOService *provider)
1196{
1197	uint32_t idx;
1198	vtd_unit_t * unit;
1199
1200    fIsSystem = true;
1201
1202	uint64_t context_width;
1203	fTreeBits = 0;
1204	unit = units[0];
1205	// prefer smallest tree?
1206	for (context_width = kAddressWidth30;
1207			(context_width <= kAddressWidth64);
1208			context_width++)
1209	{
1210		if ((0x100 << context_width) & unit->regs->capability)
1211		{
1212			fTreeBits = (30 + 9 * context_width);  // (57+9) for 64
1213			break;
1214		}
1215	}
1216
1217	for (idx = 0; (unit = units[idx]); idx++)
1218	{
1219		if (!((0x100 << context_width) & unit->regs->capability))
1220			panic("!tree bits %d on unit %d", fTreeBits, idx);
1221		if (unit->selective && ((unit->rounding > fMaxRoundSize)))
1222			fMaxRoundSize = unit->rounding;
1223	}
1224
1225	VTLOG("context_width %lld, treebits %d, round %d\n",
1226			context_width, fTreeBits, fMaxRoundSize);
1227
1228    // need better legacy checks
1229	if (!fMaxRoundSize)                              return (false);
1230	if ((48 == fTreeBits) && (9 == fMaxRoundSize))   return (false);
1231	//
1232
1233	fHWLock = IOSimpleLockAlloc();
1234
1235	fSpace = space_create(fCacheLineSize, fTreeBits, kVPages, kBPagesLog2, kRPages);
1236	if (!fSpace) return (false);
1237
1238	space_alloc_fixed(fSpace, atop_64(0xfee00000), atop_64(0xfef00000-0xfee00000));
1239	vtd_space_fault(fSpace, atop_64(0xfee00000), 1);
1240	fSpace->tables[0][atop_64(0xfee00000)].bits = 0xfee00000 | kPageAccess;
1241
1242	ACPI_TABLE_DMAR *           dmar = (typeof(dmar))      fDMARData->getBytesNoCopy();
1243	ACPI_DMAR_HEADER *          dmarEnd = (typeof(dmarEnd))(((uintptr_t) dmar) + fDMARData->getLength());
1244	ACPI_DMAR_HEADER *          hdr = (typeof(hdr))      (dmar + 1);
1245	ACPI_DMAR_RESERVED_MEMORY * mem;
1246
1247	for (; hdr < dmarEnd;
1248			hdr = (typeof(hdr))(((uintptr_t) hdr) + hdr->Length))
1249	{
1250		uint64_t addr;
1251		uint32_t count;
1252		switch (hdr->Type)
1253		{
1254			case ACPI_DMAR_TYPE_RESERVED_MEMORY:
1255				mem = (typeof(mem)) hdr;
1256				VTLOG("ACPI_DMAR_TYPE_RESERVED_MEMORY 0x%llx, 0x%llx\n",
1257					mem->BaseAddress, mem->EndAddress);
1258
1259				addr = mem->BaseAddress;
1260				count = atop_32(mem->EndAddress - addr);
1261
1262				space_alloc_fixed(fSpace, atop_64(addr), count);
1263				for (; count; addr += page_size, count--)
1264				{
1265					fSpace->tables[0][atop_64(addr)].bits = (addr | kPageAccess);
1266				}
1267				break;
1268		}
1269	}
1270
1271	IOReturn kr;
1272	IOBufferMemoryDescriptor *
1273	md = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL,
1274						kIOMemoryPageable |
1275						kIOMapWriteCombineCache |
1276						kIOMemoryMapperNone,
1277						2 * page_size, page_size);
1278	vtassert(md);
1279	if (!md) return (kIOReturnNoMemory);
1280
1281	kr = md->prepare(kIODirectionOutIn);
1282	vtassert(KERN_SUCCESS == kr);
1283
1284	fContextTableMap = md->map();
1285	vtassert(fContextTableMap);
1286	md->release();
1287
1288    // context entries
1289
1290	context_entry_t * context_entry_table = (typeof(context_entry_table)) fContextTableMap->getVirtualAddress();
1291	for (idx = 0; idx < 256; idx++)
1292	{
1293		context_entry_table[idx].address_space_root = 	ptoa_64(fSpace->root_page)
1294														| kEntryPresent
1295														| kTranslationType0;
1296		context_entry_table[idx].context_entry = context_width
1297												| kTheDomain*kDomainIdentifier1;
1298//		if (idx == ((2<<3)|0)) context_entry_table[idx].address_space_root |= kTranslationType2;  // passthru
1299//		if (idx == ((27<<3)|0)) context_entry_table[idx].address_space_root = 0;
1300		if (!(kIOPCIConfiguratorIGIsMapped & gIOPCIFlags))
1301		{
1302			if (idx == ((2<<3)|0)) context_entry_table[idx].address_space_root &= ~kEntryPresent;
1303		}
1304	}
1305	ppnum_t context_page = pmap_find_phys(kernel_pmap, (uintptr_t) &context_entry_table[0]);
1306	if (!context_page) panic("!context_page");
1307
1308	// root
1309
1310	root_entry_t * root_entry_table = (typeof(root_entry_table)) (fContextTableMap->getAddress() + page_size);
1311	for (idx = 0; idx < 256; idx++)
1312	{
1313		root_entry_table[idx].context_entry_ptr = ptoa_64(context_page)
1314													| kEntryPresent;
1315		root_entry_table[idx].resv = 0;
1316	}
1317
1318	fRootEntryPage = pmap_find_phys(kernel_pmap, (uintptr_t) &root_entry_table[0]);
1319	if (!fRootEntryPage) panic("!fRootEntryPage");
1320	for (idx = 0; (unit = units[idx]); idx++)
1321	{
1322		unit->root = ptoa_64(fRootEntryPage);
1323	}
1324
1325	// QI
1326
1327	for (idx = 0; (unit = units[idx]); idx++)
1328	{
1329		md = IOBufferMemoryDescriptor::inTaskWithOptions(kernel_task,
1330							kIOMemoryHostPhysicallyContiguous |
1331							kIOMapWriteCombineCache |
1332							kIOMemoryMapperNone,
1333							kQIPageCount * page_size, page_size);
1334		vtassert(md);
1335		if (!md) return (kIOReturnNoMemory);
1336
1337		kr = md->prepare(kIODirectionOutIn);
1338		vtassert(KERN_SUCCESS == kr);
1339
1340		unit->qi_map = md->map();
1341		vtassert(unit->qi_map);
1342		unit->qi_mask    = (kQIPageCount * 256) - 1;
1343		unit->qi_table   = (typeof(unit->qi_table)) (unit->qi_map->getAddress());
1344		unit->qi_address = vtd_log2down(kQIPageCount)
1345					     | md->getPhysicalSegment(0, NULL, kIOMemoryMapperNone);
1346
1347		ppnum_t stamp_page = pmap_find_phys(kernel_pmap, (uintptr_t) &unit->qi_stamp);
1348		vtassert(stamp_page);
1349		unit->qi_stamp_address = ptoa_64(stamp_page) | (page_mask & ((uintptr_t) &unit->qi_stamp));
1350
1351		md->release();
1352    }
1353
1354	//
1355
1356	IOReturn  ret;
1357	uint64_t  msiAddress;
1358	uint32_t  msiData;
1359	ret = gIOPCIMessagedInterruptController->allocateDeviceInterrupts(
1360													this, 2, 0, &msiAddress, &msiData);
1361	if (kIOReturnSuccess == ret)
1362	{
1363        fIntES = IOInterruptEventSource::interruptEventSource(
1364                      this,
1365                      OSMemberFunctionCast(IOInterruptEventSource::Action,
1366                                            this, &AppleVTD::handleInterrupt),
1367                      this, 0);
1368		if (fIntES) fWorkLoop->addEventSource(fIntES);
1369        fFaultES = IOInterruptEventSource::interruptEventSource(
1370                      this,
1371                      OSMemberFunctionCast(IOInterruptEventSource::Action,
1372                                            this, &AppleVTD::handleFault),
1373                      this, 1);
1374		if (fFaultES) fWorkLoop->addEventSource(fFaultES);
1375	}
1376
1377
1378	fTimerES = IOTimerEventSource::timerEventSource(this,
1379	                      OSMemberFunctionCast(IOTimerEventSource::Action,
1380												this, &AppleVTD::timer));
1381	if (fTimerES) fWorkLoop->addEventSource(fTimerES);
1382
1383	if (!fIntES || !fFaultES) msiData = msiAddress = 0;
1384
1385	__mfence();
1386	for (idx = 0; (unit = units[idx]); idx++)
1387	{
1388		unit->msi_data    = msiData & 0xff;
1389		unit->msi_address = msiAddress;
1390		unit_enable(unit);
1391	}
1392	if (fIntES)   fIntES->enable();
1393	if (fFaultES) fFaultES->enable();
1394
1395//	fTimerES->setTimeoutMS(10);
1396
1397	setProperty(kIOPlatformQuiesceActionKey, INT32_MAX - 1000, 64);
1398	setProperty(kIOPlatformActiveActionKey, INT32_MAX - 1000, 64);
1399
1400	registerService();
1401
1402	return (true);
1403}
1404
1405IOReturn
1406AppleVTD::handleInterrupt(IOInterruptEventSource * source, int count)
1407{
1408	uint32_t idx;
1409	vtd_unit_t * unit;
1410
1411	IOSimpleLockLock(fHWLock);
1412	for (idx = 0; idx < kFreeQCount; idx++) checkFree(idx);
1413	for (idx = 0; (unit = units[idx]); idx++)
1414	{
1415		unit->regs->invalidation_completion_status = 1;
1416	}
1417	IOSimpleLockUnlock(fHWLock);
1418
1419	return (kIOReturnSuccess);
1420}
1421
1422IOReturn
1423AppleVTD::handleFault(IOInterruptEventSource * source, int count)
1424{
1425	uint32_t idx;
1426	vtd_unit_t * unit;
1427
1428	for (idx = 0; (unit = units[idx]); idx++) unit_faults(unit, true || (idx != 0));
1429
1430	return (kIOReturnSuccess);
1431}
1432
1433IOReturn
1434AppleVTD::timer(OSObject * owner, IOTimerEventSource * sender)
1435{
1436	uint32_t idx;
1437
1438	IOSimpleLockLock(fHWLock);
1439	for (idx = 0; idx < kFreeQCount; idx++) checkFree(idx);
1440	IOSimpleLockUnlock(fHWLock);
1441
1442	fTimerES->setTimeoutMS(10);
1443
1444	return (kIOReturnSuccess);
1445}
1446
1447IOReturn
1448AppleVTD::callPlatformFunction(const OSSymbol * functionName,
1449							   bool waitForFunction,
1450							   void * param1, void * param2,
1451							   void * param3, void * param4)
1452{
1453    if (functionName)
1454    {
1455		uint32_t idx;
1456		vtd_unit_t * unit;
1457    	if (functionName->isEqualTo(gIOPlatformActiveActionKey))
1458		{
1459			for (idx = 0; (unit = units[idx]); idx++)
1460			{
1461				unit_enable(unit);
1462			}
1463			return (kIOReturnSuccess);
1464		}
1465		else if (functionName->isEqualTo(gIOPlatformQuiesceActionKey))
1466		{
1467			for (idx = 0; (unit = units[idx]); idx++)
1468			{
1469				unit_quiesce(unit);
1470			}
1471			return (kIOReturnSuccess);
1472		}
1473	}
1474    return (super::callPlatformFunction(functionName, waitForFunction,
1475                                        param1, param2, param3, param4));
1476}
1477
1478ppnum_t
1479AppleVTD::iovmMapMemory(
1480			  OSObject                    * memory,   // dma command or iomd
1481			  ppnum_t                       offsetPage,
1482			  ppnum_t                       pageCount,
1483			  uint32_t                      mapOptions,
1484			  upl_page_info_t             * pageList,
1485			  const IODMAMapSpecification * mapSpecification)
1486{
1487	vtd_vaddr_t base;
1488
1489	base = space_alloc(fSpace, pageCount, mapOptions, mapSpecification, pageList);
1490	vtassert((base + pageCount) <= fSpace->vsize);
1491
1492//	space_free(fSpace, base, pageCount);
1493//	base = space_alloc(fSpace, pageCount, mapOptions, mapSpecification, pageList);
1494
1495#if KP
1496	VTLOG("iovmMapMemory: (0x%x)=0x%x\n", length, (int)base);
1497#endif
1498
1499    return (base);
1500}
1501
1502ppnum_t
1503AppleVTD::iovmAlloc(IOItemCount pages)
1504{
1505	ppnum_t result;
1506
1507	result = space_alloc(fSpace, pages, 0, NULL, NULL);
1508#if KP
1509	VTLOG("iovmAlloc: 0x%x=0x%x\n", (int)pages, (int)result );
1510#endif
1511    return (result);
1512}
1513
1514void
1515AppleVTD::iovmInvalidateSync(ppnum_t addr, IOItemCount pages)
1516{
1517	vtd_unit_t * unit;
1518	unsigned int leaf;
1519	unsigned int idx;
1520	uint32_t     wait;
1521	ppnum_t      unitAddr[kMaxUnits];
1522	IOItemCount  unitPages[kMaxUnits];
1523	bool		 more;
1524
1525	for (idx = 0; (unit = units[idx]); idx++)
1526	{
1527		unitAddr[idx] = addr;
1528		unitPages[idx] = pages;
1529	}
1530	leaf = true;
1531
1532	do
1533	{
1534		more = false;
1535		wait = 0;
1536		for (idx = 0; (unit = units[idx]); idx++)
1537		{
1538			if (unitPages[idx])
1539			{
1540				wait |= (1 << idx);
1541				unit_invalidate(unit, kTheDomain, unitAddr[idx], unit->rounding, leaf);
1542				if (!unit->selective
1543					|| (unitPages[idx] <= (1U << unit->rounding)))
1544				{
1545					unitPages[idx] = 0;
1546				}
1547				else
1548				{
1549					more = true;
1550					unitPages[idx] -= (1U << unit->rounding);
1551					unitAddr[idx]  += (1U << unit->rounding);
1552				}
1553			}
1554		}
1555		for (idx = 0; (unit = units[idx]); idx++)
1556		{
1557			if (wait & (1U << idx)) unit_invalidate_done(unit);
1558		}
1559	}
1560	while (more);
1561}
1562
1563void
1564AppleVTD::iovmFree(ppnum_t addr, IOItemCount pages)
1565{
1566	vtd_unit_t * unit;
1567	unsigned int leaf, isLarge;
1568	unsigned int unitIdx;
1569    uint32_t     did = kTheDomain;
1570	ppnum_t      unitAddr;
1571	IOItemCount  unitPages;
1572	uint32_t     idx;
1573	uint32_t     next;
1574	uint32_t     count;
1575	uint64_t     stamp;
1576
1577#if KP
1578	VTLOG("iovmFree: 0x%x,0x%x\n", (int)pages, addr);
1579#endif
1580
1581	vtassert((addr + pages) <= fSpace->vsize);
1582	vtd_space_nfault(fSpace, addr, pages);
1583	bzero(&fSpace->tables[0][addr], pages * sizeof(vtd_table_entry_t));
1584	table_flush(&fSpace->tables[0][addr], pages * sizeof(vtd_table_entry_t), fCacheLineSize);
1585
1586#if !ENA_QI
1587	IOSimpleLockLock(fHWLock);
1588    iovmInvalidateSync(addr, pages);
1589	IOSimpleLockUnlock(fHWLock);
1590	space_free(fSpace, addr, pages);
1591	return;
1592
1593#else	/* !ENA_QI */
1594
1595	leaf = true;
1596	isLarge = (addr >= fSpace->rsize);
1597
1598	IOSimpleLockLock(fHWLock);
1599
1600#if 0
1601	int32_t      freeCount;
1602	freeCount = fSpace->free_tail[isLarge] - fSpace->free_head[isLarge];
1603	if (freeCount < 0) freeCount = kFreeQElems - freeCount;
1604	if (freeCount >= 8)
1605#endif
1606	{
1607		checkFree(isLarge);
1608	}
1609
1610	stamp = ++fSpace->stamp;
1611
1612	idx = fSpace->free_tail[isLarge];
1613	next = (idx + 1) & fSpace->free_mask;
1614	if (next == fSpace->free_head[isLarge]) panic("qfull");
1615	fSpace->free_queue[isLarge][idx].addr = addr;
1616	fSpace->free_queue[isLarge][idx].size = pages;
1617	fSpace->free_queue[isLarge][idx].stamp = stamp;
1618	fSpace->free_tail[isLarge] = next;
1619
1620	for (unitIdx = 0; (unit = units[unitIdx]); unitIdx++)
1621	{
1622		unitAddr = addr;
1623		unitPages = pages;
1624		idx = unit->qi_tail;
1625		count = 0;
1626		while (unitPages)
1627		{
1628			next = (idx + 1) & unit->qi_mask;
1629			while ((next << 4) == unit->regs->invalidation_queue_head) {}
1630
1631			if (unit->selective)
1632			{
1633				uint32_t mask = unit->rounding;
1634				if (unitPages < (1U << unit->rounding)) mask = vtd_log2up(unitPages);
1635				unit->qi_table[idx].command = (did<<16) | (kTlbDrainReads<<7) | (kTlbDrainWrites<<6) | (3<<4) | (2);
1636				unit->qi_table[idx].address = ptoa_64(unitAddr) | (leaf << 6) | mask;
1637			}
1638			else
1639			{
1640				unit->qi_table[idx].command = (kTlbDrainReads<<7) | (kTlbDrainWrites<<6) | (1<<4) | (2);
1641			}
1642
1643			if (!unit->selective
1644				|| (unitPages <= (1U << unit->rounding)))
1645			{
1646				unitPages = 0;
1647			}
1648			else
1649			{
1650				unitPages -= (1U << unit->rounding);
1651				unitAddr  += (1U << unit->rounding);
1652				count++;
1653				if (!(count & (unit->qi_mask >> 5)))
1654				{
1655					__mfence();
1656					unit->regs->invalidation_queue_tail = (next << 4);
1657				}
1658			}
1659			idx = next;
1660		}
1661//		if (freeCount >= 64)
1662//		if (0 == (stamp & 3))
1663		{
1664			next = (idx + 1) & unit->qi_mask;
1665			while ((next << 4) == unit->regs->invalidation_queue_head) {}
1666			uint64_t command = (stamp<<32) | (1<<5) | (5);
1667//     		command |= (1<<4); // make an int
1668			unit->qi_table[idx].command = command;
1669			unit->qi_table[idx].address = unit->qi_stamp_address;
1670		}
1671		__mfence();
1672		unit->regs->invalidation_queue_tail = (next << 4);
1673//		__mfence();
1674		unit->qi_tail = next;
1675	}
1676
1677	IOSimpleLockUnlock(fHWLock);
1678
1679#endif /* ENA_QI */
1680}
1681
1682#define stampPassed(a,b)	(((int32_t)((a)-(b))) >= 0)
1683
1684void
1685AppleVTD::checkFree(uint32_t isLarge)
1686{
1687	vtd_unit_t * unit;
1688	uint32_t     unitIdx;
1689	uint32_t     idx;
1690	uint32_t     next;
1691	ppnum_t      addr, size, count;
1692    bool         ok;
1693
1694	count = 0;
1695	idx = fSpace->free_head[isLarge];
1696	do
1697	{
1698		if (idx == fSpace->free_tail[isLarge]) break;
1699		for (unitIdx = 0, ok = true; ok && (unit = units[unitIdx]); unitIdx++)
1700		{
1701			ok &= stampPassed(unit->qi_stamp, fSpace->free_queue[isLarge][idx].stamp);
1702		}
1703
1704		if (ok)
1705		{
1706			next = (idx + 1) & fSpace->free_mask;
1707			addr = fSpace->free_queue[isLarge][idx].addr;
1708			size = fSpace->free_queue[isLarge][idx].size;
1709#if BSIMPLE
1710			if (!isLarge)
1711		    {
1712				vtd_bfree(fSpace, addr, size);
1713				STAT_ADD(fSpace, bused, -size);
1714				idx = next;
1715		    }
1716		    else
1717#endif /* BSIMPLE */
1718			{
1719				fSpace->free_head[isLarge] = next;
1720				IOSimpleLockUnlock(fHWLock);
1721				space_free(fSpace, addr, size);
1722				IOSimpleLockLock(fHWLock);
1723				idx = fSpace->free_head[isLarge];
1724		    }
1725			count++;
1726		}
1727	}
1728	while (ok);
1729
1730#if BSIMPLE
1731	fSpace->free_head[isLarge] = idx;
1732#endif
1733	if (count > fSpace->stats.max_inval[isLarge]) fSpace->stats.max_inval[isLarge] = count;
1734}
1735
1736addr64_t
1737AppleVTD::mapAddr(IOPhysicalAddress addr)
1738{
1739	ppnum_t      page = atop_64(addr);
1740	page_entry_t entry;
1741
1742	if (page >= fSpace->vsize) return (addr);
1743
1744	if (!vtd_space_present(fSpace, page)) return (addr);
1745
1746	entry = fSpace->tables[0][page].bits;
1747
1748#if KP
1749	VTLOG("mapAddr: 0x%x=0x%llx\n", (int)addr, entry);
1750#endif
1751
1752	if (kPageAccess & entry)
1753		return (trunc_page_64(entry) | (addr & page_mask));
1754	else
1755		return (addr);
1756
1757    return (0);
1758}
1759
1760void
1761AppleVTD::iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page)
1762{
1763	addr += offset;
1764	vtassert(addr < fSpace->vsize);
1765	vtd_space_nfault(fSpace, addr, 1);
1766	fSpace->tables[0][addr].bits = ptoa_64(page) | kPageAccess;
1767	table_flush(&fSpace->tables[0][addr], sizeof(vtd_table_entry_t), fCacheLineSize);
1768	STAT_ADD(fSpace, inserts, 1);
1769}
1770
1771
1772void
1773AppleVTD::iovmInsert(ppnum_t addr, IOItemCount offset,
1774						ppnum_t *pageList, IOItemCount pageCount)
1775{
1776	ppnum_t idx;
1777
1778	addr += offset;
1779	vtassert((addr + pageCount) <= fSpace->vsize);
1780	vtd_space_nfault(fSpace, addr, pageCount);
1781    for (idx = 0; idx < pageCount; idx++)
1782    {
1783		fSpace->tables[0][addr + idx].bits = ptoa_64(pageList[idx]) | kPageAccess;
1784	}
1785	table_flush(&fSpace->tables[0][addr], pageCount * sizeof(vtd_table_entry_t), fCacheLineSize);
1786	STAT_ADD(fSpace, inserts, pageCount);
1787}
1788
1789void
1790AppleVTD::iovmInsert(ppnum_t addr, IOItemCount offset,
1791					 upl_page_info_t *pageList, IOItemCount pageCount)
1792{
1793	ppnum_t idx;
1794
1795	addr += offset;
1796
1797	vtassert((addr + pageCount) <= fSpace->vsize);
1798	vtd_space_nfault(fSpace, addr, pageCount);
1799    for (idx = 0; idx < pageCount; idx++)
1800    {
1801		fSpace->tables[0][addr + idx].bits = ptoa_64(pageList[idx].phys_addr) | kPageAccess;
1802	}
1803	table_flush(&fSpace->tables[0][addr], pageCount * sizeof(vtd_table_entry_t), fCacheLineSize);
1804	STAT_ADD(fSpace, inserts, pageCount);
1805}
1806
1807#endif /* ACPI_SUPPORT */
1808
1809