1/*
2 * Copyright (c) 2012 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License").  You may not use this file except in compliance with the
9 * License.  Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22
23#if ACPI_SUPPORT
24
25#include <IOKit/IOMapper.h>
26#include <IOKit/IOKitKeysPrivate.h>
27#include <libkern/tree.h>
28#include <libkern/OSDebug.h>
29#include <i386/cpuid.h>
30#include "dmar.h"
31
32/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
33
34extern "C" vm_offset_t ml_io_map(vm_offset_t phys_addr, vm_size_t size);
35extern "C" ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va);
36
37/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
38
39#define ENA_QI			1
40#define TABLE_CB		0
41#define BSIMPLE			0
42
43#define KP				0
44#define	VTASRT			0
45
46#define kLargeThresh	(128)
47#define kLargeThresh2	(32)
48#define kVPages  		(1<<22)
49#define kBPagesLog2 	(18)
50#define kBPagesSafe		((1<<kBPagesLog2)-(1<<(kBPagesLog2 - 2)))      /* 3/4 */
51#define kBPagesReserve	((1<<kBPagesLog2)-(1<<(kBPagesLog2 - 3)))      /* 7/8 */
52#define kRPages  		(1<<20)
53
54#define kQIPageCount    (2)
55
56#define kTlbDrainReads  (0ULL)
57#define kTlbDrainWrites (0ULL)
58
59#define VTLOG(fmt, args...)                   \
60    do {                                                    						\
61        if ((gIOPCIFlags & kIOPCIConfiguratorVTLog) && !ml_at_interrupt_context())  \
62            IOLog(fmt, ## args);                           							\
63        if (gIOPCIFlags & kIOPCIConfiguratorVTLog)        							\
64            kprintf(fmt, ## args);                          						\
65    } while(0)
66
67
68#if VTASRT
69
70#define vtassert(ex)  \
71	((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex))
72
73#define vtd_space_nfault(x,y,z) _vtd_space_nfault(x,y,z)
74
75#define STAT_ADD(space, name, value) do { space->stats.name += value; } while (false);
76
77#else	/* VTASRT */
78
79#define vtassert(ex)
80#define vtd_space_nfault(x,y,z)
81
82#define STAT_ADD(space, name, value) do { space->stats.name += value; } while (false);
83//#define STAT_ADD(space, name, value)
84
85#endif	/* !VTASRT */
86
87
88#if TABLE_CB
89#define table_flush(addr, size, linesize) clflush((uintptr_t)(addr), (size), linesize);
90#else
91#define table_flush(addr, size, linesize) __mfence();
92#endif
93
94
95#if BSIMPLE
96#define BLOCK(l)	IOSimpleLockLock(l)
97#define BUNLOCK(l)	IOSimpleLockUnlock(l)
98#else
99#define BLOCK(l)	IOLockLock(l)
100#define BUNLOCK(l)	IOLockUnlock(l)
101#endif
102
103#define arrayCount(x)	(sizeof(x) / sizeof(x[0]))
104
105/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
106
107enum
108{
109	kEntryPresent = 0x00000001ULL
110};
111
112struct root_entry_t
113{
114    uint64_t context_entry_ptr;
115    uint64_t resv;
116};
117
118struct context_entry_t
119{
120    uint64_t address_space_root;
121    uint64_t context_entry;
122};
123
124struct qi_descriptor_t
125{
126    uint64_t command;
127    uint64_t address;
128};
129
130// address_space_root
131enum
132{
133//	kEntryPresent 			= 0x00000001ULL,
134	kFaultProcessingDisable = 0x00000002ULL,
135	kTranslationType0 		= 0x00000000ULL,
136	kTranslationType1 		= 0x00000004ULL,
137	kTranslationType2 		= 0x00000008ULL,
138	kTranslationType3 		= 0x0000000CULL,
139	kEvictionHint 		    = 0x00000010ULL,
140	kAddressLocalityHint    = 0x00000020ULL,
141};
142
143// context_entry
144enum
145{
146	kAddressWidth30			= 0x00000000ULL,
147	kAddressWidth39			= 0x00000001ULL,
148	kAddressWidth48			= 0x00000002ULL,
149	kAddressWidth57			= 0x00000003ULL,
150	kAddressWidth64			= 0x00000004ULL,
151
152	kContextAvail1		    = 0x00000008ULL,	// 4b
153	kDomainIdentifier1		= 0x00000100ULL,	// 16b
154};
155
156enum
157{
158	kNotTheDomain = 1ULL << 32,
159	kTheDomain    = 2ULL
160};
161
162typedef uint64_t page_entry_t;
163
164// page_entry_t
165enum
166{
167	kReadAccess 			= 0x00000001ULL,
168	kWriteAccess			= 0x00000002ULL,
169	kPageAccess				= kReadAccess|kWriteAccess,
170	kPageAvail1			    = 0x00000004ULL,	// 5b
171	kSuperPage			    = 0x00000080ULL,
172	kPageAvail2			    = 0x00000100ULL,	// 3b
173	kSnoopBehavior		    = 0x00000800ULL,
174	kTransientMapping		= 0x4000000000000000ULL,
175	kPageAvail3				= 0x8000000000000000ULL, // 1b
176
177	kPageAddrMask			= 0x3ffffffffffff000ULL
178};
179
180struct vtd_registers_t
181{
182/*00*/ 	uint32_t version;
183/*04*/	uint32_t res1;
184/*08*/	uint64_t capability;
185/*10*/	uint64_t extended_capability;
186/*18*/	uint32_t global_command;
187/*1c*/	uint32_t global_status;
188/*20*/	uint64_t root_entry_table;
189/*28*/	uint64_t context_command;
190/*30*/	uint32_t res2;
191/*34*/	uint32_t fault_status;
192/*38*/	uint32_t fault_event_control;
193/*3c*/	uint32_t fault_event_data;
194/*40*/	uint32_t fault_event_address;
195/*44*/	uint32_t fault_event_upper_address;
196/*48*/	uint64_t res3[2];
197/*58*/	uint64_t advanced_fault;
198/*60*/	uint32_t res4;
199/*64*/	uint32_t protected_memory_enable;
200/*68*/	uint32_t protected_low_memory_base;
201/*6c*/	uint32_t protected_low_memory_limit;
202/*70*/	uint64_t protected_high_memory_base;
203/*78*/	uint64_t protected_high_memory_limit;
204/*80*/	uint64_t invalidation_queue_head;
205/*88*/	uint64_t invalidation_queue_tail;
206/*90*/	uint64_t invalidation_queue_address;
207/*98*/	uint32_t res5;
208/*9c*/	uint32_t invalidation_completion_status;
209/*a0*/	uint32_t invalidation_completion_event_control;
210/*a4*/	uint32_t invalidation_completion_event_data;
211/*a8*/	uint32_t invalidation_completion_event_address;
212/*ac*/	uint32_t invalidation_completion_event_upper_address;
213/*b0*/	uint64_t res6;
214/*b8*/	uint64_t interrupt_remapping_table;
215/*c0*/
216};
217
218struct vtd_iotlb_registers_t
219{
220/*00*/	uint64_t address;
221/*08*/	uint64_t command;
222};
223struct vtd_fault_registers_t
224{
225/*00*/	uint64_t fault_low;
226/*08*/	uint64_t fault_high;
227};
228
229typedef char vtd_registers_t_check[(sizeof(vtd_registers_t) == 0xc0) ? 1 : -1];
230
231/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
232
233struct vtd_unit_t
234{
235    ACPI_DMAR_HARDWARE_UNIT * dmar;
236    volatile vtd_registers_t * regs;
237    volatile vtd_iotlb_registers_t * iotlb;
238    volatile vtd_fault_registers_t * faults;
239
240    IOMemoryMap *     qi_map;
241    qi_descriptor_t * qi_table;
242
243	uint64_t root;
244	uint64_t msi_address;
245    uint64_t qi_address;
246    uint64_t qi_stamp_address;
247
248	uint32_t qi_tail;
249	uint32_t qi_mask;
250    volatile
251    uint32_t qi_stamp;
252
253	uint32_t msi_data;
254    uint32_t num_fault;
255    uint32_t rounding;
256
257    uint8_t  global:1;
258    uint8_t  caching:1;
259    uint8_t  selective:1;
260};
261
262/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
263
264static inline void __mfence(void)
265{
266    __asm__ volatile("mfence");
267}
268
269static inline void __clflush(void *ptr)
270{
271	__asm__ volatile("clflush (%0)" : : "r" (ptr));
272}
273
274static inline void clflush(uintptr_t addr, unsigned int count, uintptr_t linesize)
275{
276	uintptr_t  bound = (addr + count + linesize -1) & ~(linesize - 1);
277	__mfence();
278	while (addr < bound)
279	{
280		__clflush((void *) (uintptr_t) addr);
281		addr += linesize;
282	}
283	__mfence();
284}
285
286static
287vtd_unit_t * unit_init(ACPI_DMAR_HARDWARE_UNIT * dmar)
288{
289	vtd_unit_t * unit;
290
291	unit = IONew(vtd_unit_t, 1);
292	if (!unit) return (NULL);
293	bzero(unit, sizeof(vtd_unit_t));
294
295	unit->dmar = dmar;
296
297	VTLOG("unit %p Address %llx, Flags %x\n",
298			dmar, dmar->Address, dmar->Flags);
299
300	unit->regs = (typeof unit->regs) ml_io_map(dmar->Address, 0x1000);
301
302	uint32_t
303	offset = (unit->regs->extended_capability >> (8 - 4)) & (((1 << 10) - 1) << 4);
304	unit->iotlb = (typeof(unit->iotlb)) (((uintptr_t)unit->regs) + offset);
305
306	offset = (unit->regs->capability >> (24 - 4)) & (((1 << 10) - 1) << 4);
307	unit->faults = (typeof(unit->faults)) (((uintptr_t)unit->regs) + offset);
308	unit->num_fault = (1 + ((unit->regs->capability >> 40) & ((1 << 8) - 1)));
309
310	unit->selective = (1 & (unit->regs->capability >> 39));
311	unit->rounding = (0x3f & (unit->regs->capability >> 48));
312	unit->caching = (1 & (unit->regs->capability >> 7));
313	unit->global = (ACPI_DMAR_INCLUDE_ALL & dmar->Flags);
314
315	VTLOG("cap 0x%llx extcap 0x%llx glob %d cache sel %d mode %d iotlb %p nfault[%d] %p\n",
316			unit->regs->capability, unit->regs->extended_capability,
317			unit->global, unit->selective, unit->caching,
318			unit->iotlb, unit->num_fault, unit->faults);
319
320	// caching is only allowed for VMs
321	if (unit->caching
322	// disable IG unit
323	|| ((!unit->global) && (!(kIOPCIConfiguratorIGIsMapped & gIOPCIFlags))))
324	{
325		IODelete(unit, vtd_unit_t, 1);
326		unit = NULL;
327	}
328
329	return (unit);
330}
331
332static void
333unit_faults(vtd_unit_t * unit, bool log)
334{
335	uint32_t idx;
336	for (idx = 0; idx < unit->num_fault; idx++)
337	{
338		uint64_t h, l;
339		uint32_t faults_pending;
340
341		faults_pending = unit->regs->fault_status;
342		h = unit->faults[idx].fault_high;
343		l = unit->faults[idx].fault_low;
344		unit->faults[idx].fault_high = h;
345		unit->regs->fault_status = faults_pending;
346		__mfence();
347		if (log && ((1ULL << 63) & h))
348		{
349			char msg[256];
350			snprintf(msg, sizeof(msg), "vtd[%d] fault: device %d:%d:%d reason 0x%x %c:0x%llx", idx,
351				(int)(255 & (h >> 8)), (int)(31 & (h >> 3)), (int)(7 & (h >> 0)),
352				(int)(255 & (h >> (96 - 64))), (h & (1ULL << (126 - 64))) ? 'R' : 'W', l);
353			IOLog("%s\n", msg);
354			kprintf("%s\n", msg);
355			if (kIOPCIConfiguratorPanicOnFault & gIOPCIFlags) panic("%s", msg);
356		}
357	}
358}
359
360static void
361unit_enable(vtd_unit_t * unit)
362{
363    uint32_t command;
364
365	VTLOG("unit %p global status 0x%x\n", unit, unit->regs->global_status);
366
367	unit->regs->root_entry_table = unit->root;
368	__mfence();
369
370	unit->regs->global_command = (1UL<<30);
371	__mfence();
372	while (!((1UL<<30) & unit->regs->global_status)) {}
373//	VTLOG("did set root\n");
374
375	unit->regs->context_command = (1ULL<<63) | (1ULL<<61);
376	__mfence();
377	while ((1ULL<<63) & unit->regs->context_command) {}
378//	VTLOG("did context inval\n");
379
380	// global & rw drain
381	unit->iotlb->command = (1ULL<<63) | (1ULL<<60) | (1ULL<<49) | (1ULL<<48);
382	__mfence();
383	while ((1ULL<<63) & unit->iotlb->command) {}
384//	VTLOG("did iotlb inval\n");
385
386	unit->qi_tail = 0;
387	unit->regs->invalidation_queue_head = 0;
388	unit->regs->invalidation_queue_tail = 0;
389    unit->regs->invalidation_queue_address = unit->qi_address;
390
391	command = 0;
392
393#if ENA_QI
394	command |= (1UL<<26);
395	unit->regs->global_command = command;
396	__mfence();
397	while (!((1UL<<26) & unit->regs->global_status)) {}
398	VTLOG("did ena qi p 0x%qx v %p\n", unit->qi_address, unit->qi_table);
399#endif
400
401	command |= (1UL<<31);
402	unit->regs->global_command = command;
403	__mfence();
404	while (!((1UL<<31) & unit->regs->global_status)) {}
405	VTLOG("did ena\n");
406
407	if (unit->msi_address)
408	{
409		unit->regs->invalidation_completion_event_data          = unit->msi_data;
410		unit->regs->invalidation_completion_event_address       = unit->msi_address;
411		unit->regs->invalidation_completion_event_upper_address = (unit->msi_address >> 32);
412
413		unit->regs->fault_event_data          = unit->msi_data + 1;
414		unit->regs->fault_event_address       = unit->msi_address;
415		unit->regs->fault_event_upper_address = (unit->msi_address >> 32);
416
417		__mfence();
418		unit_faults(unit, false);
419
420		unit->regs->fault_event_control = 0;					// ints ena
421		unit->regs->invalidation_completion_event_control = 0;	// ints ena
422		unit->regs->invalidation_completion_status = 1;
423	}
424}
425
426static void
427unit_quiesce(vtd_unit_t * unit)
428{
429	VTLOG("unit %p quiesce\n", unit);
430	// completion stamps will continue after wake
431}
432
433static void
434unit_invalidate(vtd_unit_t * unit,
435							uint64_t did, ppnum_t addr, ppnum_t mask, bool leaf)
436{
437	if (unit->selective)
438	{
439		 unit->iotlb->address = ptoa_64(addr) | (leaf << 6) | mask;
440		 __mfence();
441		 unit->iotlb->command = (1ULL<<63) | (3ULL<<60) | (kTlbDrainReads<<49) | (kTlbDrainWrites<<48) | (did << 32);
442	}
443	else unit->iotlb->command = (1ULL<<63) | (1ULL<<60) | (kTlbDrainReads<<49) | (kTlbDrainWrites<<48);
444	__mfence();
445}
446
447static void
448unit_invalidate_done(vtd_unit_t * unit)
449{
450	while ((1ULL<<63) & unit->iotlb->command) {}
451}
452
453/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
454
455typedef uint32_t vtd_vaddr_t;
456
457union vtd_table_entry
458{
459	struct
460	{
461		uint     read:1 	__attribute__ ((packed));
462		uint     write:1 	__attribute__ ((packed));
463		uint     resv:10 	__attribute__ ((packed));
464		uint64_t addr:51 	__attribute__ ((packed));
465		uint     used:1 	__attribute__ ((packed));
466	} used;
467	struct
468	{
469		uint access:2 		__attribute__ ((packed));
470		uint next:28 		__attribute__ ((packed));
471		uint prev:28 		__attribute__ ((packed));
472		uint size:5 		__attribute__ ((packed));
473		uint free:1 		__attribute__ ((packed));
474	} free;
475	uint64_t bits;
476};
477typedef union vtd_table_entry vtd_table_entry_t;
478
479typedef uint32_t vtd_rbaddr_t;
480
481struct vtd_rblock
482{
483	RB_ENTRY(vtd_rblock) address_link;
484	RB_ENTRY(vtd_rblock) size_link;
485
486	vtd_rbaddr_t start;
487	vtd_rbaddr_t end;
488};
489
490RB_HEAD(vtd_rbaddr_list, vtd_rblock);
491RB_HEAD(vtd_rbsize_list, vtd_rblock);
492
493struct vtd_space_stats
494{
495    ppnum_t vsize;
496    ppnum_t tables;
497    ppnum_t bused;
498    ppnum_t rused;
499    ppnum_t largest_paging;
500    ppnum_t largest_32b;
501    ppnum_t inserts;
502    ppnum_t max_inval[2];
503    ppnum_t breakups;
504    ppnum_t merges;
505    ppnum_t allocs[64];
506	ppnum_t bcounts[20];
507};
508typedef struct vtd_space_stats vtd_space_stats_t;
509
510struct vtd_free_queued_t
511{
512    ppnum_t  addr;
513    ppnum_t  size;
514    uint32_t stamp;
515};
516enum
517{
518	kFreeQCount = 2,
519	kFreeQElems = 256
520};
521
522struct vtd_space
523{
524#if BSIMPLE
525	IOSimpleLock *      block;
526#else
527	IOLock *            block;
528#endif
529	IOLock *            rlock;
530	ppnum_t				vsize;
531	ppnum_t				rsize;
532	size_t      	    table_bitmap_size;
533	uint8_t *   	    table_bitmap;
534	IOMemoryMap *       table_map;
535	vtd_table_entry_t *	tables[6];
536	uint32_t            cachelinesize;
537	ppnum_t             root_page;
538	uint8_t				max_level;
539    uint8_t             waiting_space;
540	uint8_t     	    bheads_count;
541	vtd_table_entry_t * bheads;
542
543	vtd_space_stats_t   stats;
544
545    vtd_free_queued_t   free_queue[kFreeQCount][kFreeQElems];
546    volatile uint32_t	free_head[kFreeQCount];
547    volatile uint32_t   free_tail[kFreeQCount];
548    uint32_t			free_mask;
549    uint32_t            stamp;
550
551	struct vtd_rbaddr_list rbaddr_list;
552	struct vtd_rbsize_list rbsize_list;
553};
554typedef struct vtd_space vtd_space_t;
555
556/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
557
558static vtd_vaddr_t
559vtd_log2up(vtd_vaddr_t size)
560{
561	if (1 == size) size = 0;
562	else size = 32 - __builtin_clz((unsigned int)size - 1);
563	return (size);
564}
565
566static vtd_vaddr_t
567vtd_log2down(vtd_vaddr_t size)
568{
569	size = 31 - __builtin_clz((unsigned int)size);
570	return (size);
571}
572
573static void
574_vtd_space_nfault(vtd_space_t * bf, vtd_vaddr_t start, vtd_vaddr_t size)
575{
576	vtd_vaddr_t index;
577	vtd_vaddr_t byte;
578	uint8_t bit;
579
580	vtassert((start + size) < bf->vsize);
581
582	size += (start & 511);
583	size = (size + 511) & ~511;
584
585	while (true)
586	{
587		index = (start >> 9);
588		byte = (index >> 3);
589		bit = (1 << (7 & index));
590		vtassert(bf->table_bitmap[byte] & bit);
591		if (size < 512) break;
592		size -= 512;
593		start += 512;
594	}
595}
596
597static bool
598vtd_space_present(vtd_space_t * bf, vtd_vaddr_t start)
599{
600	vtd_vaddr_t byte;
601	uint8_t bit;
602
603	vtassert(start < bf->vsize);
604
605	start >>= 9;
606	byte = (start >> 3);
607	bit = (1 << (7 & start));
608	return (bf->table_bitmap[byte] & bit);
609}
610
611static void
612vtd_space_fault(vtd_space_t * bf, vtd_vaddr_t start, vtd_vaddr_t size)
613{
614	vtd_vaddr_t index;
615	vtd_vaddr_t byte;
616	uint8_t     bits, bit;
617	IOReturn    kr;
618
619	vtassert((start + size) < bf->vsize);
620
621	size += (start & 511);
622	size = (size + 511) & ~511;
623
624	while (true)
625	{
626		index = (start >> 9);
627		byte = (index >> 3);
628		index &= 7;
629		bits = bf->table_bitmap[byte];
630#if 1
631		if (0xff == bits)
632		{
633			index = (8 - index) * 512;
634			if (size <= index) break;
635			size -= index;
636			start += index;
637			continue;
638		}
639#endif
640		bit = (1 << index);
641		if (!(bits & bit))
642		{
643			bf->table_bitmap[byte] = bits | bit;
644			index = start & ~511;
645
646//			VTLOG("table fault addr 0x%x, table %p\n", start, &bf->tables[0][start]);
647			kr = bf->table_map->wireRange(kIODirectionOutIn, index << 3, page_size);
648			vtassert(kr == KERN_SUCCESS);
649			STAT_ADD(bf, tables, 1);
650
651			bf->tables[0][index].bits = 0;
652			ppnum_t lvl0page = pmap_find_phys(kernel_pmap, (uintptr_t) &bf->tables[0][index]);
653			if (!lvl0page) panic("!lvl0page");
654			bf->tables[1][index >> 9].bits = ptoa_64(lvl0page) | kPageAccess;
655			table_flush(&bf->tables[1][index >> 9], sizeof(vtd_table_entry_t), bf->cachelinesize);
656		}
657		if (size <= 512) break;
658		size -= 512;
659		start += 512;
660	}
661}
662
663static void
664vtd_space_set(vtd_space_t * bf, vtd_vaddr_t start, vtd_vaddr_t size,
665			  uint32_t mapOptions, upl_page_info_t * pageList)
666{
667	ppnum_t idx;
668	uint8_t access = kReadAccess | 0*kWriteAccess;
669
670	if (kIODMAMapPhysicallyContiguous & mapOptions) VTLOG("map phys %x, %x\n", pageList[0].phys_addr, size);
671
672	if (mapOptions & kIODMAMapWriteAccess) access |= kWriteAccess;
673
674	vtassert((start + size) <= bf->vsize);
675	vtd_space_nfault(bf, start, size);
676
677	if (kIODMAMapPhysicallyContiguous & mapOptions)
678	{
679		for (idx = 0; idx < size; idx++)
680		{
681			bf->tables[0][start + idx].bits = (access | ptoa_64(pageList[0].phys_addr + idx));
682		}
683#if TABLE_CB
684		table_flush(&bf->tables[0][start], size * sizeof(vtd_table_entry_t), bf->cachelinesize);
685#endif
686	}
687	else
688	{
689#if TABLE_CB
690    	ppnum_t j;
691		for (idx = 0; size >= 8; size -= 8, idx += 8)
692		{
693			j = 0;
694			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
695			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
696			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
697			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
698			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
699			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
700			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr)); j++;
701			bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr));
702			__mfence();
703			__clflush((void *) &bf->tables[0][start + idx].bits);
704		}
705		if (size)
706		{
707			for (j = 0; j < size; j++)
708			{
709				bf->tables[0][start + idx + j].bits = (access | ptoa_64(pageList[idx + j].phys_addr));
710			}
711			__mfence();
712			__clflush((void *) &bf->tables[0][start + idx].bits);
713		}
714#else
715		for (idx = 0; idx < size; idx++)
716		{
717			bf->tables[0][start + idx].bits = (access | ptoa_64(pageList[idx].phys_addr));
718		}
719#endif
720	}
721	__mfence();
722}
723
724#include "balloc.c"
725#include "rballoc.c"
726
727/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
728
729class AppleVTD : public IOMapper
730{
731    OSDeclareDefaultStructors(AppleVTD);
732
733public:
734	IOSimpleLock  		   * fHWLock;
735	const OSData  	       * fDMARData;
736	IOWorkLoop             * fWorkLoop;
737	IOInterruptEventSource * fIntES;
738	IOInterruptEventSource * fFaultES;
739    IOTimerEventSource     * fTimerES;
740
741	enum { kMaxUnits = 8 };
742	vtd_unit_t * units[kMaxUnits];
743
744	uint32_t fTreeBits;
745	uint32_t fMaxRoundSize;
746
747	uint32_t fCacheLineSize;
748
749	IOMemoryMap * fTableMap;
750	IOMemoryMap * fContextTableMap;
751
752	ppnum_t  fRootEntryPage;
753
754	vtd_space_t * fSpace;
755
756	static void install(IOWorkLoop * wl, uint32_t flags,
757						IOService * provider, const OSData * data);
758	bool init(IOWorkLoop * wl, const OSData * data);
759
760    virtual void free();
761    virtual bool initHardware(IOService *provider);
762
763	vtd_space_t * space_create(uint32_t cachelinesize, uint32_t treebits, ppnum_t vsize,
764							   uint32_t buddybits, ppnum_t rsize);
765	vtd_vaddr_t space_alloc(vtd_space_t * bf, vtd_vaddr_t size,
766							uint32_t mapOptions, const IODMAMapSpecification * mapSpecification,
767							upl_page_info_t * pageList);
768	void space_free(vtd_space_t * bf, vtd_vaddr_t addr, vtd_vaddr_t size);
769	void space_alloc_fixed(vtd_space_t * bf, vtd_vaddr_t addr, vtd_vaddr_t size);
770
771    IOReturn handleInterrupt(IOInterruptEventSource * source, int count);
772    IOReturn handleFault(IOInterruptEventSource * source, int count);
773	IOReturn timer(OSObject * owner, IOTimerEventSource * sender);
774	virtual IOReturn callPlatformFunction(const OSSymbol * functionName,
775										  bool waitForFunction,
776										  void * param1, void * param2,
777										  void * param3, void * param4);
778
779	void iovmInvalidateSync(ppnum_t addr, IOItemCount pages);
780    void checkFree(uint32_t queue);
781
782    virtual ppnum_t iovmAlloc(IOItemCount pages);
783    virtual void iovmFree(ppnum_t addr, IOItemCount pages);
784
785    virtual void iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page);
786    virtual void iovmInsert(ppnum_t addr, IOItemCount offset,
787                            ppnum_t *pageList, IOItemCount pageCount);
788    virtual void iovmInsert(ppnum_t addr, IOItemCount offset,
789                            upl_page_info_t *pageList, IOItemCount pageCount);
790
791    virtual ppnum_t iovmMapMemory(
792    			  OSObject                    * memory,   // dma command or iomd
793				  ppnum_t                       offsetPage,
794				  ppnum_t                       pageCount,
795				  uint32_t                      options,
796				  upl_page_info_t             * pageList,
797				  const IODMAMapSpecification * mapSpecification);
798
799    virtual addr64_t mapAddr(IOPhysicalAddress addr);
800};
801
802
803OSDefineMetaClassAndStructors(AppleVTD, IOMapper);
804#define super IOMapper
805
806/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
807
808void
809AppleVTD::install(IOWorkLoop * wl, uint32_t flags,
810					IOService * provider, const OSData * data)
811{
812	AppleVTD * mapper = 0;
813	bool ok = false;
814
815	if (!IOService::getPlatform()->getProperty(kIOPlatformMapperPresentKey)) return;
816
817	VTLOG("DMAR %p\n", data);
818	if (data)
819	{
820		mapper = new AppleVTD;
821		if (mapper)
822		{
823			if (mapper->init(wl, data) && mapper->attach(provider))
824			{
825				ok = mapper->start(provider);
826				if (!ok) mapper->detach(provider);
827			}
828			mapper->release();
829		}
830	}
831	if (!ok)
832	{
833		IOService::getPlatform()->removeProperty(kIOPlatformMapperPresentKey);
834		IOMapper::setMapperRequired(false);
835	}
836}
837
838bool
839AppleVTD::init(IOWorkLoop * wl, const OSData * data)
840{
841	uint32_t unitIdx;
842
843	if (!super::init()) return (false);
844
845	data->retain();
846	fDMARData = data;
847	wl->retain();
848	fWorkLoop = wl;
849	fCacheLineSize = cpuid_info()->cache_linesize;
850
851	ACPI_TABLE_DMAR *           dmar = (typeof(dmar))      data->getBytesNoCopy();
852	ACPI_DMAR_HEADER *          dmarEnd = (typeof(dmarEnd))(((uintptr_t) dmar) + data->getLength());
853	ACPI_DMAR_HEADER *          hdr = (typeof(hdr))      (dmar + 1);
854	ACPI_DMAR_HARDWARE_UNIT *   unit;
855
856	VTLOG("DMAR Width %x, Flags %x\n", dmar->Width, dmar->Flags);
857
858	for (unitIdx = 0; hdr < dmarEnd;
859			hdr = (typeof(hdr))(((uintptr_t) hdr) + hdr->Length))
860	{
861		switch (hdr->Type)
862		{
863			case ACPI_DMAR_TYPE_HARDWARE_UNIT:
864				unit = (typeof(unit)) hdr;
865				if ((units[unitIdx] = unit_init(unit))) unitIdx++;
866				break;
867		}
868	}
869
870	return (unitIdx != 0);
871}
872
873void AppleVTD::free()
874{
875	super::free();
876}
877
878vtd_space_t *
879AppleVTD::space_create(uint32_t cachelinesize,
880						uint32_t treebits, ppnum_t vsize, uint32_t buddybits, ppnum_t rsize)
881{
882	IOBufferMemoryDescriptor * md;
883	IOReturn 	   kr = kIOReturnSuccess;
884	vtd_space_t *  bf;
885	uint32_t       count;
886	mach_vm_size_t alloc;
887	uint32_t       level;
888	uint32_t       bit;
889
890	vtassert(vsize >= (1U << buddybits));
891	vtassert(vsize > rsize);
892	vtassert(buddybits > (9 + 3));
893	vtassert(treebits > 12);
894
895	bf = IONew(vtd_space_t, 1);
896	if (!bf) return (NULL);
897	bzero(bf, sizeof(vtd_space_t));
898
899	bf->rlock = IOLockAlloc();
900#if BSIMPLE
901	bf->block = fHWLock;
902#else
903	bf->block = IOLockAlloc();
904#endif
905	bf->cachelinesize = cachelinesize;
906
907	treebits -= 12;
908	vsize = (vsize + 511) & ~511;
909	bf->vsize = vsize;
910	bf->table_bitmap_size = ((vsize / 512) + 7) / 8;
911	bf->table_bitmap = IONew(uint8_t, bf->table_bitmap_size);
912	if (!bf->table_bitmap) return (NULL);
913	bzero(bf->table_bitmap, bf->table_bitmap_size);
914
915	alloc = 0;
916	level = 0;
917	bit   = 0;
918	while (bit < treebits)
919	{
920		count = (vsize >> bit);
921		if (!count) count = 1;
922		alloc += round_page_64(count * sizeof(vtd_table_entry_t));
923		bit += 9;
924		level++;
925	}
926	bf->max_level = level - 1;
927
928	VTLOG("level %d, bmd...0x%llx\n", bf->max_level, alloc);
929	md = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL,
930						kIOMemoryPageable |
931#if !TABLE_CB
932						kIOMapWriteCombineCache |
933#endif
934						kIOMemoryMapperNone,
935						alloc, page_size);
936	VTLOG("bmd %p\n", md);
937	vtassert(md);
938	if (!md) return (NULL);
939
940//	kr = bmd->prepare(kIODirectionOutIn);
941//	vtassert(KERN_SUCCESS == kr);
942
943	bf->table_map = md->map();
944	vtassert(bf->table_map);
945	md->release();
946
947	vtassert(bf->table_map);
948	if (!bf->table_map) return (NULL);
949
950	vtd_table_entry_t * table;
951	table = (typeof(table)) bf->table_map->getVirtualAddress();
952
953	vtd_table_entry_t * prior = NULL;
954	vtd_table_entry_t * next = table;
955	mach_vm_size_t      offset;
956	uint32_t idx;
957
958	level = 0;
959	bit   = 0;
960	while (bit < treebits)
961	{
962		count = (vsize >> bit);
963		if (!count) count = 1;
964
965		vtassert(level < arrayCount(bf->tables));
966		vtassert(level <= bf->max_level);
967		bf->tables[level] = next;
968		if (level == 1)
969		{
970			// wire levels >0
971			offset = ((next - table) * sizeof(vtd_table_entry_t));
972			VTLOG("wire [%llx, %llx]\n", offset, alloc);
973			kr = bf->table_map->wireRange(kIODirectionOutIn, offset, alloc - offset);
974			vtassert(KERN_SUCCESS == kr);
975			STAT_ADD(bf, tables, atop_64(alloc - offset));
976			if (KERN_SUCCESS != kr)
977			{
978				bf->table_map->release();
979				return (NULL);
980			}
981		}
982		else if (level >= 2)
983		{
984			for (idx = 0; idx < count; idx++)
985			{
986				ppnum_t lvl2page = pmap_find_phys(kernel_pmap, (uintptr_t) &prior[idx << 9]);
987				if (!lvl2page) panic("!lvl2page");
988				VTLOG("lvl2 %p[%x] = %p\n", next, idx, &prior[idx << 9]);
989				next[idx].bits = (kPageAccess | ptoa_64(lvl2page));
990			}
991		}
992		prior = next;
993		next = next + ((count + 511) & ~511);
994		bit += 9;
995		level++;
996	}
997	table_flush(&bf->tables[1][0], alloc - offset, bf->cachelinesize);
998
999#if !TABLE_CB
1000	IOSetProcessorCacheMode(kernel_task, (IOVirtualAddress) &bf->tables[0][0], page_size, kIOCopybackCache);
1001#endif
1002
1003	VTLOG("tables %p, %p, %p, %p, %p, %p\n", bf->tables[0], bf->tables[1], bf->tables[2],
1004						   						bf->tables[3], bf->tables[4], bf->tables[5]);
1005
1006	bf->root_page = pmap_find_phys(kernel_pmap, (uintptr_t) bf->tables[bf->max_level]);
1007	if (!bf->root_page) panic("!root_page");
1008	VTLOG("tree root 0x%llx\n", ptoa_64(bf->root_page));
1009
1010	vtd_ballocator_init(bf, buddybits);
1011	bf->rsize = rsize;
1012	vtd_rballocator_init(bf, rsize, vsize - rsize);
1013
1014	VTLOG("bsize 0x%x, bsafe 0x%x, breserve 0x%x, rsize 0x%x\n",
1015	        (1<<kBPagesLog2), kBPagesSafe, kBPagesReserve, bf->rsize);
1016
1017	STAT_ADD(bf, vsize, vsize);
1018	OSData *
1019	data = OSData::withBytesNoCopy(&bf->stats, sizeof(bf->stats));
1020	if (data)
1021	{
1022		setProperty("stats", data);
1023		data->release();
1024	}
1025
1026	bf->stamp = 0x100;
1027	bf->free_mask  = (kFreeQElems - 1);
1028
1029	return (bf);
1030}
1031
1032vtd_baddr_t
1033AppleVTD::space_alloc(vtd_space_t * bf, vtd_baddr_t size,
1034					  uint32_t mapOptions, const IODMAMapSpecification * mapSpecification,
1035					  upl_page_info_t * pageList)
1036{
1037	vtd_vaddr_t addr;
1038    vtd_vaddr_t align = 1;
1039    vtd_baddr_t largethresh;
1040    bool        uselarge;
1041	uint32_t    list;
1042
1043	if ((kIODMAMapPagingPath & mapOptions) && (size > bf->stats.largest_paging))
1044		bf->stats.largest_paging = size;
1045
1046	list = vtd_log2up(size);
1047
1048	if (mapSpecification)
1049	{
1050		if (mapSpecification->numAddressBits
1051			&& (mapSpecification->numAddressBits <= 32)
1052			&& (size > bf->stats.largest_32b))		bf->stats.largest_32b = size;
1053
1054		if (mapSpecification->alignment > page_size) align = atop_64(mapSpecification->alignment);
1055	}
1056
1057	if (bf->stats.bused >= kBPagesReserve)
1058	{
1059		largethresh = 1;
1060	}
1061	else if (bf->stats.bused >= kBPagesSafe)
1062	{
1063		largethresh = kLargeThresh2;
1064	}
1065	else
1066	{
1067		largethresh = kLargeThresh;
1068	}
1069
1070	if (!(kIODMAMapPagingPath & mapOptions)
1071		&& (size >= largethresh)
1072		&& mapSpecification
1073		&& mapSpecification->numAddressBits
1074		&& ((1ULL << (mapSpecification->numAddressBits - 12)) >= bf->vsize))
1075	{
1076		uselarge = true;
1077	}
1078	else
1079	{
1080		uselarge = false;
1081		if (align > size) size = align;
1082	}
1083
1084#if 0
1085	IOSimpleLockLock(fHWLock);
1086	checkFree(uselarge);
1087	IOSimpleLockUnlock(fHWLock);
1088#endif
1089
1090	do
1091	{
1092		if (uselarge)
1093		{
1094			IOLockLock(bf->rlock);
1095			addr = vtd_rballoc(bf, size, align, fMaxRoundSize, mapOptions, pageList);
1096			STAT_ADD(bf, allocs[list], 1);
1097			if (addr)
1098			{
1099				STAT_ADD(bf, rused, size);
1100				vtd_space_fault(bf, addr, size);
1101			}
1102			IOLockUnlock(bf->rlock);
1103			if (addr && pageList) vtd_space_set(bf, addr, size, mapOptions, pageList);
1104		}
1105		else
1106		{
1107			BLOCK(bf->block);
1108			addr = vtd_balloc(bf, size, mapOptions, pageList);
1109			STAT_ADD(bf, allocs[list], 1);
1110			if (addr) STAT_ADD(bf, bused, (1 << list));
1111			BUNLOCK(bf->block);
1112		}
1113		if (addr) break;
1114		IOLockLock(bf->rlock);
1115		bf->waiting_space = true;
1116		IOLockSleep(bf->rlock, &bf->waiting_space, THREAD_UNINT);
1117		IOLockUnlock(bf->rlock);
1118//		IOLog("AppleVTD: waiting space (%d)\n", size);
1119		VTLOG("AppleVTD: waiting space (%d, bused %d, rused %d)\n",
1120				size, bf->stats.bused, bf->stats.rused);
1121	}
1122	while (true);
1123
1124	return (addr);
1125}
1126
1127void
1128AppleVTD::space_free(vtd_space_t * bf, vtd_baddr_t addr, vtd_baddr_t size)
1129{
1130	uint32_t list;
1131
1132	vtassert(addr);
1133	vtassert((addr + size) <= bf->vsize);
1134
1135	if (addr >= bf->rsize)
1136	{
1137		IOLockLock(bf->rlock);
1138		vtd_rbfree(bf, addr, size, fMaxRoundSize);
1139		STAT_ADD(bf, rused, -size);
1140		IOLockUnlock(bf->rlock);
1141	}
1142	else
1143	{
1144		list = vtd_log2up(size);
1145		BLOCK(bf->block);
1146		vtd_bfree(bf, addr, size);
1147		STAT_ADD(bf, bused, -(1 << list));
1148		BUNLOCK(bf->block);
1149	}
1150
1151	if (bf->waiting_space)
1152	{
1153		IOLockLock(bf->rlock);
1154		bf->waiting_space = false;
1155		IOLockWakeup(bf->rlock, &bf->waiting_space, false);
1156		IOLockUnlock(bf->rlock);
1157	}
1158}
1159
1160void
1161AppleVTD::space_alloc_fixed(vtd_space_t * bf, vtd_baddr_t addr, vtd_baddr_t size)
1162{
1163	vtd_balloc_fixed(bf, addr, size);
1164	vtd_rballoc_fixed(bf, addr, size);
1165	vtd_space_fault(bf, addr, size);
1166}
1167
1168static page_entry_t
1169vtd_tree_read(page_entry_t root, uint32_t width, addr64_t addr)
1170{
1171	page_entry_t entry = root;
1172	page_entry_t table;
1173	uint32_t index;
1174	uint32_t level = 0;
1175
1176	while (width > 12)
1177	{
1178		width -= 9;
1179		index = (addr >> (width - 3)) & (511 << 3);
1180
1181		table = entry & kPageAddrMask;
1182		entry = ml_phys_read_double_64(table + index);
1183
1184		if (!(kPageAccess & entry))
1185			break;
1186		level++;
1187	}
1188
1189	return (entry);
1190}
1191
1192bool
1193AppleVTD::initHardware(IOService *provider)
1194{
1195	uint32_t idx;
1196	vtd_unit_t * unit;
1197
1198    fIsSystem = true;
1199
1200	uint64_t context_width;
1201	fTreeBits = 0;
1202	unit = units[0];
1203	// prefer smallest tree?
1204	for (context_width = kAddressWidth30;
1205			(context_width <= kAddressWidth64);
1206			context_width++)
1207	{
1208		if ((0x100 << context_width) & unit->regs->capability)
1209		{
1210			fTreeBits = (30 + 9 * context_width);  // (57+9) for 64
1211			break;
1212		}
1213	}
1214
1215	for (idx = 0; (unit = units[idx]); idx++)
1216	{
1217		if (!((0x100 << context_width) & unit->regs->capability))
1218			panic("!tree bits %d on unit %d", fTreeBits, idx);
1219		if (unit->selective && ((unit->rounding > fMaxRoundSize)))
1220			fMaxRoundSize = unit->rounding;
1221	}
1222
1223	VTLOG("context_width %lld, treebits %d, round %d\n",
1224			context_width, fTreeBits, fMaxRoundSize);
1225
1226    // need better legacy checks
1227	if (!fMaxRoundSize)                              return (false);
1228	if ((48 == fTreeBits) && (9 == fMaxRoundSize))   return (false);
1229	//
1230
1231	fHWLock = IOSimpleLockAlloc();
1232
1233	fSpace = space_create(fCacheLineSize, fTreeBits, kVPages, kBPagesLog2, kRPages);
1234	if (!fSpace) return (false);
1235
1236	space_alloc_fixed(fSpace, atop_64(0xfee00000), atop_64(0xfef00000-0xfee00000));
1237	vtd_space_fault(fSpace, atop_64(0xfee00000), 1);
1238	fSpace->tables[0][atop_64(0xfee00000)].bits = 0xfee00000 | kPageAccess;
1239
1240	ACPI_TABLE_DMAR *           dmar = (typeof(dmar))      fDMARData->getBytesNoCopy();
1241	ACPI_DMAR_HEADER *          dmarEnd = (typeof(dmarEnd))(((uintptr_t) dmar) + fDMARData->getLength());
1242	ACPI_DMAR_HEADER *          hdr = (typeof(hdr))      (dmar + 1);
1243	ACPI_DMAR_RESERVED_MEMORY * mem;
1244
1245	for (; hdr < dmarEnd;
1246			hdr = (typeof(hdr))(((uintptr_t) hdr) + hdr->Length))
1247	{
1248		uint64_t addr;
1249		uint32_t count;
1250		switch (hdr->Type)
1251		{
1252			case ACPI_DMAR_TYPE_RESERVED_MEMORY:
1253				mem = (typeof(mem)) hdr;
1254				VTLOG("ACPI_DMAR_TYPE_RESERVED_MEMORY 0x%llx, 0x%llx\n",
1255					mem->BaseAddress, mem->EndAddress);
1256
1257				addr = mem->BaseAddress;
1258				count = atop_32(mem->EndAddress - addr);
1259
1260				space_alloc_fixed(fSpace, atop_64(addr), count);
1261				for (; count; addr += page_size, count--)
1262				{
1263					fSpace->tables[0][atop_64(addr)].bits = (addr | kPageAccess);
1264				}
1265				break;
1266		}
1267	}
1268
1269	IOReturn kr;
1270	IOBufferMemoryDescriptor *
1271	md = IOBufferMemoryDescriptor::inTaskWithOptions(TASK_NULL,
1272						kIOMemoryPageable |
1273						kIOMapWriteCombineCache |
1274						kIOMemoryMapperNone,
1275						2 * page_size, page_size);
1276	vtassert(md);
1277	if (!md) return (kIOReturnNoMemory);
1278
1279	kr = md->prepare(kIODirectionOutIn);
1280	vtassert(KERN_SUCCESS == kr);
1281
1282	fContextTableMap = md->map();
1283	vtassert(fContextTableMap);
1284	md->release();
1285
1286    // context entries
1287
1288	context_entry_t * context_entry_table = (typeof(context_entry_table)) fContextTableMap->getVirtualAddress();
1289	for (idx = 0; idx < 256; idx++)
1290	{
1291		context_entry_table[idx].address_space_root = 	ptoa_64(fSpace->root_page)
1292														| kEntryPresent
1293														| kTranslationType0;
1294		context_entry_table[idx].context_entry = context_width
1295												| kTheDomain*kDomainIdentifier1;
1296//		if (idx == ((2<<3)|0)) context_entry_table[idx].address_space_root |= kTranslationType2;  // passthru
1297//		if (idx == ((27<<3)|0)) context_entry_table[idx].address_space_root = 0;
1298		if (!(kIOPCIConfiguratorIGIsMapped & gIOPCIFlags))
1299		{
1300			if (idx == ((2<<3)|0)) context_entry_table[idx].address_space_root &= ~kEntryPresent;
1301		}
1302	}
1303	ppnum_t context_page = pmap_find_phys(kernel_pmap, (uintptr_t) &context_entry_table[0]);
1304	if (!context_page) panic("!context_page");
1305
1306	// root
1307
1308	root_entry_t * root_entry_table = (typeof(root_entry_table)) (fContextTableMap->getAddress() + page_size);
1309	for (idx = 0; idx < 256; idx++)
1310	{
1311		root_entry_table[idx].context_entry_ptr = ptoa_64(context_page)
1312													| kEntryPresent;
1313		root_entry_table[idx].resv = 0;
1314	}
1315
1316	fRootEntryPage = pmap_find_phys(kernel_pmap, (uintptr_t) &root_entry_table[0]);
1317	if (!fRootEntryPage) panic("!fRootEntryPage");
1318	for (idx = 0; (unit = units[idx]); idx++)
1319	{
1320		unit->root = ptoa_64(fRootEntryPage);
1321	}
1322
1323	// QI
1324
1325	for (idx = 0; (unit = units[idx]); idx++)
1326	{
1327		md = IOBufferMemoryDescriptor::inTaskWithOptions(kernel_task,
1328							kIOMemoryHostPhysicallyContiguous |
1329							kIOMapWriteCombineCache |
1330							kIOMemoryMapperNone,
1331							kQIPageCount * page_size, page_size);
1332		vtassert(md);
1333		if (!md) return (kIOReturnNoMemory);
1334
1335		kr = md->prepare(kIODirectionOutIn);
1336		vtassert(KERN_SUCCESS == kr);
1337
1338		unit->qi_map = md->map();
1339		vtassert(unit->qi_map);
1340		unit->qi_mask    = (kQIPageCount * 256) - 1;
1341		unit->qi_table   = (typeof(unit->qi_table)) (unit->qi_map->getAddress());
1342		unit->qi_address = vtd_log2down(kQIPageCount)
1343					     | md->getPhysicalSegment(0, NULL, kIOMemoryMapperNone);
1344
1345		ppnum_t stamp_page = pmap_find_phys(kernel_pmap, (uintptr_t) &unit->qi_stamp);
1346		vtassert(stamp_page);
1347		unit->qi_stamp_address = ptoa_64(stamp_page) | (page_mask & ((uintptr_t) &unit->qi_stamp));
1348
1349		md->release();
1350    }
1351
1352	//
1353
1354	IOReturn  ret;
1355	uint64_t  msiAddress;
1356	uint32_t  msiData;
1357	ret = gIOPCIMessagedInterruptController->allocateDeviceInterrupts(
1358													this, 2, 0, &msiAddress, &msiData);
1359	if (kIOReturnSuccess == ret)
1360	{
1361        fIntES = IOInterruptEventSource::interruptEventSource(
1362                      this,
1363                      OSMemberFunctionCast(IOInterruptEventSource::Action,
1364                                            this, &AppleVTD::handleInterrupt),
1365                      this, 0);
1366		if (fIntES) fWorkLoop->addEventSource(fIntES);
1367        fFaultES = IOInterruptEventSource::interruptEventSource(
1368                      this,
1369                      OSMemberFunctionCast(IOInterruptEventSource::Action,
1370                                            this, &AppleVTD::handleFault),
1371                      this, 1);
1372		if (fFaultES) fWorkLoop->addEventSource(fFaultES);
1373	}
1374
1375
1376	fTimerES = IOTimerEventSource::timerEventSource(this,
1377	                      OSMemberFunctionCast(IOTimerEventSource::Action,
1378												this, &AppleVTD::timer));
1379	if (fTimerES) fWorkLoop->addEventSource(fTimerES);
1380
1381	if (!fIntES || !fFaultES) msiData = msiAddress = 0;
1382
1383	__mfence();
1384	for (idx = 0; (unit = units[idx]); idx++)
1385	{
1386		unit->msi_data    = msiData & 0xff;
1387		unit->msi_address = msiAddress;
1388		unit_enable(unit);
1389	}
1390	if (fIntES)   fIntES->enable();
1391	if (fFaultES) fFaultES->enable();
1392
1393//	fTimerES->setTimeoutMS(10);
1394
1395	setProperty(kIOPlatformQuiesceActionKey, INT32_MAX - 1000, 64);
1396	setProperty(kIOPlatformActiveActionKey, INT32_MAX - 1000, 64);
1397
1398	registerService();
1399
1400	return (true);
1401}
1402
1403IOReturn
1404AppleVTD::handleInterrupt(IOInterruptEventSource * source, int count)
1405{
1406	uint32_t idx;
1407	vtd_unit_t * unit;
1408
1409	IOSimpleLockLock(fHWLock);
1410	for (idx = 0; idx < kFreeQCount; idx++) checkFree(idx);
1411	for (idx = 0; (unit = units[idx]); idx++)
1412	{
1413		unit->regs->invalidation_completion_status = 1;
1414	}
1415	IOSimpleLockUnlock(fHWLock);
1416
1417	return (kIOReturnSuccess);
1418}
1419
1420IOReturn
1421AppleVTD::handleFault(IOInterruptEventSource * source, int count)
1422{
1423	uint32_t idx;
1424	vtd_unit_t * unit;
1425
1426	for (idx = 0; (unit = units[idx]); idx++) unit_faults(unit, true || (idx != 0));
1427
1428	return (kIOReturnSuccess);
1429}
1430
1431IOReturn
1432AppleVTD::timer(OSObject * owner, IOTimerEventSource * sender)
1433{
1434	uint32_t idx;
1435
1436	IOSimpleLockLock(fHWLock);
1437	for (idx = 0; idx < kFreeQCount; idx++) checkFree(idx);
1438	IOSimpleLockUnlock(fHWLock);
1439
1440	fTimerES->setTimeoutMS(10);
1441
1442	return (kIOReturnSuccess);
1443}
1444
1445IOReturn
1446AppleVTD::callPlatformFunction(const OSSymbol * functionName,
1447							   bool waitForFunction,
1448							   void * param1, void * param2,
1449							   void * param3, void * param4)
1450{
1451    if (functionName)
1452    {
1453		uint32_t idx;
1454		vtd_unit_t * unit;
1455    	if (functionName->isEqualTo(gIOPlatformActiveActionKey))
1456		{
1457			for (idx = 0; (unit = units[idx]); idx++)
1458			{
1459				unit_enable(unit);
1460			}
1461			return (kIOReturnSuccess);
1462		}
1463		else if (functionName->isEqualTo(gIOPlatformQuiesceActionKey))
1464		{
1465			for (idx = 0; (unit = units[idx]); idx++)
1466			{
1467				unit_quiesce(unit);
1468			}
1469			return (kIOReturnSuccess);
1470		}
1471	}
1472    return (super::callPlatformFunction(functionName, waitForFunction,
1473                                        param1, param2, param3, param4));
1474}
1475
1476ppnum_t
1477AppleVTD::iovmMapMemory(
1478			  OSObject                    * memory,   // dma command or iomd
1479			  ppnum_t                       offsetPage,
1480			  ppnum_t                       pageCount,
1481			  uint32_t                      mapOptions,
1482			  upl_page_info_t             * pageList,
1483			  const IODMAMapSpecification * mapSpecification)
1484{
1485	vtd_vaddr_t base;
1486
1487	base = space_alloc(fSpace, pageCount, mapOptions, mapSpecification, pageList);
1488	vtassert((base + pageCount) <= fSpace->vsize);
1489
1490//	space_free(fSpace, base, pageCount);
1491//	base = space_alloc(fSpace, pageCount, mapOptions, mapSpecification, pageList);
1492
1493#if KP
1494	VTLOG("iovmMapMemory: (0x%x)=0x%x\n", length, (int)base);
1495#endif
1496
1497    return (base);
1498}
1499
1500ppnum_t
1501AppleVTD::iovmAlloc(IOItemCount pages)
1502{
1503	ppnum_t result;
1504
1505	result = space_alloc(fSpace, pages, 0, NULL, NULL);
1506#if KP
1507	VTLOG("iovmAlloc: 0x%x=0x%x\n", (int)pages, (int)result );
1508#endif
1509    return (result);
1510}
1511
1512void
1513AppleVTD::iovmInvalidateSync(ppnum_t addr, IOItemCount pages)
1514{
1515	vtd_unit_t * unit;
1516	unsigned int leaf;
1517	unsigned int idx;
1518	uint32_t     wait;
1519	ppnum_t      unitAddr[kMaxUnits];
1520	IOItemCount  unitPages[kMaxUnits];
1521	bool		 more;
1522
1523	for (idx = 0; (unit = units[idx]); idx++)
1524	{
1525		unitAddr[idx] = addr;
1526		unitPages[idx] = pages;
1527	}
1528	leaf = true;
1529
1530	do
1531	{
1532		more = false;
1533		wait = 0;
1534		for (idx = 0; (unit = units[idx]); idx++)
1535		{
1536			if (unitPages[idx])
1537			{
1538				wait |= (1 << idx);
1539				unit_invalidate(unit, kTheDomain, unitAddr[idx], unit->rounding, leaf);
1540				if (!unit->selective
1541					|| (unitPages[idx] <= (1U << unit->rounding)))
1542				{
1543					unitPages[idx] = 0;
1544				}
1545				else
1546				{
1547					more = true;
1548					unitPages[idx] -= (1U << unit->rounding);
1549					unitAddr[idx]  += (1U << unit->rounding);
1550				}
1551			}
1552		}
1553		for (idx = 0; (unit = units[idx]); idx++)
1554		{
1555			if (wait & (1U << idx)) unit_invalidate_done(unit);
1556		}
1557	}
1558	while (more);
1559}
1560
1561void
1562AppleVTD::iovmFree(ppnum_t addr, IOItemCount pages)
1563{
1564	vtd_unit_t * unit;
1565	unsigned int leaf, isLarge;
1566	unsigned int unitIdx;
1567    uint32_t     did = kTheDomain;
1568	ppnum_t      unitAddr;
1569	IOItemCount  unitPages;
1570	uint32_t     idx;
1571	uint32_t     next;
1572	uint32_t     count;
1573	uint64_t     stamp;
1574
1575#if KP
1576	VTLOG("iovmFree: 0x%x,0x%x\n", (int)pages, addr);
1577#endif
1578
1579	vtassert((addr + pages) <= fSpace->vsize);
1580	vtd_space_nfault(fSpace, addr, pages);
1581	bzero(&fSpace->tables[0][addr], pages * sizeof(vtd_table_entry_t));
1582	table_flush(&fSpace->tables[0][addr], pages * sizeof(vtd_table_entry_t), fCacheLineSize);
1583
1584#if !ENA_QI
1585	IOSimpleLockLock(fHWLock);
1586    iovmInvalidateSync(addr, pages);
1587	IOSimpleLockUnlock(fHWLock);
1588	space_free(fSpace, addr, pages);
1589	return;
1590
1591#else	/* !ENA_QI */
1592
1593	leaf = true;
1594	isLarge = (addr >= fSpace->rsize);
1595
1596	IOSimpleLockLock(fHWLock);
1597
1598#if 0
1599	int32_t      freeCount;
1600	freeCount = fSpace->free_tail[isLarge] - fSpace->free_head[isLarge];
1601	if (freeCount < 0) freeCount = kFreeQElems - freeCount;
1602	if (freeCount >= 8)
1603#endif
1604	{
1605		checkFree(isLarge);
1606	}
1607
1608	stamp = ++fSpace->stamp;
1609
1610	idx = fSpace->free_tail[isLarge];
1611	next = (idx + 1) & fSpace->free_mask;
1612	if (next == fSpace->free_head[isLarge]) panic("qfull");
1613	fSpace->free_queue[isLarge][idx].addr = addr;
1614	fSpace->free_queue[isLarge][idx].size = pages;
1615	fSpace->free_queue[isLarge][idx].stamp = stamp;
1616	fSpace->free_tail[isLarge] = next;
1617
1618	for (unitIdx = 0; (unit = units[unitIdx]); unitIdx++)
1619	{
1620		unitAddr = addr;
1621		unitPages = pages;
1622		idx = unit->qi_tail;
1623		count = 0;
1624		while (unitPages)
1625		{
1626			next = (idx + 1) & unit->qi_mask;
1627			while ((next << 4) == unit->regs->invalidation_queue_head) {}
1628
1629			if (unit->selective)
1630			{
1631				uint32_t mask = unit->rounding;
1632				if (unitPages < (1U << unit->rounding)) mask = vtd_log2up(unitPages);
1633				unit->qi_table[idx].command = (did<<16) | (kTlbDrainReads<<7) | (kTlbDrainWrites<<6) | (3<<4) | (2);
1634				unit->qi_table[idx].address = ptoa_64(unitAddr) | (leaf << 6) | mask;
1635			}
1636			else
1637			{
1638				unit->qi_table[idx].command = (kTlbDrainReads<<7) | (kTlbDrainWrites<<6) | (1<<4) | (2);
1639			}
1640
1641			if (!unit->selective
1642				|| (unitPages <= (1U << unit->rounding)))
1643			{
1644				unitPages = 0;
1645			}
1646			else
1647			{
1648				unitPages -= (1U << unit->rounding);
1649				unitAddr  += (1U << unit->rounding);
1650				count++;
1651				if (!(count & (unit->qi_mask >> 5)))
1652				{
1653					__mfence();
1654					unit->regs->invalidation_queue_tail = (next << 4);
1655				}
1656			}
1657			idx = next;
1658		}
1659//		if (freeCount >= 64)
1660//		if (0 == (stamp & 3))
1661		{
1662			next = (idx + 1) & unit->qi_mask;
1663			while ((next << 4) == unit->regs->invalidation_queue_head) {}
1664			uint64_t command = (stamp<<32) | (1<<5) | (5);
1665//     		command |= (1<<4); // make an int
1666			unit->qi_table[idx].command = command;
1667			unit->qi_table[idx].address = unit->qi_stamp_address;
1668		}
1669		__mfence();
1670		unit->regs->invalidation_queue_tail = (next << 4);
1671//		__mfence();
1672		unit->qi_tail = next;
1673	}
1674
1675	IOSimpleLockUnlock(fHWLock);
1676
1677#endif /* ENA_QI */
1678}
1679
1680#define stampPassed(a,b)	(((int32_t)((a)-(b))) >= 0)
1681
1682void
1683AppleVTD::checkFree(uint32_t isLarge)
1684{
1685	vtd_unit_t * unit;
1686	uint32_t     unitIdx;
1687	uint32_t     idx;
1688	uint32_t     next;
1689	ppnum_t      addr, size, count;
1690    bool         ok;
1691
1692	count = 0;
1693	idx = fSpace->free_head[isLarge];
1694	do
1695	{
1696		if (idx == fSpace->free_tail[isLarge]) break;
1697		for (unitIdx = 0, ok = true; ok && (unit = units[unitIdx]); unitIdx++)
1698		{
1699			ok &= stampPassed(unit->qi_stamp, fSpace->free_queue[isLarge][idx].stamp);
1700		}
1701
1702		if (ok)
1703		{
1704			next = (idx + 1) & fSpace->free_mask;
1705			addr = fSpace->free_queue[isLarge][idx].addr;
1706			size = fSpace->free_queue[isLarge][idx].size;
1707#if BSIMPLE
1708			if (!isLarge)
1709		    {
1710				vtd_bfree(fSpace, addr, size);
1711				STAT_ADD(fSpace, bused, -size);
1712				idx = next;
1713		    }
1714		    else
1715#endif /* BSIMPLE */
1716			{
1717				fSpace->free_head[isLarge] = next;
1718				IOSimpleLockUnlock(fHWLock);
1719				space_free(fSpace, addr, size);
1720				IOSimpleLockLock(fHWLock);
1721				idx = fSpace->free_head[isLarge];
1722		    }
1723			count++;
1724		}
1725	}
1726	while (ok);
1727
1728#if BSIMPLE
1729	fSpace->free_head[isLarge] = idx;
1730#endif
1731	if (count > fSpace->stats.max_inval[isLarge]) fSpace->stats.max_inval[isLarge] = count;
1732}
1733
1734addr64_t
1735AppleVTD::mapAddr(IOPhysicalAddress addr)
1736{
1737	ppnum_t      page = atop_64(addr);
1738	page_entry_t entry;
1739
1740	if (page >= fSpace->vsize) return (addr);
1741
1742	if (!vtd_space_present(fSpace, page)) return (addr);
1743
1744	entry = fSpace->tables[0][page].bits;
1745
1746#if KP
1747	VTLOG("mapAddr: 0x%x=0x%llx\n", (int)addr, entry);
1748#endif
1749
1750	if (kPageAccess & entry)
1751		return (trunc_page_64(entry) | (addr & page_mask));
1752	else
1753		return (addr);
1754
1755    return (0);
1756}
1757
1758void
1759AppleVTD::iovmInsert(ppnum_t addr, IOItemCount offset, ppnum_t page)
1760{
1761	addr += offset;
1762	vtassert(addr < fSpace->vsize);
1763	vtd_space_nfault(fSpace, addr, 1);
1764	fSpace->tables[0][addr].bits = ptoa_64(page) | kPageAccess;
1765	table_flush(&fSpace->tables[0][addr], sizeof(vtd_table_entry_t), fCacheLineSize);
1766	STAT_ADD(fSpace, inserts, 1);
1767}
1768
1769
1770void
1771AppleVTD::iovmInsert(ppnum_t addr, IOItemCount offset,
1772						ppnum_t *pageList, IOItemCount pageCount)
1773{
1774	ppnum_t idx;
1775
1776	addr += offset;
1777	vtassert((addr + pageCount) <= fSpace->vsize);
1778	vtd_space_nfault(fSpace, addr, pageCount);
1779    for (idx = 0; idx < pageCount; idx++)
1780    {
1781		fSpace->tables[0][addr + idx].bits = ptoa_64(pageList[idx]) | kPageAccess;
1782	}
1783	table_flush(&fSpace->tables[0][addr], pageCount * sizeof(vtd_table_entry_t), fCacheLineSize);
1784	STAT_ADD(fSpace, inserts, pageCount);
1785}
1786
1787void
1788AppleVTD::iovmInsert(ppnum_t addr, IOItemCount offset,
1789					 upl_page_info_t *pageList, IOItemCount pageCount)
1790{
1791	ppnum_t idx;
1792
1793	addr += offset;
1794
1795	vtassert((addr + pageCount) <= fSpace->vsize);
1796	vtd_space_nfault(fSpace, addr, pageCount);
1797    for (idx = 0; idx < pageCount; idx++)
1798    {
1799		fSpace->tables[0][addr + idx].bits = ptoa_64(pageList[idx].phys_addr) | kPageAccess;
1800	}
1801	table_flush(&fSpace->tables[0][addr], pageCount * sizeof(vtd_table_entry_t), fCacheLineSize);
1802	STAT_ADD(fSpace, inserts, pageCount);
1803}
1804
1805#endif /* ACPI_SUPPORT */
1806
1807