machdep.c revision 238257
1/*-
2 * Copyright (c) 2003,2004 Marcel Moolenaar
3 * Copyright (c) 2000,2001 Doug Rabson
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/sys/ia64/ia64/machdep.c 238257 2012-07-08 18:00:22Z marcel $");
30
31#include "opt_compat.h"
32#include "opt_ddb.h"
33#include "opt_kstack_pages.h"
34#include "opt_sched.h"
35
36#include <sys/param.h>
37#include <sys/proc.h>
38#include <sys/systm.h>
39#include <sys/bio.h>
40#include <sys/buf.h>
41#include <sys/bus.h>
42#include <sys/cons.h>
43#include <sys/cpu.h>
44#include <sys/eventhandler.h>
45#include <sys/exec.h>
46#include <sys/imgact.h>
47#include <sys/kdb.h>
48#include <sys/kernel.h>
49#include <sys/linker.h>
50#include <sys/lock.h>
51#include <sys/malloc.h>
52#include <sys/mbuf.h>
53#include <sys/msgbuf.h>
54#include <sys/pcpu.h>
55#include <sys/ptrace.h>
56#include <sys/random.h>
57#include <sys/reboot.h>
58#include <sys/sched.h>
59#include <sys/signalvar.h>
60#include <sys/syscall.h>
61#include <sys/syscallsubr.h>
62#include <sys/sysctl.h>
63#include <sys/sysproto.h>
64#include <sys/ucontext.h>
65#include <sys/uio.h>
66#include <sys/uuid.h>
67#include <sys/vmmeter.h>
68#include <sys/vnode.h>
69
70#include <ddb/ddb.h>
71
72#include <net/netisr.h>
73
74#include <vm/vm.h>
75#include <vm/vm_extern.h>
76#include <vm/vm_kern.h>
77#include <vm/vm_page.h>
78#include <vm/vm_map.h>
79#include <vm/vm_object.h>
80#include <vm/vm_pager.h>
81
82#include <machine/bootinfo.h>
83#include <machine/cpu.h>
84#include <machine/efi.h>
85#include <machine/elf.h>
86#include <machine/fpu.h>
87#include <machine/intr.h>
88#include <machine/mca.h>
89#include <machine/md_var.h>
90#include <machine/pal.h>
91#include <machine/pcb.h>
92#include <machine/reg.h>
93#include <machine/sal.h>
94#include <machine/sigframe.h>
95#ifdef SMP
96#include <machine/smp.h>
97#endif
98#include <machine/unwind.h>
99#include <machine/vmparam.h>
100
101static SYSCTL_NODE(_hw, OID_AUTO, freq, CTLFLAG_RD, 0, "");
102static SYSCTL_NODE(_machdep, OID_AUTO, cpu, CTLFLAG_RD, 0, "");
103
104static u_int bus_freq;
105SYSCTL_UINT(_hw_freq, OID_AUTO, bus, CTLFLAG_RD, &bus_freq, 0,
106    "Bus clock frequency");
107
108static u_int cpu_freq;
109SYSCTL_UINT(_hw_freq, OID_AUTO, cpu, CTLFLAG_RD, &cpu_freq, 0,
110    "CPU clock frequency");
111
112static u_int itc_freq;
113SYSCTL_UINT(_hw_freq, OID_AUTO, itc, CTLFLAG_RD, &itc_freq, 0,
114    "ITC frequency");
115
116int cold = 1;
117
118struct bootinfo *bootinfo;
119
120struct pcpu pcpu0;
121
122extern u_int64_t kernel_text[], _end[];
123
124extern u_int64_t ia64_gateway_page[];
125extern u_int64_t break_sigtramp[];
126extern u_int64_t epc_sigtramp[];
127
128struct fpswa_iface *fpswa_iface;
129
130vm_size_t ia64_pal_size;
131vm_paddr_t ia64_pal_base;
132vm_offset_t ia64_port_base;
133
134u_int64_t ia64_lapic_addr = PAL_PIB_DEFAULT_ADDR;
135
136struct ia64_pib *ia64_pib;
137
138static int ia64_sync_icache_needed;
139
140char machine[] = MACHINE;
141SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "");
142
143static char cpu_model[64];
144SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0,
145    "The CPU model name");
146
147static char cpu_family[64];
148SYSCTL_STRING(_hw, OID_AUTO, family, CTLFLAG_RD, cpu_family, 0,
149    "The CPU family name");
150
151#ifdef DDB
152extern vm_offset_t ksym_start, ksym_end;
153#endif
154
155struct msgbuf *msgbufp = NULL;
156
157/* Other subsystems (e.g., ACPI) can hook this later. */
158void (*cpu_idle_hook)(void) = NULL;
159
160struct kva_md_info kmi;
161
162#define	Mhz	1000000L
163#define	Ghz	(1000L*Mhz)
164
165static void
166identifycpu(void)
167{
168	char vendor[17];
169	char *family_name, *model_name;
170	u_int64_t features, tmp;
171	int number, revision, model, family, archrev;
172
173	/*
174	 * Assumes little-endian.
175	 */
176	*(u_int64_t *) &vendor[0] = ia64_get_cpuid(0);
177	*(u_int64_t *) &vendor[8] = ia64_get_cpuid(1);
178	vendor[16] = '\0';
179
180	tmp = ia64_get_cpuid(3);
181	number = (tmp >> 0) & 0xff;
182	revision = (tmp >> 8) & 0xff;
183	model = (tmp >> 16) & 0xff;
184	family = (tmp >> 24) & 0xff;
185	archrev = (tmp >> 32) & 0xff;
186
187	family_name = model_name = "unknown";
188	switch (family) {
189	case 0x07:
190		family_name = "Itanium";
191		model_name = "Merced";
192		break;
193	case 0x1f:
194		family_name = "Itanium 2";
195		switch (model) {
196		case 0x00:
197			model_name = "McKinley";
198			break;
199		case 0x01:
200			/*
201			 * Deerfield is a low-voltage variant based on the
202			 * Madison core. We need circumstantial evidence
203			 * (i.e. the clock frequency) to identify those.
204			 * Allow for roughly 1% error margin.
205			 */
206			if (cpu_freq > 990 && cpu_freq < 1010)
207				model_name = "Deerfield";
208			else
209				model_name = "Madison";
210			break;
211		case 0x02:
212			model_name = "Madison II";
213			break;
214		}
215		break;
216	case 0x20:
217		ia64_sync_icache_needed = 1;
218
219		family_name = "Itanium 2";
220		switch (model) {
221		case 0x00:
222			model_name = "Montecito";
223			break;
224		case 0x01:
225			model_name = "Montvale";
226			break;
227		}
228		break;
229	}
230	snprintf(cpu_family, sizeof(cpu_family), "%s", family_name);
231	snprintf(cpu_model, sizeof(cpu_model), "%s", model_name);
232
233	features = ia64_get_cpuid(4);
234
235	printf("CPU: %s (", model_name);
236	if (cpu_freq)
237		printf("%u MHz ", cpu_freq);
238	printf("%s)\n", family_name);
239	printf("  Origin = \"%s\"  Revision = %d\n", vendor, revision);
240	printf("  Features = 0x%b\n", (u_int32_t) features,
241	    "\020"
242	    "\001LB"	/* long branch (brl) instruction. */
243	    "\002SD"	/* Spontaneous deferral. */
244	    "\003AO"	/* 16-byte atomic operations (ld, st, cmpxchg). */ );
245}
246
247static void
248cpu_startup(void *dummy)
249{
250	char nodename[16];
251	struct pcpu *pc;
252	struct pcpu_stats *pcs;
253
254	/*
255	 * Good {morning,afternoon,evening,night}.
256	 */
257	identifycpu();
258
259#ifdef PERFMON
260	perfmon_init();
261#endif
262	printf("real memory  = %ld (%ld MB)\n", ptoa(realmem),
263	    ptoa(realmem) / 1048576);
264
265	vm_ksubmap_init(&kmi);
266
267	printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count),
268	    ptoa(cnt.v_free_count) / 1048576);
269
270	if (fpswa_iface == NULL)
271		printf("Warning: no FPSWA package supplied\n");
272	else
273		printf("FPSWA Revision = 0x%lx, Entry = %p\n",
274		    (long)fpswa_iface->if_rev, (void *)fpswa_iface->if_fpswa);
275
276	/*
277	 * Set up buffers, so they can be used to read disk labels.
278	 */
279	bufinit();
280	vm_pager_bufferinit();
281
282	/*
283	 * Traverse the MADT to discover IOSAPIC and Local SAPIC
284	 * information.
285	 */
286	ia64_probe_sapics();
287	ia64_pib = pmap_mapdev(ia64_lapic_addr, sizeof(*ia64_pib));
288
289	ia64_mca_init();
290
291	/*
292	 * Create sysctl tree for per-CPU information.
293	 */
294	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
295		snprintf(nodename, sizeof(nodename), "%u", pc->pc_cpuid);
296		sysctl_ctx_init(&pc->pc_md.sysctl_ctx);
297		pc->pc_md.sysctl_tree = SYSCTL_ADD_NODE(&pc->pc_md.sysctl_ctx,
298		    SYSCTL_STATIC_CHILDREN(_machdep_cpu), OID_AUTO, nodename,
299		    CTLFLAG_RD, NULL, "");
300		if (pc->pc_md.sysctl_tree == NULL)
301			continue;
302
303		pcs = &pc->pc_md.stats;
304
305		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
306		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
307		    "nasts", CTLFLAG_RD, &pcs->pcs_nasts,
308		    "Number of IPI_AST interrupts");
309
310		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
311		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
312		    "nclks", CTLFLAG_RD, &pcs->pcs_nclks,
313		    "Number of clock interrupts");
314
315		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
316		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
317		    "nextints", CTLFLAG_RD, &pcs->pcs_nextints,
318		    "Number of ExtINT interrupts");
319
320		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
321		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
322		    "nhardclocks", CTLFLAG_RD, &pcs->pcs_nhardclocks,
323		    "Number of IPI_HARDCLOCK interrupts");
324
325		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
326		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
327		    "nhighfps", CTLFLAG_RD, &pcs->pcs_nhighfps,
328		    "Number of IPI_HIGH_FP interrupts");
329
330		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
331		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
332		    "nhwints", CTLFLAG_RD, &pcs->pcs_nhwints,
333		    "Number of hardware (device) interrupts");
334
335		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
336		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
337		    "npreempts", CTLFLAG_RD, &pcs->pcs_npreempts,
338		    "Number of IPI_PREEMPT interrupts");
339
340		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
341		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
342		    "nrdvs", CTLFLAG_RD, &pcs->pcs_nrdvs,
343		    "Number of IPI_RENDEZVOUS interrupts");
344
345		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
346		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
347		    "nstops", CTLFLAG_RD, &pcs->pcs_nstops,
348		    "Number of IPI_STOP interrupts");
349
350		SYSCTL_ADD_ULONG(&pc->pc_md.sysctl_ctx,
351		    SYSCTL_CHILDREN(pc->pc_md.sysctl_tree), OID_AUTO,
352		    "nstrays", CTLFLAG_RD, &pcs->pcs_nstrays,
353		    "Number of stray interrupts");
354	}
355}
356SYSINIT(cpu_startup, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL);
357
358void
359cpu_flush_dcache(void *ptr, size_t len)
360{
361	vm_offset_t lim, va;
362
363	va = (uintptr_t)ptr & ~31;
364	lim = (uintptr_t)ptr + len;
365	while (va < lim) {
366		ia64_fc(va);
367		va += 32;
368	}
369
370	ia64_srlz_d();
371}
372
373/* Get current clock frequency for the given cpu id. */
374int
375cpu_est_clockrate(int cpu_id, uint64_t *rate)
376{
377
378	if (pcpu_find(cpu_id) == NULL || rate == NULL)
379		return (EINVAL);
380	*rate = (u_long)cpu_freq * 1000000ul;
381	return (0);
382}
383
384void
385cpu_halt()
386{
387
388	efi_reset_system();
389}
390
391void
392cpu_idle(int busy)
393{
394	register_t ie;
395
396	if (!busy) {
397		critical_enter();
398		cpu_idleclock();
399	}
400
401	ie = intr_disable();
402	KASSERT(ie != 0, ("%s called with interrupts disabled\n", __func__));
403
404	if (sched_runnable())
405		ia64_enable_intr();
406	else if (cpu_idle_hook != NULL) {
407		(*cpu_idle_hook)();
408		/* The hook must enable interrupts! */
409	} else {
410		ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
411		ia64_enable_intr();
412	}
413
414	if (!busy) {
415		cpu_activeclock();
416		critical_exit();
417	}
418}
419
420int
421cpu_idle_wakeup(int cpu)
422{
423
424	return (0);
425}
426
427void
428cpu_reset()
429{
430
431	efi_reset_system();
432}
433
434void
435cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx)
436{
437	struct pcb *oldpcb, *newpcb;
438
439	oldpcb = old->td_pcb;
440#ifdef COMPAT_FREEBSD32
441	ia32_savectx(oldpcb);
442#endif
443	if (PCPU_GET(fpcurthread) == old)
444		old->td_frame->tf_special.psr |= IA64_PSR_DFH;
445	if (!savectx(oldpcb)) {
446		newpcb = new->td_pcb;
447		oldpcb->pcb_current_pmap =
448		    pmap_switch(newpcb->pcb_current_pmap);
449
450		atomic_store_rel_ptr(&old->td_lock, mtx);
451
452#if defined(SCHED_ULE) && defined(SMP)
453		while (atomic_load_acq_ptr(&new->td_lock) == &blocked_lock)
454			cpu_spinwait();
455#endif
456
457		PCPU_SET(curthread, new);
458
459#ifdef COMPAT_FREEBSD32
460		ia32_restorectx(newpcb);
461#endif
462
463		if (PCPU_GET(fpcurthread) == new)
464			new->td_frame->tf_special.psr &= ~IA64_PSR_DFH;
465		restorectx(newpcb);
466		/* We should not get here. */
467		panic("cpu_switch: restorectx() returned");
468		/* NOTREACHED */
469	}
470}
471
472void
473cpu_throw(struct thread *old __unused, struct thread *new)
474{
475	struct pcb *newpcb;
476
477	newpcb = new->td_pcb;
478	(void)pmap_switch(newpcb->pcb_current_pmap);
479
480#if defined(SCHED_ULE) && defined(SMP)
481	while (atomic_load_acq_ptr(&new->td_lock) == &blocked_lock)
482		cpu_spinwait();
483#endif
484
485	PCPU_SET(curthread, new);
486
487#ifdef COMPAT_FREEBSD32
488	ia32_restorectx(newpcb);
489#endif
490
491	restorectx(newpcb);
492	/* We should not get here. */
493	panic("cpu_throw: restorectx() returned");
494	/* NOTREACHED */
495}
496
497void
498cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
499{
500
501	/*
502	 * Set pc_acpi_id to "uninitialized".
503	 * See sys/dev/acpica/acpi_cpu.c
504	 */
505	pcpu->pc_acpi_id = 0xffffffff;
506}
507
508void
509cpu_pcpu_setup(struct pcpu *pc, u_int acpi_id, u_int sapic_id)
510{
511
512	pc->pc_acpi_id = acpi_id;
513	pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id);
514}
515
516void
517spinlock_enter(void)
518{
519	struct thread *td;
520	int intr;
521
522	td = curthread;
523	if (td->td_md.md_spinlock_count == 0) {
524		intr = intr_disable();
525		td->td_md.md_spinlock_count = 1;
526		td->td_md.md_saved_intr = intr;
527	} else
528		td->td_md.md_spinlock_count++;
529	critical_enter();
530}
531
532void
533spinlock_exit(void)
534{
535	struct thread *td;
536	int intr;
537
538	td = curthread;
539	critical_exit();
540	intr = td->td_md.md_saved_intr;
541	td->td_md.md_spinlock_count--;
542	if (td->td_md.md_spinlock_count == 0)
543		intr_restore(intr);
544}
545
546void
547map_vhpt(uintptr_t vhpt)
548{
549	pt_entry_t pte;
550	uint64_t psr;
551
552	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
553	    PTE_PL_KERN | PTE_AR_RW;
554	pte |= vhpt & PTE_PPN_MASK;
555
556	__asm __volatile("ptr.d %0,%1" :: "r"(vhpt),
557	    "r"(pmap_vhpt_log2size << 2));
558
559	__asm __volatile("mov   %0=psr" : "=r"(psr));
560	__asm __volatile("rsm   psr.ic|psr.i");
561	ia64_srlz_i();
562	ia64_set_ifa(vhpt);
563	ia64_set_itir(pmap_vhpt_log2size << 2);
564	ia64_srlz_d();
565	__asm __volatile("itr.d dtr[%0]=%1" :: "r"(3), "r"(pte));
566	__asm __volatile("mov   psr.l=%0" :: "r" (psr));
567	ia64_srlz_i();
568}
569
570void
571map_pal_code(void)
572{
573	pt_entry_t pte;
574	vm_offset_t va;
575	vm_size_t sz;
576	uint64_t psr;
577	u_int shft;
578
579	if (ia64_pal_size == 0)
580		return;
581
582	va = IA64_PHYS_TO_RR7(ia64_pal_base);
583
584	sz = ia64_pal_size;
585	shft = 0;
586	while (sz > 1) {
587		shft++;
588		sz >>= 1;
589	}
590
591	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
592	    PTE_PL_KERN | PTE_AR_RWX;
593	pte |= ia64_pal_base & PTE_PPN_MASK;
594
595	__asm __volatile("ptr.d %0,%1; ptr.i %0,%1" :: "r"(va), "r"(shft<<2));
596
597	__asm __volatile("mov	%0=psr" : "=r"(psr));
598	__asm __volatile("rsm	psr.ic|psr.i");
599	ia64_srlz_i();
600	ia64_set_ifa(va);
601	ia64_set_itir(shft << 2);
602	ia64_srlz_d();
603	__asm __volatile("itr.d	dtr[%0]=%1" :: "r"(4), "r"(pte));
604	ia64_srlz_d();
605	__asm __volatile("itr.i	itr[%0]=%1" :: "r"(1), "r"(pte));
606	__asm __volatile("mov	psr.l=%0" :: "r" (psr));
607	ia64_srlz_i();
608}
609
610void
611map_gateway_page(void)
612{
613	pt_entry_t pte;
614	uint64_t psr;
615
616	pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY |
617	    PTE_PL_KERN | PTE_AR_X_RX;
618	pte |= ia64_tpa((uint64_t)ia64_gateway_page) & PTE_PPN_MASK;
619
620	__asm __volatile("ptr.d %0,%1; ptr.i %0,%1" ::
621	    "r"(VM_MAXUSER_ADDRESS), "r"(PAGE_SHIFT << 2));
622
623	__asm __volatile("mov	%0=psr" : "=r"(psr));
624	__asm __volatile("rsm	psr.ic|psr.i");
625	ia64_srlz_i();
626	ia64_set_ifa(VM_MAXUSER_ADDRESS);
627	ia64_set_itir(PAGE_SHIFT << 2);
628	ia64_srlz_d();
629	__asm __volatile("itr.d	dtr[%0]=%1" :: "r"(5), "r"(pte));
630	ia64_srlz_d();
631	__asm __volatile("itr.i	itr[%0]=%1" :: "r"(2), "r"(pte));
632	__asm __volatile("mov	psr.l=%0" :: "r" (psr));
633	ia64_srlz_i();
634
635	/* Expose the mapping to userland in ar.k5 */
636	ia64_set_k5(VM_MAXUSER_ADDRESS);
637}
638
639static u_int
640freq_ratio(u_long base, u_long ratio)
641{
642	u_long f;
643
644	f = (base * (ratio >> 32)) / (ratio & 0xfffffffful);
645	return ((f + 500000) / 1000000);
646}
647
648static void
649calculate_frequencies(void)
650{
651	struct ia64_sal_result sal;
652	struct ia64_pal_result pal;
653	register_t ie;
654
655	ie = intr_disable();
656	sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
657	pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0);
658	intr_restore(ie);
659
660	if (sal.sal_status == 0 && pal.pal_status == 0) {
661		if (bootverbose) {
662			printf("Platform clock frequency %ld Hz\n",
663			       sal.sal_result[0]);
664			printf("Processor ratio %ld/%ld, Bus ratio %ld/%ld, "
665			       "ITC ratio %ld/%ld\n",
666			       pal.pal_result[0] >> 32,
667			       pal.pal_result[0] & ((1L << 32) - 1),
668			       pal.pal_result[1] >> 32,
669			       pal.pal_result[1] & ((1L << 32) - 1),
670			       pal.pal_result[2] >> 32,
671			       pal.pal_result[2] & ((1L << 32) - 1));
672		}
673		cpu_freq = freq_ratio(sal.sal_result[0], pal.pal_result[0]);
674		bus_freq = freq_ratio(sal.sal_result[0], pal.pal_result[1]);
675		itc_freq = freq_ratio(sal.sal_result[0], pal.pal_result[2]);
676	}
677}
678
679struct ia64_init_return
680ia64_init(void)
681{
682	struct ia64_init_return ret;
683	struct efi_md *md;
684	pt_entry_t *pbvm_pgtbl_ent, *pbvm_pgtbl_lim;
685	char *p;
686	vm_size_t mdlen;
687	int metadata_missing;
688
689	/*
690	 * NO OUTPUT ALLOWED UNTIL FURTHER NOTICE.
691	 */
692
693	ia64_set_fpsr(IA64_FPSR_DEFAULT);
694
695	/*
696	 * Region 6 is direct mapped UC and region 7 is direct mapped
697	 * WC. The details of this is controlled by the Alt {I,D}TLB
698	 * handlers. Here we just make sure that they have the largest
699	 * possible page size to minimise TLB usage.
700	 */
701	ia64_set_rr(IA64_RR_BASE(6), (6 << 8) | (PAGE_SHIFT << 2));
702	ia64_set_rr(IA64_RR_BASE(7), (7 << 8) | (PAGE_SHIFT << 2));
703	ia64_srlz_d();
704
705	/* Initialize/setup physical memory datastructures */
706	ia64_physmem_init();
707
708	/*
709	 * Process the memory map. This gives us the PAL locations,
710	 * the I/O port base address, the available memory regions
711	 * for initializing the physical memory map.
712	 */
713	for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) {
714		mdlen = md->md_pages * EFI_PAGE_SIZE;
715		switch (md->md_type) {
716		case EFI_MD_TYPE_IOPORT:
717			ia64_port_base = (uintptr_t)pmap_mapdev(md->md_phys,
718			    mdlen);
719			break;
720		case EFI_MD_TYPE_PALCODE:
721			ia64_pal_base = md->md_phys;
722			ia64_pal_size = mdlen;
723			/*FALLTHROUGH*/
724		case EFI_MD_TYPE_BAD:
725		case EFI_MD_TYPE_FIRMWARE:
726		case EFI_MD_TYPE_RECLAIM:
727		case EFI_MD_TYPE_RT_CODE:
728		case EFI_MD_TYPE_RT_DATA:
729			/* Don't use these memory regions. */
730			ia64_physmem_track(md->md_phys, mdlen);
731			break;
732		case EFI_MD_TYPE_BS_CODE:
733		case EFI_MD_TYPE_BS_DATA:
734		case EFI_MD_TYPE_CODE:
735		case EFI_MD_TYPE_DATA:
736		case EFI_MD_TYPE_FREE:
737			/* These are ok to use. */
738			ia64_physmem_add(md->md_phys, mdlen);
739			break;
740		}
741	}
742
743	/*
744	 * Remove the PBVM and its page table from phys_avail. The loader
745	 * passes the physical address of the page table to us. The virtual
746	 * address of the page table is fixed.
747	 * Track and the PBVM limit for later use.
748	 */
749	ia64_physmem_delete(bootinfo->bi_pbvm_pgtbl, bootinfo->bi_pbvm_pgtblsz);
750	pbvm_pgtbl_ent = (void *)IA64_PBVM_PGTBL;
751	pbvm_pgtbl_lim = (void *)(IA64_PBVM_PGTBL + bootinfo->bi_pbvm_pgtblsz);
752	while (pbvm_pgtbl_ent < pbvm_pgtbl_lim) {
753		if ((*pbvm_pgtbl_ent & PTE_PRESENT) == 0)
754			break;
755		ia64_physmem_delete(*pbvm_pgtbl_ent & PTE_PPN_MASK,
756		    IA64_PBVM_PAGE_SIZE);
757		pbvm_pgtbl_ent++;
758	}
759
760	/* Finalize physical memory datastructures */
761	ia64_physmem_fini();
762
763	metadata_missing = 0;
764	if (bootinfo->bi_modulep)
765		preload_metadata = (caddr_t)bootinfo->bi_modulep;
766	else
767		metadata_missing = 1;
768
769	if (envmode == 0 && bootinfo->bi_envp)
770		kern_envp = (caddr_t)bootinfo->bi_envp;
771	else
772		kern_envp = static_env;
773
774	/*
775	 * Look at arguments passed to us and compute boothowto.
776	 */
777	boothowto = bootinfo->bi_boothowto;
778
779	if (boothowto & RB_VERBOSE)
780		bootverbose = 1;
781
782	/*
783	 * Wire things up so we can call the firmware.
784	 */
785	map_pal_code();
786	efi_boot_minimal(bootinfo->bi_systab);
787	ia64_xiv_init();
788	ia64_sal_init();
789	calculate_frequencies();
790
791	set_cputicker(ia64_get_itc, (u_long)itc_freq * 1000000, 0);
792
793	/*
794	 * Setup the PCPU data for the bootstrap processor. It is needed
795	 * by printf(). Also, since printf() has critical sections, we
796	 * need to initialize at least pc_curthread.
797	 */
798	pcpup = &pcpu0;
799	ia64_set_k4((u_int64_t)pcpup);
800	pcpu_init(pcpup, 0, sizeof(pcpu0));
801	dpcpu_init(ia64_physmem_alloc(DPCPU_SIZE, PAGE_SIZE), 0);
802	cpu_pcpu_setup(pcpup, ~0U, ia64_get_lid());
803	PCPU_SET(curthread, &thread0);
804
805	/*
806	 * Initialize the console before we print anything out.
807	 */
808	cninit();
809
810	/* OUTPUT NOW ALLOWED */
811
812	if (metadata_missing)
813		printf("WARNING: loader(8) metadata is missing!\n");
814
815	/* Get FPSWA interface */
816	fpswa_iface = (bootinfo->bi_fpswa == 0) ? NULL :
817	    (struct fpswa_iface *)IA64_PHYS_TO_RR7(bootinfo->bi_fpswa);
818
819	/* Init basic tunables, including hz */
820	init_param1();
821
822	p = getenv("kernelname");
823	if (p != NULL) {
824		strlcpy(kernelname, p, sizeof(kernelname));
825		freeenv(p);
826	}
827
828	init_param2(physmem);
829
830	/*
831	 * Initialize error message buffer (at end of core).
832	 */
833	msgbufp = ia64_physmem_alloc(msgbufsize, PAGE_SIZE);
834	msgbufinit(msgbufp, msgbufsize);
835
836	proc_linkup0(&proc0, &thread0);
837	/*
838	 * Init mapping for kernel stack for proc 0
839	 */
840	p = ia64_physmem_alloc(KSTACK_PAGES * PAGE_SIZE, PAGE_SIZE);
841	thread0.td_kstack = (uintptr_t)p;
842	thread0.td_kstack_pages = KSTACK_PAGES;
843
844	mutex_init();
845
846	/*
847	 * Initialize the rest of proc 0's PCB.
848	 *
849	 * Set the kernel sp, reserving space for an (empty) trapframe,
850	 * and make proc0's trapframe pointer point to it for sanity.
851	 * Initialise proc0's backing store to start after u area.
852	 */
853	cpu_thread_alloc(&thread0);
854	thread0.td_frame->tf_flags = FRAME_SYSCALL;
855	thread0.td_pcb->pcb_special.sp =
856	    (u_int64_t)thread0.td_frame - 16;
857	thread0.td_pcb->pcb_special.bspstore = thread0.td_kstack;
858
859	/*
860	 * Initialize the virtual memory system.
861	 */
862	pmap_bootstrap();
863
864	/*
865	 * Initialize debuggers, and break into them if appropriate.
866	 */
867#ifdef DDB
868	ksym_start = bootinfo->bi_symtab;
869	ksym_end = bootinfo->bi_esymtab;
870#endif
871
872	kdb_init();
873
874#ifdef KDB
875	if (boothowto & RB_KDB)
876		kdb_enter(KDB_WHY_BOOTFLAGS,
877		    "Boot flags requested debugger\n");
878#endif
879
880	ia64_set_tpr(0);
881	ia64_srlz_d();
882
883	ret.bspstore = thread0.td_pcb->pcb_special.bspstore;
884	ret.sp = thread0.td_pcb->pcb_special.sp;
885	return (ret);
886}
887
888uint64_t
889ia64_get_hcdp(void)
890{
891
892	return (bootinfo->bi_hcdp);
893}
894
895void
896bzero(void *buf, size_t len)
897{
898	caddr_t p = buf;
899
900	while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) {
901		*p++ = 0;
902		len--;
903	}
904	while (len >= sizeof(u_long) * 8) {
905		*(u_long*) p = 0;
906		*((u_long*) p + 1) = 0;
907		*((u_long*) p + 2) = 0;
908		*((u_long*) p + 3) = 0;
909		len -= sizeof(u_long) * 8;
910		*((u_long*) p + 4) = 0;
911		*((u_long*) p + 5) = 0;
912		*((u_long*) p + 6) = 0;
913		*((u_long*) p + 7) = 0;
914		p += sizeof(u_long) * 8;
915	}
916	while (len >= sizeof(u_long)) {
917		*(u_long*) p = 0;
918		len -= sizeof(u_long);
919		p += sizeof(u_long);
920	}
921	while (len) {
922		*p++ = 0;
923		len--;
924	}
925}
926
927u_int
928ia64_itc_freq(void)
929{
930
931	return (itc_freq);
932}
933
934void
935DELAY(int n)
936{
937	u_int64_t start, end, now;
938
939	sched_pin();
940
941	start = ia64_get_itc();
942	end = start + itc_freq * n;
943	/* printf("DELAY from 0x%lx to 0x%lx\n", start, end); */
944	do {
945		now = ia64_get_itc();
946	} while (now < end || (now > start && end < start));
947
948	sched_unpin();
949}
950
951/*
952 * Send an interrupt (signal) to a process.
953 */
954void
955sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
956{
957	struct proc *p;
958	struct thread *td;
959	struct trapframe *tf;
960	struct sigacts *psp;
961	struct sigframe sf, *sfp;
962	u_int64_t sbs, sp;
963	int oonstack;
964	int sig;
965	u_long code;
966
967	td = curthread;
968	p = td->td_proc;
969	PROC_LOCK_ASSERT(p, MA_OWNED);
970	sig = ksi->ksi_signo;
971	code = ksi->ksi_code;
972	psp = p->p_sigacts;
973	mtx_assert(&psp->ps_mtx, MA_OWNED);
974	tf = td->td_frame;
975	sp = tf->tf_special.sp;
976	oonstack = sigonstack(sp);
977	sbs = 0;
978
979	/* save user context */
980	bzero(&sf, sizeof(struct sigframe));
981	sf.sf_uc.uc_sigmask = *mask;
982	sf.sf_uc.uc_stack = td->td_sigstk;
983	sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
984	    ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE;
985
986	/*
987	 * Allocate and validate space for the signal handler
988	 * context. Note that if the stack is in P0 space, the
989	 * call to grow() is a nop, and the useracc() check
990	 * will fail if the process has not already allocated
991	 * the space with a `brk'.
992	 */
993	if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack &&
994	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
995		sbs = (u_int64_t)td->td_sigstk.ss_sp;
996		sbs = (sbs + 15) & ~15;
997		sfp = (struct sigframe *)(sbs + td->td_sigstk.ss_size);
998#if defined(COMPAT_43)
999		td->td_sigstk.ss_flags |= SS_ONSTACK;
1000#endif
1001	} else
1002		sfp = (struct sigframe *)sp;
1003	sfp = (struct sigframe *)((u_int64_t)(sfp - 1) & ~15);
1004
1005	/* Fill in the siginfo structure for POSIX handlers. */
1006	if (SIGISMEMBER(psp->ps_siginfo, sig)) {
1007		sf.sf_si = ksi->ksi_info;
1008		sf.sf_si.si_signo = sig;
1009		/*
1010		 * XXX this shouldn't be here after code in trap.c
1011		 * is fixed
1012		 */
1013		sf.sf_si.si_addr = (void*)tf->tf_special.ifa;
1014		code = (u_int64_t)&sfp->sf_si;
1015	}
1016
1017	mtx_unlock(&psp->ps_mtx);
1018	PROC_UNLOCK(p);
1019
1020	get_mcontext(td, &sf.sf_uc.uc_mcontext, 0);
1021
1022	/* Copy the frame out to userland. */
1023	if (copyout(&sf, sfp, sizeof(sf)) != 0) {
1024		/*
1025		 * Process has trashed its stack; give it an illegal
1026		 * instruction to halt it in its tracks.
1027		 */
1028		PROC_LOCK(p);
1029		sigexit(td, SIGILL);
1030		return;
1031	}
1032
1033	if ((tf->tf_flags & FRAME_SYSCALL) == 0) {
1034		tf->tf_special.psr &= ~IA64_PSR_RI;
1035		tf->tf_special.iip = ia64_get_k5() +
1036		    ((uint64_t)break_sigtramp - (uint64_t)ia64_gateway_page);
1037	} else
1038		tf->tf_special.iip = ia64_get_k5() +
1039		    ((uint64_t)epc_sigtramp - (uint64_t)ia64_gateway_page);
1040
1041	/*
1042	 * Setup the trapframe to return to the signal trampoline. We pass
1043	 * information to the trampoline in the following registers:
1044	 *
1045	 *	gp	new backing store or NULL
1046	 *	r8	signal number
1047	 *	r9	signal code or siginfo pointer
1048	 *	r10	signal handler (function descriptor)
1049	 */
1050	tf->tf_special.sp = (u_int64_t)sfp - 16;
1051	tf->tf_special.gp = sbs;
1052	tf->tf_special.bspstore = sf.sf_uc.uc_mcontext.mc_special.bspstore;
1053	tf->tf_special.ndirty = 0;
1054	tf->tf_special.rnat = sf.sf_uc.uc_mcontext.mc_special.rnat;
1055	tf->tf_scratch.gr8 = sig;
1056	tf->tf_scratch.gr9 = code;
1057	tf->tf_scratch.gr10 = (u_int64_t)catcher;
1058
1059	PROC_LOCK(p);
1060	mtx_lock(&psp->ps_mtx);
1061}
1062
1063/*
1064 * System call to cleanup state after a signal
1065 * has been taken.  Reset signal mask and
1066 * stack state from context left by sendsig (above).
1067 * Return to previous pc and psl as specified by
1068 * context left by sendsig. Check carefully to
1069 * make sure that the user has not modified the
1070 * state to gain improper privileges.
1071 *
1072 * MPSAFE
1073 */
1074int
1075sys_sigreturn(struct thread *td,
1076	struct sigreturn_args /* {
1077		ucontext_t *sigcntxp;
1078	} */ *uap)
1079{
1080	ucontext_t uc;
1081	struct trapframe *tf;
1082	struct pcb *pcb;
1083
1084	tf = td->td_frame;
1085	pcb = td->td_pcb;
1086
1087	/*
1088	 * Fetch the entire context structure at once for speed.
1089	 * We don't use a normal argument to simplify RSE handling.
1090	 */
1091	if (copyin(uap->sigcntxp, (caddr_t)&uc, sizeof(uc)))
1092		return (EFAULT);
1093
1094	set_mcontext(td, &uc.uc_mcontext);
1095
1096#if defined(COMPAT_43)
1097	if (sigonstack(tf->tf_special.sp))
1098		td->td_sigstk.ss_flags |= SS_ONSTACK;
1099	else
1100		td->td_sigstk.ss_flags &= ~SS_ONSTACK;
1101#endif
1102	kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0);
1103
1104	return (EJUSTRETURN);
1105}
1106
1107#ifdef COMPAT_FREEBSD4
1108int
1109freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap)
1110{
1111
1112	return sys_sigreturn(td, (struct sigreturn_args *)uap);
1113}
1114#endif
1115
1116/*
1117 * Construct a PCB from a trapframe. This is called from kdb_trap() where
1118 * we want to start a backtrace from the function that caused us to enter
1119 * the debugger. We have the context in the trapframe, but base the trace
1120 * on the PCB. The PCB doesn't have to be perfect, as long as it contains
1121 * enough for a backtrace.
1122 */
1123void
1124makectx(struct trapframe *tf, struct pcb *pcb)
1125{
1126
1127	pcb->pcb_special = tf->tf_special;
1128	pcb->pcb_special.__spare = ~0UL;	/* XXX see unwind.c */
1129	save_callee_saved(&pcb->pcb_preserved);
1130	save_callee_saved_fp(&pcb->pcb_preserved_fp);
1131}
1132
1133int
1134ia64_flush_dirty(struct thread *td, struct _special *r)
1135{
1136	struct iovec iov;
1137	struct uio uio;
1138	uint64_t bspst, kstk, rnat;
1139	int error, locked;
1140
1141	if (r->ndirty == 0)
1142		return (0);
1143
1144	kstk = td->td_kstack + (r->bspstore & 0x1ffUL);
1145	if (td == curthread) {
1146		__asm __volatile("mov	ar.rsc=0;;");
1147		__asm __volatile("mov	%0=ar.bspstore" : "=r"(bspst));
1148		/* Make sure we have all the user registers written out. */
1149		if (bspst - kstk < r->ndirty) {
1150			__asm __volatile("flushrs;;");
1151			__asm __volatile("mov	%0=ar.bspstore" : "=r"(bspst));
1152		}
1153		__asm __volatile("mov	%0=ar.rnat;;" : "=r"(rnat));
1154		__asm __volatile("mov	ar.rsc=3");
1155		error = copyout((void*)kstk, (void*)r->bspstore, r->ndirty);
1156		kstk += r->ndirty;
1157		r->rnat = (bspst > kstk && (bspst & 0x1ffL) < (kstk & 0x1ffL))
1158		    ? *(uint64_t*)(kstk | 0x1f8L) : rnat;
1159	} else {
1160		locked = PROC_LOCKED(td->td_proc);
1161		if (!locked)
1162			PHOLD(td->td_proc);
1163		iov.iov_base = (void*)(uintptr_t)kstk;
1164		iov.iov_len = r->ndirty;
1165		uio.uio_iov = &iov;
1166		uio.uio_iovcnt = 1;
1167		uio.uio_offset = r->bspstore;
1168		uio.uio_resid = r->ndirty;
1169		uio.uio_segflg = UIO_SYSSPACE;
1170		uio.uio_rw = UIO_WRITE;
1171		uio.uio_td = td;
1172		error = proc_rwmem(td->td_proc, &uio);
1173		/*
1174		 * XXX proc_rwmem() doesn't currently return ENOSPC,
1175		 * so I think it can bogusly return 0. Neither do
1176		 * we allow short writes.
1177		 */
1178		if (uio.uio_resid != 0 && error == 0)
1179			error = ENOSPC;
1180		if (!locked)
1181			PRELE(td->td_proc);
1182	}
1183
1184	r->bspstore += r->ndirty;
1185	r->ndirty = 0;
1186	return (error);
1187}
1188
1189int
1190get_mcontext(struct thread *td, mcontext_t *mc, int flags)
1191{
1192	struct trapframe *tf;
1193	int error;
1194
1195	tf = td->td_frame;
1196	bzero(mc, sizeof(*mc));
1197	mc->mc_special = tf->tf_special;
1198	error = ia64_flush_dirty(td, &mc->mc_special);
1199	if (tf->tf_flags & FRAME_SYSCALL) {
1200		mc->mc_flags |= _MC_FLAGS_SYSCALL_CONTEXT;
1201		mc->mc_scratch = tf->tf_scratch;
1202		if (flags & GET_MC_CLEAR_RET) {
1203			mc->mc_scratch.gr8 = 0;
1204			mc->mc_scratch.gr9 = 0;
1205			mc->mc_scratch.gr10 = 0;
1206			mc->mc_scratch.gr11 = 0;
1207		}
1208	} else {
1209		mc->mc_flags |= _MC_FLAGS_ASYNC_CONTEXT;
1210		mc->mc_scratch = tf->tf_scratch;
1211		mc->mc_scratch_fp = tf->tf_scratch_fp;
1212		/*
1213		 * XXX If the thread never used the high FP registers, we
1214		 * probably shouldn't waste time saving them.
1215		 */
1216		ia64_highfp_save(td);
1217		mc->mc_flags |= _MC_FLAGS_HIGHFP_VALID;
1218		mc->mc_high_fp = td->td_pcb->pcb_high_fp;
1219	}
1220	save_callee_saved(&mc->mc_preserved);
1221	save_callee_saved_fp(&mc->mc_preserved_fp);
1222	return (error);
1223}
1224
1225int
1226set_mcontext(struct thread *td, const mcontext_t *mc)
1227{
1228	struct _special s;
1229	struct trapframe *tf;
1230	uint64_t psrmask;
1231
1232	tf = td->td_frame;
1233
1234	KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0,
1235	    ("Whoa there! We have more than 8KB of dirty registers!"));
1236
1237	s = mc->mc_special;
1238	/*
1239	 * Only copy the user mask and the restart instruction bit from
1240	 * the new context.
1241	 */
1242	psrmask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
1243	    IA64_PSR_MFH | IA64_PSR_RI;
1244	s.psr = (tf->tf_special.psr & ~psrmask) | (s.psr & psrmask);
1245	/* We don't have any dirty registers of the new context. */
1246	s.ndirty = 0;
1247	if (mc->mc_flags & _MC_FLAGS_ASYNC_CONTEXT) {
1248		/*
1249		 * We can get an async context passed to us while we
1250		 * entered the kernel through a syscall: sigreturn(2)
1251		 * takes contexts that could previously be the result of
1252		 * a trap or interrupt.
1253		 * Hence, we cannot assert that the trapframe is not
1254		 * a syscall frame, but we can assert that it's at
1255		 * least an expected syscall.
1256		 */
1257		if (tf->tf_flags & FRAME_SYSCALL) {
1258			KASSERT(tf->tf_scratch.gr15 == SYS_sigreturn, ("foo"));
1259			tf->tf_flags &= ~FRAME_SYSCALL;
1260		}
1261		tf->tf_scratch = mc->mc_scratch;
1262		tf->tf_scratch_fp = mc->mc_scratch_fp;
1263		if (mc->mc_flags & _MC_FLAGS_HIGHFP_VALID)
1264			td->td_pcb->pcb_high_fp = mc->mc_high_fp;
1265	} else {
1266		KASSERT((tf->tf_flags & FRAME_SYSCALL) != 0, ("foo"));
1267		if ((mc->mc_flags & _MC_FLAGS_SYSCALL_CONTEXT) == 0) {
1268			s.cfm = tf->tf_special.cfm;
1269			s.iip = tf->tf_special.iip;
1270			tf->tf_scratch.gr15 = 0;	/* Clear syscall nr. */
1271		} else
1272			tf->tf_scratch = mc->mc_scratch;
1273	}
1274	tf->tf_special = s;
1275	restore_callee_saved(&mc->mc_preserved);
1276	restore_callee_saved_fp(&mc->mc_preserved_fp);
1277
1278	return (0);
1279}
1280
1281/*
1282 * Clear registers on exec.
1283 */
1284void
1285exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
1286{
1287	struct trapframe *tf;
1288	uint64_t *ksttop, *kst;
1289
1290	tf = td->td_frame;
1291	ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty +
1292	    (tf->tf_special.bspstore & 0x1ffUL));
1293
1294	/*
1295	 * We can ignore up to 8KB of dirty registers by masking off the
1296	 * lower 13 bits in exception_restore() or epc_syscall(). This
1297	 * should be enough for a couple of years, but if there are more
1298	 * than 8KB of dirty registers, we lose track of the bottom of
1299	 * the kernel stack. The solution is to copy the active part of
1300	 * the kernel stack down 1 page (or 2, but not more than that)
1301	 * so that we always have less than 8KB of dirty registers.
1302	 */
1303	KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0,
1304	    ("Whoa there! We have more than 8KB of dirty registers!"));
1305
1306	bzero(&tf->tf_special, sizeof(tf->tf_special));
1307	if ((tf->tf_flags & FRAME_SYSCALL) == 0) {	/* break syscalls. */
1308		bzero(&tf->tf_scratch, sizeof(tf->tf_scratch));
1309		bzero(&tf->tf_scratch_fp, sizeof(tf->tf_scratch_fp));
1310		tf->tf_special.cfm = (1UL<<63) | (3UL<<7) | 3UL;
1311		tf->tf_special.bspstore = IA64_BACKINGSTORE;
1312		/*
1313		 * Copy the arguments onto the kernel register stack so that
1314		 * they get loaded by the loadrs instruction. Skip over the
1315		 * NaT collection points.
1316		 */
1317		kst = ksttop - 1;
1318		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1319			*kst-- = 0;
1320		*kst-- = 0;
1321		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1322			*kst-- = 0;
1323		*kst-- = imgp->ps_strings;
1324		if (((uintptr_t)kst & 0x1ff) == 0x1f8)
1325			*kst-- = 0;
1326		*kst = stack;
1327		tf->tf_special.ndirty = (ksttop - kst) << 3;
1328	} else {				/* epc syscalls (default). */
1329		tf->tf_special.cfm = (3UL<<62) | (3UL<<7) | 3UL;
1330		tf->tf_special.bspstore = IA64_BACKINGSTORE + 24;
1331		/*
1332		 * Write values for out0, out1 and out2 to the user's backing
1333		 * store and arrange for them to be restored into the user's
1334		 * initial register frame.
1335		 * Assumes that (bspstore & 0x1f8) < 0x1e0.
1336		 */
1337		suword((caddr_t)tf->tf_special.bspstore - 24, stack);
1338		suword((caddr_t)tf->tf_special.bspstore - 16, imgp->ps_strings);
1339		suword((caddr_t)tf->tf_special.bspstore -  8, 0);
1340	}
1341
1342	tf->tf_special.iip = imgp->entry_addr;
1343	tf->tf_special.sp = (stack & ~15) - 16;
1344	tf->tf_special.rsc = 0xf;
1345	tf->tf_special.fpsr = IA64_FPSR_DEFAULT;
1346	tf->tf_special.psr = IA64_PSR_IC | IA64_PSR_I | IA64_PSR_IT |
1347	    IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH | IA64_PSR_BN |
1348	    IA64_PSR_CPL_USER;
1349}
1350
1351int
1352ptrace_set_pc(struct thread *td, unsigned long addr)
1353{
1354	uint64_t slot;
1355
1356	switch (addr & 0xFUL) {
1357	case 0:
1358		slot = IA64_PSR_RI_0;
1359		break;
1360	case 1:
1361		/* XXX we need to deal with MLX bundles here */
1362		slot = IA64_PSR_RI_1;
1363		break;
1364	case 2:
1365		slot = IA64_PSR_RI_2;
1366		break;
1367	default:
1368		return (EINVAL);
1369	}
1370
1371	td->td_frame->tf_special.iip = addr & ~0x0FULL;
1372	td->td_frame->tf_special.psr =
1373	    (td->td_frame->tf_special.psr & ~IA64_PSR_RI) | slot;
1374	return (0);
1375}
1376
1377int
1378ptrace_single_step(struct thread *td)
1379{
1380	struct trapframe *tf;
1381
1382	/*
1383	 * There's no way to set single stepping when we're leaving the
1384	 * kernel through the EPC syscall path. The way we solve this is
1385	 * by enabling the lower-privilege trap so that we re-enter the
1386	 * kernel as soon as the privilege level changes. See trap.c for
1387	 * how we proceed from there.
1388	 */
1389	tf = td->td_frame;
1390	if (tf->tf_flags & FRAME_SYSCALL)
1391		tf->tf_special.psr |= IA64_PSR_LP;
1392	else
1393		tf->tf_special.psr |= IA64_PSR_SS;
1394	return (0);
1395}
1396
1397int
1398ptrace_clear_single_step(struct thread *td)
1399{
1400	struct trapframe *tf;
1401
1402	/*
1403	 * Clear any and all status bits we may use to implement single
1404	 * stepping.
1405	 */
1406	tf = td->td_frame;
1407	tf->tf_special.psr &= ~IA64_PSR_SS;
1408	tf->tf_special.psr &= ~IA64_PSR_LP;
1409	tf->tf_special.psr &= ~IA64_PSR_TB;
1410	return (0);
1411}
1412
1413int
1414fill_regs(struct thread *td, struct reg *regs)
1415{
1416	struct trapframe *tf;
1417
1418	tf = td->td_frame;
1419	regs->r_special = tf->tf_special;
1420	regs->r_scratch = tf->tf_scratch;
1421	save_callee_saved(&regs->r_preserved);
1422	return (0);
1423}
1424
1425int
1426set_regs(struct thread *td, struct reg *regs)
1427{
1428	struct trapframe *tf;
1429	int error;
1430
1431	tf = td->td_frame;
1432	error = ia64_flush_dirty(td, &tf->tf_special);
1433	if (!error) {
1434		tf->tf_special = regs->r_special;
1435		tf->tf_special.bspstore += tf->tf_special.ndirty;
1436		tf->tf_special.ndirty = 0;
1437		tf->tf_scratch = regs->r_scratch;
1438		restore_callee_saved(&regs->r_preserved);
1439	}
1440	return (error);
1441}
1442
1443int
1444fill_dbregs(struct thread *td, struct dbreg *dbregs)
1445{
1446
1447	return (ENOSYS);
1448}
1449
1450int
1451set_dbregs(struct thread *td, struct dbreg *dbregs)
1452{
1453
1454	return (ENOSYS);
1455}
1456
1457int
1458fill_fpregs(struct thread *td, struct fpreg *fpregs)
1459{
1460	struct trapframe *frame = td->td_frame;
1461	struct pcb *pcb = td->td_pcb;
1462
1463	/* Save the high FP registers. */
1464	ia64_highfp_save(td);
1465
1466	fpregs->fpr_scratch = frame->tf_scratch_fp;
1467	save_callee_saved_fp(&fpregs->fpr_preserved);
1468	fpregs->fpr_high = pcb->pcb_high_fp;
1469	return (0);
1470}
1471
1472int
1473set_fpregs(struct thread *td, struct fpreg *fpregs)
1474{
1475	struct trapframe *frame = td->td_frame;
1476	struct pcb *pcb = td->td_pcb;
1477
1478	/* Throw away the high FP registers (should be redundant). */
1479	ia64_highfp_drop(td);
1480
1481	frame->tf_scratch_fp = fpregs->fpr_scratch;
1482	restore_callee_saved_fp(&fpregs->fpr_preserved);
1483	pcb->pcb_high_fp = fpregs->fpr_high;
1484	return (0);
1485}
1486
1487void
1488ia64_sync_icache(vm_offset_t va, vm_offset_t sz)
1489{
1490	vm_offset_t lim;
1491
1492	if (!ia64_sync_icache_needed)
1493		return;
1494
1495	lim = va + sz;
1496	while (va < lim) {
1497		ia64_fc_i(va);
1498		va += 32;	/* XXX */
1499	}
1500
1501	ia64_sync_i();
1502	ia64_srlz_i();
1503}
1504