fakebop.c revision 7656:2621e50fdf4a
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * This file contains the functionality that mimics the boot operations
29 * on SPARC systems or the old boot.bin/multiboot programs on x86 systems.
30 * The x86 kernel now does everything on its own.
31 */
32
33#include <sys/types.h>
34#include <sys/bootconf.h>
35#include <sys/bootsvcs.h>
36#include <sys/bootinfo.h>
37#include <sys/multiboot.h>
38#include <sys/bootvfs.h>
39#include <sys/bootprops.h>
40#include <sys/varargs.h>
41#include <sys/param.h>
42#include <sys/machparam.h>
43#include <sys/archsystm.h>
44#include <sys/boot_console.h>
45#include <sys/cmn_err.h>
46#include <sys/systm.h>
47#include <sys/promif.h>
48#include <sys/archsystm.h>
49#include <sys/x86_archext.h>
50#include <sys/kobj.h>
51#include <sys/privregs.h>
52#include <sys/sysmacros.h>
53#include <sys/ctype.h>
54#include <sys/fastboot.h>
55#ifdef __xpv
56#include <sys/hypervisor.h>
57#include <net/if.h>
58#endif
59#include <vm/kboot_mmu.h>
60#include <vm/hat_pte.h>
61#include <sys/dmar_acpi.h>
62#include "acpi_fw.h"
63
64static int have_console = 0;	/* set once primitive console is initialized */
65static char *boot_args = "";
66
67/*
68 * Debugging macros
69 */
70static uint_t kbm_debug = 0;
71#define	DBG_MSG(s)	{ if (kbm_debug) bop_printf(NULL, "%s", s); }
72#define	DBG(x)		{ if (kbm_debug)			\
73	bop_printf(NULL, "%s is %" PRIx64 "\n", #x, (uint64_t)(x));	\
74	}
75
76#define	PUT_STRING(s) {				\
77	char *cp;				\
78	for (cp = (s); *cp; ++cp)		\
79		bcons_putchar(*cp);		\
80	}
81
82struct xboot_info *xbootp;	/* boot info from "glue" code in low memory */
83bootops_t bootop;	/* simple bootops we'll pass on to kernel */
84struct bsys_mem bm;
85
86static uintptr_t next_virt;	/* next available virtual address */
87static paddr_t next_phys;	/* next available physical address from dboot */
88static paddr_t high_phys = -(paddr_t)1;	/* last used physical address */
89
90/*
91 * buffer for vsnprintf for console I/O
92 */
93#define	BUFFERSIZE	256
94static char buffer[BUFFERSIZE];
95/*
96 * stuff to store/report/manipulate boot property settings.
97 */
98typedef struct bootprop {
99	struct bootprop *bp_next;
100	char *bp_name;
101	uint_t bp_vlen;
102	char *bp_value;
103} bootprop_t;
104
105static bootprop_t *bprops = NULL;
106static char *curr_page = NULL;		/* ptr to avail bprop memory */
107static int curr_space = 0;		/* amount of memory at curr_page */
108
109#ifdef __xpv
110start_info_t *xen_info;
111shared_info_t *HYPERVISOR_shared_info;
112#endif
113
114/*
115 * some allocator statistics
116 */
117static ulong_t total_bop_alloc_scratch = 0;
118static ulong_t total_bop_alloc_kernel = 0;
119
120static void build_firmware_properties(void);
121
122static int early_allocation = 1;
123
124#ifdef	__xpv
125int fastreboot_capable = 0;
126int force_fastreboot = 0;
127int post_fastreboot = 0;
128#else
129int fastreboot_capable = 1;
130int force_fastreboot = 0;
131int post_fastreboot = 0;
132#endif
133
134/*
135 * Information saved from current boot for fast reboot.
136 * If the information size exceeds what we have allocated, fast reboot
137 * will not be supported.
138 */
139multiboot_info_t saved_mbi;
140mb_memory_map_t saved_mmap[FASTBOOT_SAVED_MMAP_COUNT];
141struct sol_netinfo saved_drives[FASTBOOT_SAVED_DRIVES_COUNT];
142char saved_cmdline[FASTBOOT_SAVED_CMDLINE_LEN];
143int saved_cmdline_len = 0;
144
145/*
146 * Pointers to where System Resource Affinity Table (SRAT) and
147 * System Locality Information Table (SLIT) are mapped into virtual memory
148 */
149struct srat	*srat_ptr = NULL;
150struct slit	*slit_ptr = NULL;
151
152
153/*
154 * Allocate aligned physical memory at boot time. This allocator allocates
155 * from the highest possible addresses. This avoids exhausting memory that
156 * would be useful for DMA buffers.
157 */
158paddr_t
159do_bop_phys_alloc(uint64_t size, uint64_t align)
160{
161	paddr_t	pa = 0;
162	paddr_t	start;
163	paddr_t	end;
164	struct memlist	*ml = (struct memlist *)xbootp->bi_phys_install;
165
166	/*
167	 * Be careful if high memory usage is limited in startup.c
168	 * Since there are holes in the low part of the physical address
169	 * space we can treat physmem as a pfn (not just a pgcnt) and
170	 * get a conservative upper limit.
171	 */
172	if (physmem != 0 && high_phys > pfn_to_pa(physmem))
173		high_phys = pfn_to_pa(physmem);
174
175	/*
176	 * find the lowest or highest available memory in physinstalled
177	 * On 32 bit avoid physmem above 4Gig if PAE isn't enabled
178	 */
179#if defined(__i386)
180	if (xbootp->bi_use_pae == 0 && high_phys > FOUR_GIG)
181		high_phys = FOUR_GIG;
182#endif
183
184	/*
185	 * find the highest available memory in physinstalled
186	 */
187	size = P2ROUNDUP(size, align);
188	for (; ml; ml = ml->next) {
189		start = P2ROUNDUP(ml->address, align);
190		end = P2ALIGN(ml->address + ml->size, align);
191		if (start < next_phys)
192			start = P2ROUNDUP(next_phys, align);
193		if (end > high_phys)
194			end = P2ALIGN(high_phys, align);
195
196		if (end <= start)
197			continue;
198		if (end - start < size)
199			continue;
200
201		/*
202		 * Early allocations need to use low memory, since
203		 * physmem might be further limited by bootenv.rc
204		 */
205		if (early_allocation) {
206			if (pa == 0 || start < pa)
207				pa = start;
208		} else {
209			if (end - size > pa)
210				pa = end - size;
211		}
212	}
213	if (pa != 0) {
214		if (early_allocation)
215			next_phys = pa + size;
216		else
217			high_phys = pa;
218		return (pa);
219	}
220	bop_panic("do_bop_phys_alloc(0x%" PRIx64 ", 0x%" PRIx64
221	    ") Out of memory\n", size, align);
222	/*NOTREACHED*/
223}
224
225static uintptr_t
226alloc_vaddr(size_t size, paddr_t align)
227{
228	uintptr_t rv;
229
230	next_virt = P2ROUNDUP(next_virt, (uintptr_t)align);
231	rv = (uintptr_t)next_virt;
232	next_virt += size;
233	return (rv);
234}
235
236/*
237 * Allocate virtual memory. The size is always rounded up to a multiple
238 * of base pagesize.
239 */
240
241/*ARGSUSED*/
242static caddr_t
243do_bsys_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align)
244{
245	paddr_t a = align;	/* same type as pa for masking */
246	uint_t pgsize;
247	paddr_t pa;
248	uintptr_t va;
249	ssize_t s;		/* the aligned size */
250	uint_t level;
251	uint_t is_kernel = (virthint != 0);
252
253	if (a < MMU_PAGESIZE)
254		a = MMU_PAGESIZE;
255	else if (!ISP2(a))
256		prom_panic("do_bsys_alloc() incorrect alignment");
257	size = P2ROUNDUP(size, MMU_PAGESIZE);
258
259	/*
260	 * Use the next aligned virtual address if we weren't given one.
261	 */
262	if (virthint == NULL) {
263		virthint = (caddr_t)alloc_vaddr(size, a);
264		total_bop_alloc_scratch += size;
265	} else {
266		total_bop_alloc_kernel += size;
267	}
268
269	/*
270	 * allocate the physical memory
271	 */
272	pa = do_bop_phys_alloc(size, a);
273
274	/*
275	 * Add the mappings to the page tables, try large pages first.
276	 */
277	va = (uintptr_t)virthint;
278	s = size;
279	level = 1;
280	pgsize = xbootp->bi_use_pae ? TWO_MEG : FOUR_MEG;
281	if (xbootp->bi_use_largepage && a == pgsize) {
282		while (IS_P2ALIGNED(pa, pgsize) && IS_P2ALIGNED(va, pgsize) &&
283		    s >= pgsize) {
284			kbm_map(va, pa, level, is_kernel);
285			va += pgsize;
286			pa += pgsize;
287			s -= pgsize;
288		}
289	}
290
291	/*
292	 * Map remaining pages use small mappings
293	 */
294	level = 0;
295	pgsize = MMU_PAGESIZE;
296	while (s > 0) {
297		kbm_map(va, pa, level, is_kernel);
298		va += pgsize;
299		pa += pgsize;
300		s -= pgsize;
301	}
302	return (virthint);
303}
304
305/*
306 * Free virtual memory - we'll just ignore these.
307 */
308/*ARGSUSED*/
309static void
310do_bsys_free(bootops_t *bop, caddr_t virt, size_t size)
311{
312	bop_printf(NULL, "do_bsys_free(virt=0x%p, size=0x%lx) ignored\n",
313	    (void *)virt, size);
314}
315
316/*
317 * Old interface
318 */
319/*ARGSUSED*/
320static caddr_t
321do_bsys_ealloc(
322	bootops_t *bop,
323	caddr_t virthint,
324	size_t size,
325	int align,
326	int flags)
327{
328	prom_panic("unsupported call to BOP_EALLOC()\n");
329	return (0);
330}
331
332
333static void
334bsetprop(char *name, int nlen, void *value, int vlen)
335{
336	uint_t size;
337	uint_t need_size;
338	bootprop_t *b;
339
340	/*
341	 * align the size to 16 byte boundary
342	 */
343	size = sizeof (bootprop_t) + nlen + 1 + vlen;
344	size = (size + 0xf) & ~0xf;
345	if (size > curr_space) {
346		need_size = (size + (MMU_PAGEOFFSET)) & MMU_PAGEMASK;
347		curr_page = do_bsys_alloc(NULL, 0, need_size, MMU_PAGESIZE);
348		curr_space = need_size;
349	}
350
351	/*
352	 * use a bootprop_t at curr_page and link into list
353	 */
354	b = (bootprop_t *)curr_page;
355	curr_page += sizeof (bootprop_t);
356	curr_space -=  sizeof (bootprop_t);
357	b->bp_next = bprops;
358	bprops = b;
359
360	/*
361	 * follow by name and ending zero byte
362	 */
363	b->bp_name = curr_page;
364	bcopy(name, curr_page, nlen);
365	curr_page += nlen;
366	*curr_page++ = 0;
367	curr_space -= nlen + 1;
368
369	/*
370	 * copy in value, but no ending zero byte
371	 */
372	b->bp_value = curr_page;
373	b->bp_vlen = vlen;
374	if (vlen > 0) {
375		bcopy(value, curr_page, vlen);
376		curr_page += vlen;
377		curr_space -= vlen;
378	}
379
380	/*
381	 * align new values of curr_page, curr_space
382	 */
383	while (curr_space & 0xf) {
384		++curr_page;
385		--curr_space;
386	}
387}
388
389static void
390bsetprops(char *name, char *value)
391{
392	bsetprop(name, strlen(name), value, strlen(value) + 1);
393}
394
395static void
396bsetprop64(char *name, uint64_t value)
397{
398	bsetprop(name, strlen(name), (void *)&value, sizeof (value));
399}
400
401static void
402bsetpropsi(char *name, int value)
403{
404	char prop_val[32];
405
406	(void) snprintf(prop_val, sizeof (prop_val), "%d", value);
407	bsetprops(name, prop_val);
408}
409
410/*
411 * to find the size of the buffer to allocate
412 */
413/*ARGSUSED*/
414int
415do_bsys_getproplen(bootops_t *bop, const char *name)
416{
417	bootprop_t *b;
418
419	for (b = bprops; b; b = b->bp_next) {
420		if (strcmp(name, b->bp_name) != 0)
421			continue;
422		return (b->bp_vlen);
423	}
424	return (-1);
425}
426
427/*
428 * get the value associated with this name
429 */
430/*ARGSUSED*/
431int
432do_bsys_getprop(bootops_t *bop, const char *name, void *value)
433{
434	bootprop_t *b;
435
436	for (b = bprops; b; b = b->bp_next) {
437		if (strcmp(name, b->bp_name) != 0)
438			continue;
439		bcopy(b->bp_value, value, b->bp_vlen);
440		return (0);
441	}
442	return (-1);
443}
444
445/*
446 * get the name of the next property in succession from the standalone
447 */
448/*ARGSUSED*/
449static char *
450do_bsys_nextprop(bootops_t *bop, char *name)
451{
452	bootprop_t *b;
453
454	/*
455	 * A null name is a special signal for the 1st boot property
456	 */
457	if (name == NULL || strlen(name) == 0) {
458		if (bprops == NULL)
459			return (NULL);
460		return (bprops->bp_name);
461	}
462
463	for (b = bprops; b; b = b->bp_next) {
464		if (name != b->bp_name)
465			continue;
466		b = b->bp_next;
467		if (b == NULL)
468			return (NULL);
469		return (b->bp_name);
470	}
471	return (NULL);
472}
473
474/*
475 * Parse numeric value from a string. Understands decimal, hex, octal, - and ~
476 */
477static int
478parse_value(char *p, uint64_t *retval)
479{
480	int adjust = 0;
481	uint64_t tmp = 0;
482	int digit;
483	int radix = 10;
484
485	*retval = 0;
486	if (*p == '-' || *p == '~')
487		adjust = *p++;
488
489	if (*p == '0') {
490		++p;
491		if (*p == 0)
492			return (0);
493		if (*p == 'x' || *p == 'X') {
494			radix = 16;
495			++p;
496		} else {
497			radix = 8;
498			++p;
499		}
500	}
501	while (*p) {
502		if ('0' <= *p && *p <= '9')
503			digit = *p - '0';
504		else if ('a' <= *p && *p <= 'f')
505			digit = 10 + *p - 'a';
506		else if ('A' <= *p && *p <= 'F')
507			digit = 10 + *p - 'A';
508		else
509			return (-1);
510		if (digit >= radix)
511			return (-1);
512		tmp = tmp * radix + digit;
513		++p;
514	}
515	if (adjust == '-')
516		tmp = -tmp;
517	else if (adjust == '~')
518		tmp = ~tmp;
519	*retval = tmp;
520	return (0);
521}
522
523/*
524 * 2nd part of building the table of boot properties. This includes:
525 * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
526 *
527 * lines look like one of:
528 * ^$
529 * ^# comment till end of line
530 * setprop name 'value'
531 * setprop name value
532 * setprop name "value"
533 *
534 * we do single character I/O since this is really just looking at memory
535 */
536void
537boot_prop_finish(void)
538{
539	int fd;
540	char *line;
541	int c;
542	int bytes_read;
543	char *name;
544	int n_len;
545	char *value;
546	int v_len;
547	char *inputdev;	/* these override the command line if serial ports */
548	char *outputdev;
549	char *consoledev;
550	uint64_t lvalue;
551	int use_xencons = 0;
552
553#ifdef __xpv
554	if (!DOMAIN_IS_INITDOMAIN(xen_info))
555		use_xencons = 1;
556#endif /* __xpv */
557
558	DBG_MSG("Opening /boot/solaris/bootenv.rc\n");
559	fd = BRD_OPEN(bfs_ops, "/boot/solaris/bootenv.rc", 0);
560	DBG(fd);
561
562	line = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
563	while (fd >= 0) {
564
565		/*
566		 * get a line
567		 */
568		for (c = 0; ; ++c) {
569			bytes_read = BRD_READ(bfs_ops, fd, line + c, 1);
570			if (bytes_read == 0) {
571				if (c == 0)
572					goto done;
573				break;
574			}
575			if (line[c] == '\n')
576				break;
577		}
578		line[c] = 0;
579
580		/*
581		 * ignore comment lines
582		 */
583		c = 0;
584		while (ISSPACE(line[c]))
585			++c;
586		if (line[c] == '#' || line[c] == 0)
587			continue;
588
589		/*
590		 * must have "setprop " or "setprop\t"
591		 */
592		if (strncmp(line + c, "setprop ", 8) != 0 &&
593		    strncmp(line + c, "setprop\t", 8) != 0)
594			continue;
595		c += 8;
596		while (ISSPACE(line[c]))
597			++c;
598		if (line[c] == 0)
599			continue;
600
601		/*
602		 * gather up the property name
603		 */
604		name = line + c;
605		n_len = 0;
606		while (line[c] && !ISSPACE(line[c]))
607			++n_len, ++c;
608
609		/*
610		 * gather up the value, if any
611		 */
612		value = "";
613		v_len = 0;
614		while (ISSPACE(line[c]))
615			++c;
616		if (line[c] != 0) {
617			value = line + c;
618			while (line[c] && !ISSPACE(line[c]))
619				++v_len, ++c;
620		}
621
622		if (v_len >= 2 && value[0] == value[v_len - 1] &&
623		    (value[0] == '\'' || value[0] == '"')) {
624			++value;
625			v_len -= 2;
626		}
627		name[n_len] = 0;
628		if (v_len > 0)
629			value[v_len] = 0;
630		else
631			continue;
632
633		/*
634		 * ignore "boot-file" property, it's now meaningless
635		 */
636		if (strcmp(name, "boot-file") == 0)
637			continue;
638		if (strcmp(name, "boot-args") == 0 &&
639		    strlen(boot_args) > 0)
640			continue;
641
642		/*
643		 * If a property was explicitly set on the command line
644		 * it will override a setting in bootenv.rc
645		 */
646		if (do_bsys_getproplen(NULL, name) > 0)
647			continue;
648
649		bsetprop(name, n_len, value, v_len + 1);
650	}
651done:
652	if (fd >= 0)
653		BRD_CLOSE(bfs_ops, fd);
654
655	/*
656	 * Check if we have to limit the boot time allocator
657	 */
658	if (do_bsys_getproplen(NULL, "physmem") != -1 &&
659	    do_bsys_getprop(NULL, "physmem", line) >= 0 &&
660	    parse_value(line, &lvalue) != -1) {
661		if (0 < lvalue && (lvalue < physmem || physmem == 0)) {
662			physmem = (pgcnt_t)lvalue;
663			DBG(physmem);
664		}
665	}
666	early_allocation = 0;
667
668	/*
669	 * check to see if we have to override the default value of the console
670	 */
671	if (!use_xencons) {
672		inputdev = line;
673		v_len = do_bsys_getproplen(NULL, "input-device");
674		if (v_len > 0)
675			(void) do_bsys_getprop(NULL, "input-device", inputdev);
676		else
677			v_len = 0;
678		inputdev[v_len] = 0;
679
680		outputdev = inputdev + v_len + 1;
681		v_len = do_bsys_getproplen(NULL, "output-device");
682		if (v_len > 0)
683			(void) do_bsys_getprop(NULL, "output-device",
684			    outputdev);
685		else
686			v_len = 0;
687		outputdev[v_len] = 0;
688
689		consoledev = outputdev + v_len + 1;
690		v_len = do_bsys_getproplen(NULL, "console");
691		if (v_len > 0)
692			(void) do_bsys_getprop(NULL, "console", consoledev);
693		else
694			v_len = 0;
695		consoledev[v_len] = 0;
696		bcons_init2(inputdev, outputdev, consoledev);
697	} else {
698		/*
699		 * Ensure console property exists
700		 * If not create it as "hypervisor"
701		 */
702		v_len = do_bsys_getproplen(NULL, "console");
703		if (v_len < 0)
704			bsetprops("console", "hypervisor");
705		inputdev = outputdev = consoledev = "hypervisor";
706		bcons_init2(inputdev, outputdev, consoledev);
707	}
708
709	if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) {
710		value = line;
711		bop_printf(NULL, "\nBoot properties:\n");
712		name = "";
713		while ((name = do_bsys_nextprop(NULL, name)) != NULL) {
714			bop_printf(NULL, "\t0x%p %s = ", (void *)name, name);
715			(void) do_bsys_getprop(NULL, name, value);
716			v_len = do_bsys_getproplen(NULL, name);
717			bop_printf(NULL, "len=%d ", v_len);
718			value[v_len] = 0;
719			bop_printf(NULL, "%s\n", value);
720		}
721	}
722}
723
724/*
725 * print formatted output
726 */
727/*PRINTFLIKE2*/
728/*ARGSUSED*/
729void
730bop_printf(bootops_t *bop, const char *fmt, ...)
731{
732	va_list	ap;
733
734	if (have_console == 0)
735		return;
736
737	va_start(ap, fmt);
738	(void) vsnprintf(buffer, BUFFERSIZE, fmt, ap);
739	va_end(ap);
740	PUT_STRING(buffer);
741}
742
743/*
744 * Another panic() variant; this one can be used even earlier during boot than
745 * prom_panic().
746 */
747/*PRINTFLIKE1*/
748void
749bop_panic(const char *fmt, ...)
750{
751	va_list ap;
752
753	va_start(ap, fmt);
754	bop_printf(NULL, fmt, ap);
755	va_end(ap);
756
757	bop_printf(NULL, "\nPress any key to reboot.\n");
758	(void) bcons_getchar();
759	bop_printf(NULL, "Resetting...\n");
760	pc_reset();
761}
762
763/*
764 * Do a real mode interrupt BIOS call
765 */
766typedef struct bios_regs {
767	unsigned short ax, bx, cx, dx, si, di, bp, es, ds;
768} bios_regs_t;
769typedef int (*bios_func_t)(int, bios_regs_t *);
770
771/*ARGSUSED*/
772static void
773do_bsys_doint(bootops_t *bop, int intnum, struct bop_regs *rp)
774{
775#if defined(__xpv)
776	prom_panic("unsupported call to BOP_DOINT()\n");
777#else	/* __xpv */
778	static int firsttime = 1;
779	bios_func_t bios_func = (bios_func_t)(void *)(uintptr_t)0x5000;
780	bios_regs_t br;
781
782	/*
783	 * The first time we do this, we have to copy the pre-packaged
784	 * low memory bios call code image into place.
785	 */
786	if (firsttime) {
787		extern char bios_image[];
788		extern uint32_t bios_size;
789
790		bcopy(bios_image, (void *)bios_func, bios_size);
791		firsttime = 0;
792	}
793
794	br.ax = rp->eax.word.ax;
795	br.bx = rp->ebx.word.bx;
796	br.cx = rp->ecx.word.cx;
797	br.dx = rp->edx.word.dx;
798	br.bp = rp->ebp.word.bp;
799	br.si = rp->esi.word.si;
800	br.di = rp->edi.word.di;
801	br.ds = rp->ds;
802	br.es = rp->es;
803
804	DBG_MSG("Doing BIOS call...");
805	DBG(br.ax);
806	DBG(br.bx);
807	DBG(br.dx);
808	rp->eflags = bios_func(intnum, &br);
809	DBG_MSG("done\n");
810
811	rp->eax.word.ax = br.ax;
812	rp->ebx.word.bx = br.bx;
813	rp->ecx.word.cx = br.cx;
814	rp->edx.word.dx = br.dx;
815	rp->ebp.word.bp = br.bp;
816	rp->esi.word.si = br.si;
817	rp->edi.word.di = br.di;
818	rp->ds = br.ds;
819	rp->es = br.es;
820#endif /* __xpv */
821}
822
823static struct boot_syscalls bop_sysp = {
824	bcons_getchar,
825	bcons_putchar,
826	bcons_ischar,
827};
828
829static char *whoami;
830
831#define	BUFLEN	64
832
833#if defined(__xpv)
834
835static char namebuf[32];
836
837static void
838xen_parse_props(char *s, char *prop_map[], int n_prop)
839{
840	char **prop_name = prop_map;
841	char *cp = s, *scp;
842
843	do {
844		scp = cp;
845		while ((*cp != NULL) && (*cp != ':'))
846			cp++;
847
848		if ((scp != cp) && (*prop_name != NULL)) {
849			*cp = NULL;
850			bsetprops(*prop_name, scp);
851		}
852
853		cp++;
854		prop_name++;
855		n_prop--;
856	} while (n_prop > 0);
857}
858
859#define	VBDPATHLEN	64
860
861/*
862 * parse the 'xpv-root' property to create properties used by
863 * ufs_mountroot.
864 */
865static void
866xen_vbdroot_props(char *s)
867{
868	char vbdpath[VBDPATHLEN] = "/xpvd/xdf@";
869	const char lnamefix[] = "/dev/dsk/c0d";
870	char *pnp;
871	char *prop_p;
872	char mi;
873	short minor;
874	long addr = 0;
875
876	pnp = vbdpath + strlen(vbdpath);
877	prop_p = s + strlen(lnamefix);
878	while ((*prop_p != '\0') && (*prop_p != 's') && (*prop_p != 'p'))
879		addr = addr * 10 + *prop_p++ - '0';
880	(void) snprintf(pnp, VBDPATHLEN, "%lx", addr);
881	pnp = vbdpath + strlen(vbdpath);
882	if (*prop_p == 's')
883		mi = 'a';
884	else if (*prop_p == 'p')
885		mi = 'q';
886	else
887		ASSERT(0); /* shouldn't be here */
888	prop_p++;
889	ASSERT(*prop_p != '\0');
890	if (ISDIGIT(*prop_p)) {
891		minor = *prop_p - '0';
892		prop_p++;
893		if (ISDIGIT(*prop_p)) {
894			minor = minor * 10 + *prop_p - '0';
895		}
896	} else {
897		/* malformed root path, use 0 as default */
898		minor = 0;
899	}
900	ASSERT(minor < 16); /* at most 16 partitions */
901	mi += minor;
902	*pnp++ = ':';
903	*pnp++ = mi;
904	*pnp++ = '\0';
905	bsetprops("fstype", "ufs");
906	bsetprops("bootpath", vbdpath);
907
908	DBG_MSG("VBD bootpath set to ");
909	DBG_MSG(vbdpath);
910	DBG_MSG("\n");
911}
912
913/*
914 * parse the xpv-nfsroot property to create properties used by
915 * nfs_mountroot.
916 */
917static void
918xen_nfsroot_props(char *s)
919{
920	char *prop_map[] = {
921		BP_SERVER_IP,	/* server IP address */
922		BP_SERVER_NAME,	/* server hostname */
923		BP_SERVER_PATH,	/* root path */
924	};
925	int n_prop = sizeof (prop_map) / sizeof (prop_map[0]);
926
927	bsetprop("fstype", 6, "nfs", 4);
928
929	xen_parse_props(s, prop_map, n_prop);
930
931	/*
932	 * If a server name wasn't specified, use a default.
933	 */
934	if (do_bsys_getproplen(NULL, BP_SERVER_NAME) == -1)
935		bsetprops(BP_SERVER_NAME, "unknown");
936}
937
938/*
939 * Extract our IP address, etc. from the "xpv-ip" property.
940 */
941static void
942xen_ip_props(char *s)
943{
944	char *prop_map[] = {
945		BP_HOST_IP,		/* IP address */
946		NULL,			/* NFS server IP address (ignored in */
947					/* favour of xpv-nfsroot) */
948		BP_ROUTER_IP,		/* IP gateway */
949		BP_SUBNET_MASK,		/* IP subnet mask */
950		"xpv-hostname",		/* hostname (ignored) */
951		BP_NETWORK_INTERFACE,	/* interface name */
952		"xpv-hcp",		/* host configuration protocol */
953	};
954	int n_prop = sizeof (prop_map) / sizeof (prop_map[0]);
955	char ifname[IFNAMSIZ];
956
957	xen_parse_props(s, prop_map, n_prop);
958
959	/*
960	 * A Linux dom0 administrator expects all interfaces to be
961	 * called "ethX", which is not the case here.
962	 *
963	 * If the interface name specified is "eth0", presume that
964	 * this is really intended to be "xnf0" (the first domU ->
965	 * dom0 interface for this domain).
966	 */
967	if ((do_bsys_getprop(NULL, BP_NETWORK_INTERFACE, ifname) == 0) &&
968	    (strcmp("eth0", ifname) == 0)) {
969		bsetprops(BP_NETWORK_INTERFACE, "xnf0");
970		bop_printf(NULL,
971		    "network interface name 'eth0' replaced with 'xnf0'\n");
972	}
973}
974
975#else	/* __xpv */
976
977static void
978setup_rarp_props(struct sol_netinfo *sip)
979{
980	char buf[BUFLEN];	/* to hold ip/mac addrs */
981	uint8_t *val;
982
983	val = (uint8_t *)&sip->sn_ciaddr;
984	(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
985	    val[0], val[1], val[2], val[3]);
986	bsetprops(BP_HOST_IP, buf);
987
988	val = (uint8_t *)&sip->sn_siaddr;
989	(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
990	    val[0], val[1], val[2], val[3]);
991	bsetprops(BP_SERVER_IP, buf);
992
993	if (sip->sn_giaddr != 0) {
994		val = (uint8_t *)&sip->sn_giaddr;
995		(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
996		    val[0], val[1], val[2], val[3]);
997		bsetprops(BP_ROUTER_IP, buf);
998	}
999
1000	if (sip->sn_netmask != 0) {
1001		val = (uint8_t *)&sip->sn_netmask;
1002		(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
1003		    val[0], val[1], val[2], val[3]);
1004		bsetprops(BP_SUBNET_MASK, buf);
1005	}
1006
1007	if (sip->sn_mactype != 4 || sip->sn_maclen != 6) {
1008		bop_printf(NULL, "unsupported mac type %d, mac len %d\n",
1009		    sip->sn_mactype, sip->sn_maclen);
1010	} else {
1011		val = sip->sn_macaddr;
1012		(void) snprintf(buf, BUFLEN, "%x:%x:%x:%x:%x:%x",
1013		    val[0], val[1], val[2], val[3], val[4], val[5]);
1014		bsetprops(BP_BOOT_MAC, buf);
1015	}
1016}
1017
1018#endif	/* __xpv */
1019
1020/*
1021 * 1st pass at building the table of boot properties. This includes:
1022 * - values set on the command line: -B a=x,b=y,c=z ....
1023 * - known values we just compute (ie. from xbootp)
1024 * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
1025 *
1026 * the grub command line looked like:
1027 * kernel boot-file [-B prop=value[,prop=value]...] [boot-args]
1028 *
1029 * whoami is the same as boot-file
1030 */
1031static void
1032build_boot_properties(void)
1033{
1034	char *name;
1035	int name_len;
1036	char *value;
1037	int value_len;
1038	struct boot_modules *bm;
1039	char *propbuf;
1040	int quoted = 0;
1041	int boot_arg_len;
1042#ifndef __xpv
1043	static int stdout_val = 0;
1044	uchar_t boot_device;
1045	char str[3];
1046	multiboot_info_t *mbi;
1047	int netboot;
1048	struct sol_netinfo *sip;
1049#endif
1050
1051	/*
1052	 * These have to be done first, so that kobj_mount_root() works
1053	 */
1054	DBG_MSG("Building boot properties\n");
1055	propbuf = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, 0);
1056	DBG((uintptr_t)propbuf);
1057	if (xbootp->bi_module_cnt > 0) {
1058		bm = xbootp->bi_modules;
1059		bsetprop64("ramdisk_start", (uint64_t)(uintptr_t)bm->bm_addr);
1060		bsetprop64("ramdisk_end", (uint64_t)(uintptr_t)bm->bm_addr +
1061		    bm->bm_size);
1062	}
1063
1064	DBG_MSG("Parsing command line for boot properties\n");
1065	value = xbootp->bi_cmdline;
1066
1067	/*
1068	 * allocate memory to collect boot_args into
1069	 */
1070	boot_arg_len = strlen(xbootp->bi_cmdline) + 1;
1071	boot_args = do_bsys_alloc(NULL, NULL, boot_arg_len, MMU_PAGESIZE);
1072	boot_args[0] = 0;
1073	boot_arg_len = 0;
1074
1075#ifndef	__xpv
1076	saved_cmdline_len =  strlen(xbootp->bi_cmdline) + 1;
1077	if (saved_cmdline_len > FASTBOOT_SAVED_CMDLINE_LEN) {
1078		DBG(saved_cmdline_len);
1079		DBG_MSG("Command line too long: clearing fastreboot_capable\n");
1080		fastreboot_capable = 0;
1081	} else {
1082		bcopy((void *)(xbootp->bi_cmdline), (void *)saved_cmdline,
1083		    saved_cmdline_len);
1084		saved_cmdline[saved_cmdline_len - 1] = '\0';
1085	}
1086#endif
1087
1088
1089#ifdef __xpv
1090	/*
1091	 * Xen puts a lot of device information in front of the kernel name
1092	 * let's grab them and make them boot properties.  The first
1093	 * string w/o an "=" in it will be the boot-file property.
1094	 */
1095	(void) strcpy(namebuf, "xpv-");
1096	for (;;) {
1097		/*
1098		 * get to next property
1099		 */
1100		while (ISSPACE(*value))
1101			++value;
1102		name = value;
1103		/*
1104		 * look for an "="
1105		 */
1106		while (*value && !ISSPACE(*value) && *value != '=') {
1107			value++;
1108		}
1109		if (*value != '=') { /* no "=" in the property */
1110			value = name;
1111			break;
1112		}
1113		name_len = value - name;
1114		value_len = 0;
1115		/*
1116		 * skip over the "="
1117		 */
1118		value++;
1119		while (value[value_len] && !ISSPACE(value[value_len])) {
1120			++value_len;
1121		}
1122		/*
1123		 * build property name with "xpv-" prefix
1124		 */
1125		if (name_len + 4 > 32) { /* skip if name too long */
1126			value += value_len;
1127			continue;
1128		}
1129		bcopy(name, &namebuf[4], name_len);
1130		name_len += 4;
1131		namebuf[name_len] = 0;
1132		bcopy(value, propbuf, value_len);
1133		propbuf[value_len] = 0;
1134		bsetprops(namebuf, propbuf);
1135
1136		/*
1137		 * xpv-root is set to the logical disk name of the xen
1138		 * VBD when booting from a disk-based filesystem.
1139		 */
1140		if (strcmp(namebuf, "xpv-root") == 0)
1141			xen_vbdroot_props(propbuf);
1142		/*
1143		 * While we're here, if we have a "xpv-nfsroot" property
1144		 * then we need to set "fstype" to "nfs" so we mount
1145		 * our root from the nfs server.  Also parse the xpv-nfsroot
1146		 * property to create the properties that nfs_mountroot will
1147		 * need to find the root and mount it.
1148		 */
1149		if (strcmp(namebuf, "xpv-nfsroot") == 0)
1150			xen_nfsroot_props(propbuf);
1151
1152		if (strcmp(namebuf, "xpv-ip") == 0)
1153			xen_ip_props(propbuf);
1154		value += value_len;
1155	}
1156#endif
1157
1158	while (ISSPACE(*value))
1159		++value;
1160	/*
1161	 * value now points at the boot-file
1162	 */
1163	value_len = 0;
1164	while (value[value_len] && !ISSPACE(value[value_len]))
1165		++value_len;
1166	if (value_len > 0) {
1167		whoami = propbuf;
1168		bcopy(value, whoami, value_len);
1169		whoami[value_len] = 0;
1170		bsetprops("boot-file", whoami);
1171		/*
1172		 * strip leading path stuff from whoami, so running from
1173		 * PXE/miniroot makes sense.
1174		 */
1175		if (strstr(whoami, "/platform/") != NULL)
1176			whoami = strstr(whoami, "/platform/");
1177		bsetprops("whoami", whoami);
1178	}
1179
1180	/*
1181	 * Values forcibly set boot properties on the command line via -B.
1182	 * Allow use of quotes in values. Other stuff goes on kernel
1183	 * command line.
1184	 */
1185	name = value + value_len;
1186	while (*name != 0) {
1187		/*
1188		 * anything not " -B" is copied to the command line
1189		 */
1190		if (!ISSPACE(name[0]) || name[1] != '-' || name[2] != 'B') {
1191			boot_args[boot_arg_len++] = *name;
1192			boot_args[boot_arg_len] = 0;
1193			++name;
1194			continue;
1195		}
1196
1197		/*
1198		 * skip the " -B" and following white space
1199		 */
1200		name += 3;
1201		while (ISSPACE(*name))
1202			++name;
1203		while (*name && !ISSPACE(*name)) {
1204			value = strstr(name, "=");
1205			if (value == NULL)
1206				break;
1207			name_len = value - name;
1208			++value;
1209			value_len = 0;
1210			quoted = 0;
1211			for (; ; ++value_len) {
1212				if (!value[value_len])
1213					break;
1214
1215				/*
1216				 * is this value quoted?
1217				 */
1218				if (value_len == 0 &&
1219				    (value[0] == '\'' || value[0] == '"')) {
1220					quoted = value[0];
1221					++value_len;
1222				}
1223
1224				/*
1225				 * In the quote accept any character,
1226				 * but look for ending quote.
1227				 */
1228				if (quoted) {
1229					if (value[value_len] == quoted)
1230						quoted = 0;
1231					continue;
1232				}
1233
1234				/*
1235				 * a comma or white space ends the value
1236				 */
1237				if (value[value_len] == ',' ||
1238				    ISSPACE(value[value_len]))
1239					break;
1240			}
1241
1242			if (value_len == 0) {
1243				bsetprop(name, name_len, "true", 5);
1244			} else {
1245				char *v = value;
1246				int l = value_len;
1247				if (v[0] == v[l - 1] &&
1248				    (v[0] == '\'' || v[0] == '"')) {
1249					++v;
1250					l -= 2;
1251				}
1252				bcopy(v, propbuf, l);
1253				propbuf[l] = '\0';
1254				bsetprop(name, name_len, propbuf,
1255				    l + 1);
1256			}
1257			name = value + value_len;
1258			while (*name == ',')
1259				++name;
1260		}
1261	}
1262
1263	/*
1264	 * set boot-args property
1265	 * 1275 name is bootargs, so set
1266	 * that too
1267	 */
1268	bsetprops("boot-args", boot_args);
1269	bsetprops("bootargs", boot_args);
1270
1271#ifndef __xpv
1272	/*
1273	 * set the BIOS boot device from GRUB
1274	 */
1275	netboot = 0;
1276	mbi = xbootp->bi_mb_info;
1277	bcopy(mbi, &saved_mbi, sizeof (multiboot_info_t));
1278	if (mbi->mmap_length > sizeof (saved_mmap)) {
1279		DBG_MSG("mbi->mmap_length too big: clearing "
1280		    "fastreboot_capable\n");
1281		fastreboot_capable = 0;
1282	} else {
1283		bcopy((void *)(uintptr_t)mbi->mmap_addr, (void *)saved_mmap,
1284		    mbi->mmap_length);
1285	}
1286
1287	if (mbi->drives_length > sizeof (saved_drives)) {
1288		DBG(mbi->drives_length);
1289		DBG_MSG("mbi->drives_length too big: clearing "
1290		    "fastreboot_capable\n");
1291		fastreboot_capable = 0;
1292	} else {
1293		bcopy((void *)(uintptr_t)mbi->drives_addr, (void *)saved_drives,
1294		    mbi->drives_length);
1295	}
1296
1297	if (mbi != NULL && mbi->flags & 0x2) {
1298		boot_device = mbi->boot_device >> 24;
1299		if (boot_device == 0x20)
1300			netboot++;
1301		str[0] = (boot_device >> 4) + '0';
1302		str[1] = (boot_device & 0xf) + '0';
1303		str[2] = 0;
1304		bsetprops("bios-boot-device", str);
1305	} else {
1306		netboot = 1;
1307	}
1308
1309	/*
1310	 * In the netboot case, drives_info is overloaded with the dhcp ack.
1311	 * This is not multiboot compliant and requires special pxegrub!
1312	 */
1313	if (netboot && mbi->drives_length != 0) {
1314		sip = (struct sol_netinfo *)(uintptr_t)mbi->drives_addr;
1315		if (sip->sn_infotype == SN_TYPE_BOOTP)
1316			bsetprop("bootp-response", sizeof ("bootp-response"),
1317			    (void *)(uintptr_t)mbi->drives_addr,
1318			    mbi->drives_length);
1319		else if (sip->sn_infotype == SN_TYPE_RARP)
1320			setup_rarp_props(sip);
1321	}
1322	bsetprop("stdout", strlen("stdout"),
1323	    &stdout_val, sizeof (stdout_val));
1324#endif /* __xpv */
1325
1326	/*
1327	 * more conjured up values for made up things....
1328	 */
1329#if defined(__xpv)
1330	bsetprops("mfg-name", "i86xpv");
1331	bsetprops("impl-arch-name", "i86xpv");
1332#else
1333	bsetprops("mfg-name", "i86pc");
1334	bsetprops("impl-arch-name", "i86pc");
1335#endif
1336
1337	/*
1338	 * Build firmware-provided system properties
1339	 */
1340	build_firmware_properties();
1341
1342	/*
1343	 * XXPV
1344	 *
1345	 * Find out what these are:
1346	 * - cpuid_feature_ecx_include
1347	 * - cpuid_feature_ecx_exclude
1348	 * - cpuid_feature_edx_include
1349	 * - cpuid_feature_edx_exclude
1350	 *
1351	 * Find out what these are in multiboot:
1352	 * - netdev-path
1353	 * - fstype
1354	 */
1355}
1356
1357#ifdef __xpv
1358/*
1359 * Under the Hypervisor, memory usable for DMA may be scarce. One
1360 * very likely large pool of DMA friendly memory is occupied by
1361 * the boot_archive, as it was loaded by grub into low MFNs.
1362 *
1363 * Here we free up that memory by copying the boot archive to what are
1364 * likely higher MFN pages and then swapping the mfn/pfn mappings.
1365 */
1366#define	PFN_2GIG	0x80000
1367static void
1368relocate_boot_archive(void)
1369{
1370	mfn_t max_mfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
1371	struct boot_modules *bm = xbootp->bi_modules;
1372	uintptr_t va;
1373	pfn_t va_pfn;
1374	mfn_t va_mfn;
1375	caddr_t copy;
1376	pfn_t copy_pfn;
1377	mfn_t copy_mfn;
1378	size_t	len;
1379	int slop;
1380	int total = 0;
1381	int relocated = 0;
1382	int mmu_update_return;
1383	mmu_update_t t[2];
1384	x86pte_t pte;
1385
1386	/*
1387	 * If all MFN's are below 2Gig, don't bother doing this.
1388	 */
1389	if (max_mfn < PFN_2GIG)
1390		return;
1391	if (xbootp->bi_module_cnt < 1) {
1392		DBG_MSG("no boot_archive!");
1393		return;
1394	}
1395
1396	DBG_MSG("moving boot_archive to high MFN memory\n");
1397	va = (uintptr_t)bm->bm_addr;
1398	len = bm->bm_size;
1399	slop = va & MMU_PAGEOFFSET;
1400	if (slop) {
1401		va += MMU_PAGESIZE - slop;
1402		len -= MMU_PAGESIZE - slop;
1403	}
1404	len = P2ALIGN(len, MMU_PAGESIZE);
1405
1406	/*
1407	 * Go through all boot_archive pages, swapping any low MFN pages
1408	 * with memory at next_phys.
1409	 */
1410	while (len != 0) {
1411		++total;
1412		va_pfn = mmu_btop(va - ONE_GIG);
1413		va_mfn = mfn_list[va_pfn];
1414		if (mfn_list[va_pfn] < PFN_2GIG) {
1415			copy = kbm_remap_window(next_phys, 1);
1416			bcopy((void *)va, copy, MMU_PAGESIZE);
1417			copy_pfn = mmu_btop(next_phys);
1418			copy_mfn = mfn_list[copy_pfn];
1419
1420			pte = mfn_to_ma(copy_mfn) | PT_NOCONSIST | PT_VALID;
1421			if (HYPERVISOR_update_va_mapping(va, pte,
1422			    UVMF_INVLPG | UVMF_LOCAL))
1423				bop_panic("relocate_boot_archive():  "
1424				    "HYPERVISOR_update_va_mapping() failed");
1425
1426			mfn_list[va_pfn] = copy_mfn;
1427			mfn_list[copy_pfn] = va_mfn;
1428
1429			t[0].ptr = mfn_to_ma(copy_mfn) | MMU_MACHPHYS_UPDATE;
1430			t[0].val = va_pfn;
1431			t[1].ptr = mfn_to_ma(va_mfn) | MMU_MACHPHYS_UPDATE;
1432			t[1].val = copy_pfn;
1433			if (HYPERVISOR_mmu_update(t, 2, &mmu_update_return,
1434			    DOMID_SELF) != 0 || mmu_update_return != 2)
1435				bop_panic("relocate_boot_archive():  "
1436				    "HYPERVISOR_mmu_update() failed");
1437
1438			next_phys += MMU_PAGESIZE;
1439			++relocated;
1440		}
1441		len -= MMU_PAGESIZE;
1442		va += MMU_PAGESIZE;
1443	}
1444	DBG_MSG("Relocated pages:\n");
1445	DBG(relocated);
1446	DBG_MSG("Out of total pages:\n");
1447	DBG(total);
1448}
1449#endif /* __xpv */
1450
1451#if !defined(__xpv)
1452/*
1453 * Install a temporary IDT that lets us catch errors in the boot time code.
1454 * We shouldn't get any faults at all while this is installed, so we'll
1455 * just generate a traceback and exit.
1456 */
1457#ifdef __amd64
1458static const int bcode_sel = B64CODE_SEL;
1459#else
1460static const int bcode_sel = B32CODE_SEL;
1461#endif
1462
1463/*
1464 * simple description of a stack frame (args are 32 bit only currently)
1465 */
1466typedef struct bop_frame {
1467	struct bop_frame *old_frame;
1468	pc_t retaddr;
1469	long arg[1];
1470} bop_frame_t;
1471
1472void
1473bop_traceback(bop_frame_t *frame)
1474{
1475	pc_t pc;
1476	int cnt;
1477	int a;
1478	char *ksym;
1479	ulong_t off;
1480
1481	bop_printf(NULL, "Stack traceback:\n");
1482	for (cnt = 0; cnt < 30; ++cnt) {	/* up to 30 frames */
1483		pc = frame->retaddr;
1484		if (pc == 0)
1485			break;
1486		ksym = kobj_getsymname(pc, &off);
1487		if (ksym)
1488			bop_printf(NULL, "  %s+%lx", ksym, off);
1489		else
1490			bop_printf(NULL, "  0x%lx", pc);
1491
1492		frame = frame->old_frame;
1493		if (frame == 0) {
1494			bop_printf(NULL, "\n");
1495			break;
1496		}
1497		for (a = 0; a < 6; ++a) {	/* try for 6 args */
1498#if defined(__i386)
1499			if ((void *)&frame->arg[a] == (void *)frame->old_frame)
1500				break;
1501			if (a == 0)
1502				bop_printf(NULL, "(");
1503			else
1504				bop_printf(NULL, ",");
1505			bop_printf(NULL, "0x%lx", frame->arg[a]);
1506#endif
1507		}
1508		bop_printf(NULL, ")\n");
1509	}
1510}
1511
1512struct trapframe {
1513	ulong_t frame_ptr;	/* %[er]bp pushed by our code */
1514	ulong_t error_code;	/* optional */
1515	ulong_t inst_ptr;
1516	ulong_t code_seg;
1517	ulong_t flags_reg;
1518#ifdef __amd64
1519	ulong_t stk_ptr;
1520	ulong_t stk_seg;
1521#endif
1522};
1523
1524void
1525bop_trap(struct trapframe *tf)
1526{
1527	bop_frame_t fakeframe;
1528	static int depth = 0;
1529
1530	/*
1531	 * Check for an infinite loop of traps.
1532	 */
1533	if (++depth > 2)
1534		bop_panic("Nested trap");
1535
1536	/*
1537	 * adjust the tf for optional error_code by detecting the code selector
1538	 */
1539	if (tf->code_seg != bcode_sel)
1540		tf = (struct trapframe *)((uintptr_t)tf - sizeof (ulong_t));
1541
1542	bop_printf(NULL, "Unexpected trap\n");
1543	bop_printf(NULL, "instruction pointer  0x%lx\n", tf->inst_ptr);
1544	bop_printf(NULL, "error code, optional 0x%lx\n",
1545	    tf->error_code & 0xffffffff);
1546	bop_printf(NULL, "code segment         0x%lx\n", tf->code_seg & 0xffff);
1547	bop_printf(NULL, "flags register       0x%lx\n", tf->flags_reg);
1548#ifdef __amd64
1549	bop_printf(NULL, "return %%rsp         0x%lx\n", tf->stk_ptr);
1550	bop_printf(NULL, "return %%ss          0x%lx\n", tf->stk_seg & 0xffff);
1551#endif
1552	fakeframe.old_frame = (bop_frame_t *)tf->frame_ptr;
1553	fakeframe.retaddr = (pc_t)tf->inst_ptr;
1554	bop_printf(NULL, "Attempting stack backtrace:\n");
1555	bop_traceback(&fakeframe);
1556	bop_panic("unexpected trap in early boot");
1557}
1558
1559extern void bop_trap_handler(void);
1560
1561static gate_desc_t *bop_idt;
1562
1563static desctbr_t bop_idt_info;
1564
1565static void
1566bop_idt_init(void)
1567{
1568	int t;
1569
1570	bop_idt = (gate_desc_t *)
1571	    do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
1572	bzero(bop_idt, MMU_PAGESIZE);
1573	for (t = 0; t < NIDT; ++t) {
1574		set_gatesegd(&bop_idt[t], &bop_trap_handler, bcode_sel,
1575		    SDT_SYSIGT, TRP_KPL);
1576	}
1577	bop_idt_info.dtr_limit = (NIDT * sizeof (gate_desc_t)) - 1;
1578	bop_idt_info.dtr_base = (uintptr_t)bop_idt;
1579	wr_idtr(&bop_idt_info);
1580}
1581#endif	/* !defined(__xpv) */
1582
1583/*
1584 * This is where we enter the kernel. It dummies up the boot_ops and
1585 * boot_syscalls vectors and jumps off to _kobj_boot()
1586 */
1587void
1588_start(struct xboot_info *xbp)
1589{
1590	bootops_t *bops = &bootop;
1591	extern void _kobj_boot();
1592
1593	/*
1594	 * 1st off - initialize the console for any error messages
1595	 */
1596	xbootp = xbp;
1597#ifdef __xpv
1598	HYPERVISOR_shared_info = (void *)xbootp->bi_shared_info;
1599	xen_info = xbootp->bi_xen_start_info;
1600#endif
1601	bcons_init((void *)xbootp->bi_cmdline);
1602	have_console = 1;
1603
1604	if (*((uint32_t *)(FASTBOOT_SWTCH_PA + FASTBOOT_STACK_OFFSET)) ==
1605	    FASTBOOT_MAGIC) {
1606		post_fastreboot = 1;
1607		*((uint32_t *)(FASTBOOT_SWTCH_PA + FASTBOOT_STACK_OFFSET)) = 0;
1608	}
1609
1610	/*
1611	 * enable debugging
1612	 */
1613	if (strstr((char *)xbootp->bi_cmdline, "kbm_debug"))
1614		kbm_debug = 1;
1615
1616	DBG_MSG("\n\n*** Entered Solaris in _start() cmdline is: ");
1617	DBG_MSG((char *)xbootp->bi_cmdline);
1618	DBG_MSG("\n\n\n");
1619
1620	/*
1621	 * physavail is no longer used by startup
1622	 */
1623	bm.physinstalled = xbp->bi_phys_install;
1624	bm.pcimem = xbp->bi_pcimem;
1625	bm.physavail = NULL;
1626
1627	/*
1628	 * initialize the boot time allocator
1629	 */
1630	next_phys = xbootp->bi_next_paddr;
1631	DBG(next_phys);
1632	next_virt = (uintptr_t)xbootp->bi_next_vaddr;
1633	DBG(next_virt);
1634	DBG_MSG("Initializing boot time memory management...");
1635#ifdef __xpv
1636	{
1637		xen_platform_parameters_t p;
1638
1639		/* This call shouldn't fail, dboot already did it once. */
1640		(void) HYPERVISOR_xen_version(XENVER_platform_parameters, &p);
1641		mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
1642		DBG(xen_virt_start);
1643	}
1644#endif
1645	kbm_init(xbootp);
1646	DBG_MSG("done\n");
1647
1648	/*
1649	 * Fill in the bootops vector
1650	 */
1651	bops->bsys_version = BO_VERSION;
1652	bops->boot_mem = &bm;
1653	bops->bsys_alloc = do_bsys_alloc;
1654	bops->bsys_free = do_bsys_free;
1655	bops->bsys_getproplen = do_bsys_getproplen;
1656	bops->bsys_getprop = do_bsys_getprop;
1657	bops->bsys_nextprop = do_bsys_nextprop;
1658	bops->bsys_printf = bop_printf;
1659	bops->bsys_doint = do_bsys_doint;
1660
1661	/*
1662	 * BOP_EALLOC() is no longer needed
1663	 */
1664	bops->bsys_ealloc = do_bsys_ealloc;
1665
1666#ifdef __xpv
1667	/*
1668	 * On domain 0 we need to free up some physical memory that is
1669	 * usable for DMA. Since GRUB loaded the boot_archive, it is
1670	 * sitting in low MFN memory. We'll relocated the boot archive
1671	 * pages to high PFN memory.
1672	 */
1673	if (DOMAIN_IS_INITDOMAIN(xen_info))
1674		relocate_boot_archive();
1675#endif
1676
1677#ifndef __xpv
1678	/*
1679	 * Install an IDT to catch early pagefaults (shouldn't have any).
1680	 * Also needed for kmdb.
1681	 */
1682	bop_idt_init();
1683#endif
1684
1685	/*
1686	 * Start building the boot properties from the command line
1687	 */
1688	DBG_MSG("Initializing boot properties:\n");
1689	build_boot_properties();
1690
1691	if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) {
1692		char *name;
1693		char *value;
1694		char *cp;
1695		int len;
1696
1697		value = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
1698		bop_printf(NULL, "\nBoot properties:\n");
1699		name = "";
1700		while ((name = do_bsys_nextprop(NULL, name)) != NULL) {
1701			bop_printf(NULL, "\t0x%p %s = ", (void *)name, name);
1702			(void) do_bsys_getprop(NULL, name, value);
1703			len = do_bsys_getproplen(NULL, name);
1704			bop_printf(NULL, "len=%d ", len);
1705			value[len] = 0;
1706			for (cp = value; *cp; ++cp) {
1707				if (' ' <= *cp && *cp <= '~')
1708					bop_printf(NULL, "%c", *cp);
1709				else
1710					bop_printf(NULL, "-0x%x-", *cp);
1711			}
1712			bop_printf(NULL, "\n");
1713		}
1714	}
1715
1716	/*
1717	 * jump into krtld...
1718	 */
1719	_kobj_boot(&bop_sysp, NULL, bops, NULL);
1720}
1721
1722
1723/*ARGSUSED*/
1724static caddr_t
1725no_more_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align)
1726{
1727	panic("Attempt to bsys_alloc() too late\n");
1728	return (NULL);
1729}
1730
1731/*ARGSUSED*/
1732static void
1733no_more_free(bootops_t *bop, caddr_t virt, size_t size)
1734{
1735	panic("Attempt to bsys_free() too late\n");
1736}
1737
1738void
1739bop_no_more_mem(void)
1740{
1741	DBG(total_bop_alloc_scratch);
1742	DBG(total_bop_alloc_kernel);
1743	bootops->bsys_alloc = no_more_alloc;
1744	bootops->bsys_free = no_more_free;
1745}
1746
1747
1748#ifndef __xpv
1749/*
1750 * Set ACPI firmware properties
1751 */
1752
1753static caddr_t
1754vmap_phys(size_t length, paddr_t pa)
1755{
1756	paddr_t	start, end;
1757	caddr_t	va;
1758	size_t	len, page;
1759
1760	start = P2ALIGN(pa, MMU_PAGESIZE);
1761	end = P2ROUNDUP(pa + length, MMU_PAGESIZE);
1762	len = end - start;
1763	va = (caddr_t)alloc_vaddr(len, MMU_PAGESIZE);
1764	for (page = 0; page < len; page += MMU_PAGESIZE)
1765		kbm_map((uintptr_t)va + page, start + page, 0, 0);
1766	return (va + (pa & MMU_PAGEOFFSET));
1767}
1768
1769static uint8_t
1770checksum_table(uint8_t *tp, size_t len)
1771{
1772	uint8_t sum = 0;
1773
1774	while (len-- > 0)
1775		sum += *tp++;
1776
1777	return (sum);
1778}
1779
1780static int
1781valid_rsdp(struct rsdp *rp)
1782{
1783
1784	/* validate the V1.x checksum */
1785	if (checksum_table((uint8_t *)&rp->v1, sizeof (struct rsdp_v1)) != 0)
1786		return (0);
1787
1788	/* If pre-ACPI 2.0, this is a valid RSDP */
1789	if (rp->v1.revision < 2)
1790		return (1);
1791
1792	/* validate the V2.x checksum */
1793	if (checksum_table((uint8_t *)rp, sizeof (struct rsdp)) != 0)
1794		return (0);
1795
1796	return (1);
1797}
1798
1799/*
1800 * Scan memory range for an RSDP;
1801 * see ACPI 3.0 Spec, 5.2.5.1
1802 */
1803static struct rsdp *
1804scan_rsdp(paddr_t start, paddr_t end)
1805{
1806	size_t len  = end - start + 1;
1807	caddr_t ptr;
1808
1809	ptr = vmap_phys(len, start);
1810	while (len > 0) {
1811		if (strncmp(ptr, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN) == 0)
1812			if (valid_rsdp((struct rsdp *)ptr))
1813				return ((struct rsdp *)ptr);
1814		ptr += 16;
1815		len -= 16;
1816	}
1817
1818	return (NULL);
1819}
1820
1821/*
1822 * Refer to ACPI 3.0 Spec, section 5.2.5.1 to understand this function
1823 */
1824static struct rsdp *
1825find_rsdp() {
1826	struct rsdp *rsdp;
1827	uint16_t *ebda_seg;
1828	paddr_t  ebda_addr;
1829
1830	/*
1831	 * Get the EBDA segment and scan the first 1K
1832	 */
1833	ebda_seg = (uint16_t *)vmap_phys(sizeof (uint16_t), ACPI_EBDA_SEG_ADDR);
1834	ebda_addr = *ebda_seg << 4;
1835	rsdp = scan_rsdp(ebda_addr, ebda_addr + ACPI_EBDA_LEN - 1);
1836	if (rsdp == NULL)
1837		/* if EBDA doesn't contain RSDP, look in BIOS memory */
1838		rsdp = scan_rsdp(0xe0000, 0xfffff);
1839	return (rsdp);
1840}
1841
1842static struct table_header *
1843map_fw_table(paddr_t table_addr)
1844{
1845	struct table_header *tp;
1846	size_t len = MAX(sizeof (struct table_header), MMU_PAGESIZE);
1847
1848	/*
1849	 * Map at least a page; if the table is larger than this, remap it
1850	 */
1851	tp = (struct table_header *)vmap_phys(len, table_addr);
1852	if (tp->len > len)
1853		tp = (struct table_header *)vmap_phys(tp->len, table_addr);
1854	return (tp);
1855}
1856
1857static struct table_header *
1858find_fw_table(char *signature)
1859{
1860	static int revision = 0;
1861	static struct xsdt *xsdt;
1862	static int len;
1863	paddr_t xsdt_addr;
1864	struct rsdp *rsdp;
1865	struct table_header *tp;
1866	paddr_t table_addr;
1867	int	n;
1868
1869	if (strlen(signature) != ACPI_TABLE_SIG_LEN)
1870		return (NULL);
1871
1872	/*
1873	 * Reading the ACPI 3.0 Spec, section 5.2.5.3 will help
1874	 * understand this code.  If we haven't already found the RSDT/XSDT,
1875	 * revision will be 0. Find the RSDP and check the revision
1876	 * to find out whether to use the RSDT or XSDT.  If revision is
1877	 * 0 or 1, use the RSDT and set internal revision to 1; if it is 2,
1878	 * use the XSDT.  If the XSDT address is 0, though, fall back to
1879	 * revision 1 and use the RSDT.
1880	 */
1881	if (revision == 0) {
1882		if ((rsdp = (struct rsdp *)find_rsdp()) != NULL) {
1883			revision = rsdp->v1.revision;
1884			switch (revision) {
1885			case 2:
1886				/*
1887				 * Use the XSDT unless BIOS is buggy and
1888				 * claims to be rev 2 but has a null XSDT
1889				 * address
1890				 */
1891				xsdt_addr = rsdp->xsdt;
1892				if (xsdt_addr != 0)
1893					break;
1894				/* FALLTHROUGH */
1895			case 0:
1896				/* treat RSDP rev 0 as revision 1 internally */
1897				revision = 1;
1898				/* FALLTHROUGH */
1899			case 1:
1900				/* use the RSDT for rev 0/1 */
1901				xsdt_addr = rsdp->v1.rsdt;
1902				break;
1903			default:
1904				/* unknown revision */
1905				revision = 0;
1906				break;
1907			}
1908		}
1909		if (revision == 0)
1910			return (NULL);
1911
1912		/* cache the XSDT info */
1913		xsdt = (struct xsdt *)map_fw_table(xsdt_addr);
1914		len = (xsdt->hdr.len - sizeof (xsdt->hdr)) /
1915		    ((revision == 1) ? sizeof (uint32_t) : sizeof (uint64_t));
1916	}
1917
1918	/*
1919	 * Scan the table headers looking for a signature match
1920	 */
1921	for (n = 0; n < len; n++) {
1922		table_addr = (revision == 1) ? xsdt->p.r[n] : xsdt->p.x[n];
1923		if (table_addr == 0)
1924			continue;
1925		tp = map_fw_table(table_addr);
1926		if (strncmp(tp->sig, signature, ACPI_TABLE_SIG_LEN) == 0) {
1927			return (tp);
1928		}
1929	}
1930	return (NULL);
1931}
1932
1933static void
1934process_madt(struct madt *tp)
1935{
1936	struct madt_processor *cpu, *end;
1937	uint32_t cpu_count = 0;
1938	uint8_t cpu_apicid_array[UINT8_MAX + 1];
1939
1940	if (tp != NULL) {
1941		/*
1942		 * Determine number of CPUs and keep track of "final" APIC ID
1943		 * for each CPU by walking through ACPI MADT processor list
1944		 */
1945		end = (struct madt_processor *)(tp->hdr.len + (uintptr_t)tp);
1946		cpu = tp->list;
1947		while (cpu < end) {
1948			if (cpu->type == MADT_PROCESSOR) {
1949				if (cpu->flags & 1) {
1950					if (cpu_count < UINT8_MAX)
1951						cpu_apicid_array[cpu_count] =
1952						    cpu->apic_id;
1953					cpu_count++;
1954				}
1955			}
1956
1957			cpu = (struct madt_processor *)
1958			    (cpu->len + (uintptr_t)cpu);
1959		}
1960
1961		/*
1962		 * Make boot property for array of "final" APIC IDs for each
1963		 * CPU
1964		 */
1965		bsetprop(BP_CPU_APICID_ARRAY, strlen(BP_CPU_APICID_ARRAY),
1966		    cpu_apicid_array, cpu_count * sizeof (uint8_t));
1967	}
1968
1969	/*
1970	 * User-set boot-ncpus overrides firmware count
1971	 */
1972	if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
1973		return;
1974
1975	/*
1976	 * Set boot property for boot-ncpus to number of CPUs given in MADT
1977	 * if user hasn't set the property already
1978	 */
1979	if (tp != NULL)
1980		bsetpropsi("boot-ncpus", cpu_count);
1981}
1982
1983static void
1984process_srat(struct srat *tp)
1985{
1986	struct srat_item *item, *end;
1987	int i;
1988	int proc_num, mem_num;
1989#pragma pack(1)
1990	struct {
1991		uint32_t domain;
1992		uint32_t apic_id;
1993		uint32_t sapic_id;
1994	} processor;
1995	struct {
1996		uint32_t domain;
1997		uint32_t x2apic_id;
1998	} x2apic;
1999	struct {
2000		uint32_t domain;
2001		uint64_t addr;
2002		uint64_t length;
2003		uint32_t flags;
2004	} memory;
2005#pragma pack()
2006	char prop_name[30];
2007
2008	if (tp == NULL)
2009		return;
2010
2011	proc_num = mem_num = 0;
2012	end = (struct srat_item *)(tp->hdr.len + (uintptr_t)tp);
2013	item = tp->list;
2014	while (item < end) {
2015		switch (item->type) {
2016		case SRAT_PROCESSOR:
2017			if (!(item->i.p.flags & SRAT_ENABLED))
2018				break;
2019			processor.domain = item->i.p.domain1;
2020			for (i = 0; i < 3; i++)
2021				processor.domain +=
2022				    item->i.p.domain2[i] << ((i + 1) * 8);
2023			processor.apic_id = item->i.p.apic_id;
2024			processor.sapic_id = item->i.p.local_sapic_eid;
2025			(void) snprintf(prop_name, 30, "acpi-srat-processor-%d",
2026			    proc_num);
2027			bsetprop(prop_name, strlen(prop_name), &processor,
2028			    sizeof (processor));
2029			proc_num++;
2030			break;
2031		case SRAT_MEMORY:
2032			if (!(item->i.m.flags & SRAT_ENABLED))
2033				break;
2034			memory.domain = item->i.m.domain;
2035			memory.addr = item->i.m.base_addr;
2036			memory.length = item->i.m.len;
2037			memory.flags = item->i.m.flags;
2038			(void) snprintf(prop_name, 30, "acpi-srat-memory-%d",
2039			    mem_num);
2040			bsetprop(prop_name, strlen(prop_name), &memory,
2041			    sizeof (memory));
2042			mem_num++;
2043			break;
2044		case SRAT_X2APIC:
2045			if (!(item->i.xp.flags & SRAT_ENABLED))
2046				break;
2047			x2apic.domain = item->i.xp.domain;
2048			x2apic.x2apic_id = item->i.xp.x2apic_id;
2049			(void) snprintf(prop_name, 30, "acpi-srat-processor-%d",
2050			    proc_num);
2051			bsetprop(prop_name, strlen(prop_name), &x2apic,
2052			    sizeof (x2apic));
2053			proc_num++;
2054			break;
2055		}
2056
2057		item = (struct srat_item *)
2058		    (item->len + (caddr_t)item);
2059	}
2060}
2061
2062static void
2063process_slit(struct slit *tp)
2064{
2065
2066	/*
2067	 * Check the number of localities; if it's too huge, we just
2068	 * return and locality enumeration code will handle this later,
2069	 * if possible.
2070	 *
2071	 * Note that the size of the table is the square of the
2072	 * number of localities; if the number of localities exceeds
2073	 * UINT16_MAX, the table size may overflow an int when being
2074	 * passed to bsetprop() below.
2075	 */
2076	if (tp->number >= SLIT_LOCALITIES_MAX)
2077		return;
2078
2079	bsetprop(SLIT_NUM_PROPNAME, strlen(SLIT_NUM_PROPNAME), &tp->number,
2080	    sizeof (tp->number));
2081	bsetprop(SLIT_PROPNAME, strlen(SLIT_PROPNAME), &tp->entry,
2082	    tp->number * tp->number);
2083}
2084
2085static void
2086process_dmar(struct dmar *tp)
2087{
2088	bsetprop(DMAR_TABLE_PROPNAME, strlen(DMAR_TABLE_PROPNAME),
2089	    tp, tp->hdr.len);
2090}
2091
2092#else /* __xpv */
2093static void
2094enumerate_xen_cpus()
2095{
2096	processorid_t	id, max_id;
2097
2098	/*
2099	 * User-set boot-ncpus overrides enumeration
2100	 */
2101	if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
2102		return;
2103
2104	/*
2105	 * Probe every possible virtual CPU id and remember the
2106	 * highest id present; the count of CPUs is one greater
2107	 * than this.  This tacitly assumes at least cpu 0 is present.
2108	 */
2109	max_id = 0;
2110	for (id = 0; id < MAX_VIRT_CPUS; id++)
2111		if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) == 0)
2112			max_id = id;
2113
2114	bsetpropsi("boot-ncpus", max_id+1);
2115
2116}
2117#endif /* __xpv */
2118
2119static void
2120build_firmware_properties(void)
2121{
2122#ifndef __xpv
2123	struct table_header *tp;
2124
2125	if ((tp = find_fw_table("APIC")) != NULL)
2126		process_madt((struct madt *)tp);
2127
2128	if ((srat_ptr = (struct srat *)find_fw_table("SRAT")) != NULL)
2129		process_srat(srat_ptr);
2130
2131	if (slit_ptr = (struct slit *)find_fw_table("SLIT"))
2132		process_slit(slit_ptr);
2133
2134	if (tp = find_fw_table("DMAR"))
2135		process_dmar((struct dmar *)tp);
2136#else /* __xpv */
2137	enumerate_xen_cpus();
2138#endif /* __xpv */
2139}
2140
2141/*
2142 * fake up a boot property for USB serial console early boot output
2143 */
2144void *
2145usbser_init(size_t size)
2146{
2147	static char *p = NULL;
2148
2149	p = do_bsys_alloc(NULL, NULL, size, MMU_PAGESIZE);
2150	*p = 0;
2151	bsetprop("usb-serial-buf", strlen("usb-serial-buf") + 1,
2152	    &p, sizeof (p));
2153	return (p);
2154}
2155
2156/*ARGSUSED*/
2157int
2158boot_compinfo(int fd, struct compinfo *cbp)
2159{
2160	cbp->iscmp = 0;
2161	cbp->blksize = MAXBSIZE;
2162	return (0);
2163}
2164