fakebop.c revision 6336:4eaf084434c9
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29/*
30 * This file contains the functionality that mimics the boot operations
31 * on SPARC systems or the old boot.bin/multiboot programs on x86 systems.
32 * The x86 kernel now does everything on its own.
33 */
34
35#include <sys/types.h>
36#include <sys/bootconf.h>
37#include <sys/bootsvcs.h>
38#include <sys/bootinfo.h>
39#include <sys/multiboot.h>
40#include <sys/bootvfs.h>
41#include <sys/bootprops.h>
42#include <sys/varargs.h>
43#include <sys/param.h>
44#include <sys/machparam.h>
45#include <sys/archsystm.h>
46#include <sys/boot_console.h>
47#include <sys/cmn_err.h>
48#include <sys/systm.h>
49#include <sys/promif.h>
50#include <sys/archsystm.h>
51#include <sys/x86_archext.h>
52#include <sys/kobj.h>
53#include <sys/privregs.h>
54#include <sys/sysmacros.h>
55#include <sys/ctype.h>
56#ifdef __xpv
57#include <sys/hypervisor.h>
58#include <net/if.h>
59#endif
60#include <vm/kboot_mmu.h>
61#include <vm/hat_pte.h>
62#include "acpi_fw.h"
63
64static int have_console = 0;	/* set once primitive console is initialized */
65static char *boot_args = "";
66
67/*
68 * Debugging macros
69 */
70static uint_t kbm_debug = 0;
71#define	DBG_MSG(s)	{ if (kbm_debug) bop_printf(NULL, "%s", s); }
72#define	DBG(x)		{ if (kbm_debug)			\
73	bop_printf(NULL, "%s is %" PRIx64 "\n", #x, (uint64_t)(x));	\
74	}
75
76#define	PUT_STRING(s) {				\
77	char *cp;				\
78	for (cp = (s); *cp; ++cp)		\
79		bcons_putchar(*cp);		\
80	}
81
82struct xboot_info *xbootp;	/* boot info from "glue" code in low memory */
83bootops_t bootop;	/* simple bootops we'll pass on to kernel */
84struct bsys_mem bm;
85
86static uintptr_t next_virt;	/* next available virtual address */
87static paddr_t next_phys;	/* next available physical address from dboot */
88static paddr_t high_phys = -(paddr_t)1;	/* last used physical address */
89
90/*
91 * buffer for vsnprintf for console I/O
92 */
93#define	BUFFERSIZE	256
94static char buffer[BUFFERSIZE];
95/*
96 * stuff to store/report/manipulate boot property settings.
97 */
98typedef struct bootprop {
99	struct bootprop *bp_next;
100	char *bp_name;
101	uint_t bp_vlen;
102	char *bp_value;
103} bootprop_t;
104
105static bootprop_t *bprops = NULL;
106static char *curr_page = NULL;		/* ptr to avail bprop memory */
107static int curr_space = 0;		/* amount of memory at curr_page */
108
109#ifdef __xpv
110start_info_t *xen_info;
111shared_info_t *HYPERVISOR_shared_info;
112#endif
113
114/*
115 * some allocator statistics
116 */
117static ulong_t total_bop_alloc_scratch = 0;
118static ulong_t total_bop_alloc_kernel = 0;
119
120static void build_firmware_properties(void);
121
122static int early_allocation = 1;
123
124/*
125 * Allocate aligned physical memory at boot time. This allocator allocates
126 * from the highest possible addresses. This avoids exhausting memory that
127 * would be useful for DMA buffers.
128 */
129paddr_t
130do_bop_phys_alloc(uint64_t size, uint64_t align)
131{
132	paddr_t	pa = 0;
133	paddr_t	start;
134	paddr_t	end;
135	struct memlist	*ml = (struct memlist *)xbootp->bi_phys_install;
136
137	/*
138	 * Be careful if high memory usage is limited in startup.c
139	 * Since there are holes in the low part of the physical address
140	 * space we can treat physmem as a pfn (not just a pgcnt) and
141	 * get a conservative upper limit.
142	 */
143	if (physmem != 0 && high_phys > pfn_to_pa(physmem))
144		high_phys = pfn_to_pa(physmem);
145
146	/*
147	 * find the lowest or highest available memory in physinstalled
148	 * On 32 bit avoid physmem above 4Gig if PAE isn't enabled
149	 */
150#if defined(__i386)
151	if (xbootp->bi_use_pae == 0 && high_phys > FOUR_GIG)
152		high_phys = FOUR_GIG;
153#endif
154
155	/*
156	 * find the highest available memory in physinstalled
157	 */
158	size = P2ROUNDUP(size, align);
159	for (; ml; ml = ml->next) {
160		start = P2ROUNDUP(ml->address, align);
161		end = P2ALIGN(ml->address + ml->size, align);
162		if (start < next_phys)
163			start = P2ROUNDUP(next_phys, align);
164		if (end > high_phys)
165			end = P2ALIGN(high_phys, align);
166
167		if (end <= start)
168			continue;
169		if (end - start < size)
170			continue;
171
172		/*
173		 * Early allocations need to use low memory, since
174		 * physmem might be further limited by bootenv.rc
175		 */
176		if (early_allocation) {
177			if (pa == 0 || start < pa)
178				pa = start;
179		} else {
180			if (end - size > pa)
181				pa = end - size;
182		}
183	}
184	if (pa != 0) {
185		if (early_allocation)
186			next_phys = pa + size;
187		else
188			high_phys = pa;
189		return (pa);
190	}
191	bop_panic("do_bop_phys_alloc(0x%" PRIx64 ", 0x%" PRIx64
192	    ") Out of memory\n", size, align);
193	/*NOTREACHED*/
194}
195
196static uintptr_t
197alloc_vaddr(size_t size, paddr_t align)
198{
199	uintptr_t rv;
200
201	next_virt = P2ROUNDUP(next_virt, (uintptr_t)align);
202	rv = (uintptr_t)next_virt;
203	next_virt += size;
204	return (rv);
205}
206
207/*
208 * Allocate virtual memory. The size is always rounded up to a multiple
209 * of base pagesize.
210 */
211
212/*ARGSUSED*/
213static caddr_t
214do_bsys_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align)
215{
216	paddr_t a = align;	/* same type as pa for masking */
217	uint_t pgsize;
218	paddr_t pa;
219	uintptr_t va;
220	ssize_t s;		/* the aligned size */
221	uint_t level;
222	uint_t is_kernel = (virthint != 0);
223
224	if (a < MMU_PAGESIZE)
225		a = MMU_PAGESIZE;
226	else if (!ISP2(a))
227		prom_panic("do_bsys_alloc() incorrect alignment");
228	size = P2ROUNDUP(size, MMU_PAGESIZE);
229
230	/*
231	 * Use the next aligned virtual address if we weren't given one.
232	 */
233	if (virthint == NULL) {
234		virthint = (caddr_t)alloc_vaddr(size, a);
235		total_bop_alloc_scratch += size;
236	} else {
237		total_bop_alloc_kernel += size;
238	}
239
240	/*
241	 * allocate the physical memory
242	 */
243	pa = do_bop_phys_alloc(size, a);
244
245	/*
246	 * Add the mappings to the page tables, try large pages first.
247	 */
248	va = (uintptr_t)virthint;
249	s = size;
250	level = 1;
251	pgsize = xbootp->bi_use_pae ? TWO_MEG : FOUR_MEG;
252	if (xbootp->bi_use_largepage && a == pgsize) {
253		while (IS_P2ALIGNED(pa, pgsize) && IS_P2ALIGNED(va, pgsize) &&
254		    s >= pgsize) {
255			kbm_map(va, pa, level, is_kernel);
256			va += pgsize;
257			pa += pgsize;
258			s -= pgsize;
259		}
260	}
261
262	/*
263	 * Map remaining pages use small mappings
264	 */
265	level = 0;
266	pgsize = MMU_PAGESIZE;
267	while (s > 0) {
268		kbm_map(va, pa, level, is_kernel);
269		va += pgsize;
270		pa += pgsize;
271		s -= pgsize;
272	}
273	return (virthint);
274}
275
276/*
277 * Free virtual memory - we'll just ignore these.
278 */
279/*ARGSUSED*/
280static void
281do_bsys_free(bootops_t *bop, caddr_t virt, size_t size)
282{
283	bop_printf(NULL, "do_bsys_free(virt=0x%p, size=0x%lx) ignored\n",
284	    (void *)virt, size);
285}
286
287/*
288 * Old interface
289 */
290/*ARGSUSED*/
291static caddr_t
292do_bsys_ealloc(
293	bootops_t *bop,
294	caddr_t virthint,
295	size_t size,
296	int align,
297	int flags)
298{
299	prom_panic("unsupported call to BOP_EALLOC()\n");
300	return (0);
301}
302
303
304static void
305bsetprop(char *name, int nlen, void *value, int vlen)
306{
307	uint_t size;
308	uint_t need_size;
309	bootprop_t *b;
310
311	/*
312	 * align the size to 16 byte boundary
313	 */
314	size = sizeof (bootprop_t) + nlen + 1 + vlen;
315	size = (size + 0xf) & ~0xf;
316	if (size > curr_space) {
317		need_size = (size + (MMU_PAGEOFFSET)) & MMU_PAGEMASK;
318		curr_page = do_bsys_alloc(NULL, 0, need_size, MMU_PAGESIZE);
319		curr_space = need_size;
320	}
321
322	/*
323	 * use a bootprop_t at curr_page and link into list
324	 */
325	b = (bootprop_t *)curr_page;
326	curr_page += sizeof (bootprop_t);
327	curr_space -=  sizeof (bootprop_t);
328	b->bp_next = bprops;
329	bprops = b;
330
331	/*
332	 * follow by name and ending zero byte
333	 */
334	b->bp_name = curr_page;
335	bcopy(name, curr_page, nlen);
336	curr_page += nlen;
337	*curr_page++ = 0;
338	curr_space -= nlen + 1;
339
340	/*
341	 * copy in value, but no ending zero byte
342	 */
343	b->bp_value = curr_page;
344	b->bp_vlen = vlen;
345	if (vlen > 0) {
346		bcopy(value, curr_page, vlen);
347		curr_page += vlen;
348		curr_space -= vlen;
349	}
350
351	/*
352	 * align new values of curr_page, curr_space
353	 */
354	while (curr_space & 0xf) {
355		++curr_page;
356		--curr_space;
357	}
358}
359
360static void
361bsetprops(char *name, char *value)
362{
363	bsetprop(name, strlen(name), value, strlen(value) + 1);
364}
365
366static void
367bsetprop64(char *name, uint64_t value)
368{
369	bsetprop(name, strlen(name), (void *)&value, sizeof (value));
370}
371
372static void
373bsetpropsi(char *name, int value)
374{
375	char prop_val[32];
376
377	(void) snprintf(prop_val, sizeof (prop_val), "%d", value);
378	bsetprops(name, prop_val);
379}
380
381/*
382 * to find the size of the buffer to allocate
383 */
384/*ARGSUSED*/
385int
386do_bsys_getproplen(bootops_t *bop, const char *name)
387{
388	bootprop_t *b;
389
390	for (b = bprops; b; b = b->bp_next) {
391		if (strcmp(name, b->bp_name) != 0)
392			continue;
393		return (b->bp_vlen);
394	}
395	return (-1);
396}
397
398/*
399 * get the value associated with this name
400 */
401/*ARGSUSED*/
402int
403do_bsys_getprop(bootops_t *bop, const char *name, void *value)
404{
405	bootprop_t *b;
406
407	for (b = bprops; b; b = b->bp_next) {
408		if (strcmp(name, b->bp_name) != 0)
409			continue;
410		bcopy(b->bp_value, value, b->bp_vlen);
411		return (0);
412	}
413	return (-1);
414}
415
416/*
417 * get the name of the next property in succession from the standalone
418 */
419/*ARGSUSED*/
420static char *
421do_bsys_nextprop(bootops_t *bop, char *name)
422{
423	bootprop_t *b;
424
425	/*
426	 * A null name is a special signal for the 1st boot property
427	 */
428	if (name == NULL || strlen(name) == 0) {
429		if (bprops == NULL)
430			return (NULL);
431		return (bprops->bp_name);
432	}
433
434	for (b = bprops; b; b = b->bp_next) {
435		if (name != b->bp_name)
436			continue;
437		b = b->bp_next;
438		if (b == NULL)
439			return (NULL);
440		return (b->bp_name);
441	}
442	return (NULL);
443}
444
445/*
446 * Parse numeric value from a string. Understands decimal, hex, octal, - and ~
447 */
448static int
449parse_value(char *p, uint64_t *retval)
450{
451	int adjust = 0;
452	uint64_t tmp = 0;
453	int digit;
454	int radix = 10;
455
456	*retval = 0;
457	if (*p == '-' || *p == '~')
458		adjust = *p++;
459
460	if (*p == '0') {
461		++p;
462		if (*p == 0)
463			return (0);
464		if (*p == 'x' || *p == 'X') {
465			radix = 16;
466			++p;
467		} else {
468			radix = 8;
469			++p;
470		}
471	}
472	while (*p) {
473		if ('0' <= *p && *p <= '9')
474			digit = *p - '0';
475		else if ('a' <= *p && *p <= 'f')
476			digit = 10 + *p - 'a';
477		else if ('A' <= *p && *p <= 'F')
478			digit = 10 + *p - 'A';
479		else
480			return (-1);
481		if (digit >= radix)
482			return (-1);
483		tmp = tmp * radix + digit;
484		++p;
485	}
486	if (adjust == '-')
487		tmp = -tmp;
488	else if (adjust == '~')
489		tmp = ~tmp;
490	*retval = tmp;
491	return (0);
492}
493
494/*
495 * 2nd part of building the table of boot properties. This includes:
496 * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
497 *
498 * lines look like one of:
499 * ^$
500 * ^# comment till end of line
501 * setprop name 'value'
502 * setprop name value
503 * setprop name "value"
504 *
505 * we do single character I/O since this is really just looking at memory
506 */
507void
508boot_prop_finish(void)
509{
510	int fd;
511	char *line;
512	int c;
513	int bytes_read;
514	char *name;
515	int n_len;
516	char *value;
517	int v_len;
518	char *inputdev;	/* these override the command line if serial ports */
519	char *outputdev;
520	char *consoledev;
521	uint64_t lvalue;
522	int use_xencons = 0;
523
524#ifdef __xpv
525	if (!DOMAIN_IS_INITDOMAIN(xen_info))
526		use_xencons = 1;
527#endif /* __xpv */
528
529	DBG_MSG("Opening /boot/solaris/bootenv.rc\n");
530	fd = BRD_OPEN(bfs_ops, "/boot/solaris/bootenv.rc", 0);
531	DBG(fd);
532
533	line = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
534	while (fd >= 0) {
535
536		/*
537		 * get a line
538		 */
539		for (c = 0; ; ++c) {
540			bytes_read = BRD_READ(bfs_ops, fd, line + c, 1);
541			if (bytes_read == 0) {
542				if (c == 0)
543					goto done;
544				break;
545			}
546			if (line[c] == '\n')
547				break;
548		}
549		line[c] = 0;
550
551		/*
552		 * ignore comment lines
553		 */
554		c = 0;
555		while (ISSPACE(line[c]))
556			++c;
557		if (line[c] == '#' || line[c] == 0)
558			continue;
559
560		/*
561		 * must have "setprop " or "setprop\t"
562		 */
563		if (strncmp(line + c, "setprop ", 8) != 0 &&
564		    strncmp(line + c, "setprop\t", 8) != 0)
565			continue;
566		c += 8;
567		while (ISSPACE(line[c]))
568			++c;
569		if (line[c] == 0)
570			continue;
571
572		/*
573		 * gather up the property name
574		 */
575		name = line + c;
576		n_len = 0;
577		while (line[c] && !ISSPACE(line[c]))
578			++n_len, ++c;
579
580		/*
581		 * gather up the value, if any
582		 */
583		value = "";
584		v_len = 0;
585		while (ISSPACE(line[c]))
586			++c;
587		if (line[c] != 0) {
588			value = line + c;
589			while (line[c] && !ISSPACE(line[c]))
590				++v_len, ++c;
591		}
592
593		if (v_len >= 2 && value[0] == value[v_len - 1] &&
594		    (value[0] == '\'' || value[0] == '"')) {
595			++value;
596			v_len -= 2;
597		}
598		name[n_len] = 0;
599		if (v_len > 0)
600			value[v_len] = 0;
601		else
602			continue;
603
604		/*
605		 * ignore "boot-file" property, it's now meaningless
606		 */
607		if (strcmp(name, "boot-file") == 0)
608			continue;
609		if (strcmp(name, "boot-args") == 0 &&
610		    strlen(boot_args) > 0)
611			continue;
612
613		/*
614		 * If a property was explicitly set on the command line
615		 * it will override a setting in bootenv.rc
616		 */
617		if (do_bsys_getproplen(NULL, name) > 0)
618			continue;
619
620		bsetprop(name, n_len, value, v_len + 1);
621	}
622done:
623	if (fd >= 0)
624		BRD_CLOSE(bfs_ops, fd);
625
626	/*
627	 * Check if we have to limit the boot time allocator
628	 */
629	if (do_bsys_getproplen(NULL, "physmem") != -1 &&
630	    do_bsys_getprop(NULL, "physmem", line) >= 0 &&
631	    parse_value(line, &lvalue) != -1) {
632		if (0 < lvalue && (lvalue < physmem || physmem == 0)) {
633			physmem = (pgcnt_t)lvalue;
634			DBG(physmem);
635		}
636	}
637	early_allocation = 0;
638
639	/*
640	 * check to see if we have to override the default value of the console
641	 */
642	if (!use_xencons) {
643		inputdev = line;
644		v_len = do_bsys_getproplen(NULL, "input-device");
645		if (v_len > 0)
646			(void) do_bsys_getprop(NULL, "input-device", inputdev);
647		else
648			v_len = 0;
649		inputdev[v_len] = 0;
650
651		outputdev = inputdev + v_len + 1;
652		v_len = do_bsys_getproplen(NULL, "output-device");
653		if (v_len > 0)
654			(void) do_bsys_getprop(NULL, "output-device",
655			    outputdev);
656		else
657			v_len = 0;
658		outputdev[v_len] = 0;
659
660		consoledev = outputdev + v_len + 1;
661		v_len = do_bsys_getproplen(NULL, "console");
662		if (v_len > 0)
663			(void) do_bsys_getprop(NULL, "console", consoledev);
664		else
665			v_len = 0;
666		consoledev[v_len] = 0;
667		bcons_init2(inputdev, outputdev, consoledev);
668	} else {
669		/*
670		 * Ensure console property exists
671		 * If not create it as "hypervisor"
672		 */
673		v_len = do_bsys_getproplen(NULL, "console");
674		if (v_len < 0)
675			bsetprops("console", "hypervisor");
676		inputdev = outputdev = consoledev = "hypervisor";
677		bcons_init2(inputdev, outputdev, consoledev);
678	}
679
680	if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) {
681		value = line;
682		bop_printf(NULL, "\nBoot properties:\n");
683		name = "";
684		while ((name = do_bsys_nextprop(NULL, name)) != NULL) {
685			bop_printf(NULL, "\t0x%p %s = ", (void *)name, name);
686			(void) do_bsys_getprop(NULL, name, value);
687			v_len = do_bsys_getproplen(NULL, name);
688			bop_printf(NULL, "len=%d ", v_len);
689			value[v_len] = 0;
690			bop_printf(NULL, "%s\n", value);
691		}
692	}
693}
694
695/*
696 * print formatted output
697 */
698/*PRINTFLIKE2*/
699/*ARGSUSED*/
700void
701bop_printf(bootops_t *bop, const char *fmt, ...)
702{
703	va_list	ap;
704
705	if (have_console == 0)
706		return;
707
708	va_start(ap, fmt);
709	(void) vsnprintf(buffer, BUFFERSIZE, fmt, ap);
710	va_end(ap);
711	PUT_STRING(buffer);
712}
713
714/*
715 * Another panic() variant; this one can be used even earlier during boot than
716 * prom_panic().
717 */
718/*PRINTFLIKE1*/
719void
720bop_panic(const char *fmt, ...)
721{
722	va_list ap;
723
724	va_start(ap, fmt);
725	bop_printf(NULL, fmt, ap);
726	va_end(ap);
727
728	bop_printf(NULL, "\nPress any key to reboot.\n");
729	(void) bcons_getchar();
730	bop_printf(NULL, "Resetting...\n");
731	pc_reset();
732}
733
734/*
735 * Do a real mode interrupt BIOS call
736 */
737typedef struct bios_regs {
738	unsigned short ax, bx, cx, dx, si, di, bp, es, ds;
739} bios_regs_t;
740typedef int (*bios_func_t)(int, bios_regs_t *);
741
742/*ARGSUSED*/
743static void
744do_bsys_doint(bootops_t *bop, int intnum, struct bop_regs *rp)
745{
746#if defined(__xpv)
747	prom_panic("unsupported call to BOP_DOINT()\n");
748#else	/* __xpv */
749	static int firsttime = 1;
750	bios_func_t bios_func = (bios_func_t)(void *)(uintptr_t)0x5000;
751	bios_regs_t br;
752
753	/*
754	 * The first time we do this, we have to copy the pre-packaged
755	 * low memory bios call code image into place.
756	 */
757	if (firsttime) {
758		extern char bios_image[];
759		extern uint32_t bios_size;
760
761		bcopy(bios_image, (void *)bios_func, bios_size);
762		firsttime = 0;
763	}
764
765	br.ax = rp->eax.word.ax;
766	br.bx = rp->ebx.word.bx;
767	br.cx = rp->ecx.word.cx;
768	br.dx = rp->edx.word.dx;
769	br.bp = rp->ebp.word.bp;
770	br.si = rp->esi.word.si;
771	br.di = rp->edi.word.di;
772	br.ds = rp->ds;
773	br.es = rp->es;
774
775	DBG_MSG("Doing BIOS call...");
776	rp->eflags = bios_func(intnum, &br);
777	DBG_MSG("done\n");
778
779	rp->eax.word.ax = br.ax;
780	rp->ebx.word.bx = br.bx;
781	rp->ecx.word.cx = br.cx;
782	rp->edx.word.dx = br.dx;
783	rp->ebp.word.bp = br.bp;
784	rp->esi.word.si = br.si;
785	rp->edi.word.di = br.di;
786	rp->ds = br.ds;
787	rp->es = br.es;
788#endif /* __xpv */
789}
790
791static struct boot_syscalls bop_sysp = {
792	bcons_getchar,
793	bcons_putchar,
794	bcons_ischar,
795};
796
797static char *whoami;
798
799#define	BUFLEN	64
800
801#if defined(__xpv)
802
803static char namebuf[32];
804
805static void
806xen_parse_props(char *s, char *prop_map[], int n_prop)
807{
808	char **prop_name = prop_map;
809	char *cp = s, *scp;
810
811	do {
812		scp = cp;
813		while ((*cp != NULL) && (*cp != ':'))
814			cp++;
815
816		if ((scp != cp) && (*prop_name != NULL)) {
817			*cp = NULL;
818			bsetprops(*prop_name, scp);
819		}
820
821		cp++;
822		prop_name++;
823		n_prop--;
824	} while (n_prop > 0);
825}
826
827#define	VBDPATHLEN	64
828
829/*
830 * parse the 'xpv-root' property to create properties used by
831 * ufs_mountroot.
832 */
833static void
834xen_vbdroot_props(char *s)
835{
836	char vbdpath[VBDPATHLEN] = "/xpvd/xdf@";
837	const char lnamefix[] = "/dev/dsk/c0d";
838	char *pnp;
839	char *prop_p;
840	char mi;
841	short minor;
842	long addr = 0;
843
844	pnp = vbdpath + strlen(vbdpath);
845	prop_p = s + strlen(lnamefix);
846	while ((*prop_p != '\0') && (*prop_p != 's') && (*prop_p != 'p'))
847		addr = addr * 10 + *prop_p++ - '0';
848	(void) snprintf(pnp, VBDPATHLEN, "%lx", addr);
849	pnp = vbdpath + strlen(vbdpath);
850	if (*prop_p == 's')
851		mi = 'a';
852	else if (*prop_p == 'p')
853		mi = 'q';
854	else
855		ASSERT(0); /* shouldn't be here */
856	prop_p++;
857	ASSERT(*prop_p != '\0');
858	if (ISDIGIT(*prop_p)) {
859		minor = *prop_p - '0';
860		prop_p++;
861		if (ISDIGIT(*prop_p)) {
862			minor = minor * 10 + *prop_p - '0';
863		}
864	} else {
865		/* malformed root path, use 0 as default */
866		minor = 0;
867	}
868	ASSERT(minor < 16); /* at most 16 partitions */
869	mi += minor;
870	*pnp++ = ':';
871	*pnp++ = mi;
872	*pnp++ = '\0';
873	bsetprops("fstype", "ufs");
874	bsetprops("bootpath", vbdpath);
875
876	DBG_MSG("VBD bootpath set to ");
877	DBG_MSG(vbdpath);
878	DBG_MSG("\n");
879}
880
881/*
882 * parse the xpv-nfsroot property to create properties used by
883 * nfs_mountroot.
884 */
885static void
886xen_nfsroot_props(char *s)
887{
888	char *prop_map[] = {
889		BP_SERVER_IP,	/* server IP address */
890		BP_SERVER_NAME,	/* server hostname */
891		BP_SERVER_PATH,	/* root path */
892	};
893	int n_prop = sizeof (prop_map) / sizeof (prop_map[0]);
894
895	bsetprop("fstype", 6, "nfsdyn", 7);
896
897	xen_parse_props(s, prop_map, n_prop);
898
899	/*
900	 * If a server name wasn't specified, use a default.
901	 */
902	if (do_bsys_getproplen(NULL, BP_SERVER_NAME) == -1)
903		bsetprops(BP_SERVER_NAME, "unknown");
904}
905
906/*
907 * Extract our IP address, etc. from the "xpv-ip" property.
908 */
909static void
910xen_ip_props(char *s)
911{
912	char *prop_map[] = {
913		BP_HOST_IP,		/* IP address */
914		NULL,			/* NFS server IP address (ignored in */
915					/* favour of xpv-nfsroot) */
916		BP_ROUTER_IP,		/* IP gateway */
917		BP_SUBNET_MASK,		/* IP subnet mask */
918		"xpv-hostname",		/* hostname (ignored) */
919		BP_NETWORK_INTERFACE,	/* interface name */
920		"xpv-hcp",		/* host configuration protocol */
921	};
922	int n_prop = sizeof (prop_map) / sizeof (prop_map[0]);
923	char ifname[IFNAMSIZ];
924
925	xen_parse_props(s, prop_map, n_prop);
926
927	/*
928	 * A Linux dom0 administrator expects all interfaces to be
929	 * called "ethX", which is not the case here.
930	 *
931	 * If the interface name specified is "eth0", presume that
932	 * this is really intended to be "xnf0" (the first domU ->
933	 * dom0 interface for this domain).
934	 */
935	if ((do_bsys_getprop(NULL, BP_NETWORK_INTERFACE, ifname) == 0) &&
936	    (strcmp("eth0", ifname) == 0)) {
937		bsetprops(BP_NETWORK_INTERFACE, "xnf0");
938		bop_printf(NULL,
939		    "network interface name 'eth0' replaced with 'xnf0'\n");
940	}
941}
942
943#else	/* __xpv */
944
945static void
946setup_rarp_props(struct sol_netinfo *sip)
947{
948	char buf[BUFLEN];	/* to hold ip/mac addrs */
949	uint8_t *val;
950
951	val = (uint8_t *)&sip->sn_ciaddr;
952	(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
953	    val[0], val[1], val[2], val[3]);
954	bsetprops(BP_HOST_IP, buf);
955
956	val = (uint8_t *)&sip->sn_siaddr;
957	(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
958	    val[0], val[1], val[2], val[3]);
959	bsetprops(BP_SERVER_IP, buf);
960
961	if (sip->sn_giaddr != 0) {
962		val = (uint8_t *)&sip->sn_giaddr;
963		(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
964		    val[0], val[1], val[2], val[3]);
965		bsetprops(BP_ROUTER_IP, buf);
966	}
967
968	if (sip->sn_netmask != 0) {
969		val = (uint8_t *)&sip->sn_netmask;
970		(void) snprintf(buf, BUFLEN, "%d.%d.%d.%d",
971		    val[0], val[1], val[2], val[3]);
972		bsetprops(BP_SUBNET_MASK, buf);
973	}
974
975	if (sip->sn_mactype != 4 || sip->sn_maclen != 6) {
976		bop_printf(NULL, "unsupported mac type %d, mac len %d\n",
977		    sip->sn_mactype, sip->sn_maclen);
978	} else {
979		val = sip->sn_macaddr;
980		(void) snprintf(buf, BUFLEN, "%x:%x:%x:%x:%x:%x",
981		    val[0], val[1], val[2], val[3], val[4], val[5]);
982		bsetprops(BP_BOOT_MAC, buf);
983	}
984}
985
986#endif	/* __xpv */
987
988/*
989 * 1st pass at building the table of boot properties. This includes:
990 * - values set on the command line: -B a=x,b=y,c=z ....
991 * - known values we just compute (ie. from xbootp)
992 * - values from /boot/solaris/bootenv.rc (ie. eeprom(1m) values)
993 *
994 * the grub command line looked like:
995 * kernel boot-file [-B prop=value[,prop=value]...] [boot-args]
996 *
997 * whoami is the same as boot-file
998 */
999static void
1000build_boot_properties(void)
1001{
1002	char *name;
1003	int name_len;
1004	char *value;
1005	int value_len;
1006	struct boot_modules *bm;
1007	char *propbuf;
1008	int quoted = 0;
1009	int boot_arg_len;
1010#ifndef __xpv
1011	static int stdout_val = 0;
1012	uchar_t boot_device;
1013	char str[3];
1014	multiboot_info_t *mbi;
1015	int netboot;
1016	struct sol_netinfo *sip;
1017#endif
1018
1019	/*
1020	 * These have to be done first, so that kobj_mount_root() works
1021	 */
1022	DBG_MSG("Building boot properties\n");
1023	propbuf = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, 0);
1024	DBG((uintptr_t)propbuf);
1025	if (xbootp->bi_module_cnt > 0) {
1026		bm = xbootp->bi_modules;
1027		bsetprop64("ramdisk_start", (uint64_t)(uintptr_t)bm->bm_addr);
1028		bsetprop64("ramdisk_end", (uint64_t)(uintptr_t)bm->bm_addr +
1029		    bm->bm_size);
1030	}
1031
1032	DBG_MSG("Parsing command line for boot properties\n");
1033	value = xbootp->bi_cmdline;
1034
1035	/*
1036	 * allocate memory to collect boot_args into
1037	 */
1038	boot_arg_len = strlen(xbootp->bi_cmdline) + 1;
1039	boot_args = do_bsys_alloc(NULL, NULL, boot_arg_len, MMU_PAGESIZE);
1040	boot_args[0] = 0;
1041	boot_arg_len = 0;
1042
1043#ifdef __xpv
1044	/*
1045	 * Xen puts a lot of device information in front of the kernel name
1046	 * let's grab them and make them boot properties.  The first
1047	 * string w/o an "=" in it will be the boot-file property.
1048	 */
1049	(void) strcpy(namebuf, "xpv-");
1050	for (;;) {
1051		/*
1052		 * get to next property
1053		 */
1054		while (ISSPACE(*value))
1055			++value;
1056		name = value;
1057		/*
1058		 * look for an "="
1059		 */
1060		while (*value && !ISSPACE(*value) && *value != '=') {
1061			value++;
1062		}
1063		if (*value != '=') { /* no "=" in the property */
1064			value = name;
1065			break;
1066		}
1067		name_len = value - name;
1068		value_len = 0;
1069		/*
1070		 * skip over the "="
1071		 */
1072		value++;
1073		while (value[value_len] && !ISSPACE(value[value_len])) {
1074			++value_len;
1075		}
1076		/*
1077		 * build property name with "xpv-" prefix
1078		 */
1079		if (name_len + 4 > 32) { /* skip if name too long */
1080			value += value_len;
1081			continue;
1082		}
1083		bcopy(name, &namebuf[4], name_len);
1084		name_len += 4;
1085		namebuf[name_len] = 0;
1086		bcopy(value, propbuf, value_len);
1087		propbuf[value_len] = 0;
1088		bsetprops(namebuf, propbuf);
1089
1090		/*
1091		 * xpv-root is set to the logical disk name of the xen
1092		 * VBD when booting from a disk-based filesystem.
1093		 */
1094		if (strcmp(namebuf, "xpv-root") == 0)
1095			xen_vbdroot_props(propbuf);
1096		/*
1097		 * While we're here, if we have a "xpv-nfsroot" property
1098		 * then we need to set "fstype" to "nfsdyn" so we mount
1099		 * our root from the nfs server.  Also parse the xpv-nfsroot
1100		 * property to create the properties that nfs_mountroot will
1101		 * need to find the root and mount it.
1102		 */
1103		if (strcmp(namebuf, "xpv-nfsroot") == 0)
1104			xen_nfsroot_props(propbuf);
1105
1106		if (strcmp(namebuf, "xpv-ip") == 0)
1107			xen_ip_props(propbuf);
1108		value += value_len;
1109	}
1110#endif
1111
1112	while (ISSPACE(*value))
1113		++value;
1114	/*
1115	 * value now points at the boot-file
1116	 */
1117	value_len = 0;
1118	while (value[value_len] && !ISSPACE(value[value_len]))
1119		++value_len;
1120	if (value_len > 0) {
1121		whoami = propbuf;
1122		bcopy(value, whoami, value_len);
1123		whoami[value_len] = 0;
1124		bsetprops("boot-file", whoami);
1125		/*
1126		 * strip leading path stuff from whoami, so running from
1127		 * PXE/miniroot makes sense.
1128		 */
1129		if (strstr(whoami, "/platform/") != NULL)
1130			whoami = strstr(whoami, "/platform/");
1131		bsetprops("whoami", whoami);
1132	}
1133
1134	/*
1135	 * Values forcibly set boot properties on the command line via -B.
1136	 * Allow use of quotes in values. Other stuff goes on kernel
1137	 * command line.
1138	 */
1139	name = value + value_len;
1140	while (*name != 0) {
1141		/*
1142		 * anything not " -B" is copied to the command line
1143		 */
1144		if (!ISSPACE(name[0]) || name[1] != '-' || name[2] != 'B') {
1145			boot_args[boot_arg_len++] = *name;
1146			boot_args[boot_arg_len] = 0;
1147			++name;
1148			continue;
1149		}
1150
1151		/*
1152		 * skip the " -B" and following white space
1153		 */
1154		name += 3;
1155		while (ISSPACE(*name))
1156			++name;
1157		while (*name && !ISSPACE(*name)) {
1158			value = strstr(name, "=");
1159			if (value == NULL)
1160				break;
1161			name_len = value - name;
1162			++value;
1163			value_len = 0;
1164			quoted = 0;
1165			for (; ; ++value_len) {
1166				if (!value[value_len])
1167					break;
1168
1169				/*
1170				 * is this value quoted?
1171				 */
1172				if (value_len == 0 &&
1173				    (value[0] == '\'' || value[0] == '"')) {
1174					quoted = value[0];
1175					++value_len;
1176				}
1177
1178				/*
1179				 * In the quote accept any character,
1180				 * but look for ending quote.
1181				 */
1182				if (quoted) {
1183					if (value[value_len] == quoted)
1184						quoted = 0;
1185					continue;
1186				}
1187
1188				/*
1189				 * a comma or white space ends the value
1190				 */
1191				if (value[value_len] == ',' ||
1192				    ISSPACE(value[value_len]))
1193					break;
1194			}
1195
1196			if (value_len == 0) {
1197				bsetprop(name, name_len, "true", 5);
1198			} else {
1199				char *v = value;
1200				int l = value_len;
1201				if (v[0] == v[l - 1] &&
1202				    (v[0] == '\'' || v[0] == '"')) {
1203					++v;
1204					l -= 2;
1205				}
1206				bcopy(v, propbuf, l);
1207				propbuf[l] = '\0';
1208				bsetprop(name, name_len, propbuf,
1209				    l + 1);
1210			}
1211			name = value + value_len;
1212			while (*name == ',')
1213				++name;
1214		}
1215	}
1216
1217	/*
1218	 * set boot-args property
1219	 * 1275 name is bootargs, so set
1220	 * that too
1221	 */
1222	bsetprops("boot-args", boot_args);
1223	bsetprops("bootargs", boot_args);
1224
1225#ifndef __xpv
1226	/*
1227	 * set the BIOS boot device from GRUB
1228	 */
1229	netboot = 0;
1230	mbi = xbootp->bi_mb_info;
1231	if (mbi != NULL && mbi->flags & 0x2) {
1232		boot_device = mbi->boot_device >> 24;
1233		if (boot_device == 0x20)
1234			netboot++;
1235		str[0] = (boot_device >> 4) + '0';
1236		str[1] = (boot_device & 0xf) + '0';
1237		str[2] = 0;
1238		bsetprops("bios-boot-device", str);
1239	} else {
1240		netboot = 1;
1241	}
1242
1243	/*
1244	 * In the netboot case, drives_info is overloaded with the dhcp ack.
1245	 * This is not multiboot compliant and requires special pxegrub!
1246	 */
1247	if (netboot && mbi->drives_length != 0) {
1248		sip = (struct sol_netinfo *)(uintptr_t)mbi->drives_addr;
1249		if (sip->sn_infotype == SN_TYPE_BOOTP)
1250			bsetprop("bootp-response", sizeof ("bootp-response"),
1251			    (void *)(uintptr_t)mbi->drives_addr,
1252			    mbi->drives_length);
1253		else if (sip->sn_infotype == SN_TYPE_RARP)
1254			setup_rarp_props(sip);
1255	}
1256	bsetprop("stdout", strlen("stdout"),
1257	    &stdout_val, sizeof (stdout_val));
1258#endif /* __xpv */
1259
1260	/*
1261	 * more conjured up values for made up things....
1262	 */
1263#if defined(__xpv)
1264	bsetprops("mfg-name", "i86xpv");
1265	bsetprops("impl-arch-name", "i86xpv");
1266#else
1267	bsetprops("mfg-name", "i86pc");
1268	bsetprops("impl-arch-name", "i86pc");
1269#endif
1270
1271	/*
1272	 * Build firmware-provided system properties
1273	 */
1274	build_firmware_properties();
1275
1276	/*
1277	 * XXPV
1278	 *
1279	 * Find out what these are:
1280	 * - cpuid_feature_ecx_include
1281	 * - cpuid_feature_ecx_exclude
1282	 * - cpuid_feature_edx_include
1283	 * - cpuid_feature_edx_exclude
1284	 *
1285	 * Find out what these are in multiboot:
1286	 * - netdev-path
1287	 * - fstype
1288	 */
1289}
1290
1291#ifdef __xpv
1292/*
1293 * Under the Hypervisor, memory usable for DMA may be scarce. One
1294 * very likely large pool of DMA friendly memory is occupied by
1295 * the boot_archive, as it was loaded by grub into low MFNs.
1296 *
1297 * Here we free up that memory by copying the boot archive to what are
1298 * likely higher MFN pages and then swapping the mfn/pfn mappings.
1299 */
1300#define	PFN_2GIG	0x80000
1301static void
1302relocate_boot_archive(void)
1303{
1304	mfn_t max_mfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
1305	struct boot_modules *bm = xbootp->bi_modules;
1306	uintptr_t va;
1307	pfn_t va_pfn;
1308	mfn_t va_mfn;
1309	caddr_t copy;
1310	pfn_t copy_pfn;
1311	mfn_t copy_mfn;
1312	size_t	len;
1313	int slop;
1314	int total = 0;
1315	int relocated = 0;
1316	int mmu_update_return;
1317	mmu_update_t t[2];
1318	x86pte_t pte;
1319
1320	/*
1321	 * If all MFN's are below 2Gig, don't bother doing this.
1322	 */
1323	if (max_mfn < PFN_2GIG)
1324		return;
1325	if (xbootp->bi_module_cnt < 1) {
1326		DBG_MSG("no boot_archive!");
1327		return;
1328	}
1329
1330	DBG_MSG("moving boot_archive to high MFN memory\n");
1331	va = (uintptr_t)bm->bm_addr;
1332	len = bm->bm_size;
1333	slop = va & MMU_PAGEOFFSET;
1334	if (slop) {
1335		va += MMU_PAGESIZE - slop;
1336		len -= MMU_PAGESIZE - slop;
1337	}
1338	len = P2ALIGN(len, MMU_PAGESIZE);
1339
1340	/*
1341	 * Go through all boot_archive pages, swapping any low MFN pages
1342	 * with memory at next_phys.
1343	 */
1344	while (len != 0) {
1345		++total;
1346		va_pfn = mmu_btop(va - ONE_GIG);
1347		va_mfn = mfn_list[va_pfn];
1348		if (mfn_list[va_pfn] < PFN_2GIG) {
1349			copy = kbm_remap_window(next_phys, 1);
1350			bcopy((void *)va, copy, MMU_PAGESIZE);
1351			copy_pfn = mmu_btop(next_phys);
1352			copy_mfn = mfn_list[copy_pfn];
1353
1354			pte = mfn_to_ma(copy_mfn) | PT_NOCONSIST | PT_VALID;
1355			if (HYPERVISOR_update_va_mapping(va, pte,
1356			    UVMF_INVLPG | UVMF_LOCAL))
1357				bop_panic("relocate_boot_archive():  "
1358				    "HYPERVISOR_update_va_mapping() failed");
1359
1360			mfn_list[va_pfn] = copy_mfn;
1361			mfn_list[copy_pfn] = va_mfn;
1362
1363			t[0].ptr = mfn_to_ma(copy_mfn) | MMU_MACHPHYS_UPDATE;
1364			t[0].val = va_pfn;
1365			t[1].ptr = mfn_to_ma(va_mfn) | MMU_MACHPHYS_UPDATE;
1366			t[1].val = copy_pfn;
1367			if (HYPERVISOR_mmu_update(t, 2, &mmu_update_return,
1368			    DOMID_SELF) != 0 || mmu_update_return != 2)
1369				bop_panic("relocate_boot_archive():  "
1370				    "HYPERVISOR_mmu_update() failed");
1371
1372			next_phys += MMU_PAGESIZE;
1373			++relocated;
1374		}
1375		len -= MMU_PAGESIZE;
1376		va += MMU_PAGESIZE;
1377	}
1378	DBG_MSG("Relocated pages:\n");
1379	DBG(relocated);
1380	DBG_MSG("Out of total pages:\n");
1381	DBG(total);
1382}
1383#endif /* __xpv */
1384
1385#if !defined(__xpv)
1386/*
1387 * Install a temporary IDT that lets us catch errors in the boot time code.
1388 * We shouldn't get any faults at all while this is installed, so we'll
1389 * just generate a traceback and exit.
1390 */
1391#ifdef __amd64
1392static const int bcode_sel = B64CODE_SEL;
1393#else
1394static const int bcode_sel = B32CODE_SEL;
1395#endif
1396
1397/*
1398 * simple description of a stack frame (args are 32 bit only currently)
1399 */
1400typedef struct bop_frame {
1401	struct bop_frame *old_frame;
1402	pc_t retaddr;
1403	long arg[1];
1404} bop_frame_t;
1405
1406void
1407bop_traceback(bop_frame_t *frame)
1408{
1409	pc_t pc;
1410	int cnt;
1411	int a;
1412	char *ksym;
1413	ulong_t off;
1414
1415	bop_printf(NULL, "Stack traceback:\n");
1416	for (cnt = 0; cnt < 30; ++cnt) {	/* up to 30 frames */
1417		pc = frame->retaddr;
1418		if (pc == 0)
1419			break;
1420		ksym = kobj_getsymname(pc, &off);
1421		if (ksym)
1422			bop_printf(NULL, "  %s+%lx", ksym, off);
1423		else
1424			bop_printf(NULL, "  0x%lx", pc);
1425
1426		frame = frame->old_frame;
1427		if (frame == 0) {
1428			bop_printf(NULL, "\n");
1429			break;
1430		}
1431		for (a = 0; a < 6; ++a) {	/* try for 6 args */
1432#if defined(__i386)
1433			if ((void *)&frame->arg[a] == (void *)frame->old_frame)
1434				break;
1435			if (a == 0)
1436				bop_printf(NULL, "(");
1437			else
1438				bop_printf(NULL, ",");
1439			bop_printf(NULL, "0x%lx", frame->arg[a]);
1440#endif
1441		}
1442		bop_printf(NULL, ")\n");
1443	}
1444}
1445
1446struct trapframe {
1447	ulong_t frame_ptr;	/* %[er]bp pushed by our code */
1448	ulong_t error_code;	/* optional */
1449	ulong_t inst_ptr;
1450	ulong_t code_seg;
1451	ulong_t flags_reg;
1452#ifdef __amd64
1453	ulong_t stk_ptr;
1454	ulong_t stk_seg;
1455#endif
1456};
1457
1458void
1459bop_trap(struct trapframe *tf)
1460{
1461	bop_frame_t fakeframe;
1462	static int depth = 0;
1463
1464	/*
1465	 * Check for an infinite loop of traps.
1466	 */
1467	if (++depth > 2)
1468		bop_panic("Nested trap");
1469
1470	/*
1471	 * adjust the tf for optional error_code by detecting the code selector
1472	 */
1473	if (tf->code_seg != bcode_sel)
1474		tf = (struct trapframe *)((uintptr_t)tf - sizeof (ulong_t));
1475
1476	bop_printf(NULL, "Unexpected trap\n");
1477	bop_printf(NULL, "instruction pointer  0x%lx\n", tf->inst_ptr);
1478	bop_printf(NULL, "error code, optional 0x%lx\n",
1479	    tf->error_code & 0xffffffff);
1480	bop_printf(NULL, "code segment         0x%lx\n", tf->code_seg & 0xffff);
1481	bop_printf(NULL, "flags register       0x%lx\n", tf->flags_reg);
1482#ifdef __amd64
1483	bop_printf(NULL, "return %%rsp         0x%lx\n", tf->stk_ptr);
1484	bop_printf(NULL, "return %%ss          0x%lx\n", tf->stk_seg & 0xffff);
1485#endif
1486	fakeframe.old_frame = (bop_frame_t *)tf->frame_ptr;
1487	fakeframe.retaddr = (pc_t)tf->inst_ptr;
1488	bop_printf(NULL, "Attempting stack backtrace:\n");
1489	bop_traceback(&fakeframe);
1490	bop_panic("unexpected trap in early boot");
1491}
1492
1493extern void bop_trap_handler(void);
1494
1495static gate_desc_t *bop_idt;
1496
1497static desctbr_t bop_idt_info;
1498
1499static void
1500bop_idt_init(void)
1501{
1502	int t;
1503
1504	bop_idt = (gate_desc_t *)
1505	    do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
1506	bzero(bop_idt, MMU_PAGESIZE);
1507	for (t = 0; t < NIDT; ++t) {
1508		set_gatesegd(&bop_idt[t], &bop_trap_handler, bcode_sel,
1509		    SDT_SYSIGT, TRP_KPL);
1510	}
1511	bop_idt_info.dtr_limit = (NIDT * sizeof (gate_desc_t)) - 1;
1512	bop_idt_info.dtr_base = (uintptr_t)bop_idt;
1513	wr_idtr(&bop_idt_info);
1514}
1515#endif	/* !defined(__xpv) */
1516
1517/*
1518 * This is where we enter the kernel. It dummies up the boot_ops and
1519 * boot_syscalls vectors and jumps off to _kobj_boot()
1520 */
1521void
1522_start(struct xboot_info *xbp)
1523{
1524	bootops_t *bops = &bootop;
1525	extern void _kobj_boot();
1526
1527	/*
1528	 * 1st off - initialize the console for any error messages
1529	 */
1530	xbootp = xbp;
1531#ifdef __xpv
1532	HYPERVISOR_shared_info = (void *)xbootp->bi_shared_info;
1533	xen_info = xbootp->bi_xen_start_info;
1534#endif
1535	bcons_init((void *)xbootp->bi_cmdline);
1536	have_console = 1;
1537
1538	/*
1539	 * enable debugging
1540	 */
1541	if (strstr((char *)xbootp->bi_cmdline, "kbm_debug"))
1542		kbm_debug = 1;
1543
1544	DBG_MSG("\n\n*** Entered Solaris in _start() cmdline is: ");
1545	DBG_MSG((char *)xbootp->bi_cmdline);
1546	DBG_MSG("\n\n\n");
1547
1548	/*
1549	 * physavail is no longer used by startup
1550	 */
1551	bm.physinstalled = xbp->bi_phys_install;
1552	bm.pcimem = xbp->bi_pcimem;
1553	bm.physavail = NULL;
1554
1555	/*
1556	 * initialize the boot time allocator
1557	 */
1558	next_phys = xbootp->bi_next_paddr;
1559	DBG(next_phys);
1560	next_virt = (uintptr_t)xbootp->bi_next_vaddr;
1561	DBG(next_virt);
1562	DBG_MSG("Initializing boot time memory management...");
1563#ifdef __xpv
1564	{
1565		xen_platform_parameters_t p;
1566
1567		/* This call shouldn't fail, dboot already did it once. */
1568		(void) HYPERVISOR_xen_version(XENVER_platform_parameters, &p);
1569		mfn_to_pfn_mapping = (pfn_t *)(xen_virt_start = p.virt_start);
1570		DBG(xen_virt_start);
1571	}
1572#endif
1573	kbm_init(xbootp);
1574	DBG_MSG("done\n");
1575
1576	/*
1577	 * Fill in the bootops vector
1578	 */
1579	bops->bsys_version = BO_VERSION;
1580	bops->boot_mem = &bm;
1581	bops->bsys_alloc = do_bsys_alloc;
1582	bops->bsys_free = do_bsys_free;
1583	bops->bsys_getproplen = do_bsys_getproplen;
1584	bops->bsys_getprop = do_bsys_getprop;
1585	bops->bsys_nextprop = do_bsys_nextprop;
1586	bops->bsys_printf = bop_printf;
1587	bops->bsys_doint = do_bsys_doint;
1588
1589	/*
1590	 * BOP_EALLOC() is no longer needed
1591	 */
1592	bops->bsys_ealloc = do_bsys_ealloc;
1593
1594#ifdef __xpv
1595	/*
1596	 * On domain 0 we need to free up some physical memory that is
1597	 * usable for DMA. Since GRUB loaded the boot_archive, it is
1598	 * sitting in low MFN memory. We'll relocated the boot archive
1599	 * pages to high PFN memory.
1600	 */
1601	if (DOMAIN_IS_INITDOMAIN(xen_info))
1602		relocate_boot_archive();
1603#endif
1604
1605#ifndef __xpv
1606	/*
1607	 * Install an IDT to catch early pagefaults (shouldn't have any).
1608	 * Also needed for kmdb.
1609	 */
1610	bop_idt_init();
1611#endif
1612
1613	/*
1614	 * Start building the boot properties from the command line
1615	 */
1616	DBG_MSG("Initializing boot properties:\n");
1617	build_boot_properties();
1618
1619	if (strstr((char *)xbootp->bi_cmdline, "prom_debug") || kbm_debug) {
1620		char *name;
1621		char *value;
1622		char *cp;
1623		int len;
1624
1625		value = do_bsys_alloc(NULL, NULL, MMU_PAGESIZE, MMU_PAGESIZE);
1626		bop_printf(NULL, "\nBoot properties:\n");
1627		name = "";
1628		while ((name = do_bsys_nextprop(NULL, name)) != NULL) {
1629			bop_printf(NULL, "\t0x%p %s = ", (void *)name, name);
1630			(void) do_bsys_getprop(NULL, name, value);
1631			len = do_bsys_getproplen(NULL, name);
1632			bop_printf(NULL, "len=%d ", len);
1633			value[len] = 0;
1634			for (cp = value; *cp; ++cp) {
1635				if (' ' <= *cp && *cp <= '~')
1636					bop_printf(NULL, "%c", *cp);
1637				else
1638					bop_printf(NULL, "-0x%x-", *cp);
1639			}
1640			bop_printf(NULL, "\n");
1641		}
1642	}
1643
1644	/*
1645	 * jump into krtld...
1646	 */
1647	_kobj_boot(&bop_sysp, NULL, bops, NULL);
1648}
1649
1650
1651/*ARGSUSED*/
1652static caddr_t
1653no_more_alloc(bootops_t *bop, caddr_t virthint, size_t size, int align)
1654{
1655	panic("Attempt to bsys_alloc() too late\n");
1656	return (NULL);
1657}
1658
1659/*ARGSUSED*/
1660static void
1661no_more_free(bootops_t *bop, caddr_t virt, size_t size)
1662{
1663	panic("Attempt to bsys_free() too late\n");
1664}
1665
1666void
1667bop_no_more_mem(void)
1668{
1669	DBG(total_bop_alloc_scratch);
1670	DBG(total_bop_alloc_kernel);
1671	bootops->bsys_alloc = no_more_alloc;
1672	bootops->bsys_free = no_more_free;
1673}
1674
1675
1676#ifndef __xpv
1677/*
1678 * Set ACPI firmware properties
1679 */
1680
1681static caddr_t
1682vmap_phys(size_t length, paddr_t pa)
1683{
1684	paddr_t	start, end;
1685	caddr_t	va;
1686	size_t	len, page;
1687
1688	start = P2ALIGN(pa, MMU_PAGESIZE);
1689	end = P2ROUNDUP(pa + length, MMU_PAGESIZE);
1690	len = end - start;
1691	va = (caddr_t)alloc_vaddr(len, MMU_PAGESIZE);
1692	for (page = 0; page < len; page += MMU_PAGESIZE)
1693		kbm_map((uintptr_t)va + page, start + page, 0, 0);
1694	return (va + (pa & MMU_PAGEOFFSET));
1695}
1696
1697static uint8_t
1698checksum_table(uint8_t *tp, size_t len)
1699{
1700	uint8_t sum = 0;
1701
1702	while (len-- > 0)
1703		sum += *tp++;
1704
1705	return (sum);
1706}
1707
1708static int
1709valid_rsdp(struct rsdp *rp)
1710{
1711
1712	/* validate the V1.x checksum */
1713	if (checksum_table((uint8_t *)&rp->v1, sizeof (struct rsdp_v1)) != 0)
1714		return (0);
1715
1716	/* If pre-ACPI 2.0, this is a valid RSDP */
1717	if (rp->v1.revision < 2)
1718		return (1);
1719
1720	/* validate the V2.x checksum */
1721	if (checksum_table((uint8_t *)rp, sizeof (struct rsdp)) != 0)
1722		return (0);
1723
1724	return (1);
1725}
1726
1727/*
1728 * Scan memory range for an RSDP;
1729 * see ACPI 3.0 Spec, 5.2.5.1
1730 */
1731static struct rsdp *
1732scan_rsdp(paddr_t start, paddr_t end)
1733{
1734	size_t len  = end - start + 1;
1735	caddr_t ptr;
1736
1737	ptr = vmap_phys(len, start);
1738	while (len > 0) {
1739		if (strncmp(ptr, ACPI_RSDP_SIG, ACPI_RSDP_SIG_LEN) == 0)
1740			if (valid_rsdp((struct rsdp *)ptr))
1741				return ((struct rsdp *)ptr);
1742		ptr += 16;
1743		len -= 16;
1744	}
1745
1746	return (NULL);
1747}
1748
1749/*
1750 * Refer to ACPI 3.0 Spec, section 5.2.5.1 to understand this function
1751 */
1752static struct rsdp *
1753find_rsdp() {
1754	struct rsdp *rsdp;
1755	uint16_t *ebda_seg;
1756	paddr_t  ebda_addr;
1757
1758	/*
1759	 * Get the EBDA segment and scan the first 1K
1760	 */
1761	ebda_seg = (uint16_t *)vmap_phys(sizeof (uint16_t), ACPI_EBDA_SEG_ADDR);
1762	ebda_addr = *ebda_seg << 4;
1763	rsdp = scan_rsdp(ebda_addr, ebda_addr + ACPI_EBDA_LEN - 1);
1764	if (rsdp == NULL)
1765		/* if EBDA doesn't contain RSDP, look in BIOS memory */
1766		rsdp = scan_rsdp(0xe0000, 0xfffff);
1767	return (rsdp);
1768}
1769
1770static struct table_header *
1771map_fw_table(paddr_t table_addr)
1772{
1773	struct table_header *tp;
1774	size_t len = MAX(sizeof (struct table_header), MMU_PAGESIZE);
1775
1776	/*
1777	 * Map at least a page; if the table is larger than this, remap it
1778	 */
1779	tp = (struct table_header *)vmap_phys(len, table_addr);
1780	if (tp->len > len)
1781		tp = (struct table_header *)vmap_phys(tp->len, table_addr);
1782	return (tp);
1783}
1784
1785static struct table_header *
1786find_fw_table(char *signature)
1787{
1788	static int revision = 0;
1789	static struct xsdt *xsdt;
1790	static int len;
1791	paddr_t xsdt_addr;
1792	struct rsdp *rsdp;
1793	struct table_header *tp;
1794	paddr_t table_addr;
1795	int	n;
1796
1797	if (strlen(signature) != ACPI_TABLE_SIG_LEN)
1798		return (NULL);
1799
1800	/*
1801	 * Reading the ACPI 3.0 Spec, section 5.2.5.3 will help
1802	 * understand this code.  If we haven't already found the RSDT/XSDT,
1803	 * revision will be 0. Find the RSDP and check the revision
1804	 * to find out whether to use the RSDT or XSDT.  If revision is
1805	 * 0 or 1, use the RSDT and set internal revision to 1; if it is 2,
1806	 * use the XSDT.  If the XSDT address is 0, though, fall back to
1807	 * revision 1 and use the RSDT.
1808	 */
1809	if (revision == 0) {
1810		if ((rsdp = (struct rsdp *)find_rsdp()) != NULL) {
1811			revision = rsdp->v1.revision;
1812			switch (revision) {
1813			case 2:
1814				/*
1815				 * Use the XSDT unless BIOS is buggy and
1816				 * claims to be rev 2 but has a null XSDT
1817				 * address
1818				 */
1819				xsdt_addr = rsdp->xsdt;
1820				if (xsdt_addr != 0)
1821					break;
1822				/* FALLTHROUGH */
1823			case 0:
1824				/* treat RSDP rev 0 as revision 1 internally */
1825				revision = 1;
1826				/* FALLTHROUGH */
1827			case 1:
1828				/* use the RSDT for rev 0/1 */
1829				xsdt_addr = rsdp->v1.rsdt;
1830				break;
1831			default:
1832				/* unknown revision */
1833				revision = 0;
1834				break;
1835			}
1836		}
1837		if (revision == 0)
1838			return (NULL);
1839
1840		/* cache the XSDT info */
1841		xsdt = (struct xsdt *)map_fw_table(xsdt_addr);
1842		len = (xsdt->hdr.len - sizeof (xsdt->hdr)) /
1843		    ((revision == 1) ? sizeof (uint32_t) : sizeof (uint64_t));
1844	}
1845
1846	/*
1847	 * Scan the table headers looking for a signature match
1848	 */
1849	for (n = 0; n < len; n++) {
1850		table_addr = (revision == 1) ? xsdt->p.r[n] : xsdt->p.x[n];
1851		if (table_addr == 0)
1852			continue;
1853		tp = map_fw_table(table_addr);
1854		if (strncmp(tp->sig, signature, ACPI_TABLE_SIG_LEN) == 0) {
1855			return (tp);
1856		}
1857	}
1858	return (NULL);
1859}
1860
1861static void
1862process_madt(struct madt *tp)
1863{
1864	struct madt_processor *cpu, *end;
1865	uint32_t cpu_count = 0;
1866
1867	/*
1868	 * User-set boot-ncpus overrides firmware count
1869	 */
1870	if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
1871		return;
1872
1873	if (tp != NULL) {
1874		end = (struct madt_processor *)(tp->hdr.len + (uintptr_t)tp);
1875		cpu = tp->list;
1876		while (cpu < end) {
1877			if (cpu->type == MADT_PROCESSOR)
1878				if (cpu->flags & 1)
1879					cpu_count++;
1880
1881			cpu = (struct madt_processor *)
1882			    (cpu->len + (uintptr_t)cpu);
1883		}
1884		bsetpropsi("boot-ncpus", cpu_count);
1885	}
1886
1887}
1888
1889static void
1890process_srat(struct srat *tp)
1891{
1892	struct srat_item *item, *end;
1893	int i;
1894	int proc_num, mem_num;
1895#pragma pack(1)
1896	struct {
1897		uint32_t domain;
1898		uint32_t apic_id;
1899		uint32_t sapic_id;
1900	} processor;
1901	struct {
1902		uint32_t domain;
1903		uint64_t addr;
1904		uint64_t length;
1905		uint32_t flags;
1906	} memory;
1907#pragma pack()
1908	char prop_name[30];
1909
1910	if (tp == NULL)
1911		return;
1912
1913	proc_num = mem_num = 0;
1914	end = (struct srat_item *)(tp->hdr.len + (uintptr_t)tp);
1915	item = tp->list;
1916	while (item < end) {
1917		switch (item->type) {
1918		case SRAT_PROCESSOR:
1919			if (!(item->i.p.flags & SRAT_ENABLED))
1920				break;
1921			processor.domain = item->i.p.domain1;
1922			for (i = 0; i < 3; i++)
1923				processor.domain +=
1924				    item->i.p.domain2[i] << ((i + 1) * 8);
1925			processor.apic_id = item->i.p.apic_id;
1926			processor.sapic_id = item->i.p.local_sapic_eid;
1927			(void) snprintf(prop_name, 30, "acpi-srat-processor-%d",
1928			    proc_num);
1929			bsetprop(prop_name, strlen(prop_name), &processor,
1930			    sizeof (processor));
1931			proc_num++;
1932			break;
1933		case SRAT_MEMORY:
1934			if (!(item->i.m.flags & SRAT_ENABLED))
1935				break;
1936			memory.domain = item->i.m.domain;
1937			memory.addr = item->i.m.base_addr;
1938			memory.length = item->i.m.len;
1939			memory.flags = item->i.m.flags;
1940			(void) snprintf(prop_name, 30, "acpi-srat-memory-%d",
1941			    mem_num);
1942			bsetprop(prop_name, strlen(prop_name), &memory,
1943			    sizeof (memory));
1944			mem_num++;
1945			break;
1946		}
1947
1948		item = (struct srat_item *)
1949		    (item->len + (caddr_t)item);
1950	}
1951}
1952
1953static void
1954process_slit(struct slit *tp)
1955{
1956
1957	/*
1958	 * Check the number of localities; if it's too huge, we just
1959	 * return and locality enumeration code will handle this later,
1960	 * if possible.
1961	 *
1962	 * Note that the size of the table is the square of the
1963	 * number of localities; if the number of localities exceeds
1964	 * UINT16_MAX, the table size may overflow an int when being
1965	 * passed to bsetprop() below.
1966	 */
1967	if (tp->number >= SLIT_LOCALITIES_MAX)
1968		return;
1969
1970	bsetprop(SLIT_NUM_PROPNAME, strlen(SLIT_NUM_PROPNAME), &tp->number,
1971	    sizeof (tp->number));
1972	bsetprop(SLIT_PROPNAME, strlen(SLIT_PROPNAME), &tp->entry,
1973	    tp->number * tp->number);
1974}
1975#else /* __xpv */
1976static void
1977enumerate_xen_cpus()
1978{
1979	processorid_t	id, max_id;
1980
1981	/*
1982	 * User-set boot-ncpus overrides enumeration
1983	 */
1984	if (do_bsys_getproplen(NULL, "boot-ncpus") >= 0)
1985		return;
1986
1987	/*
1988	 * Probe every possible virtual CPU id and remember the
1989	 * highest id present; the count of CPUs is one greater
1990	 * than this.  This tacitly assumes at least cpu 0 is present.
1991	 */
1992	max_id = 0;
1993	for (id = 0; id < MAX_VIRT_CPUS; id++)
1994		if (HYPERVISOR_vcpu_op(VCPUOP_is_up, id, NULL) == 0)
1995			max_id = id;
1996
1997	bsetpropsi("boot-ncpus", max_id+1);
1998
1999}
2000#endif /* __xpv */
2001
2002static void
2003build_firmware_properties(void)
2004{
2005#ifndef __xpv
2006	struct table_header *tp;
2007
2008	if ((tp = find_fw_table("APIC")) != NULL)
2009		process_madt((struct madt *)tp);
2010
2011	if ((tp = find_fw_table("SRAT")) != NULL)
2012		process_srat((struct srat *)tp);
2013
2014	if (tp = find_fw_table("SLIT"))
2015		process_slit((struct slit *)tp);
2016#else /* __xpv */
2017	enumerate_xen_cpus();
2018#endif /* __xpv */
2019}
2020
2021/*
2022 * fake up a boot property for USB serial console early boot output
2023 */
2024void *
2025usbser_init(size_t size)
2026{
2027	static char *p = NULL;
2028
2029	p = do_bsys_alloc(NULL, NULL, size, MMU_PAGESIZE);
2030	*p = 0;
2031	bsetprop("usb-serial-buf", strlen("usb-serial-buf") + 1,
2032	    &p, sizeof (p));
2033	return (p);
2034}
2035
2036/*ARGSUSED*/
2037int
2038boot_compinfo(int fd, struct compinfo *cbp)
2039{
2040	cbp->iscmp = 0;
2041	cbp->blksize = MAXBSIZE;
2042	return (0);
2043}
2044