1/*-
2 * Copyright (C) 2010-2014 Nathan Whitehorn
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
18 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
20 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
21 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
22 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
23 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include <stand.h>
27#include <sys/param.h>
28#include <sys/boot.h>
29#include <fdt_platform.h>
30
31#include <machine/cpufunc.h>
32#include <bootstrap.h>
33#include "host_syscall.h"
34#include "kboot.h"
35#include "stand.h"
36#include <smbios.h>
37
38struct arch_switch	archsw;
39extern void *_end;
40
41int kboot_getdev(void **vdev, const char *devspec, const char **path);
42ssize_t kboot_copyin(const void *src, vm_offset_t dest, const size_t len);
43ssize_t kboot_copyout(vm_offset_t src, void *dest, const size_t len);
44ssize_t kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len);
45int kboot_autoload(void);
46static void kboot_zfs_probe(void);
47
48extern int command_fdt_internal(int argc, char *argv[]);
49
50#define PA_INVAL (vm_offset_t)-1
51static vm_offset_t pa_start = PA_INVAL;
52static vm_offset_t padding;
53static vm_offset_t offset;
54
55static uint64_t commit_limit;
56static uint64_t committed_as;
57static uint64_t mem_avail;
58
59static void
60memory_limits(void)
61{
62	int fd;
63	char buf[128];
64
65	/*
66	 * To properly size the slabs, we need to find how much memory we can
67	 * commit to using. commit_limit is the max, while commited_as is the
68	 * current total. We can use these later to allocate the largetst amount
69	 * of memory possible so we can support larger ram disks than we could
70	 * by using fixed segment sizes. We also grab the memory available so
71	 * we don't use more than 49% of that.
72	 */
73	fd = open("host:/proc/meminfo", O_RDONLY);
74	if (fd != -1) {
75		while (fgetstr(buf, sizeof(buf), fd) > 0) {
76			if (strncmp(buf, "MemAvailable:", 13) == 0) {
77				mem_avail = strtoll(buf + 13, NULL, 0);
78				mem_avail <<= 10; /* Units are kB */
79			} else if (strncmp(buf, "CommitLimit:", 12) == 0) {
80				commit_limit = strtoll(buf + 13, NULL, 0);
81				commit_limit <<= 10; /* Units are kB */
82			} else if (strncmp(buf, "Committed_AS:", 13) == 0) {
83				committed_as = strtoll(buf + 14, NULL, 0);
84				committed_as <<= 10; /* Units are kB */
85			}
86		}
87	} else {
88		/* Otherwise, on FreeBSD host, for testing 32GB host: */
89		mem_avail = 31ul << 30;			/* 31GB free */
90		commit_limit = mem_avail * 9 / 10;	/* 90% comittable */
91		committed_as = 20ul << 20;		/* 20MB used */
92	}
93	printf("Commit limit: %lld Committed bytes %lld Available %lld\n",
94	    (long long)commit_limit, (long long)committed_as,
95	    (long long)mem_avail);
96	close(fd);
97}
98
99/*
100 * NB: getdev should likely be identical to this most places, except maybe
101 * we should move to storing the length of the platform devdesc.
102 */
103int
104kboot_getdev(void **vdev, const char *devspec, const char **path)
105{
106	struct devdesc **dev = (struct devdesc **)vdev;
107	int				rv;
108
109	/*
110	 * If it looks like this is just a path and no device, go with the
111	 * current device.
112	 */
113	if (devspec == NULL || strchr(devspec, ':') == NULL) {
114		if (((rv = devparse(dev, getenv("currdev"), NULL)) == 0) &&
115		    (path != NULL))
116			*path = devspec;
117		return (rv);
118	}
119
120	/*
121	 * Try to parse the device name off the beginning of the devspec
122	 */
123	return (devparse(dev, devspec, path));
124}
125
126static int
127parse_args(int argc, const char **argv)
128{
129	int howto = 0;
130
131	/*
132	 * When run as init, sometimes argv[0] is a EFI-ESP path, other times
133	 * it's the name of the init program, and sometimes it's a placeholder
134	 * string, so we exclude it here. For the other args, look for DOS-like
135	 * and Unix-like absolte paths and exclude parsing it if we find that,
136	 * otherwise parse it as a command arg (so looking for '-X', 'foo' or
137	 * 'foo=bar'). This is a little different than EFI where it argv[0]
138	 * often times is the first argument passed in. There are cases when
139	 * linux-booting via EFI that we have the EFI path we used to run
140	 * bootXXX.efi as the arguments to init, so we need to exclude the paths
141	 * there as well.
142	 */
143	for (int i = 1; i < argc; i++) {
144		if (argv[i][0] != '\\' && argv[i][0] != '/') {
145			howto |= boot_parse_arg(argv[i]);
146		}
147	}
148
149	return (howto);
150}
151
152static vm_offset_t rsdp;
153
154static vm_offset_t
155kboot_rsdp_from_efi(void)
156{
157	char buffer[512 + 1];
158	char *walker, *ep;
159
160	if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
161		return (0);	/* Not an EFI system */
162	ep = buffer + strlen(buffer);
163	walker = buffer;
164	while (walker < ep) {
165		if (strncmp("ACPI20=", walker, 7) == 0)
166			return((vm_offset_t)strtoull(walker + 7, NULL, 0));
167		if (strncmp("ACPI=", walker, 5) == 0)
168			return((vm_offset_t)strtoull(walker + 5, NULL, 0));
169		walker += strcspn(walker, "\n") + 1;
170	}
171	return (0);
172}
173
174static void
175find_acpi(void)
176{
177	rsdp = kboot_rsdp_from_efi();
178#if 0	/* maybe for amd64 */
179	if (rsdp == 0)
180		rsdp = find_rsdp_arch();
181#endif
182}
183
184vm_offset_t
185acpi_rsdp(void)
186{
187	return (rsdp);
188}
189
190bool
191has_acpi(void)
192{
193	return rsdp != 0;
194}
195
196/*
197 * SMBIOS support. We map the physical memory address we get into a VA in this
198 * address space with mmap with 64k pages. Once we're done, we cleanup any
199 * mappings we made.
200 */
201
202#define MAX_MAP	10
203#define PAGE	(64<<10)
204
205static struct mapping
206{
207	uintptr_t pa;
208	caddr_t va;
209} map[MAX_MAP];
210static int smbios_fd;
211static int nmap;
212
213caddr_t ptov(uintptr_t pa)
214{
215	caddr_t va;
216	uintptr_t pa2;
217	struct mapping *m = map;
218
219	pa2 = rounddown(pa, PAGE);
220	for (int i = 0; i < nmap; i++, m++) {
221		if (m->pa == pa2) {
222			return (m->va + pa - m->pa);
223		}
224	}
225	if (nmap == MAX_MAP)
226		panic("Too many maps for smbios");
227
228	/*
229	 * host_mmap returns small negative numbers on errors, can't return an
230	 * error here, so we have to panic. The Linux wrapper will set errno
231	 * based on this and then return HOST_MAP_FAILED. Since we're calling
232	 * the raw system call we have to do that ourselves.
233	 */
234	va = host_mmap(0, PAGE, HOST_PROT_READ, HOST_MAP_SHARED, smbios_fd, pa2);
235	if (is_linux_error((long)va))
236		panic("smbios mmap offset %#jx failed", (uintmax_t)pa2);
237	m = &map[nmap++];
238	m->pa = pa2;
239	m->va = va;
240	return (m->va + pa - m->pa);
241}
242
243static void
244smbios_cleanup(void)
245{
246	for (int i = 0; i < nmap; i++) {
247		host_munmap(map[i].va, PAGE);
248	}
249}
250
251static vm_offset_t
252kboot_find_smbios(void)
253{
254	char buffer[512 + 1];
255	char *walker, *ep;
256
257	if (!file2str("/sys/firmware/efi/systab", buffer, sizeof(buffer)))
258		return (0);	/* Not an EFI system */
259	ep = buffer + strlen(buffer);
260	walker = buffer;
261	while (walker <= ep) {
262		if (strncmp("SMBIOS3=", walker, 8) == 0)
263			return((vm_offset_t)strtoull(walker + 8, NULL, 0));
264		if (strncmp("SMBIOS=", walker, 7) == 0)
265			return((vm_offset_t)strtoull(walker + 7, NULL, 0));
266		walker += strcspn(walker, "\n") + 1;
267	}
268	return (0);
269}
270
271static void
272find_smbios(void)
273{
274	char buf[40];
275	uintptr_t pa;
276	caddr_t va;
277
278	pa = kboot_find_smbios();
279	printf("SMBIOS at %#jx\n", (uintmax_t)pa);
280	if (pa == 0)
281		return;
282
283	snprintf(buf, sizeof(buf), "%#jx", (uintmax_t)pa);
284	setenv("hint.smbios.0.mem", buf, 1);
285	smbios_fd = host_open("/dev/mem", O_RDONLY, 0);
286	if (smbios_fd < 0) {
287		printf("Can't open /dev/mem to read smbios\n");
288		return;
289	}
290	va = ptov(pa);
291	printf("Start of smbios at pa %p va %p\n", (void *)pa, va);
292	smbios_detect(va);
293	smbios_cleanup();
294	host_close(smbios_fd);
295}
296
297static void
298parse_file(const char *fn)
299{
300	struct stat st;
301	int fd = -1;
302	char *env = NULL;
303
304	if (stat(fn, &st) != 0)
305		return;
306	fd = open(fn, O_RDONLY);
307	if (fd == -1)
308		return;
309	env = malloc(st.st_size + 1);
310	if (env == NULL)
311		goto out;
312	if (read(fd, env, st.st_size) != st.st_size)
313		goto out;
314	env[st.st_size] = '\0';
315	boot_parse_cmdline(env);
316out:
317	free(env);
318	close(fd);
319}
320
321
322int
323main(int argc, const char **argv)
324{
325	void *heapbase;
326	const size_t heapsize = 64*1024*1024;
327	const char *bootdev;
328
329	archsw.arch_getdev = kboot_getdev;
330	archsw.arch_copyin = kboot_copyin;
331	archsw.arch_copyout = kboot_copyout;
332	archsw.arch_readin = kboot_readin;
333	archsw.arch_autoload = kboot_autoload;
334	archsw.arch_zfs_probe = kboot_zfs_probe;
335
336	/* Give us a sane world if we're running as init */
337	do_init();
338
339	/*
340	 * Setup the heap, 64MB is minimum for ZFS booting
341	 */
342	heapbase = host_getmem(heapsize);
343	setheap(heapbase, heapbase + heapsize);
344
345	/*
346	 * Set up console so we get error messages.
347	 */
348	cons_probe();
349
350	/*
351	 * Find acpi and smbios, if they exists. This allows command line and
352	 * later scripts to override if necessary.
353	 */
354	find_acpi();
355	find_smbios();
356
357	/* Parse the command line args -- ignoring for now the console selection */
358	parse_args(argc, argv);
359
360	hostfs_root = getenv("hostfs_root");
361	if (hostfs_root == NULL)
362		hostfs_root = "/";
363
364	/* Initialize all the devices */
365	devinit();
366
367	/* Figure out where we're booting from */
368	bootdev = getenv("bootdev");
369	if (bootdev == NULL)
370		bootdev = hostdisk_gen_probe();
371#if defined(LOADER_ZFS_SUPPORT)
372	if (bootdev == NULL || strcmp(bootdev, "zfs:") == 0) {
373		/*
374		 * Pseudo device that says go find the right ZFS pool. This will be
375		 * the first pool that we find that passes the sanity checks (eg looks
376		 * like it might be vbootable) and sets currdev to the right thing based
377		 * on active BEs, etc
378		 */
379		if (hostdisk_zfs_find_default())
380			bootdev = getenv("currdev");
381	}
382#endif
383	if (bootdev != NULL) {
384		/*
385		 * Otherwise, honor what's on the command line. If we've been
386		 * given a specific ZFS partition, then we'll honor it w/o BE
387		 * processing that would otherwise pick a different snapshot to
388		 * boot than the default one in the pool.
389		 */
390		set_currdev(bootdev);
391	} else {
392		panic("Bootdev is still NULL");
393	}
394
395	printf("Boot device: %s with hostfs_root %s\n", bootdev, hostfs_root);
396
397	printf("\n%s", bootprog_info);
398
399	setenv("LINES", "24", 1);
400
401	memory_limits();
402	enumerate_memory_arch();
403
404	interact();			/* doesn't return */
405
406	return (0);
407}
408
409void
410exit(int code)
411{
412	host_exit(code);
413	__unreachable();
414}
415
416void
417delay(int usecs)
418{
419	struct host_timeval tvi, tv;
420	uint64_t ti, t;
421	host_gettimeofday(&tvi, NULL);
422	ti = tvi.tv_sec*1000000 + tvi.tv_usec;
423	do {
424		host_gettimeofday(&tv, NULL);
425		t = tv.tv_sec*1000000 + tv.tv_usec;
426	} while (t < ti + usecs);
427}
428
429time_t
430getsecs(void)
431{
432	struct host_timeval tv;
433	host_gettimeofday(&tv, NULL);
434	return (tv.tv_sec);
435}
436
437time_t
438time(time_t *tloc)
439{
440	time_t rv;
441
442	rv = getsecs();
443	if (tloc != NULL)
444		*tloc = rv;
445
446	return (rv);
447}
448
449struct host_kexec_segment loaded_segments[HOST_KEXEC_SEGMENT_MAX];
450int nkexec_segments = 0;
451
452#define SEGALIGN (1ul<<20)
453
454static ssize_t
455get_phys_buffer(vm_offset_t dest, const size_t len, void **buf)
456{
457	int i = 0;
458	const size_t segsize = 64*1024*1024;
459	size_t sz, amt, l;
460
461	if (nkexec_segments == HOST_KEXEC_SEGMENT_MAX)
462		panic("Tried to load too many kexec segments");
463	for (i = 0; i < nkexec_segments; i++) {
464		if (dest >= (vm_offset_t)loaded_segments[i].mem &&
465		    dest < (vm_offset_t)loaded_segments[i].mem +
466		    loaded_segments[i].bufsz) /* Need to use bufsz since memsz is in use size */
467			goto out;
468	}
469
470	sz = segsize;
471	if (nkexec_segments == 0) {
472		/* how much space does this segment have */
473		sz = space_avail(dest);
474		/* Clip to 45% of available memory (need 2 copies) */
475		sz = MIN(sz, rounddown2(mem_avail * 45 / 100, SEGALIGN));
476		printf("limit to 45%% of mem_avail %zd\n", sz);
477		/* And only use 95% of what we can allocate */
478		sz = MIN(sz,
479		    rounddown2((commit_limit - committed_as) * 95 / 100, SEGALIGN));
480		printf("Allocating %zd MB for first segment\n", sz >> 20);
481	}
482
483	loaded_segments[nkexec_segments].buf = host_getmem(sz);
484	loaded_segments[nkexec_segments].bufsz = sz;
485	loaded_segments[nkexec_segments].mem = (void *)rounddown2(dest,SEGALIGN);
486	loaded_segments[nkexec_segments].memsz = 0;
487
488	i = nkexec_segments;
489	nkexec_segments++;
490
491out:
492	/*
493	 * Keep track of the highest amount used in a segment
494	 */
495	amt = dest - (vm_offset_t)loaded_segments[i].mem;
496	l = min(len,loaded_segments[i].bufsz - amt);
497	*buf = loaded_segments[i].buf + amt;
498	if (amt + l > loaded_segments[i].memsz)
499		loaded_segments[i].memsz = amt + l;
500	return (l);
501}
502
503ssize_t
504kboot_copyin(const void *src, vm_offset_t dest, const size_t len)
505{
506	ssize_t segsize, remainder;
507	void *destbuf;
508
509	if (pa_start == PA_INVAL) {
510		pa_start = kboot_get_phys_load_segment();
511//		padding = 2 << 20; /* XXX amd64: revisit this when we make it work */
512		padding = 0;
513		offset = dest;
514		get_phys_buffer(pa_start, len, &destbuf);
515	}
516
517	remainder = len;
518	do {
519		segsize = get_phys_buffer(dest + pa_start + padding - offset, remainder, &destbuf);
520		bcopy(src, destbuf, segsize);
521		remainder -= segsize;
522		src += segsize;
523		dest += segsize;
524	} while (remainder > 0);
525
526	return (len);
527}
528
529ssize_t
530kboot_copyout(vm_offset_t src, void *dest, const size_t len)
531{
532	ssize_t segsize, remainder;
533	void *srcbuf;
534
535	remainder = len;
536	do {
537		segsize = get_phys_buffer(src + pa_start + padding - offset, remainder, &srcbuf);
538		bcopy(srcbuf, dest, segsize);
539		remainder -= segsize;
540		src += segsize;
541		dest += segsize;
542	} while (remainder > 0);
543
544	return (len);
545}
546
547ssize_t
548kboot_readin(readin_handle_t fd, vm_offset_t dest, const size_t len)
549{
550	void            *buf;
551	size_t          resid, chunk, get;
552	ssize_t         got;
553	vm_offset_t     p;
554
555	p = dest;
556
557	chunk = min(PAGE_SIZE, len);
558	buf = malloc(chunk);
559	if (buf == NULL) {
560		printf("kboot_readin: buf malloc failed\n");
561		return (0);
562	}
563
564	for (resid = len; resid > 0; resid -= got, p += got) {
565		get = min(chunk, resid);
566		got = VECTX_READ(fd, buf, get);
567		if (got <= 0) {
568			if (got < 0)
569				printf("kboot_readin: read failed\n");
570			break;
571		}
572
573		kboot_copyin(buf, p, got);
574	}
575
576	free (buf);
577	return (len - resid);
578}
579
580int
581kboot_autoload(void)
582{
583
584	return (0);
585}
586
587void
588kboot_kseg_get(int *nseg, void **ptr)
589{
590	printf("kseg_get: %d segments\n", nkexec_segments);
591	printf("VA               SZ       PA               MEMSZ\n");
592	printf("---------------- -------- ---------------- -----\n");
593	for (int a = 0; a < nkexec_segments; a++) {
594		/*
595		 * Truncate each segment to just what we've used in the segment,
596		 * rounded up to the next page.
597		 */
598		loaded_segments[a].memsz = roundup2(loaded_segments[a].memsz,PAGE_SIZE);
599		loaded_segments[a].bufsz = loaded_segments[a].memsz;
600		printf("%016jx %08jx %016jx %08jx\n",
601			(uintmax_t)loaded_segments[a].buf,
602			(uintmax_t)loaded_segments[a].bufsz,
603			(uintmax_t)loaded_segments[a].mem,
604			(uintmax_t)loaded_segments[a].memsz);
605	}
606
607	*nseg = nkexec_segments;
608	*ptr = &loaded_segments[0];
609}
610
611static void
612kboot_zfs_probe(void)
613{
614#if defined(LOADER_ZFS_SUPPORT)
615	/*
616	 * Open all the disks and partitions we can find to see if there are ZFS
617	 * pools on them.
618	 */
619	hostdisk_zfs_probe();
620#endif
621}
622
623/*
624 * Since proper fdt command handling function is defined in fdt_loader_cmd.c,
625 * and declaring it as extern is in contradiction with COMMAND_SET() macro
626 * (which uses static pointer), we're defining wrapper function, which
627 * calls the proper fdt handling routine.
628 */
629static int
630command_fdt(int argc, char *argv[])
631{
632
633	return (command_fdt_internal(argc, argv));
634}
635
636COMMAND_SET(fdt, "fdt", "flattened device tree handling", command_fdt);
637
638