1/*-
2 * Copyright (C) 2014 Nathan Whitehorn
3 * All rights reserved.
4 * Copyright 2022 Netflix, Inc
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
24 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include <sys/types.h>
28#include <sys/disk.h>
29#include <stdarg.h>
30#include <paths.h>
31#include "host_syscall.h"
32#include "kboot.h"
33#include "bootstrap.h"
34#ifdef LOADER_ZFS_SUPPORT
35#include "libzfs.h"
36#include <sys/zfs_bootenv.h>
37#endif
38
39static int hostdisk_init(void);
40static int hostdisk_strategy(void *devdata, int flag, daddr_t dblk,
41    size_t size, char *buf, size_t *rsize);
42static int hostdisk_open(struct open_file *f, ...);
43static int hostdisk_close(struct open_file *f);
44static int hostdisk_ioctl(struct open_file *f, u_long cmd, void *data);
45static int hostdisk_print(int verbose);
46static char *hostdisk_fmtdev(struct devdesc *vdev);
47static bool hostdisk_match(struct devsw *devsw, const char *devspec);
48static int hostdisk_parsedev(struct devdesc **idev, const char *devspec, const char **path);
49
50struct devsw hostdisk = {
51	.dv_name = "/dev",
52	.dv_type = DEVT_HOSTDISK,
53	.dv_init = hostdisk_init,
54	.dv_strategy = hostdisk_strategy,
55	.dv_open = hostdisk_open,
56	.dv_close = hostdisk_close,
57	.dv_ioctl = hostdisk_ioctl,
58	.dv_print = hostdisk_print,
59	.dv_cleanup = nullsys,
60	.dv_fmtdev = hostdisk_fmtdev,
61	.dv_match = hostdisk_match,
62	.dv_parsedev = hostdisk_parsedev,
63};
64
65/*
66 * We need to walk through the /sys/block directories looking for
67 * block devices that we can use.
68 */
69#define SYSBLK "/sys/block"
70
71#define HOSTDISK_MIN_SIZE (16ul << 20)	/* 16MB */
72
73typedef STAILQ_HEAD(, hdinfo) hdinfo_list_t;
74typedef struct hdinfo {
75	STAILQ_ENTRY(hdinfo)	hd_link;	/* link in device list */
76	hdinfo_list_t	hd_children;
77	struct hdinfo	*hd_parent;
78	const char	*hd_dev;
79	uint64_t	hd_size;		/* In bytes */
80	uint64_t	hd_sectors;
81	uint64_t	hd_sectorsize;
82	int		hd_flags;
83#define HDF_HAS_ZPOOL	1			/* We found a zpool here and uuid valid */
84	uint64_t	hd_zfs_uuid;
85} hdinfo_t;
86
87#define dev2hd(d) ((hdinfo_t *)d->d_opendata)
88#define hd_name(hd) ((hd->hd_dev + 5))
89
90static hdinfo_list_t hdinfo = STAILQ_HEAD_INITIALIZER(hdinfo);
91
92typedef bool fef_cb_t(struct host_dirent64 *, void *);
93#define FEF_RECURSIVE 1
94
95static bool
96foreach_file(const char *dir, fef_cb_t cb, void *argp, u_int flags)
97{
98	char dents[2048];
99	int fd, dentsize;
100	struct host_dirent64 *dent;
101
102	fd = host_open(dir, O_RDONLY, 0);
103	if (fd < 0) {
104		printf("Can't open %s\n", dir);/* XXX */
105		return (false);
106	}
107	while (1) {
108		dentsize = host_getdents64(fd, dents, sizeof(dents));
109		if (dentsize <= 0)
110			break;
111		for (dent = (struct host_dirent64 *)dents;
112		     (char *)dent < dents + dentsize;
113		     dent = (struct host_dirent64 *)((void *)dent + dent->d_reclen)) {
114			if (!cb(dent, argp))
115				break;
116		}
117	}
118	host_close(fd);
119	return (true);
120}
121
122static void
123hostdisk_add_part(hdinfo_t *hd, const char *drv, uint64_t secs)
124{
125	hdinfo_t *md;
126	char *dev;
127
128	printf("hd %s adding %s %ju\n", hd->hd_dev, drv, (uintmax_t)secs);
129	if ((md = calloc(1, sizeof(*md))) == NULL)
130		return;
131	if (asprintf(&dev, "/dev/%s", drv) == -1) {
132		printf("hostdisk: no memory\n");
133		free(md);
134		return;
135	}
136	md->hd_dev = dev;
137	md->hd_sectors = secs;
138	md->hd_sectorsize = hd->hd_sectorsize;
139	md->hd_size = md->hd_sectors * md->hd_sectorsize;
140	md->hd_parent = hd;
141	STAILQ_INSERT_TAIL(&hd->hd_children, md, hd_link);
142}
143
144static bool
145hostdisk_one_part(struct host_dirent64 *dent, void *argp)
146{
147	hdinfo_t *hd = argp;
148	char szfn[1024];
149	uint64_t sz;
150
151	/* Need to skip /dev/ at start of hd_name */
152	if (strncmp(dent->d_name, hd_name(hd), strlen(hd_name(hd))) != 0)
153		return (true);
154	/* Find out how big this is -- no size not a disk */
155	snprintf(szfn, sizeof(szfn), "%s/%s/%s/size", SYSBLK,
156	    hd_name(hd), dent->d_name);
157	if (!file2u64(szfn, &sz))
158		return true;
159	hostdisk_add_part(hd, dent->d_name, sz);
160	return true;
161}
162
163static void
164hostdisk_add_parts(hdinfo_t *hd)
165{
166	char fn[1024];
167
168	snprintf(fn, sizeof(fn), "%s/%s", SYSBLK, hd_name(hd));
169	foreach_file(fn, hostdisk_one_part, hd, 0);
170}
171
172static void
173hostdisk_add_drive(const char *drv, uint64_t secs)
174{
175	hdinfo_t *hd = NULL;
176	char *dev = NULL;
177	char fn[1024];
178
179	if ((hd = calloc(1, sizeof(*hd))) == NULL)
180		return;
181	if (asprintf(&dev, "/dev/%s", drv) == -1) {
182		printf("hostdisk: no memory\n");
183		free(hd);
184		return;
185	}
186	hd->hd_dev = dev;
187	hd->hd_sectors = secs;
188	snprintf(fn, sizeof(fn), "%s/%s/queue/hw_sector_size",
189	    SYSBLK, drv);
190	if (!file2u64(fn, &hd->hd_sectorsize))
191		goto err;
192	hd->hd_size = hd->hd_sectors * hd->hd_sectorsize;
193	if (hd->hd_size < HOSTDISK_MIN_SIZE)
194		goto err;
195	hd->hd_flags = 0;
196	STAILQ_INIT(&hd->hd_children);
197	printf("/dev/%s: %ju %ju %ju\n",
198	    drv, hd->hd_size, hd->hd_sectors, hd->hd_sectorsize);
199	STAILQ_INSERT_TAIL(&hdinfo, hd, hd_link);
200	hostdisk_add_parts(hd);
201	return;
202err:
203	free(dev);
204	free(hd);
205	return;
206}
207
208/* Find a disk / partition by its filename */
209
210static hdinfo_t *
211hostdisk_find(const char *fn)
212{
213	hdinfo_t *hd, *md;
214
215	STAILQ_FOREACH(hd, &hdinfo, hd_link) {
216		if (strcmp(hd->hd_dev, fn) == 0)
217			return (hd);
218		STAILQ_FOREACH(md, &hd->hd_children, hd_link) {
219			if (strcmp(md->hd_dev, fn) == 0)
220				return (md);
221		}
222	}
223	return (NULL);
224}
225
226
227static bool
228hostdisk_one_disk(struct host_dirent64 *dent, void *argp __unused)
229{
230	char szfn[1024];
231	uint64_t sz;
232
233	/*
234	 * Skip . and ..
235	 */
236	if (strcmp(dent->d_name, ".") == 0 ||
237	    strcmp(dent->d_name, "..") == 0)
238		return (true);
239
240	/* Find out how big this is -- no size not a disk */
241	snprintf(szfn, sizeof(szfn), "%s/%s/size", SYSBLK,
242	    dent->d_name);
243	if (!file2u64(szfn, &sz))
244		return (true);
245	hostdisk_add_drive(dent->d_name, sz);
246	return (true);
247}
248
249static void
250hostdisk_fake_one_disk(char *override)
251{
252	hdinfo_t *hd = NULL;
253	struct host_kstat sb;
254
255	if (host_stat(override, &sb) != 0)
256		return;
257	if (!HOST_S_ISREG(sb.st_mode))
258		return;
259	if (sb.st_size == 0)
260		return;
261	if ((hd = calloc(1, sizeof(*hd))) == NULL)
262		return;
263	if ((hd->hd_dev = strdup(override)) == NULL)
264		goto err;
265	hd->hd_size = sb.st_size;
266	hd->hd_sectorsize = 512;	/* XXX configurable? */
267	hd->hd_sectors = hd->hd_size / hd->hd_sectorsize;
268	if (hd->hd_size < HOSTDISK_MIN_SIZE)
269		goto err;
270	hd->hd_flags = 0;
271	STAILQ_INIT(&hd->hd_children);
272	printf("%s: %ju %ju %ju\n",
273	    hd->hd_dev, hd->hd_size, hd->hd_sectors, hd->hd_sectorsize);
274	STAILQ_INSERT_TAIL(&hdinfo, hd, hd_link);
275	return;
276err:
277	free(__DECONST(void *, hd->hd_dev));
278	free(hd);
279}
280
281static void
282hostdisk_find_block_devices(void)
283{
284	char *override;
285
286	override=getenv("hostdisk_override");
287	if (override != NULL)
288		hostdisk_fake_one_disk(override);
289	else
290		foreach_file(SYSBLK, hostdisk_one_disk, NULL, 0);
291}
292
293static int
294hostdisk_init(void)
295{
296	hostdisk_find_block_devices();
297
298	return (0);
299}
300
301static int
302hostdisk_strategy(void *devdata, int flag, daddr_t dblk, size_t size,
303    char *buf, size_t *rsize)
304{
305	struct devdesc *desc = devdata;
306	daddr_t pos;
307	int n;
308	uint64_t res;
309	uint32_t posl, posh;
310
311	pos = dblk * 512;
312
313	posl = pos & 0xffffffffu;
314	posh = (pos >> 32) & 0xffffffffu;
315	if (host_llseek(desc->d_unit, posh, posl, &res, 0) < 0)
316		return (EIO);
317	if (flag & F_READ)
318		n = host_read(desc->d_unit, buf, size);
319	else if (flag & F_WRITE)
320		n = host_write(desc->d_unit, buf, size);
321	else
322		return (EINVAL);
323
324	if (n < 0)
325		return (EIO);
326
327	*rsize = n;
328	return (0);
329}
330
331static int
332hostdisk_open(struct open_file *f, ...)
333{
334	struct devdesc *desc;
335	const char *fn;
336	va_list vl;
337
338	va_start(vl, f);
339	desc = va_arg(vl, struct devdesc *);
340	va_end(vl);
341
342	fn = dev2hd(desc)->hd_dev;
343	desc->d_unit = host_open(fn, O_RDWR, 0);
344	if (desc->d_unit <= 0) {
345		printf("hostdisk_open: couldn't open %s: %d\n", fn, errno);
346		return (ENOENT);
347	}
348
349	return (0);
350}
351
352static int
353hostdisk_close(struct open_file *f)
354{
355	struct devdesc *desc = f->f_devdata;
356
357	host_close(desc->d_unit);
358	return (0);
359}
360
361static int
362hostdisk_ioctl(struct open_file *f, u_long cmd, void *data)
363{
364	struct devdesc *desc = f->f_devdata;
365	hdinfo_t *hd = dev2hd(desc);
366
367	switch (cmd) {
368	case DIOCGSECTORSIZE:
369		*(u_int *)data = hd->hd_sectorsize;
370		break;
371	case DIOCGMEDIASIZE:
372		*(uint64_t *)data = hd->hd_size;
373		break;
374	default:
375		return (ENOTTY);
376	}
377	return (0);
378}
379
380static int
381hostdisk_print(int verbose)
382{
383	char line[80];
384	hdinfo_t *hd, *md;
385	int ret = 0;
386
387	printf("%s devices:", hostdisk.dv_name);
388	if (pager_output("\n") != 0)
389		return (1);
390
391	STAILQ_FOREACH(hd, &hdinfo, hd_link) {
392		snprintf(line, sizeof(line),
393		    "   %s: %ju X %ju: %ju bytes\n",
394		    hd->hd_dev,
395		    (uintmax_t)hd->hd_sectors,
396		    (uintmax_t)hd->hd_sectorsize,
397		    (uintmax_t)hd->hd_size);
398		if ((ret = pager_output(line)) != 0)
399			break;
400		STAILQ_FOREACH(md, &hd->hd_children, hd_link) {
401			snprintf(line, sizeof(line),
402			    "     %s: %ju X %ju: %ju bytes\n",
403			    md->hd_dev,
404			    (uintmax_t)md->hd_sectors,
405			    (uintmax_t)md->hd_sectorsize,
406			    (uintmax_t)md->hd_size);
407			if ((ret = pager_output(line)) != 0)
408				goto done;
409		}
410	}
411
412done:
413	return (ret);
414}
415
416static char *
417hostdisk_fmtdev(struct devdesc *vdev)
418{
419	static char name[DEV_DEVLEN];
420
421	snprintf(name, sizeof(name), "%s:", dev2hd(vdev)->hd_dev);
422	return (name);
423}
424
425static bool
426hostdisk_match(struct devsw *devsw, const char *devspec)
427{
428	hdinfo_t *hd;
429	const char *colon;
430	char *cp;
431
432	colon = strchr(devspec, ':');
433	if (colon == NULL)
434		return false;
435	cp = strdup(devspec);
436	cp[colon - devspec] = '\0';
437	hd = hostdisk_find(cp);
438	free(cp);
439	return (hd != NULL);
440}
441
442static int
443hostdisk_parsedev(struct devdesc **idev, const char *devspec, const char **path)
444{
445	const char *cp;
446	struct devdesc *dev;
447	hdinfo_t *hd;
448	int len;
449	char *fn;
450
451	/* Must have a : in it */
452	cp = strchr(devspec, ':');
453	if (cp == NULL)
454		return (EINVAL);
455	/* XXX Stat the /dev or defer error handling to open(2) call? */
456	if (path != NULL)
457		*path = cp + 1;
458	len = cp - devspec;
459	fn = strdup(devspec);
460	fn[len] = '\0';
461	hd = hostdisk_find(fn);
462	if (hd == NULL) {
463		printf("Can't find hdinfo for %s\n", fn);
464		free(fn);
465		return (EINVAL);
466	}
467	free(fn);
468	dev = malloc(sizeof(*dev));
469	if (dev == NULL)
470		return (ENOMEM);
471	dev->d_unit = 0;
472	dev->d_dev = &hostdisk;
473	dev->d_opendata = hd;
474	*idev = dev;
475	return (0);
476}
477
478/* XXX refactor */
479static bool
480sanity_check_currdev(void)
481{
482	struct stat st;
483
484	return (stat(PATH_DEFAULTS_LOADER_CONF, &st) == 0 ||
485#ifdef PATH_BOOTABLE_TOKEN
486	    stat(PATH_BOOTABLE_TOKEN, &st) == 0 || /* non-standard layout */
487#endif
488	    stat(PATH_KERNEL, &st) == 0);
489}
490
491static const char *
492hostdisk_try_one(hdinfo_t *hd)
493{
494	char *fn;
495
496	if (asprintf(&fn, "%s:", hd->hd_dev) == -1)
497		return (NULL);
498	set_currdev(fn);
499	printf("Trying %s\n", fn);
500	if (sanity_check_currdev())
501		return (fn);
502	printf("Failed %s\n", fn);
503	free(fn);
504	return (NULL);
505}
506
507const char *
508hostdisk_gen_probe(void)
509{
510	hdinfo_t *hd, *md;
511	const char *rv = NULL;
512
513	STAILQ_FOREACH(hd, &hdinfo, hd_link) {
514		/* try whole disk */
515		if (hd->hd_flags & HDF_HAS_ZPOOL)
516			continue;
517		rv = hostdisk_try_one(hd);
518		if (rv != NULL)
519			return (rv);
520
521		/* try all partitions */
522		STAILQ_FOREACH(md, &hd->hd_children, hd_link) {
523			if (md->hd_flags & HDF_HAS_ZPOOL)
524				continue;
525			rv = hostdisk_try_one(md);
526			if (rv != NULL)
527				return (rv);
528		}
529	}
530	return (NULL);
531}
532
533#ifdef LOADER_ZFS_SUPPORT
534static bool
535hostdisk_zfs_check_one(hdinfo_t *hd)
536{
537	char *fn;
538	bool found = false;
539	uint64_t pool_uuid;
540
541	if (asprintf(&fn, "%s:", hd->hd_dev) == -1)
542		return (false);
543	pool_uuid = 0;
544	zfs_probe_dev(fn, &pool_uuid, false);
545	if (pool_uuid != 0) {
546		found = true;
547		hd->hd_flags |= HDF_HAS_ZPOOL;
548		hd->hd_zfs_uuid = pool_uuid;
549	}
550	free(fn);
551
552	return (found);
553}
554
555void
556hostdisk_zfs_probe(void)
557{
558	hdinfo_t *hd, *md;
559
560	STAILQ_FOREACH(hd, &hdinfo, hd_link) {
561		if (hostdisk_zfs_check_one(hd))
562			continue;
563		STAILQ_FOREACH(md, &hd->hd_children, hd_link) {
564			hostdisk_zfs_check_one(md);
565		}
566	}
567}
568
569/* This likely shoud move to libsa/zfs/zfs.c and be used by at least EFI booting */
570static bool
571probe_zfs_currdev(uint64_t pool_guid)
572{
573	char *devname;
574	struct zfs_devdesc currdev;
575	char buf[VDEV_PAD_SIZE];
576
577	currdev.dd.d_dev = &zfs_dev;
578	currdev.dd.d_unit = 0;
579	currdev.pool_guid = pool_guid;
580	currdev.root_guid = 0;
581	devname = devformat(&currdev.dd);
582	printf("Setting currdev to %s\n", devname);
583	set_currdev(devname);
584	init_zfs_boot_options(devname);
585
586	if (zfs_get_bootonce(&currdev, OS_BOOTONCE, buf, sizeof(buf)) == 0) {
587		printf("zfs bootonce: %s\n", buf);
588		set_currdev(buf);
589		setenv("zfs-bootonce", buf, 1);
590	}
591	(void)zfs_attach_nvstore(&currdev);
592
593	return (sanity_check_currdev());
594}
595
596static bool
597hostdisk_zfs_try_default(hdinfo_t *hd)
598{
599	return (probe_zfs_currdev(hd->hd_zfs_uuid));
600}
601
602bool
603hostdisk_zfs_find_default(void)
604{
605	hdinfo_t *hd, *md;
606
607	STAILQ_FOREACH(hd, &hdinfo, hd_link) {
608		if (hd->hd_flags & HDF_HAS_ZPOOL) {
609			if (hostdisk_zfs_try_default(hd))
610				return (true);
611			continue;
612		}
613		STAILQ_FOREACH(md, &hd->hd_children, hd_link) {
614			if (md->hd_flags & HDF_HAS_ZPOOL) {
615				if (hostdisk_zfs_try_default(md))
616					return (true);
617			}
618		}
619	}
620	return (false);
621}
622#endif
623