1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
25 * Copyright (c) 2018 by Delphix. All rights reserved.
26 */
27
28#include <stdio.h>
29#include <stdlib.h>
30#include <errno.h>
31#include <string.h>
32#include <unistd.h>
33#include <uuid/uuid.h>
34#include <zlib.h>
35#include <libintl.h>
36#include <sys/types.h>
37#include <sys/dkio.h>
38#include <sys/mhd.h>
39#include <sys/param.h>
40#include <sys/dktp/fdisk.h>
41#include <sys/efi_partition.h>
42#include <sys/byteorder.h>
43#include <sys/vdev_disk.h>
44#include <linux/fs.h>
45#include <linux/blkpg.h>
46
47static struct uuid_to_ptag {
48	struct uuid	uuid;
49} conversion_array[] = {
50	{ EFI_UNUSED },
51	{ EFI_BOOT },
52	{ EFI_ROOT },
53	{ EFI_SWAP },
54	{ EFI_USR },
55	{ EFI_BACKUP },
56	{ EFI_UNUSED },		/* STAND is never used */
57	{ EFI_VAR },
58	{ EFI_HOME },
59	{ EFI_ALTSCTR },
60	{ EFI_UNUSED },		/* CACHE (cachefs) is never used */
61	{ EFI_RESERVED },
62	{ EFI_SYSTEM },
63	{ EFI_LEGACY_MBR },
64	{ EFI_SYMC_PUB },
65	{ EFI_SYMC_CDS },
66	{ EFI_MSFT_RESV },
67	{ EFI_DELL_BASIC },
68	{ EFI_DELL_RAID },
69	{ EFI_DELL_SWAP },
70	{ EFI_DELL_LVM },
71	{ EFI_DELL_RESV },
72	{ EFI_AAPL_HFS },
73	{ EFI_AAPL_UFS },
74	{ EFI_FREEBSD_BOOT },
75	{ EFI_FREEBSD_SWAP },
76	{ EFI_FREEBSD_UFS },
77	{ EFI_FREEBSD_VINUM },
78	{ EFI_FREEBSD_ZFS },
79	{ EFI_BIOS_BOOT },
80	{ EFI_INTC_RS },
81	{ EFI_SNE_BOOT },
82	{ EFI_LENOVO_BOOT },
83	{ EFI_MSFT_LDMM },
84	{ EFI_MSFT_LDMD },
85	{ EFI_MSFT_RE },
86	{ EFI_IBM_GPFS },
87	{ EFI_MSFT_STORAGESPACES },
88	{ EFI_HPQ_DATA },
89	{ EFI_HPQ_SVC },
90	{ EFI_RHT_DATA },
91	{ EFI_RHT_HOME },
92	{ EFI_RHT_SRV },
93	{ EFI_RHT_DMCRYPT },
94	{ EFI_RHT_LUKS },
95	{ EFI_FREEBSD_DISKLABEL },
96	{ EFI_AAPL_RAID },
97	{ EFI_AAPL_RAIDOFFLINE },
98	{ EFI_AAPL_BOOT },
99	{ EFI_AAPL_LABEL },
100	{ EFI_AAPL_TVRECOVERY },
101	{ EFI_AAPL_CORESTORAGE },
102	{ EFI_NETBSD_SWAP },
103	{ EFI_NETBSD_FFS },
104	{ EFI_NETBSD_LFS },
105	{ EFI_NETBSD_RAID },
106	{ EFI_NETBSD_CAT },
107	{ EFI_NETBSD_CRYPT },
108	{ EFI_GOOG_KERN },
109	{ EFI_GOOG_ROOT },
110	{ EFI_GOOG_RESV },
111	{ EFI_HAIKU_BFS },
112	{ EFI_MIDNIGHTBSD_BOOT },
113	{ EFI_MIDNIGHTBSD_DATA },
114	{ EFI_MIDNIGHTBSD_SWAP },
115	{ EFI_MIDNIGHTBSD_UFS },
116	{ EFI_MIDNIGHTBSD_VINUM },
117	{ EFI_MIDNIGHTBSD_ZFS },
118	{ EFI_CEPH_JOURNAL },
119	{ EFI_CEPH_DMCRYPTJOURNAL },
120	{ EFI_CEPH_OSD },
121	{ EFI_CEPH_DMCRYPTOSD },
122	{ EFI_CEPH_CREATE },
123	{ EFI_CEPH_DMCRYPTCREATE },
124	{ EFI_OPENBSD_DISKLABEL },
125	{ EFI_BBRY_QNX },
126	{ EFI_BELL_PLAN9 },
127	{ EFI_VMW_KCORE },
128	{ EFI_VMW_VMFS },
129	{ EFI_VMW_RESV },
130	{ EFI_RHT_ROOTX86 },
131	{ EFI_RHT_ROOTAMD64 },
132	{ EFI_RHT_ROOTARM },
133	{ EFI_RHT_ROOTARM64 },
134	{ EFI_ACRONIS_SECUREZONE },
135	{ EFI_ONIE_BOOT },
136	{ EFI_ONIE_CONFIG },
137	{ EFI_IBM_PPRPBOOT },
138	{ EFI_FREEDESKTOP_BOOT }
139};
140
141int efi_debug = 0;
142
143static int efi_read(int, struct dk_gpt *);
144
145/*
146 * Return a 32-bit CRC of the contents of the buffer.  Pre-and-post
147 * one's conditioning will be handled by crc32() internally.
148 */
149static uint32_t
150efi_crc32(const unsigned char *buf, unsigned int size)
151{
152	uint32_t crc = crc32(0, Z_NULL, 0);
153
154	crc = crc32(crc, buf, size);
155
156	return (crc);
157}
158
159static int
160read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize)
161{
162	int sector_size;
163	unsigned long long capacity_size;
164
165	if (ioctl(fd, BLKSSZGET, &sector_size) < 0)
166		return (-1);
167
168	if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0)
169		return (-1);
170
171	*lbsize = (uint_t)sector_size;
172	*capacity = (diskaddr_t)(capacity_size / sector_size);
173
174	return (0);
175}
176
177/*
178 * Return back the device name associated with the file descriptor. The
179 * caller is responsible for freeing the memory associated with the
180 * returned string.
181 */
182static char *
183efi_get_devname(int fd)
184{
185	char path[32];
186
187	/*
188	 * The libefi API only provides the open fd and not the file path.
189	 * To handle this realpath(3) is used to resolve the block device
190	 * name from /proc/self/fd/<fd>.
191	 */
192	(void) snprintf(path, sizeof (path), "/proc/self/fd/%d", fd);
193	return (realpath(path, NULL));
194}
195
196static int
197efi_get_info(int fd, struct dk_cinfo *dki_info)
198{
199	char *dev_path;
200	int rval = 0;
201
202	memset(dki_info, 0, sizeof (*dki_info));
203
204	/*
205	 * The simplest way to get the partition number under linux is
206	 * to parse it out of the /dev/<disk><partition> block device name.
207	 * The kernel creates this using the partition number when it
208	 * populates /dev/ so it may be trusted.  The tricky bit here is
209	 * that the naming convention is based on the block device type.
210	 * So we need to take this in to account when parsing out the
211	 * partition information.  Aside from the partition number we collect
212	 * some additional device info.
213	 */
214	dev_path = efi_get_devname(fd);
215	if (dev_path == NULL)
216		goto error;
217
218	if ((strncmp(dev_path, "/dev/sd", 7) == 0)) {
219		strcpy(dki_info->dki_cname, "sd");
220		dki_info->dki_ctype = DKC_SCSI_CCS;
221		rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
222		    dki_info->dki_dname,
223		    &dki_info->dki_partition);
224	} else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) {
225		strcpy(dki_info->dki_cname, "hd");
226		dki_info->dki_ctype = DKC_DIRECT;
227		rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
228		    dki_info->dki_dname,
229		    &dki_info->dki_partition);
230	} else if ((strncmp(dev_path, "/dev/md", 7) == 0)) {
231		strcpy(dki_info->dki_cname, "pseudo");
232		dki_info->dki_ctype = DKC_MD;
233		strcpy(dki_info->dki_dname, "md");
234		rval = sscanf(dev_path, "/dev/md%[0-9]p%hu",
235		    dki_info->dki_dname + 2,
236		    &dki_info->dki_partition);
237	} else if ((strncmp(dev_path, "/dev/vd", 7) == 0)) {
238		strcpy(dki_info->dki_cname, "vd");
239		dki_info->dki_ctype = DKC_MD;
240		rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
241		    dki_info->dki_dname,
242		    &dki_info->dki_partition);
243	} else if ((strncmp(dev_path, "/dev/xvd", 8) == 0)) {
244		strcpy(dki_info->dki_cname, "xvd");
245		dki_info->dki_ctype = DKC_MD;
246		rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu",
247		    dki_info->dki_dname,
248		    &dki_info->dki_partition);
249	} else if ((strncmp(dev_path, "/dev/zd", 7) == 0)) {
250		strcpy(dki_info->dki_cname, "zd");
251		dki_info->dki_ctype = DKC_MD;
252		strcpy(dki_info->dki_dname, "zd");
253		rval = sscanf(dev_path, "/dev/zd%[0-9]p%hu",
254		    dki_info->dki_dname + 2,
255		    &dki_info->dki_partition);
256	} else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) {
257		strcpy(dki_info->dki_cname, "pseudo");
258		dki_info->dki_ctype = DKC_VBD;
259		strcpy(dki_info->dki_dname, "dm-");
260		rval = sscanf(dev_path, "/dev/dm-%[0-9]p%hu",
261		    dki_info->dki_dname + 3,
262		    &dki_info->dki_partition);
263	} else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) {
264		strcpy(dki_info->dki_cname, "pseudo");
265		dki_info->dki_ctype = DKC_PCMCIA_MEM;
266		strcpy(dki_info->dki_dname, "ram");
267		rval = sscanf(dev_path, "/dev/ram%[0-9]p%hu",
268		    dki_info->dki_dname + 3,
269		    &dki_info->dki_partition);
270	} else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) {
271		strcpy(dki_info->dki_cname, "pseudo");
272		dki_info->dki_ctype = DKC_VBD;
273		strcpy(dki_info->dki_dname, "loop");
274		rval = sscanf(dev_path, "/dev/loop%[0-9]p%hu",
275		    dki_info->dki_dname + 4,
276		    &dki_info->dki_partition);
277	} else if ((strncmp(dev_path, "/dev/nvme", 9) == 0)) {
278		strcpy(dki_info->dki_cname, "nvme");
279		dki_info->dki_ctype = DKC_SCSI_CCS;
280		strcpy(dki_info->dki_dname, "nvme");
281		(void) sscanf(dev_path, "/dev/nvme%[0-9]",
282		    dki_info->dki_dname + 4);
283		size_t controller_length = strlen(
284		    dki_info->dki_dname);
285		strcpy(dki_info->dki_dname + controller_length,
286		    "n");
287		rval = sscanf(dev_path,
288		    "/dev/nvme%*[0-9]n%[0-9]p%hu",
289		    dki_info->dki_dname + controller_length + 1,
290		    &dki_info->dki_partition);
291	} else {
292		strcpy(dki_info->dki_dname, "unknown");
293		strcpy(dki_info->dki_cname, "unknown");
294		dki_info->dki_ctype = DKC_UNKNOWN;
295	}
296
297	switch (rval) {
298	case 0:
299		errno = EINVAL;
300		goto error;
301	case 1:
302		dki_info->dki_partition = 0;
303	}
304
305	free(dev_path);
306
307	return (0);
308error:
309	if (efi_debug)
310		(void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno);
311
312	switch (errno) {
313	case EIO:
314		return (VT_EIO);
315	case EINVAL:
316		return (VT_EINVAL);
317	default:
318		return (VT_ERROR);
319	}
320}
321
322/*
323 * the number of blocks the EFI label takes up (round up to nearest
324 * block)
325 */
326#define	NBLOCKS(p, l)	(1 + ((((p) * (int)sizeof (efi_gpe_t))  + \
327				((l) - 1)) / (l)))
328/* number of partitions -- limited by what we can malloc */
329#define	MAX_PARTS	((4294967295UL - sizeof (struct dk_gpt)) / \
330			    sizeof (struct dk_part))
331
332int
333efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc)
334{
335	diskaddr_t	capacity = 0;
336	uint_t		lbsize = 0;
337	uint_t		nblocks;
338	size_t		length;
339	struct dk_gpt	*vptr;
340	struct uuid	uuid;
341	struct dk_cinfo	dki_info;
342
343	if (read_disk_info(fd, &capacity, &lbsize) != 0)
344		return (-1);
345
346	if (efi_get_info(fd, &dki_info) != 0)
347		return (-1);
348
349	if (dki_info.dki_partition != 0)
350		return (-1);
351
352	if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) ||
353	    (dki_info.dki_ctype == DKC_VBD) ||
354	    (dki_info.dki_ctype == DKC_UNKNOWN))
355		return (-1);
356
357	nblocks = NBLOCKS(nparts, lbsize);
358	if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) {
359		/* 16K plus one block for the GPT */
360		nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1;
361	}
362
363	if (nparts > MAX_PARTS) {
364		if (efi_debug) {
365			(void) fprintf(stderr,
366			"the maximum number of partitions supported is %lu\n",
367			    MAX_PARTS);
368		}
369		return (-1);
370	}
371
372	length = sizeof (struct dk_gpt) +
373	    sizeof (struct dk_part) * (nparts - 1);
374
375	vptr = calloc(1, length);
376	if (vptr == NULL)
377		return (-1);
378
379	*vtoc = vptr;
380
381	vptr->efi_version = EFI_VERSION_CURRENT;
382	vptr->efi_lbasize = lbsize;
383	vptr->efi_nparts = nparts;
384	/*
385	 * add one block here for the PMBR; on disks with a 512 byte
386	 * block size and 128 or fewer partitions, efi_first_u_lba
387	 * should work out to "34"
388	 */
389	vptr->efi_first_u_lba = nblocks + 1;
390	vptr->efi_last_lba = capacity - 1;
391	vptr->efi_altern_lba = capacity -1;
392	vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks;
393
394	(void) uuid_generate((uchar_t *)&uuid);
395	UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid);
396	return (0);
397}
398
399/*
400 * Read EFI - return partition number upon success.
401 */
402int
403efi_alloc_and_read(int fd, struct dk_gpt **vtoc)
404{
405	int			rval;
406	uint32_t		nparts;
407	int			length;
408	struct dk_gpt		*vptr;
409
410	/* figure out the number of entries that would fit into 16K */
411	nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t);
412	length = (int) sizeof (struct dk_gpt) +
413	    (int) sizeof (struct dk_part) * (nparts - 1);
414	vptr = calloc(1, length);
415
416	if (vptr == NULL)
417		return (VT_ERROR);
418
419	vptr->efi_nparts = nparts;
420	rval = efi_read(fd, vptr);
421
422	if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) {
423		void *tmp;
424		length = (int) sizeof (struct dk_gpt) +
425		    (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1);
426		if ((tmp = realloc(vptr, length)) == NULL) {
427			/* cppcheck-suppress doubleFree */
428			free(vptr);
429			*vtoc = NULL;
430			return (VT_ERROR);
431		} else {
432			vptr = tmp;
433			rval = efi_read(fd, vptr);
434		}
435	}
436
437	if (rval < 0) {
438		if (efi_debug) {
439			(void) fprintf(stderr,
440			    "read of EFI table failed, rval=%d\n", rval);
441		}
442		free(vptr);
443		*vtoc = NULL;
444	} else {
445		*vtoc = vptr;
446	}
447
448	return (rval);
449}
450
451static int
452efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc)
453{
454	void *data = dk_ioc->dki_data;
455	int error;
456	diskaddr_t capacity;
457	uint_t lbsize;
458
459	/*
460	 * When the IO is not being performed in kernel as an ioctl we need
461	 * to know the sector size so we can seek to the proper byte offset.
462	 */
463	if (read_disk_info(fd, &capacity, &lbsize) == -1) {
464		if (efi_debug)
465			fprintf(stderr, "unable to read disk info: %d", errno);
466
467		errno = EIO;
468		return (-1);
469	}
470
471	switch (cmd) {
472	case DKIOCGETEFI:
473		if (lbsize == 0) {
474			if (efi_debug)
475				(void) fprintf(stderr, "DKIOCGETEFI assuming "
476				    "LBA %d bytes\n", DEV_BSIZE);
477
478			lbsize = DEV_BSIZE;
479		}
480
481		error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
482		if (error == -1) {
483			if (efi_debug)
484				(void) fprintf(stderr, "DKIOCGETEFI lseek "
485				    "error: %d\n", errno);
486			return (error);
487		}
488
489		error = read(fd, data, dk_ioc->dki_length);
490		if (error == -1) {
491			if (efi_debug)
492				(void) fprintf(stderr, "DKIOCGETEFI read "
493				    "error: %d\n", errno);
494			return (error);
495		}
496
497		if (error != dk_ioc->dki_length) {
498			if (efi_debug)
499				(void) fprintf(stderr, "DKIOCGETEFI short "
500				    "read of %d bytes\n", error);
501			errno = EIO;
502			return (-1);
503		}
504		error = 0;
505		break;
506
507	case DKIOCSETEFI:
508		if (lbsize == 0) {
509			if (efi_debug)
510				(void) fprintf(stderr, "DKIOCSETEFI unknown "
511				    "LBA size\n");
512			errno = EIO;
513			return (-1);
514		}
515
516		error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET);
517		if (error == -1) {
518			if (efi_debug)
519				(void) fprintf(stderr, "DKIOCSETEFI lseek "
520				    "error: %d\n", errno);
521			return (error);
522		}
523
524		error = write(fd, data, dk_ioc->dki_length);
525		if (error == -1) {
526			if (efi_debug)
527				(void) fprintf(stderr, "DKIOCSETEFI write "
528				    "error: %d\n", errno);
529			return (error);
530		}
531
532		if (error != dk_ioc->dki_length) {
533			if (efi_debug)
534				(void) fprintf(stderr, "DKIOCSETEFI short "
535				    "write of %d bytes\n", error);
536			errno = EIO;
537			return (-1);
538		}
539
540		/* Sync the new EFI table to disk */
541		error = fsync(fd);
542		if (error == -1)
543			return (error);
544
545		/* Ensure any local disk cache is also flushed */
546		if (ioctl(fd, BLKFLSBUF, 0) == -1)
547			return (error);
548
549		error = 0;
550		break;
551
552	default:
553		if (efi_debug)
554			(void) fprintf(stderr, "unsupported ioctl()\n");
555
556		errno = EIO;
557		return (-1);
558	}
559
560	return (error);
561}
562
563int
564efi_rescan(int fd)
565{
566	int retry = 10;
567
568	/* Notify the kernel a devices partition table has been updated */
569	while (ioctl(fd, BLKRRPART) != 0) {
570		if ((--retry == 0) || (errno != EBUSY)) {
571			(void) fprintf(stderr, "the kernel failed to rescan "
572			    "the partition table: %d\n", errno);
573			return (-1);
574		}
575		usleep(50000);
576	}
577
578	return (0);
579}
580
581static int
582check_label(int fd, dk_efi_t *dk_ioc)
583{
584	efi_gpt_t		*efi;
585	uint_t			crc;
586
587	if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) {
588		switch (errno) {
589		case EIO:
590			return (VT_EIO);
591		default:
592			return (VT_ERROR);
593		}
594	}
595	efi = dk_ioc->dki_data;
596	if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) {
597		if (efi_debug)
598			(void) fprintf(stderr,
599			    "Bad EFI signature: 0x%llx != 0x%llx\n",
600			    (long long)efi->efi_gpt_Signature,
601			    (long long)LE_64(EFI_SIGNATURE));
602		return (VT_EINVAL);
603	}
604
605	/*
606	 * check CRC of the header; the size of the header should
607	 * never be larger than one block
608	 */
609	crc = efi->efi_gpt_HeaderCRC32;
610	efi->efi_gpt_HeaderCRC32 = 0;
611	len_t headerSize = (len_t)LE_32(efi->efi_gpt_HeaderSize);
612
613	if (headerSize < EFI_MIN_LABEL_SIZE || headerSize > EFI_LABEL_SIZE) {
614		if (efi_debug)
615			(void) fprintf(stderr,
616			    "Invalid EFI HeaderSize %llu.  Assuming %d.\n",
617			    headerSize, EFI_MIN_LABEL_SIZE);
618	}
619
620	if ((headerSize > dk_ioc->dki_length) ||
621	    crc != LE_32(efi_crc32((unsigned char *)efi, headerSize))) {
622		if (efi_debug)
623			(void) fprintf(stderr,
624			    "Bad EFI CRC: 0x%x != 0x%x\n",
625			    crc, LE_32(efi_crc32((unsigned char *)efi,
626			    headerSize)));
627		return (VT_EINVAL);
628	}
629
630	return (0);
631}
632
633static int
634efi_read(int fd, struct dk_gpt *vtoc)
635{
636	int			i, j;
637	int			label_len;
638	int			rval = 0;
639	int			md_flag = 0;
640	int			vdc_flag = 0;
641	diskaddr_t		capacity = 0;
642	uint_t			lbsize = 0;
643	struct dk_minfo		disk_info;
644	dk_efi_t		dk_ioc;
645	efi_gpt_t		*efi;
646	efi_gpe_t		*efi_parts;
647	struct dk_cinfo		dki_info;
648	uint32_t		user_length;
649	boolean_t		legacy_label = B_FALSE;
650
651	/*
652	 * get the partition number for this file descriptor.
653	 */
654	if ((rval = efi_get_info(fd, &dki_info)) != 0)
655		return (rval);
656
657	if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
658	    (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
659		md_flag++;
660	} else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) &&
661	    (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) {
662		/*
663		 * The controller and drive name "vdc" (virtual disk client)
664		 * indicates a LDoms virtual disk.
665		 */
666		vdc_flag++;
667	}
668
669	/* get the LBA size */
670	if (read_disk_info(fd, &capacity, &lbsize) == -1) {
671		if (efi_debug) {
672			(void) fprintf(stderr,
673			    "unable to read disk info: %d",
674			    errno);
675		}
676		return (VT_EINVAL);
677	}
678
679	disk_info.dki_lbsize = lbsize;
680	disk_info.dki_capacity = capacity;
681
682	if (disk_info.dki_lbsize == 0) {
683		if (efi_debug) {
684			(void) fprintf(stderr,
685			    "efi_read: assuming LBA 512 bytes\n");
686		}
687		disk_info.dki_lbsize = DEV_BSIZE;
688	}
689	/*
690	 * Read the EFI GPT to figure out how many partitions we need
691	 * to deal with.
692	 */
693	dk_ioc.dki_lba = 1;
694	if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) {
695		label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize;
696	} else {
697		label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) +
698		    disk_info.dki_lbsize;
699		if (label_len % disk_info.dki_lbsize) {
700			/* pad to physical sector size */
701			label_len += disk_info.dki_lbsize;
702			label_len &= ~(disk_info.dki_lbsize - 1);
703		}
704	}
705
706	if (posix_memalign((void **)&dk_ioc.dki_data,
707	    disk_info.dki_lbsize, label_len))
708		return (VT_ERROR);
709
710	memset(dk_ioc.dki_data, 0, label_len);
711	dk_ioc.dki_length = disk_info.dki_lbsize;
712	user_length = vtoc->efi_nparts;
713	efi = dk_ioc.dki_data;
714	if (md_flag) {
715		dk_ioc.dki_length = label_len;
716		if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
717			switch (errno) {
718			case EIO:
719				return (VT_EIO);
720			default:
721				return (VT_ERROR);
722			}
723		}
724	} else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) {
725		/*
726		 * No valid label here; try the alternate. Note that here
727		 * we just read GPT header and save it into dk_ioc.data,
728		 * Later, we will read GUID partition entry array if we
729		 * can get valid GPT header.
730		 */
731
732		/*
733		 * This is a workaround for legacy systems. In the past, the
734		 * last sector of SCSI disk was invisible on x86 platform. At
735		 * that time, backup label was saved on the next to the last
736		 * sector. It is possible for users to move a disk from previous
737		 * solaris system to present system. Here, we attempt to search
738		 * legacy backup EFI label first.
739		 */
740		dk_ioc.dki_lba = disk_info.dki_capacity - 2;
741		dk_ioc.dki_length = disk_info.dki_lbsize;
742		rval = check_label(fd, &dk_ioc);
743		if (rval == VT_EINVAL) {
744			/*
745			 * we didn't find legacy backup EFI label, try to
746			 * search backup EFI label in the last block.
747			 */
748			dk_ioc.dki_lba = disk_info.dki_capacity - 1;
749			dk_ioc.dki_length = disk_info.dki_lbsize;
750			rval = check_label(fd, &dk_ioc);
751			if (rval == 0) {
752				legacy_label = B_TRUE;
753				if (efi_debug)
754					(void) fprintf(stderr,
755					    "efi_read: primary label corrupt; "
756					    "using EFI backup label located on"
757					    " the last block\n");
758			}
759		} else {
760			if ((efi_debug) && (rval == 0))
761				(void) fprintf(stderr, "efi_read: primary label"
762				    " corrupt; using legacy EFI backup label "
763				    " located on the next to last block\n");
764		}
765
766		if (rval == 0) {
767			dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
768			vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT;
769			vtoc->efi_nparts =
770			    LE_32(efi->efi_gpt_NumberOfPartitionEntries);
771			/*
772			 * Partition tables are between backup GPT header
773			 * table and ParitionEntryLBA (the starting LBA of
774			 * the GUID partition entries array). Now that we
775			 * already got valid GPT header and saved it in
776			 * dk_ioc.dki_data, we try to get GUID partition
777			 * entry array here.
778			 */
779			/* LINTED */
780			dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
781			    + disk_info.dki_lbsize);
782			if (legacy_label)
783				dk_ioc.dki_length = disk_info.dki_capacity - 1 -
784				    dk_ioc.dki_lba;
785			else
786				dk_ioc.dki_length = disk_info.dki_capacity - 2 -
787				    dk_ioc.dki_lba;
788			dk_ioc.dki_length *= disk_info.dki_lbsize;
789			if (dk_ioc.dki_length >
790			    ((len_t)label_len - sizeof (*dk_ioc.dki_data))) {
791				rval = VT_EINVAL;
792			} else {
793				/*
794				 * read GUID partition entry array
795				 */
796				rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
797			}
798		}
799
800	} else if (rval == 0) {
801
802		dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA);
803		/* LINTED */
804		dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data
805		    + disk_info.dki_lbsize);
806		dk_ioc.dki_length = label_len - disk_info.dki_lbsize;
807		rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc);
808
809	} else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) {
810		/*
811		 * When the device is a LDoms virtual disk, the DKIOCGETEFI
812		 * ioctl can fail with EINVAL if the virtual disk backend
813		 * is a ZFS volume serviced by a domain running an old version
814		 * of Solaris. This is because the DKIOCGETEFI ioctl was
815		 * initially incorrectly implemented for a ZFS volume and it
816		 * expected the GPT and GPE to be retrieved with a single ioctl.
817		 * So we try to read the GPT and the GPE using that old style
818		 * ioctl.
819		 */
820		dk_ioc.dki_lba = 1;
821		dk_ioc.dki_length = label_len;
822		rval = check_label(fd, &dk_ioc);
823	}
824
825	if (rval < 0) {
826		free(efi);
827		return (rval);
828	}
829
830	/* LINTED -- always longlong aligned */
831	efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize);
832
833	/*
834	 * Assemble this into a "dk_gpt" struct for easier
835	 * digestibility by applications.
836	 */
837	vtoc->efi_version = LE_32(efi->efi_gpt_Revision);
838	vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries);
839	vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry);
840	vtoc->efi_lbasize = disk_info.dki_lbsize;
841	vtoc->efi_last_lba = disk_info.dki_capacity - 1;
842	vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA);
843	vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA);
844	vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
845	UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID);
846
847	/*
848	 * If the array the user passed in is too small, set the length
849	 * to what it needs to be and return
850	 */
851	if (user_length < vtoc->efi_nparts) {
852		return (VT_EINVAL);
853	}
854
855	for (i = 0; i < vtoc->efi_nparts; i++) {
856		UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid,
857		    efi_parts[i].efi_gpe_PartitionTypeGUID);
858
859		for (j = 0;
860		    j < sizeof (conversion_array)
861		    / sizeof (struct uuid_to_ptag); j++) {
862
863			if (memcmp(&vtoc->efi_parts[i].p_guid,
864			    &conversion_array[j].uuid,
865			    sizeof (struct uuid)) == 0) {
866				vtoc->efi_parts[i].p_tag = j;
867				break;
868			}
869		}
870		if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
871			continue;
872		vtoc->efi_parts[i].p_flag =
873		    LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs);
874		vtoc->efi_parts[i].p_start =
875		    LE_64(efi_parts[i].efi_gpe_StartingLBA);
876		vtoc->efi_parts[i].p_size =
877		    LE_64(efi_parts[i].efi_gpe_EndingLBA) -
878		    vtoc->efi_parts[i].p_start + 1;
879		for (j = 0; j < EFI_PART_NAME_LEN; j++) {
880			vtoc->efi_parts[i].p_name[j] =
881			    (uchar_t)LE_16(
882			    efi_parts[i].efi_gpe_PartitionName[j]);
883		}
884
885		UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid,
886		    efi_parts[i].efi_gpe_UniquePartitionGUID);
887	}
888	free(efi);
889
890	return (dki_info.dki_partition);
891}
892
893/* writes a "protective" MBR */
894static int
895write_pmbr(int fd, struct dk_gpt *vtoc)
896{
897	dk_efi_t	dk_ioc;
898	struct mboot	mb;
899	uchar_t		*cp;
900	diskaddr_t	size_in_lba;
901	uchar_t		*buf;
902	int		len;
903
904	len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize;
905	if (posix_memalign((void **)&buf, len, len))
906		return (VT_ERROR);
907
908	/*
909	 * Preserve any boot code and disk signature if the first block is
910	 * already an MBR.
911	 */
912	memset(buf, 0, len);
913	dk_ioc.dki_lba = 0;
914	dk_ioc.dki_length = len;
915	/* LINTED -- always longlong aligned */
916	dk_ioc.dki_data = (efi_gpt_t *)buf;
917	if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) {
918		memset(&mb, 0, sizeof (mb));
919		mb.signature = LE_16(MBB_MAGIC);
920	} else {
921		(void) memcpy(&mb, buf, sizeof (mb));
922		if (mb.signature != LE_16(MBB_MAGIC)) {
923			memset(&mb, 0, sizeof (mb));
924			mb.signature = LE_16(MBB_MAGIC);
925		}
926	}
927
928	memset(&mb.parts, 0, sizeof (mb.parts));
929	cp = (uchar_t *)&mb.parts[0];
930	/* bootable or not */
931	*cp++ = 0;
932	/* beginning CHS; 0xffffff if not representable */
933	*cp++ = 0xff;
934	*cp++ = 0xff;
935	*cp++ = 0xff;
936	/* OS type */
937	*cp++ = EFI_PMBR;
938	/* ending CHS; 0xffffff if not representable */
939	*cp++ = 0xff;
940	*cp++ = 0xff;
941	*cp++ = 0xff;
942	/* starting LBA: 1 (little endian format) by EFI definition */
943	*cp++ = 0x01;
944	*cp++ = 0x00;
945	*cp++ = 0x00;
946	*cp++ = 0x00;
947	/* ending LBA: last block on the disk (little endian format) */
948	size_in_lba = vtoc->efi_last_lba;
949	if (size_in_lba < 0xffffffff) {
950		*cp++ = (size_in_lba & 0x000000ff);
951		*cp++ = (size_in_lba & 0x0000ff00) >> 8;
952		*cp++ = (size_in_lba & 0x00ff0000) >> 16;
953		*cp++ = (size_in_lba & 0xff000000) >> 24;
954	} else {
955		*cp++ = 0xff;
956		*cp++ = 0xff;
957		*cp++ = 0xff;
958		*cp++ = 0xff;
959	}
960
961	(void) memcpy(buf, &mb, sizeof (mb));
962	/* LINTED -- always longlong aligned */
963	dk_ioc.dki_data = (efi_gpt_t *)buf;
964	dk_ioc.dki_lba = 0;
965	dk_ioc.dki_length = len;
966	if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
967		free(buf);
968		switch (errno) {
969		case EIO:
970			return (VT_EIO);
971		case EINVAL:
972			return (VT_EINVAL);
973		default:
974			return (VT_ERROR);
975		}
976	}
977	free(buf);
978	return (0);
979}
980
981/* make sure the user specified something reasonable */
982static int
983check_input(struct dk_gpt *vtoc)
984{
985	int			resv_part = -1;
986	int			i, j;
987	diskaddr_t		istart, jstart, isize, jsize, endsect;
988
989	/*
990	 * Sanity-check the input (make sure no partitions overlap)
991	 */
992	for (i = 0; i < vtoc->efi_nparts; i++) {
993		/* It can't be unassigned and have an actual size */
994		if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
995		    (vtoc->efi_parts[i].p_size != 0)) {
996			if (efi_debug) {
997				(void) fprintf(stderr, "partition %d is "
998				    "\"unassigned\" but has a size of %llu",
999				    i, vtoc->efi_parts[i].p_size);
1000			}
1001			return (VT_EINVAL);
1002		}
1003		if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
1004			if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
1005				continue;
1006			/* we have encountered an unknown uuid */
1007			vtoc->efi_parts[i].p_tag = 0xff;
1008		}
1009		if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
1010			if (resv_part != -1) {
1011				if (efi_debug) {
1012					(void) fprintf(stderr, "found "
1013					    "duplicate reserved partition "
1014					    "at %d\n", i);
1015				}
1016				return (VT_EINVAL);
1017			}
1018			resv_part = i;
1019		}
1020		if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
1021		    (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
1022			if (efi_debug) {
1023				(void) fprintf(stderr,
1024				    "Partition %d starts at %llu.  ",
1025				    i,
1026				    vtoc->efi_parts[i].p_start);
1027				(void) fprintf(stderr,
1028				    "It must be between %llu and %llu.\n",
1029				    vtoc->efi_first_u_lba,
1030				    vtoc->efi_last_u_lba);
1031			}
1032			return (VT_EINVAL);
1033		}
1034		if ((vtoc->efi_parts[i].p_start +
1035		    vtoc->efi_parts[i].p_size <
1036		    vtoc->efi_first_u_lba) ||
1037		    (vtoc->efi_parts[i].p_start +
1038		    vtoc->efi_parts[i].p_size >
1039		    vtoc->efi_last_u_lba + 1)) {
1040			if (efi_debug) {
1041				(void) fprintf(stderr,
1042				    "Partition %d ends at %llu.  ",
1043				    i,
1044				    vtoc->efi_parts[i].p_start +
1045				    vtoc->efi_parts[i].p_size);
1046				(void) fprintf(stderr,
1047				    "It must be between %llu and %llu.\n",
1048				    vtoc->efi_first_u_lba,
1049				    vtoc->efi_last_u_lba);
1050			}
1051			return (VT_EINVAL);
1052		}
1053
1054		for (j = 0; j < vtoc->efi_nparts; j++) {
1055			isize = vtoc->efi_parts[i].p_size;
1056			jsize = vtoc->efi_parts[j].p_size;
1057			istart = vtoc->efi_parts[i].p_start;
1058			jstart = vtoc->efi_parts[j].p_start;
1059			if ((i != j) && (isize != 0) && (jsize != 0)) {
1060				endsect = jstart + jsize -1;
1061				if ((jstart <= istart) &&
1062				    (istart <= endsect)) {
1063					if (efi_debug) {
1064						(void) fprintf(stderr,
1065						    "Partition %d overlaps "
1066						    "partition %d.", i, j);
1067					}
1068					return (VT_EINVAL);
1069				}
1070			}
1071		}
1072	}
1073	/* just a warning for now */
1074	if ((resv_part == -1) && efi_debug) {
1075		(void) fprintf(stderr,
1076		    "no reserved partition found\n");
1077	}
1078	return (0);
1079}
1080
1081static int
1082call_blkpg_ioctl(int fd, int command, diskaddr_t start,
1083    diskaddr_t size, uint_t pno)
1084{
1085	struct blkpg_ioctl_arg ioctl_arg;
1086	struct blkpg_partition  linux_part;
1087	memset(&linux_part, 0, sizeof (linux_part));
1088
1089	char *path = efi_get_devname(fd);
1090	if (path == NULL) {
1091		(void) fprintf(stderr, "failed to retrieve device name\n");
1092		return (VT_EINVAL);
1093	}
1094
1095	linux_part.start = start;
1096	linux_part.length = size;
1097	linux_part.pno = pno;
1098	snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno);
1099	linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0';
1100	free(path);
1101
1102	ioctl_arg.op = command;
1103	ioctl_arg.flags = 0;
1104	ioctl_arg.datalen = sizeof (struct blkpg_partition);
1105	ioctl_arg.data = &linux_part;
1106
1107	return (ioctl(fd, BLKPG, &ioctl_arg));
1108}
1109
1110/*
1111 * add all the unallocated space to the current label
1112 */
1113int
1114efi_use_whole_disk(int fd)
1115{
1116	struct dk_gpt *efi_label = NULL;
1117	int rval;
1118	int i;
1119	uint_t resv_index = 0, data_index = 0;
1120	diskaddr_t resv_start = 0, data_start = 0;
1121	diskaddr_t data_size, limit, difference;
1122	boolean_t sync_needed = B_FALSE;
1123	uint_t nblocks;
1124
1125	rval = efi_alloc_and_read(fd, &efi_label);
1126	if (rval < 0) {
1127		if (efi_label != NULL)
1128			efi_free(efi_label);
1129		return (rval);
1130	}
1131
1132	/*
1133	 * Find the last physically non-zero partition.
1134	 * This should be the reserved partition.
1135	 */
1136	for (i = 0; i < efi_label->efi_nparts; i ++) {
1137		if (resv_start < efi_label->efi_parts[i].p_start) {
1138			resv_start = efi_label->efi_parts[i].p_start;
1139			resv_index = i;
1140		}
1141	}
1142
1143	/*
1144	 * Find the last physically non-zero partition before that.
1145	 * This is the data partition.
1146	 */
1147	for (i = 0; i < resv_index; i ++) {
1148		if (data_start < efi_label->efi_parts[i].p_start) {
1149			data_start = efi_label->efi_parts[i].p_start;
1150			data_index = i;
1151		}
1152	}
1153	data_size = efi_label->efi_parts[data_index].p_size;
1154
1155	/*
1156	 * See the "efi_alloc_and_init" function for more information
1157	 * about where this "nblocks" value comes from.
1158	 */
1159	nblocks = efi_label->efi_first_u_lba - 1;
1160
1161	/*
1162	 * Determine if the EFI label is out of sync. We check that:
1163	 *
1164	 * 1. the data partition ends at the limit we set, and
1165	 * 2. the reserved partition starts at the limit we set.
1166	 *
1167	 * If either of these conditions is not met, then we need to
1168	 * resync the EFI label.
1169	 *
1170	 * The limit is the last usable LBA, determined by the last LBA
1171	 * and the first usable LBA fields on the EFI label of the disk
1172	 * (see the lines directly above). Additionally, we factor in
1173	 * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and
1174	 * P2ALIGN it to ensure the partition boundaries are aligned
1175	 * (for performance reasons). The alignment should match the
1176	 * alignment used by the "zpool_label_disk" function.
1177	 */
1178	limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE,
1179	    PARTITION_END_ALIGNMENT);
1180	if (data_start + data_size != limit || resv_start != limit)
1181		sync_needed = B_TRUE;
1182
1183	if (efi_debug && sync_needed)
1184		(void) fprintf(stderr, "efi_use_whole_disk: sync needed\n");
1185
1186	/*
1187	 * If alter_lba is 1, we are using the backup label.
1188	 * Since we can locate the backup label by disk capacity,
1189	 * there must be no unallocated space.
1190	 */
1191	if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba
1192	    >= efi_label->efi_last_lba && !sync_needed)) {
1193		if (efi_debug) {
1194			(void) fprintf(stderr,
1195			    "efi_use_whole_disk: requested space not found\n");
1196		}
1197		efi_free(efi_label);
1198		return (VT_ENOSPC);
1199	}
1200
1201	/*
1202	 * Verify that we've found the reserved partition by checking
1203	 * that it looks the way it did when we created it in zpool_label_disk.
1204	 * If we've found the incorrect partition, then we know that this
1205	 * device was reformatted and no longer is solely used by ZFS.
1206	 */
1207	if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) ||
1208	    (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) ||
1209	    (resv_index != 8)) {
1210		if (efi_debug) {
1211			(void) fprintf(stderr,
1212			    "efi_use_whole_disk: wholedisk not available\n");
1213		}
1214		efi_free(efi_label);
1215		return (VT_ENOSPC);
1216	}
1217
1218	if (data_start + data_size != resv_start) {
1219		if (efi_debug) {
1220			(void) fprintf(stderr,
1221			    "efi_use_whole_disk: "
1222			    "data_start (%lli) + "
1223			    "data_size (%lli) != "
1224			    "resv_start (%lli)\n",
1225			    data_start, data_size, resv_start);
1226		}
1227
1228		return (VT_EINVAL);
1229	}
1230
1231	if (limit < resv_start) {
1232		if (efi_debug) {
1233			(void) fprintf(stderr,
1234			    "efi_use_whole_disk: "
1235			    "limit (%lli) < resv_start (%lli)\n",
1236			    limit, resv_start);
1237		}
1238
1239		return (VT_EINVAL);
1240	}
1241
1242	difference = limit - resv_start;
1243
1244	if (efi_debug)
1245		(void) fprintf(stderr,
1246		    "efi_use_whole_disk: difference is %lli\n", difference);
1247
1248	/*
1249	 * Move the reserved partition. There is currently no data in
1250	 * here except fabricated devids (which get generated via
1251	 * efi_write()). So there is no need to copy data.
1252	 */
1253	efi_label->efi_parts[data_index].p_size += difference;
1254	efi_label->efi_parts[resv_index].p_start += difference;
1255	efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks;
1256
1257	/*
1258	 * Rescanning the partition table in the kernel can result
1259	 * in the device links to be removed (see comment in vdev_disk_open).
1260	 * If BLKPG_RESIZE_PARTITION is available, then we can resize
1261	 * the partition table online and avoid having to remove the device
1262	 * links used by the pool. This provides a very deterministic
1263	 * approach to resizing devices and does not require any
1264	 * loops waiting for devices to reappear.
1265	 */
1266#ifdef BLKPG_RESIZE_PARTITION
1267	/*
1268	 * Delete the reserved partition since we're about to expand
1269	 * the data partition and it would overlap with the reserved
1270	 * partition.
1271	 * NOTE: The starting index for the ioctl is 1 while for the
1272	 * EFI partitions it's 0. For that reason we have to add one
1273	 * whenever we make an ioctl call.
1274	 */
1275	rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1);
1276	if (rval != 0)
1277		goto out;
1278
1279	/*
1280	 * Expand the data partition
1281	 */
1282	rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION,
1283	    efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize,
1284	    efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize,
1285	    data_index + 1);
1286	if (rval != 0) {
1287		(void) fprintf(stderr, "Unable to resize data "
1288		    "partition:  %d\n", rval);
1289		/*
1290		 * Since we failed to resize, we need to reset the start
1291		 * of the reserve partition and re-create it.
1292		 */
1293		efi_label->efi_parts[resv_index].p_start -= difference;
1294	}
1295
1296	/*
1297	 * Re-add the reserved partition. If we've expanded the data partition
1298	 * then we'll move the reserve partition to the end of the data
1299	 * partition. Otherwise, we'll recreate the partition in its original
1300	 * location. Note that we do this as best-effort and ignore any
1301	 * errors that may arise here. This will ensure that we finish writing
1302	 * the EFI label.
1303	 */
1304	(void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION,
1305	    efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize,
1306	    efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize,
1307	    resv_index + 1);
1308#endif
1309
1310	/*
1311	 * We're now ready to write the EFI label.
1312	 */
1313	if (rval == 0) {
1314		rval = efi_write(fd, efi_label);
1315		if (rval < 0 && efi_debug) {
1316			(void) fprintf(stderr, "efi_use_whole_disk:fail "
1317			    "to write label, rval=%d\n", rval);
1318		}
1319	}
1320
1321out:
1322	efi_free(efi_label);
1323	return (rval);
1324}
1325
1326/*
1327 * write EFI label and backup label
1328 */
1329int
1330efi_write(int fd, struct dk_gpt *vtoc)
1331{
1332	dk_efi_t		dk_ioc;
1333	efi_gpt_t		*efi;
1334	efi_gpe_t		*efi_parts;
1335	int			i, j;
1336	struct dk_cinfo		dki_info;
1337	int			rval;
1338	int			md_flag = 0;
1339	int			nblocks;
1340	diskaddr_t		lba_backup_gpt_hdr;
1341
1342	if ((rval = efi_get_info(fd, &dki_info)) != 0)
1343		return (rval);
1344
1345	/* check if we are dealing with a metadevice */
1346	if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) &&
1347	    (strncmp(dki_info.dki_dname, "md", 3) == 0)) {
1348		md_flag = 1;
1349	}
1350
1351	if (check_input(vtoc)) {
1352		/*
1353		 * not valid; if it's a metadevice just pass it down
1354		 * because SVM will do its own checking
1355		 */
1356		if (md_flag == 0) {
1357			return (VT_EINVAL);
1358		}
1359	}
1360
1361	dk_ioc.dki_lba = 1;
1362	if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) {
1363		dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize;
1364	} else {
1365		dk_ioc.dki_length = (len_t)NBLOCKS(vtoc->efi_nparts,
1366		    vtoc->efi_lbasize) *
1367		    vtoc->efi_lbasize;
1368	}
1369
1370	/*
1371	 * the number of blocks occupied by GUID partition entry array
1372	 */
1373	nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1;
1374
1375	/*
1376	 * Backup GPT header is located on the block after GUID
1377	 * partition entry array. Here, we calculate the address
1378	 * for backup GPT header.
1379	 */
1380	lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks;
1381	if (posix_memalign((void **)&dk_ioc.dki_data,
1382	    vtoc->efi_lbasize, dk_ioc.dki_length))
1383		return (VT_ERROR);
1384
1385	memset(dk_ioc.dki_data, 0, dk_ioc.dki_length);
1386	efi = dk_ioc.dki_data;
1387
1388	/* stuff user's input into EFI struct */
1389	efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE);
1390	efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */
1391	efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt) - LEN_EFI_PAD);
1392	efi->efi_gpt_Reserved1 = 0;
1393	efi->efi_gpt_MyLBA = LE_64(1ULL);
1394	efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr);
1395	efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba);
1396	efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba);
1397	efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL);
1398	efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts);
1399	efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe));
1400	UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid);
1401
1402	/* LINTED -- always longlong aligned */
1403	efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize);
1404
1405	for (i = 0; i < vtoc->efi_nparts; i++) {
1406		for (j = 0;
1407		    j < sizeof (conversion_array) /
1408		    sizeof (struct uuid_to_ptag); j++) {
1409
1410			if (vtoc->efi_parts[i].p_tag == j) {
1411				UUID_LE_CONVERT(
1412				    efi_parts[i].efi_gpe_PartitionTypeGUID,
1413				    conversion_array[j].uuid);
1414				break;
1415			}
1416		}
1417
1418		if (j == sizeof (conversion_array) /
1419		    sizeof (struct uuid_to_ptag)) {
1420			/*
1421			 * If we didn't have a matching uuid match, bail here.
1422			 * Don't write a label with unknown uuid.
1423			 */
1424			if (efi_debug) {
1425				(void) fprintf(stderr,
1426				    "Unknown uuid for p_tag %d\n",
1427				    vtoc->efi_parts[i].p_tag);
1428			}
1429			return (VT_EINVAL);
1430		}
1431
1432		/* Zero's should be written for empty partitions */
1433		if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED)
1434			continue;
1435
1436		efi_parts[i].efi_gpe_StartingLBA =
1437		    LE_64(vtoc->efi_parts[i].p_start);
1438		efi_parts[i].efi_gpe_EndingLBA =
1439		    LE_64(vtoc->efi_parts[i].p_start +
1440		    vtoc->efi_parts[i].p_size - 1);
1441		efi_parts[i].efi_gpe_Attributes.PartitionAttrs =
1442		    LE_16(vtoc->efi_parts[i].p_flag);
1443		for (j = 0; j < EFI_PART_NAME_LEN; j++) {
1444			efi_parts[i].efi_gpe_PartitionName[j] =
1445			    LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]);
1446		}
1447		if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) &&
1448		    uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) {
1449			(void) uuid_generate((uchar_t *)
1450			    &vtoc->efi_parts[i].p_uguid);
1451		}
1452		memcpy(&efi_parts[i].efi_gpe_UniquePartitionGUID,
1453		    &vtoc->efi_parts[i].p_uguid,
1454		    sizeof (uuid_t));
1455	}
1456	efi->efi_gpt_PartitionEntryArrayCRC32 =
1457	    LE_32(efi_crc32((unsigned char *)efi_parts,
1458	    vtoc->efi_nparts * (int)sizeof (struct efi_gpe)));
1459	efi->efi_gpt_HeaderCRC32 =
1460	    LE_32(efi_crc32((unsigned char *)efi,
1461	    LE_32(efi->efi_gpt_HeaderSize)));
1462
1463	if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
1464		free(dk_ioc.dki_data);
1465		switch (errno) {
1466		case EIO:
1467			return (VT_EIO);
1468		case EINVAL:
1469			return (VT_EINVAL);
1470		default:
1471			return (VT_ERROR);
1472		}
1473	}
1474	/* if it's a metadevice we're done */
1475	if (md_flag) {
1476		free(dk_ioc.dki_data);
1477		return (0);
1478	}
1479
1480	/* write backup partition array */
1481	dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1;
1482	dk_ioc.dki_length -= vtoc->efi_lbasize;
1483	/* LINTED */
1484	dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data +
1485	    vtoc->efi_lbasize);
1486
1487	if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
1488		/*
1489		 * we wrote the primary label okay, so don't fail
1490		 */
1491		if (efi_debug) {
1492			(void) fprintf(stderr,
1493			    "write of backup partitions to block %llu "
1494			    "failed, errno %d\n",
1495			    vtoc->efi_last_u_lba + 1,
1496			    errno);
1497		}
1498	}
1499	/*
1500	 * now swap MyLBA and AlternateLBA fields and write backup
1501	 * partition table header
1502	 */
1503	dk_ioc.dki_lba = lba_backup_gpt_hdr;
1504	dk_ioc.dki_length = vtoc->efi_lbasize;
1505	/* LINTED */
1506	dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data -
1507	    vtoc->efi_lbasize);
1508	efi->efi_gpt_AlternateLBA = LE_64(1ULL);
1509	efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr);
1510	efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1);
1511	efi->efi_gpt_HeaderCRC32 = 0;
1512	efi->efi_gpt_HeaderCRC32 =
1513	    LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data,
1514	    LE_32(efi->efi_gpt_HeaderSize)));
1515
1516	if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) {
1517		if (efi_debug) {
1518			(void) fprintf(stderr,
1519			    "write of backup header to block %llu failed, "
1520			    "errno %d\n",
1521			    lba_backup_gpt_hdr,
1522			    errno);
1523		}
1524	}
1525	/* write the PMBR */
1526	(void) write_pmbr(fd, vtoc);
1527	free(dk_ioc.dki_data);
1528
1529	return (0);
1530}
1531
1532void
1533efi_free(struct dk_gpt *ptr)
1534{
1535	free(ptr);
1536}
1537
1538void
1539efi_err_check(struct dk_gpt *vtoc)
1540{
1541	int			resv_part = -1;
1542	int			i, j;
1543	diskaddr_t		istart, jstart, isize, jsize, endsect;
1544	int			overlap = 0;
1545
1546	/*
1547	 * make sure no partitions overlap
1548	 */
1549	for (i = 0; i < vtoc->efi_nparts; i++) {
1550		/* It can't be unassigned and have an actual size */
1551		if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) &&
1552		    (vtoc->efi_parts[i].p_size != 0)) {
1553			(void) fprintf(stderr,
1554			    "partition %d is \"unassigned\" but has a size "
1555			    "of %llu\n", i, vtoc->efi_parts[i].p_size);
1556		}
1557		if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) {
1558			continue;
1559		}
1560		if (vtoc->efi_parts[i].p_tag == V_RESERVED) {
1561			if (resv_part != -1) {
1562				(void) fprintf(stderr,
1563				    "found duplicate reserved partition at "
1564				    "%d\n", i);
1565			}
1566			resv_part = i;
1567			if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE)
1568				(void) fprintf(stderr,
1569				    "Warning: reserved partition size must "
1570				    "be %d sectors\n", EFI_MIN_RESV_SIZE);
1571		}
1572		if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) ||
1573		    (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) {
1574			(void) fprintf(stderr,
1575			    "Partition %d starts at %llu\n",
1576			    i,
1577			    vtoc->efi_parts[i].p_start);
1578			(void) fprintf(stderr,
1579			    "It must be between %llu and %llu.\n",
1580			    vtoc->efi_first_u_lba,
1581			    vtoc->efi_last_u_lba);
1582		}
1583		if ((vtoc->efi_parts[i].p_start +
1584		    vtoc->efi_parts[i].p_size <
1585		    vtoc->efi_first_u_lba) ||
1586		    (vtoc->efi_parts[i].p_start +
1587		    vtoc->efi_parts[i].p_size >
1588		    vtoc->efi_last_u_lba + 1)) {
1589			(void) fprintf(stderr,
1590			    "Partition %d ends at %llu\n",
1591			    i,
1592			    vtoc->efi_parts[i].p_start +
1593			    vtoc->efi_parts[i].p_size);
1594			(void) fprintf(stderr,
1595			    "It must be between %llu and %llu.\n",
1596			    vtoc->efi_first_u_lba,
1597			    vtoc->efi_last_u_lba);
1598		}
1599
1600		for (j = 0; j < vtoc->efi_nparts; j++) {
1601			isize = vtoc->efi_parts[i].p_size;
1602			jsize = vtoc->efi_parts[j].p_size;
1603			istart = vtoc->efi_parts[i].p_start;
1604			jstart = vtoc->efi_parts[j].p_start;
1605			if ((i != j) && (isize != 0) && (jsize != 0)) {
1606				endsect = jstart + jsize -1;
1607				if ((jstart <= istart) &&
1608				    (istart <= endsect)) {
1609					if (!overlap) {
1610					(void) fprintf(stderr,
1611					    "label error: EFI Labels do not "
1612					    "support overlapping partitions\n");
1613					}
1614					(void) fprintf(stderr,
1615					    "Partition %d overlaps partition "
1616					    "%d.\n", i, j);
1617					overlap = 1;
1618				}
1619			}
1620		}
1621	}
1622	/* make sure there is a reserved partition */
1623	if (resv_part == -1) {
1624		(void) fprintf(stderr,
1625		    "no reserved partition found\n");
1626	}
1627}
1628