1219089Spjd/*
2219089Spjd * CDDL HEADER START
3219089Spjd *
4219089Spjd * The contents of this file are subject to the terms of the
5219089Spjd * Common Development and Distribution License (the "License").
6219089Spjd * You may not use this file except in compliance with the License.
7219089Spjd *
8219089Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9219089Spjd * or http://www.opensolaris.org/os/licensing.
10219089Spjd * See the License for the specific language governing permissions
11219089Spjd * and limitations under the License.
12219089Spjd *
13219089Spjd * When distributing Covered Code, include this CDDL HEADER in each
14219089Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15219089Spjd * If applicable, add the following below this CDDL HEADER, with the
16219089Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17219089Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18219089Spjd *
19219089Spjd * CDDL HEADER END
20219089Spjd */
21219089Spjd
22219089Spjd/*
23219089Spjd * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24219089Spjd * Use is subject to license terms.
25219089Spjd */
26219089Spjd
27219089Spjd#include <dlfcn.h>
28219089Spjd#include <errno.h>
29219089Spjd#include <libintl.h>
30219089Spjd#include <link.h>
31219089Spjd#include <pthread.h>
32219089Spjd#include <strings.h>
33219089Spjd#include <unistd.h>
34219089Spjd
35219089Spjd#include <libzfs.h>
36219089Spjd
37219089Spjd#include <fm/libtopo.h>
38219089Spjd#include <sys/fm/protocol.h>
39219089Spjd#include <sys/systeminfo.h>
40219089Spjd
41219089Spjd#include "libzfs_impl.h"
42219089Spjd
43219089Spjd/*
44219089Spjd * This file is responsible for determining the relationship between I/O
45219089Spjd * devices paths and physical locations.  In the world of MPxIO and external
46219089Spjd * enclosures, the device path is not synonymous with the physical location.
47219089Spjd * If you remove a drive and insert it into a different slot, it will end up
48219089Spjd * with the same path under MPxIO.  If you recable storage enclosures, the
49219089Spjd * device paths may change.  All of this makes it difficult to implement the
50219089Spjd * 'autoreplace' property, which is supposed to automatically manage disk
51219089Spjd * replacement based on physical slot.
52219089Spjd *
53219089Spjd * In order to work around these limitations, we have a per-vdev FRU property
54219089Spjd * that is the libtopo path (minus disk-specific authority information) to the
55219089Spjd * physical location of the device on the system.  This is an optional
56219089Spjd * property, and is only needed when using the 'autoreplace' property or when
57219089Spjd * generating FMA faults against vdevs.
58219089Spjd */
59219089Spjd
60219089Spjd/*
61219089Spjd * Because the FMA packages depend on ZFS, we have to dlopen() libtopo in case
62219089Spjd * it is not present.  We only need this once per library instance, so it is
63219089Spjd * not part of the libzfs handle.
64219089Spjd */
65219089Spjdstatic void *_topo_dlhandle;
66219089Spjdstatic topo_hdl_t *(*_topo_open)(int, const char *, int *);
67219089Spjdstatic void (*_topo_close)(topo_hdl_t *);
68219089Spjdstatic char *(*_topo_snap_hold)(topo_hdl_t *, const char *, int *);
69219089Spjdstatic void (*_topo_snap_release)(topo_hdl_t *);
70219089Spjdstatic topo_walk_t *(*_topo_walk_init)(topo_hdl_t *, const char *,
71219089Spjd    topo_walk_cb_t, void *, int *);
72219089Spjdstatic int (*_topo_walk_step)(topo_walk_t *, int);
73219089Spjdstatic void (*_topo_walk_fini)(topo_walk_t *);
74219089Spjdstatic void (*_topo_hdl_strfree)(topo_hdl_t *, char *);
75219089Spjdstatic char *(*_topo_node_name)(tnode_t *);
76219089Spjdstatic int (*_topo_prop_get_string)(tnode_t *, const char *, const char *,
77219089Spjd    char **, int *);
78219089Spjdstatic int (*_topo_node_fru)(tnode_t *, nvlist_t **, nvlist_t *, int *);
79219089Spjdstatic int (*_topo_fmri_nvl2str)(topo_hdl_t *, nvlist_t *, char **, int *);
80219089Spjdstatic int (*_topo_fmri_strcmp_noauth)(topo_hdl_t *, const char *,
81219089Spjd    const char *);
82219089Spjd
83219089Spjd#define	ZFS_FRU_HASH_SIZE	257
84219089Spjd
85219089Spjdstatic size_t
86219089Spjdfru_strhash(const char *key)
87219089Spjd{
88219089Spjd	ulong_t g, h = 0;
89219089Spjd	const char *p;
90219089Spjd
91219089Spjd	for (p = key; *p != '\0'; p++) {
92219089Spjd		h = (h << 4) + *p;
93219089Spjd
94219089Spjd		if ((g = (h & 0xf0000000)) != 0) {
95219089Spjd			h ^= (g >> 24);
96219089Spjd			h ^= g;
97219089Spjd		}
98219089Spjd	}
99219089Spjd
100219089Spjd	return (h % ZFS_FRU_HASH_SIZE);
101219089Spjd}
102219089Spjd
103219089Spjdstatic int
104219089Spjdlibzfs_fru_gather(topo_hdl_t *thp, tnode_t *tn, void *arg)
105219089Spjd{
106219089Spjd	libzfs_handle_t *hdl = arg;
107219089Spjd	nvlist_t *fru;
108219089Spjd	char *devpath, *frustr;
109219089Spjd	int err;
110219089Spjd	libzfs_fru_t *frup;
111219089Spjd	size_t idx;
112219089Spjd
113219089Spjd	/*
114219089Spjd	 * If this is the chassis node, and we don't yet have the system
115219089Spjd	 * chassis ID, then fill in this value now.
116219089Spjd	 */
117219089Spjd	if (hdl->libzfs_chassis_id[0] == '\0' &&
118219089Spjd	    strcmp(_topo_node_name(tn), "chassis") == 0) {
119219089Spjd		if (_topo_prop_get_string(tn, FM_FMRI_AUTHORITY,
120219089Spjd		    FM_FMRI_AUTH_CHASSIS, &devpath, &err) == 0)
121219089Spjd			(void) strlcpy(hdl->libzfs_chassis_id, devpath,
122219089Spjd			    sizeof (hdl->libzfs_chassis_id));
123219089Spjd	}
124219089Spjd
125219089Spjd	/*
126219089Spjd	 * Skip non-disk nodes.
127219089Spjd	 */
128219089Spjd	if (strcmp(_topo_node_name(tn), "disk") != 0)
129219089Spjd		return (TOPO_WALK_NEXT);
130219089Spjd
131219089Spjd	/*
132219089Spjd	 * Get the devfs path and FRU.
133219089Spjd	 */
134219089Spjd	if (_topo_prop_get_string(tn, "io", "devfs-path", &devpath, &err) != 0)
135219089Spjd		return (TOPO_WALK_NEXT);
136219089Spjd
137219089Spjd	if (libzfs_fru_lookup(hdl, devpath) != NULL) {
138219089Spjd		_topo_hdl_strfree(thp, devpath);
139219089Spjd		return (TOPO_WALK_NEXT);
140219089Spjd	}
141219089Spjd
142219089Spjd	if (_topo_node_fru(tn, &fru, NULL, &err) != 0) {
143219089Spjd		_topo_hdl_strfree(thp, devpath);
144219089Spjd		return (TOPO_WALK_NEXT);
145219089Spjd	}
146219089Spjd
147219089Spjd	/*
148219089Spjd	 * Convert the FRU into a string.
149219089Spjd	 */
150219089Spjd	if (_topo_fmri_nvl2str(thp, fru, &frustr, &err) != 0) {
151219089Spjd		nvlist_free(fru);
152219089Spjd		_topo_hdl_strfree(thp, devpath);
153219089Spjd		return (TOPO_WALK_NEXT);
154219089Spjd	}
155219089Spjd
156219089Spjd	nvlist_free(fru);
157219089Spjd
158219089Spjd	/*
159219089Spjd	 * Finally, we have a FRU string and device path.  Add it to the hash.
160219089Spjd	 */
161219089Spjd	if ((frup = calloc(sizeof (libzfs_fru_t), 1)) == NULL) {
162219089Spjd		_topo_hdl_strfree(thp, devpath);
163219089Spjd		_topo_hdl_strfree(thp, frustr);
164219089Spjd		return (TOPO_WALK_NEXT);
165219089Spjd	}
166219089Spjd
167219089Spjd	if ((frup->zf_device = strdup(devpath)) == NULL ||
168219089Spjd	    (frup->zf_fru = strdup(frustr)) == NULL) {
169219089Spjd		free(frup->zf_device);
170219089Spjd		free(frup);
171219089Spjd		_topo_hdl_strfree(thp, devpath);
172219089Spjd		_topo_hdl_strfree(thp, frustr);
173219089Spjd		return (TOPO_WALK_NEXT);
174219089Spjd	}
175219089Spjd
176219089Spjd	_topo_hdl_strfree(thp, devpath);
177219089Spjd	_topo_hdl_strfree(thp, frustr);
178219089Spjd
179219089Spjd	idx = fru_strhash(frup->zf_device);
180219089Spjd	frup->zf_chain = hdl->libzfs_fru_hash[idx];
181219089Spjd	hdl->libzfs_fru_hash[idx] = frup;
182219089Spjd	frup->zf_next = hdl->libzfs_fru_list;
183219089Spjd	hdl->libzfs_fru_list = frup;
184219089Spjd
185219089Spjd	return (TOPO_WALK_NEXT);
186219089Spjd}
187219089Spjd
188219089Spjd/*
189219089Spjd * Called during initialization to setup the dynamic libtopo connection.
190219089Spjd */
191219089Spjd#pragma init(libzfs_init_fru)
192219089Spjdstatic void
193219089Spjdlibzfs_init_fru(void)
194219089Spjd{
195219089Spjd	char path[MAXPATHLEN];
196219089Spjd	char isa[257];
197219089Spjd
198219089Spjd#if defined(_LP64)
199219089Spjd	if (sysinfo(SI_ARCHITECTURE_64, isa, sizeof (isa)) < 0)
200219089Spjd		isa[0] = '\0';
201219089Spjd#else
202219089Spjd	isa[0] = '\0';
203219089Spjd#endif
204219089Spjd	(void) snprintf(path, sizeof (path),
205219089Spjd	    "/usr/lib/fm/%s/libtopo.so", isa);
206219089Spjd
207219089Spjd	if ((_topo_dlhandle = dlopen(path, RTLD_LAZY)) == NULL)
208219089Spjd		return;
209219089Spjd
210219089Spjd	_topo_open = (topo_hdl_t *(*)())
211219089Spjd	    dlsym(_topo_dlhandle, "topo_open");
212219089Spjd	_topo_close = (void (*)())
213219089Spjd	    dlsym(_topo_dlhandle, "topo_close");
214219089Spjd	_topo_snap_hold = (char *(*)())
215219089Spjd	    dlsym(_topo_dlhandle, "topo_snap_hold");
216219089Spjd	_topo_snap_release = (void (*)())
217219089Spjd	    dlsym(_topo_dlhandle, "topo_snap_release");
218219089Spjd	_topo_walk_init = (topo_walk_t *(*)())
219219089Spjd	    dlsym(_topo_dlhandle, "topo_walk_init");
220219089Spjd	_topo_walk_step = (int (*)())
221219089Spjd	    dlsym(_topo_dlhandle, "topo_walk_step");
222219089Spjd	_topo_walk_fini = (void (*)())
223219089Spjd	    dlsym(_topo_dlhandle, "topo_walk_fini");
224219089Spjd	_topo_hdl_strfree = (void (*)())
225219089Spjd	    dlsym(_topo_dlhandle, "topo_hdl_strfree");
226219089Spjd	_topo_node_name = (char *(*)())
227219089Spjd	    dlsym(_topo_dlhandle, "topo_node_name");
228219089Spjd	_topo_prop_get_string = (int (*)())
229219089Spjd	    dlsym(_topo_dlhandle, "topo_prop_get_string");
230219089Spjd	_topo_node_fru = (int (*)())
231219089Spjd	    dlsym(_topo_dlhandle, "topo_node_fru");
232219089Spjd	_topo_fmri_nvl2str = (int (*)())
233219089Spjd	    dlsym(_topo_dlhandle, "topo_fmri_nvl2str");
234219089Spjd	_topo_fmri_strcmp_noauth = (int (*)())
235219089Spjd	    dlsym(_topo_dlhandle, "topo_fmri_strcmp_noauth");
236219089Spjd
237219089Spjd	if (_topo_open == NULL || _topo_close == NULL ||
238219089Spjd	    _topo_snap_hold == NULL || _topo_snap_release == NULL ||
239219089Spjd	    _topo_walk_init == NULL || _topo_walk_step == NULL ||
240219089Spjd	    _topo_walk_fini == NULL || _topo_hdl_strfree == NULL ||
241219089Spjd	    _topo_node_name == NULL || _topo_prop_get_string == NULL ||
242219089Spjd	    _topo_node_fru == NULL || _topo_fmri_nvl2str == NULL ||
243219089Spjd	    _topo_fmri_strcmp_noauth == NULL) {
244219089Spjd		(void) dlclose(_topo_dlhandle);
245219089Spjd		_topo_dlhandle = NULL;
246219089Spjd	}
247219089Spjd}
248219089Spjd
249219089Spjd/*
250219089Spjd * Refresh the mappings from device path -> FMRI.  We do this by walking the
251219089Spjd * hc topology looking for disk nodes, and recording the io/devfs-path and FRU.
252219089Spjd * Note that we strip out the disk-specific authority information (serial,
253219089Spjd * part, revision, etc) so that we are left with only the identifying
254219089Spjd * characteristics of the slot (hc path and chassis-id).
255219089Spjd */
256219089Spjdvoid
257219089Spjdlibzfs_fru_refresh(libzfs_handle_t *hdl)
258219089Spjd{
259219089Spjd	int err;
260219089Spjd	char *uuid;
261219089Spjd	topo_hdl_t *thp;
262219089Spjd	topo_walk_t *twp;
263219089Spjd
264219089Spjd	if (_topo_dlhandle == NULL)
265219089Spjd		return;
266219089Spjd
267219089Spjd	/*
268219089Spjd	 * Clear the FRU hash and initialize our basic structures.
269219089Spjd	 */
270219089Spjd	libzfs_fru_clear(hdl, B_FALSE);
271219089Spjd
272219089Spjd	if ((hdl->libzfs_topo_hdl = _topo_open(TOPO_VERSION,
273219089Spjd	    NULL, &err)) == NULL)
274219089Spjd		return;
275219089Spjd
276219089Spjd	thp = hdl->libzfs_topo_hdl;
277219089Spjd
278219089Spjd	if ((uuid = _topo_snap_hold(thp, NULL, &err)) == NULL)
279219089Spjd		return;
280219089Spjd
281219089Spjd	_topo_hdl_strfree(thp, uuid);
282219089Spjd
283219089Spjd	if (hdl->libzfs_fru_hash == NULL &&
284219089Spjd	    (hdl->libzfs_fru_hash =
285219089Spjd	    calloc(ZFS_FRU_HASH_SIZE * sizeof (void *), 1)) == NULL)
286219089Spjd		return;
287219089Spjd
288219089Spjd	/*
289219089Spjd	 * We now have a topo snapshot, so iterate over the hc topology looking
290219089Spjd	 * for disks to add to the hash.
291219089Spjd	 */
292219089Spjd	twp = _topo_walk_init(thp, FM_FMRI_SCHEME_HC,
293219089Spjd	    libzfs_fru_gather, hdl, &err);
294219089Spjd	if (twp != NULL) {
295219089Spjd		(void) _topo_walk_step(twp, TOPO_WALK_CHILD);
296219089Spjd		_topo_walk_fini(twp);
297219089Spjd	}
298219089Spjd}
299219089Spjd
300219089Spjd/*
301219089Spjd * Given a devfs path, return the FRU for the device, if known.  This will
302219089Spjd * automatically call libzfs_fru_refresh() if it hasn't already been called by
303219089Spjd * the consumer.  The string returned is valid until the next call to
304219089Spjd * libzfs_fru_refresh().
305219089Spjd */
306219089Spjdconst char *
307219089Spjdlibzfs_fru_lookup(libzfs_handle_t *hdl, const char *devpath)
308219089Spjd{
309219089Spjd	size_t idx = fru_strhash(devpath);
310219089Spjd	libzfs_fru_t *frup;
311219089Spjd
312219089Spjd	if (hdl->libzfs_fru_hash == NULL)
313219089Spjd		libzfs_fru_refresh(hdl);
314219089Spjd
315219089Spjd	if (hdl->libzfs_fru_hash == NULL)
316219089Spjd		return (NULL);
317219089Spjd
318219089Spjd	for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
319219089Spjd	    frup = frup->zf_chain) {
320219089Spjd		if (strcmp(devpath, frup->zf_device) == 0)
321219089Spjd			return (frup->zf_fru);
322219089Spjd	}
323219089Spjd
324219089Spjd	return (NULL);
325219089Spjd}
326219089Spjd
327219089Spjd/*
328219089Spjd * Given a fru path, return the device path.  This will automatically call
329219089Spjd * libzfs_fru_refresh() if it hasn't already been called by the consumer.  The
330219089Spjd * string returned is valid until the next call to libzfs_fru_refresh().
331219089Spjd */
332219089Spjdconst char *
333219089Spjdlibzfs_fru_devpath(libzfs_handle_t *hdl, const char *fru)
334219089Spjd{
335219089Spjd	libzfs_fru_t *frup;
336219089Spjd	size_t idx;
337219089Spjd
338219089Spjd	if (hdl->libzfs_fru_hash == NULL)
339219089Spjd		libzfs_fru_refresh(hdl);
340219089Spjd
341219089Spjd	if (hdl->libzfs_fru_hash == NULL)
342219089Spjd		return (NULL);
343219089Spjd
344219089Spjd	for (idx = 0; idx < ZFS_FRU_HASH_SIZE; idx++) {
345219089Spjd		for (frup = hdl->libzfs_fru_hash[idx]; frup != NULL;
346219089Spjd		    frup = frup->zf_next) {
347219089Spjd			if (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl,
348219089Spjd			    fru, frup->zf_fru))
349219089Spjd				return (frup->zf_device);
350219089Spjd		}
351219089Spjd	}
352219089Spjd
353219089Spjd	return (NULL);
354219089Spjd}
355219089Spjd
356219089Spjd/*
357219089Spjd * Change the stored FRU for the given vdev.
358219089Spjd */
359219089Spjdint
360219089Spjdzpool_fru_set(zpool_handle_t *zhp, uint64_t vdev_guid, const char *fru)
361219089Spjd{
362219089Spjd	zfs_cmd_t zc = { 0 };
363219089Spjd
364219089Spjd	(void) strncpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
365219089Spjd	(void) strncpy(zc.zc_value, fru, sizeof (zc.zc_value));
366219089Spjd	zc.zc_guid = vdev_guid;
367219089Spjd
368219089Spjd	if (zfs_ioctl(zhp->zpool_hdl, ZFS_IOC_VDEV_SETFRU, &zc) != 0)
369219089Spjd		return (zpool_standard_error_fmt(zhp->zpool_hdl, errno,
370219089Spjd		    dgettext(TEXT_DOMAIN, "cannot set FRU")));
371219089Spjd
372219089Spjd	return (0);
373219089Spjd}
374219089Spjd
375219089Spjd/*
376219089Spjd * Compare to two FRUs, ignoring any authority information.
377219089Spjd */
378219089Spjdboolean_t
379219089Spjdlibzfs_fru_compare(libzfs_handle_t *hdl, const char *a, const char *b)
380219089Spjd{
381219089Spjd	if (hdl->libzfs_fru_hash == NULL)
382219089Spjd		libzfs_fru_refresh(hdl);
383219089Spjd
384219089Spjd	if (hdl->libzfs_fru_hash == NULL)
385219089Spjd		return (strcmp(a, b) == 0);
386219089Spjd
387219089Spjd	return (_topo_fmri_strcmp_noauth(hdl->libzfs_topo_hdl, a, b));
388219089Spjd}
389219089Spjd
390219089Spjd/*
391219089Spjd * This special function checks to see whether the FRU indicates it's supposed
392219089Spjd * to be in the system chassis, but the chassis-id doesn't match.  This can
393219089Spjd * happen in a clustered case, where both head nodes have the same logical
394219089Spjd * disk, but opening the device on the other head node is meaningless.
395219089Spjd */
396219089Spjdboolean_t
397219089Spjdlibzfs_fru_notself(libzfs_handle_t *hdl, const char *fru)
398219089Spjd{
399219089Spjd	const char *chassisid;
400219089Spjd	size_t len;
401219089Spjd
402219089Spjd	if (hdl->libzfs_fru_hash == NULL)
403219089Spjd		libzfs_fru_refresh(hdl);
404219089Spjd
405219089Spjd	if (hdl->libzfs_chassis_id[0] == '\0')
406219089Spjd		return (B_FALSE);
407219089Spjd
408219089Spjd	if (strstr(fru, "/chassis=0/") == NULL)
409219089Spjd		return (B_FALSE);
410219089Spjd
411219089Spjd	if ((chassisid = strstr(fru, ":chassis-id=")) == NULL)
412219089Spjd		return (B_FALSE);
413219089Spjd
414219089Spjd	chassisid += 12;
415219089Spjd	len = strlen(hdl->libzfs_chassis_id);
416219089Spjd	if (strncmp(chassisid, hdl->libzfs_chassis_id, len) == 0 &&
417219089Spjd	    (chassisid[len] == '/' || chassisid[len] == ':'))
418219089Spjd		return (B_FALSE);
419219089Spjd
420219089Spjd	return (B_TRUE);
421219089Spjd}
422219089Spjd
423219089Spjd/*
424219089Spjd * Clear memory associated with the FRU hash.
425219089Spjd */
426219089Spjdvoid
427219089Spjdlibzfs_fru_clear(libzfs_handle_t *hdl, boolean_t final)
428219089Spjd{
429219089Spjd	libzfs_fru_t *frup;
430219089Spjd
431219089Spjd	while ((frup = hdl->libzfs_fru_list) != NULL) {
432219089Spjd		hdl->libzfs_fru_list = frup->zf_next;
433219089Spjd		free(frup->zf_device);
434219089Spjd		free(frup->zf_fru);
435219089Spjd		free(frup);
436219089Spjd	}
437219089Spjd
438219089Spjd	hdl->libzfs_fru_list = NULL;
439219089Spjd
440219089Spjd	if (hdl->libzfs_topo_hdl != NULL) {
441219089Spjd		_topo_snap_release(hdl->libzfs_topo_hdl);
442219089Spjd		_topo_close(hdl->libzfs_topo_hdl);
443219089Spjd		hdl->libzfs_topo_hdl = NULL;
444219089Spjd	}
445219089Spjd
446219089Spjd	if (final) {
447219089Spjd		free(hdl->libzfs_fru_hash);
448219089Spjd	} else if (hdl->libzfs_fru_hash != NULL) {
449219089Spjd		bzero(hdl->libzfs_fru_hash,
450219089Spjd		    ZFS_FRU_HASH_SIZE * sizeof (void *));
451219089Spjd	}
452219089Spjd}
453