/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. */ /* * Metadevice diskset interfaces */ #include "meta_set_prv.h" #include #include #include extern char *blkname(char *); static int upd_dr_dbinfo( mdsetname_t *sp, md_set_desc *sd, md_drive_desc *dd, md_replicalist_t *rlp, int forceflg, md_error_t *ep ) { md_drive_desc *p; md_replica_t *r; md_replicalist_t *rl; int i; int dbcnt; int rval = 0; daddr_t nblks = 0; md_setkey_t *cl_sk; md_error_t xep = mdnullerror; md_mnnode_desc *nd; ddi_devid_t devid; /* find the smallest existing replica */ for (rl = rlp; rl != NULL; rl = rl->rl_next) { r = rl->rl_repp; nblks = ((nblks == 0) ? r->r_nblk : min(r->r_nblk, nblks)); } if (nblks <= 0) nblks = (MD_MNSET_DESC(sd)) ? MD_MN_DBSIZE : MD_DBSIZE; for (p = dd; p != NULL; p = p->dd_next) { dbcnt = 0; for (rl = rlp; rl != NULL; rl = rl->rl_next) { r = rl->rl_repp; /* * Before we bump up the dbcnt, if we're * running with device ids in disksets, let's * compare the device ids otherwise we compare * the ctd names. * * There is a possibility the device ids might * have changed. To account for that case, we * fallback to comparing the ctd names if the * device id comparison fails. If we aren't running * in device id mode and a disk has moved, the ctd's * won't match. */ if ((p->dd_dnp->devid != NULL) && (r->r_devid != NULL) && (!MD_MNSET_DESC(sd))) { (void) devid_str_decode(p->dd_dnp->devid, &devid, NULL); if ((devid_compare(devid, r->r_devid) == 0) || (strcmp(r->r_namep->drivenamep->cname, p->dd_dnp->cname) == 0)) dbcnt++; devid_free(devid); } else { if (strcmp(r->r_namep->drivenamep->cname, p->dd_dnp->cname) == 0) dbcnt++; } } p->dd_dbcnt = dbcnt; p->dd_dbsize = dbcnt > 0 ? nblks : 0; } /* Lock the set on current set members */ if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* If this is forced, don't lock other sides */ if (forceflg && strcmp(mynode(), nd->nd_nodename) != 0) { nd = nd->nd_next; continue; } /* We already locked this side in the caller */ if (strcmp(mynode(), nd->nd_nodename) == 0) { nd = nd->nd_next; continue; } if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { nd = nd->nd_next; continue; } if (clnt_lock_set(nd->nd_nodename, sp, ep)) { rval = -1; goto out; } nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* If this is forced, don't lock other sides */ if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) continue; /* We already locked this side in the caller */ if (strcmp(mynode(), sd->sd_nodes[i]) == 0) continue; if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { rval = -1; goto out; } } } if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* If this is forced, then only care about this node */ if (forceflg && strcmp(mynode(), nd->nd_nodename) != 0) { nd = nd->nd_next; continue; } if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { nd = nd->nd_next; continue; } if (clnt_upd_dr_dbinfo(nd->nd_nodename, sp, dd, ep) == -1) { if (! mdiserror(ep, MDE_NO_SET) && ! mdismddberror(ep, MDE_DB_NODB)) { rval = -1; break; } mdclrerror(ep); } nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* If this is forced, then only care about this node */ if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) continue; if (clnt_upd_dr_dbinfo(sd->sd_nodes[i], sp, dd, ep) == -1) { if (! mdiserror(ep, MDE_NO_SET) && ! mdismddberror(ep, MDE_DB_NODB)) { rval = -1; break; } mdclrerror(ep); } } } out: cl_sk = cl_get_setkey(sp->setno, sp->setname); if (MD_MNSET_DESC(sd)) { nd = sd->sd_nodelist; while (nd) { /* If this is forced, don't unlock other sides */ if (forceflg && strcmp(mynode(), nd->nd_nodename) != 0) { nd = nd->nd_next; continue; } /* We will unlocked this side in the caller */ if (strcmp(mynode(), nd->nd_nodename) == 0) { nd = nd->nd_next; continue; } if (!(nd->nd_flags & MD_MN_NODE_ALIVE)) { nd = nd->nd_next; continue; } if (clnt_unlock_set(nd->nd_nodename, cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } nd = nd->nd_next; } } else { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* If this is forced, don't unlock other sides */ if (forceflg && strcmp(mynode(), sd->sd_nodes[i]) != 0) continue; /* We will unlocked this side in the caller */ if (strcmp(mynode(), sd->sd_nodes[i]) == 0) continue; if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } } } /* Do not clear the key, via cl_set_setkey(NULL) this is nested */ return (rval); } static int usetag_take(set_t setno, int usetag, md_error_t *ep) { mddb_dtag_use_parm_t dtup; (void) memset(&dtup, '\0', sizeof (mddb_dtag_use_parm_t)); dtup.dtup_id = usetag; dtup.dtup_setno = setno; if (metaioctl(MD_MED_USE_TAG, &dtup, &dtup.dtup_mde, NULL) != 0) return (mdstealerror(ep, &dtup.dtup_mde)); return (0); } static int useit_take(set_t setno, md_error_t *ep) { mddb_accept_parm_t accp; (void) memset(&accp, '\0', sizeof (mddb_accept_parm_t)); accp.accp_setno = setno; if (metaioctl(MD_MED_ACCEPT, &accp, &accp.accp_mde, NULL) != 0) return (mdstealerror(ep, &accp.accp_mde)); return (0); } /* * Update the master block with the device id information for the disks * in the diskset. The device id information will be consumed by the * diskset import code in case of remotely replicated disksets. * * For the drives that have a valid diskset mddb on them, we add the * device id for the drive to the unused portion of the mddb. * * For the drives that don't have a diskset mddb on them, we add a dummy * master block that contains the device id for the drive. A dummy master * block is signified by changing the master block magic number, mb_magic, * to MDDB_MAGIC_DU. * * This code is responsible primarily for adding the appropriate device id * information to diskset disks that didn't have the information. This would * typically occur when the OS has been upgraded from an OS release prior to * Solaris 10 * * The error path in this routine is defined as - if an error occurs while * updating the mddb for one disk in the diskset, don't bother updating *any* * of the mddbs because it's game over anyways as far as disaster recovery for * that diskset is concerned. * * This code will need to be revisited if and when support for importing * partial disksets is added. * * NOTE: This code relies heavily on the meta_repartition() working correctly * and reformatting a drive, so that there's enough room for a dummy master * block, every time a drive is added to a diskset. Should * the meta_repartition() code change in future, this code will have to be * revisited. * * Returns 0 on success and -1 on failure */ int meta_update_mb(mdsetname_t *sp, md_drive_desc *drivedesc, md_error_t *ep) { uint_t sliceno, offset; void *mb; mddb_mb_t *mbp; int fd = -1; ddi_devid_t devid = NULL; md_drive_desc *dd; mddrivename_t *dnp; mdname_t *rsp; int dbcnt; int dbsize; size_t len; md_set_desc *sd; /* * Don't do anything for MN diskset for now. */ if (! metaislocalset(sp)) { if ((sd = metaget_setdesc(sp, ep)) == NULL) return (-1); if (MD_MNSET_DESC(sd)) return (0); } mb = Malloc(DEV_BSIZE); mbp = (mddb_mb_t *)mb; /* * For every drive in the drive descriptor, iterate through all * the mddbs present on it and check to see if mb_devid_magic is * set. If it isn't, then update the master block with the correct * device id information */ for (dd = drivedesc; dd != NULL; dd = dd->dd_next) { int i = 0; dnp = dd->dd_dnp; dbcnt = dd->dd_dbcnt; dbsize = dd->dd_dbsize; /* * When the import support for remotely replicated * disksets gets implemented, we probably want to * inform the user that the disks won't be self * identifying if any of these calls fails */ if (meta_replicaslice(dnp, &sliceno, ep) != 0) return (-1); if ((rsp = metaslicename(dnp, sliceno, ep)) == NULL) return (-1); if ((fd = open(rsp->rname, O_RDWR)) < 0) goto cleanup; /* if devid_str_decode fails, make sure devid is null */ if (devid_str_decode(dnp->devid, &devid, NULL) != 0) { devid = NULL; } do { int push = 0; offset = (i * dbsize + 16); ++i; if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0) goto cleanup; if (read(fd, mbp, DEV_BSIZE) != DEV_BSIZE) goto cleanup; if (crcchk((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum, (uint_t)DEV_BSIZE, (crc_skip_t *)NULL)) goto cleanup; /* * If the disk is one of the ones that doesn't * have a shared mddb on it, we put a dummy * master block on it. */ if (mbp->mb_devid_magic != MDDB_MAGIC_DE) { if (dbcnt == 0) { meta_mkdummymaster(sp, fd, 16); break; } } /* * if mb_setcreatetime is 0, this field was never * filled in so do it now. */ if ((mbp->mb_setcreatetime.tv_sec == 0) && (mbp->mb_setcreatetime.tv_usec == 0)) { mbp->mb_setcreatetime = meta_get_lb_inittime(sp, ep); push = 1; } /* * If MDDB_MAGIC_DE is set in the * mb_devid_magic field then we know we * have a valid device id and we don't * need to add it to the master block. * * This would have to be revisited if device * ids change as a result of device id * algorithms changing or somesuch. */ if (mbp->mb_devid_magic != MDDB_MAGIC_DE) { if (devid != NULL) { len = devid_sizeof(devid); if (len <= (DEV_BSIZE - sizeof (mddb_mb_t))) { /* * there's enough space to * store the devid */ mbp->mb_devid_magic = MDDB_MAGIC_DE; mbp->mb_devid_len = len; (void) memcpy(mbp->mb_devid, (char *)devid, len); push = 1; } } } /* * write out (push) any changes we have to the mb */ if (push) { crcgen((uchar_t *)mbp, (uint_t *)&mbp->mb_checksum, (uint_t)DEV_BSIZE, (crc_skip_t *)NULL); if (lseek(fd, (off_t)dbtob(offset), SEEK_SET) < 0) goto cleanup; if (write(fd, mbp, DEV_BSIZE) != DEV_BSIZE) goto cleanup; } if (devid) devid_free(devid); } while (i < dbcnt); (void) close(fd); } /* success */ return (0); cleanup: if (fd != -1) (void) close(fd); if (devid) devid_free(devid); return (-1); } extern int *replicated_disk_list_built; extern int replicated_disk_list_built_pass1; /* * Exported Entry Points */ int meta_set_take( mdsetname_t *sp, mhd_mhiargs_t *mhiargsp, int flags, int usetag, md_error_t *ep ) { md_set_desc *sd; md_drive_desc *dd; md_drive_desc *d = NULL; char *owner = NULL; int rval = 0; int pathname_return = 0; int i; int has_set; int matches = 0; int numsides = 0; md_replicalist_t *rlp = NULL; sigset_t oldsigs; md_setkey_t *cl_sk; int rb_level = 0; md_error_t xep = mdnullerror; mdsetname_t *local_sp = NULL; side_t side; int ret = 0; char *newname = NULL; mdkey_t side_names_key; int unrslv_replicated = 0; mddrivenamelist_t *dnlp = NULL; int retake_flag = 0; unsigned long node_active[BT_BITOUL(MD_MAXSIDES)]; mdnamelist_t *nlp = NULL; bzero(node_active, sizeof (unsigned long) * BT_BITOUL(MD_MAXSIDES)); if ((flags & TAKE_USETAG) || (flags & TAKE_USEIT)) { if (flags & TAKE_USETAG) { if (usetag_take(sp->setno, usetag, ep)) return (-1); } else { if (useit_take(sp->setno, ep)) return (-1); } if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, ep) != 0) mdclrerror(ep); } /* Do we own the set? */ i = own_set(sp, &owner, (flags & TAKE_FORCE), ep); if (! mdisok(ep)) { if (owner != NULL) Free(owner); return (-1); } if (i == MD_SETOWNER_NO) { (void) mddserror(ep, MDE_DS_NOTOWNER, sp->setno, owner, NULL, sp->setname); if (owner != NULL) Free(owner); return (-1); } if (owner != NULL) { Free(owner); owner = NULL; } /* We already own it, we are done. */ if (i == MD_SETOWNER_YES) return (0); if ((sd = metaget_setdesc(sp, &xep)) == NULL) return (-1); /* You can not take ownership of a set that has no drives */ if (sd->sd_flags & MD_SR_MB_DEVID) dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, ep); else dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep); if (dd == NULL) { if (! mdisok(ep)) return (-1); return (0); } /* END CHECK CODE */ md_rb_sig_handling_on(); /* Lock the set on our side */ if (clnt_lock_set(mynode(), sp, ep)) { rval = -1; goto out; } /* * Find the "side" value so that it can be used to deal with * the devids. */ side = getnodeside(mynode(), sd); if (side == MD_SIDEWILD) { (void) mddserror(ep, MDE_DS_HOSTNOSIDE, sp->setno, mynode(), NULL, mynode()); rval = -1; goto out; } /* * A local sets' side 0 references records associated with * that node's local set. As this is a non-local set, "side" * must be modified (by adding a SKEW) before we reference * records in the local set [setno = 0] for the non-local set * [setno = 1..n]. */ side += SKEW; /* * If this set had been previously imported as a partial replicated * diskset, then must attempt to updated any unresolved drive * records in diskset with new devid information. Must set * flags in drivedesc list before loading up set so that the * md driver will fix up names and devids correctly in the * locator block. */ if (sd->sd_flags & MD_SR_UNRSLV_REPLICATED) { md_im_names_t cnames = { 0, NULL}; ddi_devid_t old_devid, new_devid; char *search_path = "/dev"; devid_nmlist_t *nmlist; int indx; mddrivenamelist_t **dnlpp = &dnlp; if (meta_list_disks(ep, &cnames) != 0) { rval = -1; goto out; } for (indx = 0; indx < cnames.min_count; ++indx) { mddrivename_t *dnp; mdsetname_t *sp = metasetname(MD_LOCAL_NAME, ep); int fd = -1; ddi_devid_t devid1; char *cdevidp; int len; char *fp; /* * We may have name collision here so we need to get * the dnp using the devid and not the name. */ len = strlen(cnames.min_names[indx]) + strlen("s0"); if ((fp = (char *)Malloc(len+1)) == NULL) { (void) mdsyserror(ep, ENOMEM, NULL); rval = -1; goto out; } (void) snprintf(fp, len + 1, "%ss0", cnames.min_names[indx]); if ((fd = open(fp, O_RDONLY|O_NDELAY)) < 0) { (void) mdsyserror(ep, EIO, fp); rval = -1; goto out; } Free(fp); /* if no device id, what error?) */ if (devid_get(fd, &devid1) != 0) { (void) mdsyserror(ep, EIO, fp); rval = -1; goto out; } if (close(fd) < 0) { (void) mdsyserror(ep, EIO, fp); rval = -1; goto out; } cdevidp = devid_str_encode(devid1, NULL); if (cdevidp == NULL) { (void) mdsyserror(ep, EIO, fp); rval = -1; goto out; } devid_free(devid1); dnp = metadrivenamebydevid(&sp, cdevidp, cnames.min_names[indx], ep); devid_str_free(cdevidp); if (dnp == NULL) { /* * Assuming we're interested in knowing about * whatever error occurred, but not in stopping. */ mde_perror(ep, cnames.min_names[indx]); mdclrerror(ep); continue; } dnlpp = meta_drivenamelist_append_wrapper(dnlpp, dnp); } /* Reget sd and dd since freed by meta_prune_cnames. */ if ((sd = metaget_setdesc(sp, ep)) == NULL) { rval = -1; goto out; } if (sd->sd_flags & MD_SR_MB_DEVID) dd = metaget_drivedesc(sp, MD_BASICNAME_OK | PRINT_FAST, ep); else dd = metaget_drivedesc(sp, MD_BASICNAME_OK, ep); /* If ep has error, then there was a failure, set rval */ if (!mdisok(ep)) { rval = -1; goto out; } /* Builds global replicated disk list */ replicated_disk_list_built = &replicated_disk_list_built_pass1; /* If success, then clear error structure */ if (build_replicated_disks_list(ep, dnlp) == 1) mdclrerror(ep); /* If ep has error, then there was a failure, set rval */ if (! mdisok(ep)) { rval = -1; goto out; } for (d = dd; d != NULL; d = d->dd_next) { if (d->dd_flags & MD_DR_UNRSLV_REPLICATED) { /* Get old devid from drive record */ (void) devid_str_decode(d->dd_dnp->devid, &old_devid, NULL); /* * If the devid stored in the drive record * (old_devid) matches a devid known by * the system, then this disk has already * been partially resolved. This situation * could occur if a panic happened during a * previous take of this diskset. * Set flag to later handle fixing the master * block on disk and turning off the unresolved * replicated flag. */ if (meta_deviceid_to_nmlist(search_path, (ddi_devid_t)old_devid, DEVID_MINOR_NAME_ALL, &nmlist) == 0) { d->dd_flags |= MD_DR_FIX_MB_DID; retake_flag = 1; continue; } /* * If the devid stored in the drive record * is on the list of replicated disks found * during a system scan then set both flags * so that the locator block, namespaces * (diskset and local set), master block * and unresolved replicated flag are updated. */ new_devid = replicated_list_lookup( devid_sizeof((ddi_devid_t)old_devid), old_devid); devid_free(old_devid); /* * If devid stored in the drive record is * not found then set flag to mark * that set is still unresolved and * continue to next drive record. */ if (new_devid == NULL) { unrslv_replicated = 1; continue; } /* * Set flags to fix up the master block, * locator block of the diskset, diskset * namespace and the local set namespace. */ d->dd_flags |= (MD_DR_FIX_MB_DID | MD_DR_FIX_LB_NM_DID); retake_flag = 1; } } } /* * Check the local devid namespace to see if the disks * have been moved. Use the local set first of all as this contains * entries for the disks in the set. * * This is being done before the tk_own_bydd because the disks * in the dd list could be wrong! But it should be done with the lock * held for the set. */ local_sp = metasetname(MD_LOCAL_NAME, ep); for (d = dd; d != NULL; d = d->dd_next) { /* * Actually do the check of the disks. */ ret = meta_upd_ctdnames(&local_sp, 0, side, d->dd_dnp, &newname, ep); if ((ret == METADEVADM_ERR) || (ret == METADEVADM_DSKNAME_ERR)) { /* check failed in some unknown manner */ rval = -1; goto out; } else if (ret == METADEVADM_DISKMOVE) { /* * Update the dd namelist so that the rpc.metamhd * gets the correct disks to reserve - it is the rname * we are interested in. */ if (newname != NULL) { char *save_devid; /* * Need to save the side names key as this * points to the namespace entry that will * need to be updated. In addition the call * to meta_make_sidenmlist does not actually * set the namespace key. */ side_names_key = d->dd_dnp->side_names_key; /* * There is the possibility that there * will be multiple disks with the same * name but different devids in the * drivelist. Because of this, we need * to look for a new dnp based on devid * and not name. */ save_devid = Strdup(d->dd_dnp->devid); metafreedrivename(d->dd_dnp); d->dd_dnp = metadrivenamebydevid(&sp, save_devid, newname, ep); Free(save_devid); Free(newname); /* * null newname so we are reset for next time * through */ newname = NULL; ret = meta_make_sidenmlist(sp, d->dd_dnp, 0, NULL, ep); d->dd_dnp->side_names_key = side_names_key; if (ret == -1) { rval = -1; goto out; } } } } RB_TEST(1, "take", ep) RB_PREEMPT; rb_level = 1; /* level 1 */ RB_TEST(2, "take", ep) if (!MD_ATSET_DESC(sd)) { if (tk_own_bydd(sp, dd, mhiargsp, flags & MD_IM_PARTIAL_DISKSET, ep)) goto rollback; } RB_TEST(3, "take", ep) RB_PREEMPT; rb_level = 2; /* level 2 */ RB_TEST(4, "take", ep) if (clnt_stimeout(mynode(), sp, mhiargsp, ep) == -1) goto rollback; if (setup_db_bydd(sp, dd, (flags & TAKE_FORCE), ep) == -1) { if (! mdismddberror(ep, MDE_DB_ACCOK) && ! mdismddberror(ep, MDE_DB_TAGDATA)) goto rollback; mdclrerror(ep); } RB_TEST(5, "take", ep) RB_PREEMPT; rb_level = 3; /* level 3 */ RB_TEST(6, "take", ep) /* Snarf set of traditional diskset doesn't use stale information */ if (snarf_set(sp, FALSE, ep)) { if (mdismddberror(ep, MDE_DB_STALE) || mdismddberror(ep, MDE_DB_ACCOK) || mdismddberror(ep, MDE_DB_TAGDATA)) { rval = -1; goto out; } if (! mdismddberror(ep, MDE_DB_NODB) && ! mdismddberror(ep, MDE_DB_NOTOWNER)) goto rollback; /* * Look at the set on all other hosts, if every other host * has the same set with a larger genid, then we destroy this * copy. */ for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* Skip this node */ if (strcmp(sd->sd_nodes[i], mynode()) == 0) continue; numsides++; has_set = nodehasset(sp, sd->sd_nodes[i], NHS_NST_EQ_G_GT, &xep); if (has_set < 0) { if (! mdiserror(&xep, MDE_NO_SET) && ! mdismddberror(&xep, MDE_DB_NODB)) goto rollback; matches++; mdclrerror(&xep); continue; } if (has_set) matches++; } /* Destroy the set */ if (numsides > 0 && (numsides - matches) == 0) { if (meta_set_destroy(sp, FALSE, &xep)) mdclrerror(&xep); (void) mddserror(ep, MDE_DS_SETCLEANUP, sp->setno, sp->setname, NULL, mynode()); rval = -1; } goto rollback; } /* * If an unresolved replicated diskset, fix up diskset * and local namespaces, master block and drive record * with the new devid. If all drives in diskset are * now resolved, then clear set unresolved replicated flag. * If an error is encountered, don't fail the take, but * don't proceed any further in resolving the replicated disks. */ if (sd->sd_flags & MD_SR_UNRSLV_REPLICATED) { /* Fix up diskset and local namespaces with new devids */ meta_unrslv_replicated_nm(sp, dd, dnlp, ep); if (mdisok(ep)) { /* Fix up master block with new devids */ meta_unrslv_replicated_mb(sp, dd, dnlp, ep); } /* If all drives are resolved, set OK flag in set record. */ if (mdisok(ep) && (unrslv_replicated == 0)) { /* Ignore failure since no bad effect. */ (void) clnt_upd_sr_flags(mynode(), sp, MD_SR_OK, ep); } mdclrerror(ep); } /* * meta_getalldevs() will ultimately force devfsadmd to create * the /dev links for all the configured metadevices if they * do not already exist. This ensures that once the set is * owned all the metadevices are accessible as opposed to waiting * for devfsadmd to create them. */ if (meta_getalldevs(sp, &nlp, FALSE, ep) != 0) { metafreenamelist(nlp); goto rollback; } metafreenamelist(nlp); pathname_return = pathname_reload(&sp, sp->setno, ep); if ((pathname_return == METADEVADM_ERR) || (pathname_return == METADEVADM_DSKNAME_ERR)) { goto rollback; } if (metareplicalist(sp, (MD_BASICNAME_OK | PRINT_FAST), &rlp, ep) < 0) goto rollback; if (upd_dr_dbinfo(sp, sd, dd, rlp, (flags & TAKE_FORCE), ep) < 0) { metafreereplicalist(rlp); goto rollback; } metafreereplicalist(rlp); /* * If the set doesn't have the MD_SR_MB_DEVID bit set, i.e * the drives in the set don't have the device id information, * then stick it in if possible. * * If updating the master block fails for whatever reason, it's * okay. It just means the disk(s) in the diskset won't be self * identifying. */ if (!(sd->sd_flags & MD_SR_MB_DEVID)) { /* Lock the set on current set members */ for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* We already locked this side */ if (strcmp(mynode(), sd->sd_nodes[i]) == 0) continue; if (clnt_lock_set(sd->sd_nodes[i], sp, ep)) { /* * Ignore any RPC errors on a force * take. The set will have been taken * above and we still need to continue. */ if (flags & TAKE_FORCE) continue; rval = -1; goto out; } BT_SET(node_active, i); } rb_level = 4; /* level 4 */ if (meta_update_mb(sp, dd, ep) == 0) /* update the sr_flags on all hosts */ for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* * Only update those nodes that * are active (ie those that the * set is locked on). */ if (!BT_TEST(node_active, i)) continue; if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, (sd->sd_flags | MD_SR_MB_DEVID), ep)) goto rollback; } cl_sk = cl_get_setkey(sp->setno, sp->setname); for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* Unlocked of this side is done later */ if (strcmp(mynode(), sd->sd_nodes[i]) == 0) continue; /* no point calling dead nodes */ if (!BT_TEST(node_active, i)) continue; if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } } } /* * If we get here, we need to unlock the set before the resync * gets called, otherwise the "daemon" will hold the set lock * until the resync is done! */ cl_sk = cl_get_setkey(sp->setno, sp->setname); if (clnt_unlock_set(mynode(), cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } cl_set_setkey(NULL); md_rb_sig_handling_off(md_got_sig(), md_which_sig()); /* We try to get things resync'ed, but this can fail */ mdclrerror(&xep); if (meta_resync_all(sp, MD_DEF_RESYNC_BUF_SIZE, &xep) != 0) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } RB_TEST(7, "take", ep) /* * In order to resolve the namespace major driver names and * to have the subdrivers attempt to re-associate devts from * the newly resolved replicated device ids, return a '2'. * This instructs metaset to release the diskset and re-take. * * Return a 2 if * - no error was detected on the take * - a replicated unresolved devid was resolved during take * - take isn't being called during an import * - this isn't already a re-take situation */ if ((rval == 0) && (retake_flag == 1) && ((flags & (TAKE_RETAKE | TAKE_IMP)) == 0)) { rval = 2; } return (rval); out: cl_sk = cl_get_setkey(sp->setno, sp->setname); if (clnt_unlock_set(mynode(), cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } if (!(sd->sd_flags & MD_SR_MB_DEVID) && (rb_level > 2)) { for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* We already unlocked this side */ if (strcmp(mynode(), sd->sd_nodes[i]) == 0) continue; /* no point calling dead nodes */ if (!BT_TEST(node_active, i)) continue; if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } } } cl_set_setkey(NULL); md_rb_sig_handling_off(md_got_sig(), md_which_sig()); return (rval); rollback: /* Make sure we are blocking all signals */ if (procsigs(TRUE, &oldsigs, &xep) < 0) mdclrerror(&xep); rval = -1; /* level 4 */ if (rb_level > 3) { if (sd->sd_flags & MD_SR_MB_DEVID) { /* update the sr_flags on all hosts */ for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* no point calling dead nodes */ if (!BT_TEST(node_active, i)) continue; if (clnt_upd_sr_flags(sd->sd_nodes[i], sp, (sd->sd_flags & ~MD_SR_MB_DEVID), &xep)) mdclrerror(&xep); } } cl_sk = cl_get_setkey(sp->setno, sp->setname); for (i = 0; i < MD_MAXSIDES; i++) { /* Skip empty slots */ if (sd->sd_nodes[i][0] == '\0') continue; /* We will unlocked this side below */ if (strcmp(mynode(), sd->sd_nodes[i]) == 0) continue; /* no point calling dead nodes */ if (!BT_TEST(node_active, i)) continue; if (clnt_unlock_set(sd->sd_nodes[i], cl_sk, &xep)) mdclrerror(&xep); } } /* level 3 */ if (rb_level > 2) { if (halt_set(sp, &xep)) mdclrerror(&xep); } /* level 2 */ if (rb_level > 1) { if (clnt_stimeout(mynode(), sp, &defmhiargs, &xep) == -1) mdclrerror(&xep); } /* level 1 */ if (rb_level > 0) { if (!MD_ATSET_DESC(sd)) { if (rel_own_bydd(sp, dd, FALSE, &xep)) mdclrerror(&xep); } } /* level 0 */ cl_sk = cl_get_setkey(sp->setno, sp->setname); if (clnt_unlock_set(mynode(), cl_sk, &xep)) mdclrerror(&xep); cl_set_setkey(NULL); /* release signals back to what they were on entry */ if (procsigs(FALSE, &oldsigs, &xep) < 0) mdclrerror(&xep); md_rb_sig_handling_off(md_got_sig(), md_which_sig()); return (rval); } int meta_set_release( mdsetname_t *sp, md_error_t *ep ) { int rval = 0; md_drive_desc *dd; mhd_mhiargs_t mhiargs; sigset_t oldsigs; md_setkey_t *cl_sk; int rb_level = 0; md_error_t xep = mdnullerror; /* Make sure we own the set */ if (meta_check_ownership(sp, ep) != 0) return (-1); /* Get the drive descriptors */ if ((dd = metaget_drivedesc(sp, (MD_BASICNAME_OK | PRINT_FAST), ep)) == NULL) if (! mdisok(ep)) return (-1); /* Get timeout values in case we need to roll back this release */ (void) memset(&mhiargs, '\0', sizeof (mhiargs)); if (clnt_gtimeout(mynode(), sp, &mhiargs, ep) != 0) return (-1); /* END CHECK CODE */ md_rb_sig_handling_on(); /* Lock the set on our side */ if (clnt_lock_set(mynode(), sp, ep)) { rval = -1; goto out; } RB_TEST(1, "release", ep) RB_PREEMPT; rb_level = 1; /* level 1 */ RB_TEST(2, "release", ep) if (halt_set(sp, ep)) goto rollback; RB_TEST(3, "release", ep) RB_PREEMPT; rb_level = 2; /* level 2 */ RB_TEST(4, "release", ep) if (rel_own_bydd(sp, dd, FALSE, ep)) goto rollback; RB_TEST(5, "release", ep) RB_PREEMPT; rb_level = 3; /* level 3 */ RB_TEST(6, "release", ep) if (clnt_stimeout(mynode(), sp, &defmhiargs, ep) == -1) goto rollback; RB_TEST(7, "release", ep) out: cl_sk = cl_get_setkey(sp->setno, sp->setname); if (clnt_unlock_set(mynode(), cl_sk, &xep)) { if (rval == 0) (void) mdstealerror(ep, &xep); rval = -1; } cl_set_setkey(NULL); md_rb_sig_handling_off(md_got_sig(), md_which_sig()); return (rval); rollback: /* Make sure we are blocking all signals */ if (procsigs(TRUE, &oldsigs, &xep) < 0) mdclrerror(&xep); rval = -1; /* level 3 */ if (rb_level > 2) { if (clnt_stimeout(mynode(), sp, &mhiargs, &xep) == -1) mdclrerror(&xep); } /* level 2 */ if (rb_level > 1) { if (tk_own_bydd(sp, dd, &mhiargs, FALSE, &xep)) mdclrerror(&xep); } /* level 1 */ if (rb_level > 0) { if (setup_db_bydd(sp, dd, TRUE, &xep) == -1) mdclrerror(&xep); /* Snarf set of trad diskset doesn't use stale information */ if (snarf_set(sp, FALSE, &xep)) mdclrerror(&xep); } /* level 0 */ cl_sk = cl_get_setkey(sp->setno, sp->setname); if (clnt_unlock_set(mynode(), cl_sk, &xep)) mdclrerror(&xep); cl_set_setkey(NULL); /* release signals back to what they were on entry */ if (procsigs(FALSE, &oldsigs, &xep) < 0) mdclrerror(&xep); md_rb_sig_handling_off(md_got_sig(), md_which_sig()); return (rval); }