/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern int md_status; extern unit_t md_nunits; extern set_t md_nsets; extern md_set_t md_set[]; extern md_ops_t trans_md_ops; extern md_krwlock_t md_unit_array_rw; extern uint_t mt_debug; extern major_t md_major; static mt_unit_t * trans_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock) { mt_unit_t *un; mdi_unit_t *ui; set_t setno = MD_MIN2SET(mnum); if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) { (void) mdmderror(mde, MDE_INVAL_UNIT, mnum); return (NULL); } if (! (flags & STALE_OK)) { if (md_get_setstatus(setno) & MD_SET_STALE) { (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno); return (NULL); } } ui = MDI_UNIT(mnum); if (flags & NO_OLD) { if (ui != NULL) { (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum); return (NULL); } return ((mt_unit_t *)1); } if (ui == NULL) { (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum); return (NULL); } if (flags & ARRAY_WRITER) md_array_writer(lock); else if (flags & ARRAY_READER) md_array_reader(lock); if (!(flags & NO_LOCK)) { if (flags & WR_LOCK) (void) md_ioctl_writerlock(lock, ui); else /* RD_LOCK */ (void) md_ioctl_readerlock(lock, ui); } un = (mt_unit_t *)MD_UNIT(mnum); if (un->c.un_type != MD_METATRANS) { (void) mdmderror(mde, MDE_NOT_MT, mnum); return (NULL); } return (un); } #ifdef DEBUG /* * DEBUG ROUTINES * THESE ROUTINES ARE ONLY USED WHEN ASSERTS ARE ENABLED */ extern int (*mdv_strategy_tstpnt)(buf_t *, int, void*); /* * return the global stats struct */ static int trans_get_transstats(void *d, int mode) { md_i_get_t *migp = d; mdclrerror(&migp->mde); if (migp->size == 0) { migp->size = sizeof (struct transstats); return (0); } if (migp->size < sizeof (struct transstats)) return (EFAULT); if (ddi_copyout(&transstats, (caddr_t)(uintptr_t)migp->mdp, sizeof (struct transstats), mode)) return (EFAULT); return (0); } /* * test ioctls */ /* * TEST TRYGETBLK */ /*ARGSUSED1*/ static int trans_test_trygetblk(void *d, int mode, IOLOCK *lock) { mt_unit_t *un; int test; dev_t dev; struct buf *bp; struct buf *trygetblk(); md_i_get_t *migp = d; mdclrerror(&migp->mde); migp->size = 0; un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock); if (un == NULL) return (EINVAL); dev = un->un_m_dev; /* * test 1 -- don't find nonexistant buf */ test = 1; if (bp = trygetblk(dev, 0)) goto errout; /* * test 2 - don't find stale buf */ test = 2; if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL) goto errout; bp->b_flags |= (B_STALE|B_DONE); brelse(bp); if (bp = trygetblk(dev, 0)) goto errout; /* * test 3 -- don't find busy buf */ test = 3; if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL) goto errout; if (trygetblk(dev, 0)) goto errout; bp->b_flags |= B_STALE; brelse(bp); /* * test 4 -- don't find not-done buf */ test = 4; if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL) goto errout; brelse(bp); if (bp = trygetblk(dev, 0)) goto errout; /* * test 5 -- find an idle buf */ test = 5; if ((bp = bread(dev, 0, DEV_BSIZE)) == NULL) goto errout; brelse(bp); if ((bp = trygetblk(dev, 0)) == NULL) goto errout; bp->b_flags |= B_STALE; brelse(bp); bp = 0; test = 0; /* no test failed */ errout: if (bp) { bp->b_flags |= B_STALE; brelse(bp); } migp->size = test; if (test) return (EINVAL); return (0); } /* * TEST TRYGETPAGE */ static page_t * trans_trypage(struct vnode *vp, uint_t off) { page_t *pp; /* * get a locked page */ if ((pp = page_lookup_nowait(vp, off, SE_EXCL)) == NULL) return (NULL); /* * get the iolock */ if (!page_io_trylock(pp)) { page_unlock(pp); return (NULL); } return (pp); } /*ARGSUSED1*/ static int trans_test_trypage(void *d, int mode, IOLOCK *lock) { mt_unit_t *un; int test; dev_t dev; struct page *pp; struct vnode *devvp; struct vnode *cvp; extern struct vnode *common_specvp(struct vnode *); extern void pvn_io_done(struct page *); md_i_get_t *migp = d; mdclrerror(&migp->mde); migp->size = 0; un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock); if (un == NULL) return (EINVAL); dev = un->un_m_dev; devvp = makespecvp(dev, VBLK); cvp = common_specvp(devvp); /* * get rid of the devices pages */ (void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL); /* * test 1 -- don't find nonexistant page */ test = 1; if (pp = trans_trypage(cvp, 0)) goto errout; /* * test 2 -- don't find busy page */ test = 2; if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL) goto errout; if (trans_trypage(cvp, 0)) goto errout; pvn_io_done(pp); pp = 0; /* * test 3 - find an idle page */ test = 3; if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL) goto errout; pvn_io_done(pp); if ((pp = trans_trypage(cvp, 0)) == NULL) goto errout; pvn_io_done(pp); pp = 0; test = 0; /* no test failed */ errout: if (pp) pvn_io_done(pp); /* * get rid of the file's pages */ (void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL); VN_RELE(devvp); migp->size = test; if (test) return (EINVAL); return (0); } /* * TEST TSD */ #define NKEYS (7) #define NTSDTHREADS (3) struct tothread { int test; int error; int exits; int step; kmutex_t lock; kcondvar_t cv; }; static uint_t keys[NKEYS]; static struct tothread tta[NTSDTHREADS]; static int allocatorvalue; static int okdestructoralloc; static void trans_test_stepwait(struct tothread *tp, int step) { /* * wait for other thread */ mutex_enter(&tp->lock); while (tp->step < step) cv_wait(&tp->cv, &tp->lock); mutex_exit(&tp->lock); } static void trans_test_step(struct tothread *tp, int step) { /* * wakeup other threads */ mutex_enter(&tp->lock); tp->step = step; cv_broadcast(&tp->cv); mutex_exit(&tp->lock); } static void trans_test_destructor(void *voidp) { int exits; struct tothread *tp = voidp; /* * check that threads clean up *all* TSD at exit */ mutex_enter(&tp->lock); exits = ++tp->exits; mutex_exit(&tp->lock); if (exits >= NKEYS) trans_test_step(tp, 3); } static void trans_test_destructor_alloc(void *voidp) { int *value = voidp; okdestructoralloc = 0; if (value) { if (*value == allocatorvalue) okdestructoralloc = 1; md_trans_free((caddr_t)value, sizeof (value)); } } static void * trans_test_allocator(void) { int *value; value = (int *)md_trans_zalloc(sizeof (value)); *value = allocatorvalue; return ((void *)value); } /* * thread used to test TSD destroy functionality */ static void trans_test_thread(struct tothread *tp) { int i; callb_cpr_t cprinfo; /* * Register cpr callback */ CALLB_CPR_INIT(&cprinfo, &tp->lock, callb_generic_cpr, "trans_test_thread"); /* * get some TSD */ for (i = NKEYS - 1; i >= 0; --i) if (tsd_set(keys[i], tp)) { tp->error = 500; goto errout; } /* * tell parent that we have TSD */ trans_test_step(tp, 1); /* * wait for parent to destroy some of our TSD */ trans_test_stepwait(tp, 2); /* * make sure that the appropriate TSD was destroyed */ if ((tsd_get(keys[0]) != NULL) || (tsd_get(keys[NKEYS-1]) != NULL) || (tsd_get(keys[NKEYS>>1]) != NULL)) { tp->error = 510; goto errout; } for (i = 0; i < NKEYS; ++i) if (tsd_get(keys[i]) != tp) if (i != 0 && i != NKEYS - 1 && i != NKEYS >> 1) { tp->error = 520; goto errout; } /* * set up cpr exit */ mutex_enter(&tp->lock); CALLB_CPR_EXIT(&cprinfo); thread_exit(); errout: /* * error -- make sure the parent will wake up (error code in tp) */ trans_test_step(tp, 3); /* * set up cpr exit */ mutex_enter(&tp->lock); CALLB_CPR_EXIT(&cprinfo); thread_exit(); } static void trans_test_threadcreate(struct tothread *tp) { /* * initialize the per thread struct and make a thread */ bzero((caddr_t)tp, sizeof (struct tothread)); mutex_init(&tp->lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&tp->cv, NULL, CV_DEFAULT, NULL); (void) thread_create(NULL, 0, trans_test_thread, tp, 0, &p0, TS_RUN, minclsyspri); } /* * driver for TSD tests -- *NOT REENTRANT* */ /*ARGSUSED1*/ static int trans_test_tsd(void *d, int mode) { int test; uint_t rekeys[NKEYS]; int i; uint_t key; int error; md_i_get_t *migp = d; mdclrerror(&migp->mde); migp->size = 0; /* * destroy old keys, if any */ for (i = 0; i < NKEYS; ++i) tsd_destroy(&keys[i]); /* * test 1 -- simple create and destroy keys tests */ test = 1; error = 0; for (i = 0; i < NKEYS; ++i) { tsd_create(&keys[i], NULL); /* get with no set should return NULL */ if (tsd_get(keys[i]) != NULL) { error = 100; goto errout; } /* destroyed key should be 0 */ key = keys[i]; tsd_destroy(&keys[i]); if (keys[i]) { error = 110; goto errout; } /* destroy the key twice */ keys[i] = key; tsd_destroy(&keys[i]); /* destroyed key should be 0 */ if (keys[i]) { error = 120; goto errout; } /* getting a destroyed key should return NULL */ if (tsd_get(keys[i]) != NULL) { error = 130; goto errout; } /* recreate the key */ tsd_create(&keys[i], NULL); /* should be the same key as before */ if (key != keys[i]) { error = 140; goto errout; } /* initial value should be NULL */ if (tsd_get(keys[i]) != NULL) { error = 150; goto errout; } /* cleanup */ tsd_destroy(&keys[i]); } /* * test 2 -- recreate keys */ test = 2; error = 0; for (i = 0; i < NKEYS; ++i) tsd_create(&keys[i], NULL); for (i = 0; i < NKEYS; ++i) { /* make sure the keys were created */ if (keys[i] == 0) { error = 200; goto errout; } /* make sure that recreating key doesn't change it */ rekeys[i] = keys[i]; tsd_create(&rekeys[i], NULL); if (rekeys[i] != keys[i]) { error = 210; goto errout; } } for (i = 0; i < NKEYS; ++i) tsd_destroy(&keys[i]); /* * test 3 -- check processing for unset and destroyed keys */ test = 3; error = 0; /* getting a 0 key returns NULL */ if (tsd_get(0) != NULL) { error = 300; goto errout; } /* setting a 0 key returns error */ if (tsd_set(0, NULL) != EINVAL) { error = 310; goto errout; } tsd_create(&key, NULL); /* setting a created key returns no error */ if (tsd_set(key, NULL) == EINVAL) { error = 320; goto errout; } tsd_destroy(&key); /* setting a destroyed key returns error */ if (tsd_set(key, NULL) != EINVAL) { error = 330; goto errout; } /* * test 4 -- make sure that set and get work */ test = 4; error = 0; for (i = 0; i < NKEYS; ++i) { tsd_create(&keys[i], NULL); /* set a value */ (void) tsd_set(keys[i], &key); /* get the value */ if (tsd_get(keys[i]) != &key) { error = 400; goto errout; } /* set the value to NULL */ (void) tsd_set(keys[i], NULL); /* get the NULL */ if (tsd_get(keys[i]) != NULL) { error = 410; goto errout; } } /* cleanup */ for (i = 0; i < NKEYS; ++i) tsd_destroy(&keys[i]); /* * test 5 -- destroying keys w/multiple threads */ test = 5; error = 0; /* create the keys */ for (i = 0; i < NKEYS; ++i) tsd_create(&keys[i], trans_test_destructor); /* create some threads */ for (i = 0; i < NTSDTHREADS; ++i) trans_test_threadcreate(&tta[i]); /* wait for the threads to assign TSD */ for (i = 0; i < NTSDTHREADS; ++i) trans_test_stepwait(&tta[i], 1); /* destroy some of the keys */ tsd_destroy(&keys[0]); tsd_destroy(&keys[NKEYS - 1]); tsd_destroy(&keys[NKEYS >> 1]); tsd_destroy(&keys[NKEYS >> 1]); /* wakeup the threads -- they check that the destroy took */ for (i = 0; i < NTSDTHREADS; ++i) trans_test_step(&tta[i], 2); /* wait for the threads to exit (also checks for TSD cleanup) */ for (i = 0; i < NTSDTHREADS; ++i) trans_test_stepwait(&tta[i], 3); /* destroy the rest of the keys */ for (i = 0; i < NKEYS; ++i) tsd_destroy(&keys[i]); /* check for error */ for (i = 0; i < NTSDTHREADS; ++i) { if (!error) error = tta[i].error; mutex_destroy(&tta[i].lock); cv_destroy(&tta[i].cv); } /* * test 6 -- test getcreate */ test = 6; error = 0; /* make sure the keys are destroyed */ for (i = 0; i < NKEYS; ++i) tsd_destroy(&keys[i]); /* get w/create */ for (i = 0; i < NKEYS; ++i) { allocatorvalue = i; if (*(int *)tsd_getcreate(&keys[i], trans_test_destructor_alloc, trans_test_allocator) != allocatorvalue) { error = 600; goto errout; } } for (i = 0; i < NKEYS; ++i) { allocatorvalue = i; if (*(int *)tsd_get(keys[i]) != allocatorvalue) { error = 610; goto errout; } } /* make sure destructor gets called when we destroy the keys */ for (i = 0; i < NKEYS; ++i) { allocatorvalue = i; okdestructoralloc = 0; tsd_destroy(&keys[i]); if (okdestructoralloc == 0) { error = 620; goto errout; } } errout: /* make sure the keys are destroyed */ for (i = 0; i < NKEYS; ++i) tsd_destroy(&keys[i]); /* return test # and error code (if any) */ migp->size = test; return (error); } /* * Error Injection Structures, Data, and Functions: * * Error injection is used to test the Harpy error recovery system. The * MD_IOC_INJECTERRORS ioctl is used to start or continue error injection on a * unit, and MD_IOC_STOPERRORS turns it off. An mt_error structure is * associated with every trans device for which we are injecting errors. When * MD_IOC_INJECTERRORS is issued, mdv_strategy_tstpnt is set to point to * trans_error_injector(), so that it gets called for every MDD I/O operation. * * The trans unit can be in one of three states: * * count down - Each I/O causes er_count_down to be decremented. * When er_count_down reaches 0, an error is injected, * the block number is remembered. Without makeing * special provisions, the log area would receive a * small percentage of the injected errors. Thus, * trans_check_error() will be written, so that every * other error is injected on the log. * * suspend - No errors are generated and the counters are not * modified. This is so that fsck/mkfs can do their thing * (we're not testing them) and so that the test script can * set up another test. The transition back to the count * down state occurs when MD_IOC_INJECTERRORS is invoked * again. */ typedef enum { mte_count_down, mte_suspend, mte_watch_block } mte_state; typedef struct mt_error { struct mt_error *er_next; /* next error unit in list. */ mte_state er_state; mt_unit_t *er_unitp; /* unit to force errors on. */ size_t er_count_down; /* i/o transactions until error. */ size_t er_increment; /* increment for reset_count. */ size_t er_reset_count; /* used to reset er_count_down */ size_t er_total_errors; /* count generated errors. */ /* Following fields describe error we are injecting. */ dev_t er_bad_unit; /* Unit associated with block in */ /* error. */ off_t er_bad_block; /* Block in error. */ } mt_error_t; #define ERROR_INCREMENT (1) #define INITIAL_COUNT (1) static int default_increment = ERROR_INCREMENT; static kmutex_t error_mutex; /* protects error_list */ static mt_error_t error_list_head; static int initial_count = INITIAL_COUNT; static int (*tstpnt_save)(buf_t *, int, void*) = NULL; static mt_error_t * find_by_mtunit(mt_unit_t *un, mt_error_t **pred_errp) { mt_error_t *errp = (mt_error_t *)NULL; ASSERT(mutex_owned(&error_mutex) != 0); *pred_errp = &error_list_head; while ((errp = (*pred_errp)->er_next) != (mt_error_t *)NULL) { if (errp->er_unitp == un) break; *pred_errp = errp; } return (errp); } static mt_error_t * find_by_dev(md_dev64_t dev) { mt_error_t *errp = &error_list_head; ASSERT(mutex_owned(&error_mutex) != 0); while ((errp = errp->er_next) != (mt_error_t *)NULL) { if ((errp->er_unitp->un_m_dev == dev) || (errp->er_unitp->un_l_dev == dev)) break; } return (errp); } static int trans_check_error(buf_t *bp, mt_error_t *errp) { int rv = 0; md_dev64_t target = md_expldev(bp->b_edev); ASSERT(mutex_owned(&error_mutex) != 0); switch (errp->er_state) { case mte_count_down: errp->er_count_down--; if (errp->er_count_down == 0) { /* * Every other error that we inject should be on * the log device. Errors will be injected on the * log device when errp->er_total_errors is even * and on the master device when it is odd. If * this I/O is not for the appropriate device, we * will set errp->er_count_down to 1, so that we * can try again later. */ if ((((errp->er_total_errors % 2) == 0) && (errp->er_unitp->un_l_dev == target)) || (((errp->er_total_errors % 2) != 0) && (errp->er_unitp->un_m_dev == target))) { /* simulate an error */ bp->b_flags |= B_ERROR; bp->b_error = EIO; /* remember the error. */ errp->er_total_errors++; errp->er_bad_unit = bp->b_edev; errp->er_bad_block = bp->b_blkno; /* reset counters. */ errp->er_count_down = errp->er_reset_count; errp->er_reset_count += errp->er_increment; rv = 1; } else { /* Try again next time. */ errp->er_count_down = 1; } } break; case mte_suspend: /* No errors while suspended. */ break; case mte_watch_block: if ((bp->b_edev == errp->er_bad_unit) && (bp->b_blkno == errp->er_bad_block)) { bp->b_flags |= B_ERROR; bp->b_error = EIO; rv = 1; } break; } return (rv); } static int trans_error_injector(buf_t *bp, int flag, void* private) { mt_error_t *errp = (mt_error_t *)NULL; int (*tstpnt)(buf_t *, int, void*) = NULL; int rv = 0; md_dev64_t target = md_expldev(bp->b_edev); int trv = 0; mt_unit_t *un; mutex_enter(&error_mutex); errp = find_by_dev(target); if (errp != (mt_error_t *)NULL) { un = errp->er_unitp; if (target == un->un_m_dev) { /* Target is our master device. */ rv = trans_check_error(bp, errp); } if (target == un->un_l_dev) { /* * Target is our log device. Unfortunately, the same * device may also be used for the MDD database. * Therefore, we need to make sure that the I/O is for * the range of blocks designated as our log. */ if ((bp->b_blkno >= un->un_l_pwsblk) && ((bp->b_blkno + btodb(bp->b_bcount)) <= (un->un_l_sblk + un->un_l_tblks))) { rv = trans_check_error(bp, errp); } } } tstpnt = tstpnt_save; mutex_exit(&error_mutex); if (tstpnt != NULL) trv = (*tstpnt)(bp, flag, private); /* * If we are producing an error (rv != 0) we need to make sure that * biodone gets called. If the tstpnt returned non-zero, * we'll assume that it called biodone. */ if ((rv != 0) && (trv == 0)) { md_biodone(bp); } rv = ((rv == 0) && (trv == 0)) ? 0 : 1; return (rv); } /* * Prepare to inject errors on the master and log devices associated with the * unit specified in migp. The first time that trans_inject_errors() is called * for a unit, an mt_error_t structure is allocated and initialized for the * unit. Subsequent calls for the unit will just insure that the unit is in the * count down state. * * If an mt_error structure is allocated and it is the first one to be put in * the list, mdv_strategy_tstpnt (which is referenced in md_call_strategy()) is * set to trans_error_injector so that it will be called to see if an I/O * request should be treated as an error. */ /*ARGSUSED1*/ static int trans_inject_errors(void *d, int mode, IOLOCK *lock) { mt_error_t *errp; mt_error_t *do_not_care; mt_unit_t *un; int rv = 0; md_i_get_t *migp = d; mdclrerror(&migp->mde); un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock); if (un == NULL) return (EINVAL); /* * If there is already a an error structure for the unit make sure that * it is in count down mode. */ mutex_enter(&error_mutex); errp = find_by_mtunit(un, &do_not_care); if (errp != (mt_error_t *)NULL) { errp->er_state = mte_count_down; } else { /* * Initialize error structure. */ errp = (mt_error_t *)md_trans_zalloc(sizeof (mt_error_t)); errp->er_state = mte_count_down; errp->er_unitp = un; errp->er_count_down = initial_count; errp->er_increment = default_increment; errp->er_reset_count = initial_count; errp->er_total_errors = 0; errp->er_bad_unit = 0; errp->er_bad_block = 0; /* Insert it into the list. */ errp->er_next = error_list_head.er_next; error_list_head.er_next = errp; /* * Set up md_call_strategy to call our error injector. */ if (mdv_strategy_tstpnt != trans_error_injector) { tstpnt_save = mdv_strategy_tstpnt; mdv_strategy_tstpnt = trans_error_injector; } } mutex_exit(&error_mutex); return (rv); } /*ARGSUSED1*/ static int trans_stop_errors(void *d, int mode, IOLOCK *lock) { mt_error_t *errp = (mt_error_t *)NULL; mt_error_t *pred_errp; mt_unit_t *un; int rv = 0; md_i_get_t *migp = d; mdclrerror(&migp->mde); un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock); if (un == NULL) return (EINVAL); mutex_enter(&error_mutex); errp = find_by_mtunit(un, &pred_errp); if (errp != (mt_error_t *)NULL) { /* Remove from list. */ pred_errp->er_next = errp->er_next; if ((error_list_head.er_next == (mt_error_t *)NULL) && (mdv_strategy_tstpnt == trans_error_injector)) { mdv_strategy_tstpnt = tstpnt_save; } } else { /* unit not set up for errors. */ rv = ENXIO; } mutex_exit(&error_mutex); /* Free memory. */ if (errp != (mt_error_t *)NULL) { md_trans_free((void *)errp, sizeof (*errp)); } return (rv); } int _init_ioctl() { mutex_init(&error_mutex, NULL, MUTEX_DRIVER, (void *)NULL); return (1); } int _fini_ioctl() { mutex_destroy(&error_mutex); return (1); } /* * END OF DEBUG ROUTINES */ #endif /* DEBUG */ /* * BEGIN RELEASE DEBUG * The following routines remain in the released product for testability */ /* * ufs error injection remains in the released product */ /*ARGSUSED1*/ static int trans_ufserror(void *d, int mode, IOLOCK *lock) { mt_unit_t *un; md_i_get_t *migp = d; mdclrerror(&migp->mde); un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock); if (un == NULL || un->un_ut == NULL) return (EINVAL); return (0); } /* * shadow test remains in the released product */ static int trans_set_shadow(void *d, int mode, IOLOCK *lock) { dev32_t device; /* shadow device */ mt_unit_t *un; md_i_get_t *migp = d; mdclrerror(&migp->mde); un = trans_getun(migp->id, &migp->mde, WR_LOCK, lock); if (un == NULL) return (EINVAL); if ((un->un_debug & MT_SHADOW) == 0) return (EINVAL); /* Get shadow device. User always passes down 32 bit devt */ if (ddi_copyin((caddr_t)(uintptr_t)migp->mdp, &device, sizeof (device), mode)) { return (EFAULT); } /* Save shadow device designator. */ un->un_s_dev = md_expldev((md_dev64_t)device); return (0); } /* * END RELEASE DEBUG */ static int trans_get(void *d, int mode, IOLOCK *lock) { mt_unit_t *un; ml_unit_t *ul; md_i_get_t *migp = d; mdclrerror(&migp->mde); un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock); if (un == NULL) return (0); if (migp->size == 0) { migp->size = un->c.un_size; return (0); } if (migp->size < un->c.un_size) return (EFAULT); log: ul = un->un_l_unit; if (ul == NULL) goto master; /* * refresh log fields in case log was metattach'ed */ un->un_l_head = (daddr32_t)btodb(ul->un_head_lof); un->un_l_sblk = un->un_l_head; un->un_l_pwsblk = ul->un_pwsblk; un->un_l_maxtransfer = (uint_t)btodb(ul->un_maxtransfer); un->un_l_nblks = ul->un_nblks; un->un_l_tblks = ul->un_tblks; un->un_l_tail = (daddr32_t)btodb(ul->un_tail_lof); un->un_l_resv = ul->un_resv; un->un_l_maxresv = ul->un_maxresv; un->un_l_error = ul->un_error; un->un_l_timestamp = ul->un_timestamp; /* * check for log dev dynconcat; can only pick up extra space when the * tail physically follows the head in the circular log */ if (un->un_l_head <= un->un_l_tail) if (ul->un_status & LDL_METADEVICE) { struct mdc_unit *c = MD_UNIT(md_getminor(ul->un_dev)); if (c->un_total_blocks > un->un_l_tblks) { un->un_l_tblks = c->un_total_blocks; un->un_l_nblks = un->un_l_tblks - un->un_l_sblk; if (un->un_l_nblks > btodb(LDL_MAXLOGSIZE)) un->un_l_nblks = btodb(LDL_MAXLOGSIZE); un->un_l_maxresv = (uint_t)(un->un_l_nblks * LDL_USABLE_BSIZE); } } master: if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, un->c.un_size, mode)) return (EFAULT); return (0); } static int trans_replace(replace_params_t *params) { minor_t mnum = params->mnum; mt_unit_t *un; mdi_unit_t *ui; md_dev64_t cmp_dev; md_dev64_t ldev; md_dev64_t mdev; mdclrerror(¶ms->mde); ui = MDI_UNIT(mnum); un = md_unit_writerlock(ui); if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) { return (mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum)); } cmp_dev = params->old_dev; mdev = un->un_m_dev; ldev = un->un_l_dev; if (cmp_dev == mdev) { un->un_m_key = params->new_key; un->un_m_dev = params->new_dev; } else if (cmp_dev == ldev) { un->un_l_key = params->new_key; un->un_l_dev = params->new_dev; } trans_commit(un, 1); md_unit_writerexit(ui); return (0); } /*ARGSUSED1*/ static int trans_grow(void *d, int mode, IOLOCK *lock) { mt_unit_t *un; md_grow_params_t *mgp = d; mdclrerror(&mgp->mde); un = trans_getun(mgp->mnum, &mgp->mde, RD_LOCK, lock); if (un == NULL) return (0); /* * check for master dev dynconcat */ if (md_getmajor(un->un_m_dev) == md_major) { struct mdc_unit *c; c = MD_UNIT(md_getminor(un->un_m_dev)); if (c->un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) { un->c.un_total_blocks = MD_MAX_BLKS_FOR_SMALL_DEVS; } else { un->c.un_total_blocks = c->un_total_blocks; } md_nblocks_set(MD_SID(un), un->c.un_total_blocks); } return (0); } /*ARGSUSED1*/ static int trans_detach_ioctl(void *d, int mode, IOLOCK *lock) { mt_unit_t *un; int error; md_i_get_t *migp = d; mdclrerror(&migp->mde); /* acquire both md_unit_array_rw, and unit_reader lock */ un = trans_getun(migp->id, &migp->mde, READERS, lock); if (un == NULL) return (0); /* * simply too much work to make debug modes w/out a log */ if (un->un_debug) return (EACCES); /* * detach the log */ error = trans_detach(un, migp->size); return (error); } static int trans_get_log(void *d, int mode, IOLOCK *lock) { mt_unit_t *un; ml_unit_t *ul; md_i_get_t *migp = d; mdclrerror(&migp->mde); un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock); if (un == NULL) return (0); ul = un->un_l_unit; if (migp->size == 0) { migp->size = ML_UNIT_ONDSZ; return (0); } if (migp->size < ML_UNIT_ONDSZ) return (EFAULT); if (ddi_copyout(ul, (void *)(uintptr_t)migp->mdp, ML_UNIT_ONDSZ, mode)) return (EFAULT); return (0); } static int trans_getdevs(void *d, int mode, IOLOCK *lock) { int ndev; mt_unit_t *un; md_dev64_t *udevs; md_dev64_t unit_dev; md_getdevs_params_t *mgdp = d; mdclrerror(&mgdp->mde); un = trans_getun(mgdp->mnum, &mgdp->mde, RD_LOCK, lock); if (un == NULL) return (0); ndev = (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) ? 1 : 2; if (mgdp->cnt == 0) { mgdp->cnt = ndev; return (0); } if (mgdp->cnt > 2) mgdp->cnt = ndev; udevs = (md_dev64_t *)(uintptr_t)mgdp->devs; unit_dev = un->un_m_dev; if (md_getmajor(unit_dev) != md_major) { if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64) return (ENODEV); } if (mgdp->cnt >= 1) if (ddi_copyout(&unit_dev, (caddr_t)&udevs[0], sizeof (*udevs), mode) != 0) return (EFAULT); unit_dev = un->un_l_dev; if (md_getmajor(unit_dev) != md_major) { if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64) return (ENODEV); } if (mgdp->cnt >= 2) if (ddi_copyout(&unit_dev, (caddr_t)&udevs[1], sizeof (*udevs), mode) != 0) return (EFAULT); return (0); } static int trans_reset_ioctl(md_i_reset_t *mirp, IOLOCK *lock) { minor_t mnum = mirp->mnum; mt_unit_t *un; int error; mdclrerror(&mirp->mde); un = trans_getun(mnum, &mirp->mde, NO_LOCK, lock); if (un == NULL) return (0); /* This prevents new opens */ rw_enter(&md_unit_array_rw.lock, RW_WRITER); if (MD_HAS_PARENT(MD_PARENT(un))) { rw_exit(&md_unit_array_rw.lock); return (mdmderror(&mirp->mde, MDE_IN_USE, mnum)); } if (md_unit_isopen(MDI_UNIT(mnum))) { rw_exit(&md_unit_array_rw.lock); return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum)); } /* * detach the log */ error = trans_detach(un, mirp->force); /* * reset (aka remove; aka delete) the trans device */ if (error == 0) error = trans_reset(un, mnum, 1, mirp->force); rw_exit(&md_unit_array_rw.lock); return (error); } static int trans_get_geom(mt_unit_t *un, struct dk_geom *geomp) { md_get_geom((md_unit_t *)un, geomp); return (0); } static int trans_get_vtoc(mt_unit_t *un, struct vtoc *vtocp) { md_get_vtoc((md_unit_t *)un, vtocp); return (0); } static int trans_get_extvtoc(mt_unit_t *un, struct extvtoc *vtocp) { md_get_extvtoc((md_unit_t *)un, vtocp); return (0); } static int trans_islog(mt_unit_t *un) { if (un->un_l_unit == NULL) return (ENXIO); return (0); } static int trans_set_vtoc( mt_unit_t *un, struct vtoc *vtocp ) { return (md_set_vtoc((md_unit_t *)un, vtocp)); } static int trans_set_extvtoc(mt_unit_t *un, struct extvtoc *vtocp) { return (md_set_extvtoc((md_unit_t *)un, vtocp)); } static int trans_get_cgapart( mt_unit_t *un, struct dk_map *dkmapp ) { md_get_cgapart((md_unit_t *)un, dkmapp); return (0); } static int trans_admin_ioctl( int cmd, caddr_t data, int mode, IOLOCK *lockp ) { size_t sz = 0; void *d = NULL; int err = 0; /* We can only handle 32-bit clients for internal commands */ if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) { return (EINVAL); } switch (cmd) { case MD_IOCGET: { if (! (mode & FREAD)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_get(d, mode, lockp); break; } case MD_IOCGET_LOG: { if (! (mode & FREAD)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_get_log(d, mode, lockp); break; } case MD_IOCRESET: { md_i_reset_t *p; if (! (mode & FWRITE)) return (EACCES); if ((d = p = md_trans_zalloc((sz = sizeof (*p)))) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_reset_ioctl(p, lockp); break; } case MD_IOCGROW: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_grow_params_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_grow(d, mode, lockp); break; } case MD_IOC_TRANS_DETACH: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_detach_ioctl(d, mode, lockp); break; } case MD_IOCREPLACE: { replace_params_t *p; if (! (mode & FWRITE)) return (EACCES); if ((d = p = kmem_alloc((sz = sizeof (*p)), KM_SLEEP)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_replace(p); break; } case MD_IOCGET_DEVS: { if (! (mode & FREAD)) return (EACCES); sz = sizeof (md_getdevs_params_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_getdevs(d, mode, lockp); break; } /* * debug ioctls */ #ifdef DEBUG case MD_IOCGET_TRANSSTATS: { if (! (mode & FREAD)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_get_transstats(d, mode); break; } case MD_IOC_DEBUG: { md_i_get_t *mdigp; if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } mdigp = d; mdclrerror(&mdigp->mde); mt_debug = mdigp->size; break; } case MD_IOC_TSD: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_test_tsd(d, mode); break; } case MD_IOC_TRYGETBLK: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_test_trygetblk(d, mode, lockp); break; } case MD_IOC_TRYPAGE: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_test_trypage(d, mode, lockp); break; } case MD_IOC_INJECTERRORS: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_inject_errors(d, mode, lockp); break; } case MD_IOC_STOPERRORS: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_stop_errors(d, mode, lockp); break; } case MD_IOC_ISDEBUG: break; #else /* ! DEBUG */ case MD_IOC_ISDEBUG: case MD_IOCGET_TRANSSTATS: case MD_IOC_STOPERRORS: case MD_IOC_TSD: case MD_IOC_TRYGETBLK: case MD_IOC_TRYPAGE: break; /* * error injection behaves like MD_IOC_UFSERROR in released product */ case MD_IOC_INJECTERRORS: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_ufserror(d, mode, lockp); break; } /* * only the shadow test is allowed in the released product */ case MD_IOC_DEBUG: { md_i_get_t *mdigp; if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } mdigp = d; mdclrerror(&mdigp->mde); mt_debug = mdigp->size & MT_SHADOW; break; } #endif /* ! DEBUG */ /* * BEGIN RELEASE DEBUG * The following routines remain in the released product for testability */ case MD_IOC_UFSERROR: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_ufserror(d, mode, lockp); break; } case MD_IOC_SETSHADOW: { if (! (mode & FWRITE)) return (EACCES); sz = sizeof (md_i_get_t); if ((d = md_trans_zalloc(sz)) == NULL) return (ENOMEM); if (ddi_copyin(data, d, sz, mode)) { err = EFAULT; break; } err = trans_set_shadow(d, mode, lockp); break; } /* * END RELEASE DEBUG */ default: return (ENOTTY); } /* * copyout and free any args */ if (sz != 0) { if (err == 0) { if (ddi_copyout(d, data, sz, mode) != 0) { err = EFAULT; } } md_trans_free(d, sz); } return (err); } int md_trans_ioctl( dev_t dev, int cmd, caddr_t data, int mode, IOLOCK *lockp ) { minor_t mnum = getminor(dev); mt_unit_t *un; md_error_t mde = mdnullerror; int err = 0; /* handle admin ioctls */ if (mnum == MD_ADM_MINOR) return (trans_admin_ioctl(cmd, data, mode, lockp)); /* check unit */ if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits) || ((un = trans_getun(mnum, &mde, RD_LOCK, lockp)) == NULL)) return (ENXIO); /* dispatch ioctl */ switch (cmd) { case DKIOCINFO: { struct dk_cinfo *p; if (! (mode & FREAD)) return (EACCES); if ((p = md_trans_zalloc(sizeof (*p))) == NULL) return (ENOMEM); get_info(p, mnum); if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) err = EFAULT; md_trans_free(p, sizeof (*p)); return (err); } case DKIOCGGEOM: { struct dk_geom *p; if (! (mode & FREAD)) return (EACCES); if ((p = md_trans_zalloc(sizeof (*p))) == NULL) return (ENOMEM); if ((err = trans_get_geom(un, p)) == 0) { if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0) err = EFAULT; } md_trans_free(p, sizeof (*p)); return (err); } case DKIOCGVTOC: { struct vtoc *vtoc; if (! (mode & FREAD)) return (EACCES); vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); if ((err = trans_get_vtoc(un, vtoc)) != 0) { kmem_free(vtoc, sizeof (*vtoc)); return (err); } if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode)) err = EFAULT; } #ifdef _SYSCALL32 else { struct vtoc32 *vtoc32; vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); vtoctovtoc32((*vtoc), (*vtoc32)); if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode)) err = EFAULT; kmem_free(vtoc32, sizeof (*vtoc32)); } #endif /* _SYSCALL32 */ kmem_free(vtoc, sizeof (*vtoc)); return (err); } case DKIOCSVTOC: { struct vtoc *vtoc; if (! (mode & FWRITE)) return (EACCES); vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP); if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) { err = EFAULT; } } #ifdef _SYSCALL32 else { struct vtoc32 *vtoc32; vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP); if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) { err = EFAULT; } else { vtoc32tovtoc((*vtoc32), (*vtoc)); } kmem_free(vtoc32, sizeof (*vtoc32)); } #endif /* _SYSCALL32 */ if (err == 0) err = trans_set_vtoc(un, vtoc); kmem_free(vtoc, sizeof (*vtoc)); return (err); } case DKIOCGEXTVTOC: { struct extvtoc *extvtoc; if (! (mode & FREAD)) return (EACCES); extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); if ((err = trans_get_extvtoc(un, extvtoc)) != 0) { return (err); } if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode)) err = EFAULT; kmem_free(extvtoc, sizeof (*extvtoc)); return (err); } case DKIOCSEXTVTOC: { struct extvtoc *extvtoc; if (! (mode & FWRITE)) return (EACCES); extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP); if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) { err = EFAULT; } if (err == 0) err = trans_set_extvtoc(un, extvtoc); kmem_free(extvtoc, sizeof (*extvtoc)); return (err); } case DKIOCGAPART: { struct dk_map dmp; if ((err = trans_get_cgapart(un, &dmp)) != 0) { return (err); } if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) { if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp), mode) != 0) err = EFAULT; } #ifdef _SYSCALL32 else { struct dk_map32 dmp32; dmp32.dkl_cylno = dmp.dkl_cylno; dmp32.dkl_nblk = dmp.dkl_nblk; if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32), mode) != 0) err = EFAULT; } #endif /* _SYSCALL32 */ return (err); } /* * _FIOISLOG, _FIOISLOGOK, _FIOLOGRESET are used by fsck/mkfs * after opening the device. fsck/mkfs use these ioctls for * error recovery. */ case _FIOISLOG: return (trans_islog(un)); default: return (ENOTTY); } } /* * rename named service entry points and support functions */ /* rename/exchange role swap functions */ /* * MDRNM_UPDATE_SELF * This role swap function is identical for all unit types, * so keep it here. It's also the best example because it * touches all the modified portions of the relevant * in-common structures. */ void trans_rename_update_self( md_rendelta_t *delta, md_rentxn_t *rtxnp) { minor_t from_min, to_min; sv_dev_t sv; mt_unit_t *un; ASSERT(rtxnp); ASSERT(rtxnp->op == MDRNOP_RENAME); ASSERT(delta); ASSERT(delta->unp); ASSERT(delta->uip); ASSERT(rtxnp->rec_idx >= 0); ASSERT(rtxnp->recids); ASSERT(delta->old_role == MDRR_SELF); ASSERT(delta->new_role == MDRR_SELF); from_min = rtxnp->from.mnum; to_min = rtxnp->to.mnum; un = (mt_unit_t *)delta->unp; /* * self id changes in our own unit struct * both mechanisms for identifying the trans must be reset. */ MD_SID(delta->unp) = to_min; un->un_dev = makedevice(md_major, to_min); /* * clear old array pointers to unit in-core and unit */ MDI_VOIDUNIT(from_min) = NULL; MD_VOIDUNIT(from_min) = NULL; /* * and point the new slots at the unit in-core and unit structs */ MDI_VOIDUNIT(to_min) = delta->uip; MD_VOIDUNIT(to_min) = delta->unp; /* * recreate kstats */ md_kstat_destroy_ui(delta->uip); md_kstat_init_ui(to_min, delta->uip); /* * the unit in-core reference to the get next link's id changes */ delta->uip->ui_link.ln_id = to_min; /* * name space addition of new key was done from user-level * remove the old name's key here */ sv.setno = MD_MIN2SET(from_min); sv.key = rtxnp->from.key; md_rem_names(&sv, 1); /* * and store the record id (from the unit struct) into recids * for later commitment by md_rename() */ md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); } /* * MDRNM_UPDATE_KIDS * rename/exchange of our child or grandchild */ void trans_renexch_update_kids( md_rendelta_t *delta, md_rentxn_t *rtxnp) { mt_unit_t *un; minor_t from_min, to_min, log_min, master_min; ASSERT(delta); ASSERT(rtxnp); ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); ASSERT(delta->unp); ASSERT(rtxnp->recids); ASSERT(rtxnp->rec_idx >= 0); ASSERT(delta->old_role == MDRR_PARENT); ASSERT(delta->new_role == MDRR_PARENT); un = (mt_unit_t *)delta->unp; from_min = rtxnp->from.mnum; to_min = rtxnp->to.mnum; log_min = md_getminor(un->un_l_dev); master_min = md_getminor(un->un_m_dev); /* * since our role isn't changing (parent->parent) * one of our children must be changing; which one is it? * find the child being modified, and update * our notion of it */ /* both devices must be metadevices in order to be updated */ ASSERT(md_getmajor(un->un_m_dev) == md_major); ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major))); if ((md_getmajor(un->un_m_dev) == md_major) && (master_min == from_min)) { ASSERT(!(un->un_l_unit && (log_min == from_min))); un->un_m_dev = makedevice(md_major, to_min); un->un_m_key = rtxnp->to.key; } else if ((md_getmajor(un->un_m_dev) == md_major) && un->un_l_unit && (log_min == from_min)) { ASSERT(master_min != from_min); un->un_l_dev = makedevice(md_major, to_min); un->un_l_key = rtxnp->to.key; } else { ASSERT(FALSE); panic("trans_renexch_update_kids: not a metadevice"); /*NOTREACHED*/ } md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); } /* * MDRNM_SELF_UPDATE_FROM (exchange down) [self->child] */ void trans_exchange_self_update_from_down( md_rendelta_t *delta, md_rentxn_t *rtxnp) { mt_unit_t *un; minor_t from_min, to_min, master_min, log_min; sv_dev_t sv; ASSERT(delta); ASSERT(delta->unp); ASSERT(delta->uip); ASSERT(rtxnp); ASSERT(MDRNOP_EXCHANGE == rtxnp->op); ASSERT(rtxnp->from.uip); ASSERT(rtxnp->rec_idx >= 0); ASSERT(rtxnp->recids); ASSERT(delta->old_role == MDRR_SELF); ASSERT(delta->new_role == MDRR_CHILD); ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum); un = (mt_unit_t *)delta->unp; /* * if we're exchanging a trans, it had better be a metadevice */ ASSERT(md_getmajor(un->un_m_dev) == md_major); to_min = rtxnp->to.mnum; from_min = rtxnp->from.mnum; master_min = md_getminor(un->un_m_dev); log_min = md_getminor(un->un_l_dev); /* * both mechanisms for identifying a trans must be updated */ MD_SID(delta->unp) = to_min; un->un_dev = makedevice(md_major, to_min); /* * parent identifier need not change */ /* * point the set array pointers at the "new" unit and unit in-cores * Note: the other half of this transfer is done in the "update to" * rename/exchange named service. */ MDI_VOIDUNIT(to_min) = delta->uip; MD_VOIDUNIT(to_min) = delta->unp; /* * transfer kstats */ delta->uip->ui_kstat = rtxnp->to.kstatp; /* * the unit in-core reference to the get next link's id changes */ delta->uip->ui_link.ln_id = to_min; /* * which one of our children is changing? * * Note that the check routines forbid changing the log (for now) * because there's no lockfs-like trans-ufs "freeze and remount" * or "freeze and bobbit the log." */ /* both devices must be metadevices in order to be updated */ ASSERT(md_getmajor(un->un_m_dev) == md_major); ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major))); if ((md_getmajor(un->un_m_dev) == md_major) && (master_min == to_min)) { /* master and log can't both be changed */ ASSERT(!(un->un_l_unit && (log_min == to_min))); un->un_m_dev = makedevice(md_major, from_min); sv.key = un->un_m_key; un->un_m_key = rtxnp->from.key; } else if ((md_getmajor(un->un_m_dev) == md_major) && un->un_l_unit && (log_min == to_min)) { /* master and log can't both be changed */ ASSERT(!(master_min == to_min)); un->un_l_dev = makedevice(md_major, from_min); sv.key = un->un_l_key; un->un_l_key = rtxnp->from.key; } else { ASSERT(FALSE); panic("trans_exchange_self_update_from_down: not a metadevice"); /*NOTREACHED*/ } /* * the new master must exist in the name space */ ASSERT(rtxnp->from.key != MD_KEYWILD); ASSERT(rtxnp->from.key != MD_KEYBAD); /* * delete the key for the changed child from the namespace */ sv.setno = MD_MIN2SET(from_min); md_rem_names(&sv, 1); /* * and store the record id (from the unit struct) into recids */ md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); } /* * MDRNM_PARENT_UPDATE_TO (exchange down) [parent->self] */ void trans_exchange_parent_update_to( md_rendelta_t *delta, md_rentxn_t *rtxnp) { mt_unit_t *un; minor_t from_min, to_min, master_min, log_min; sv_dev_t sv; ASSERT(delta); ASSERT(delta->unp); ASSERT(delta->uip); ASSERT(rtxnp); ASSERT(MDRNOP_EXCHANGE == rtxnp->op); ASSERT(rtxnp->from.uip); ASSERT(rtxnp->rec_idx >= 0); ASSERT(rtxnp->recids); ASSERT(delta->old_role == MDRR_PARENT); ASSERT(delta->new_role == MDRR_SELF); ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum); un = (mt_unit_t *)delta->unp; ASSERT(md_getmajor(un->un_m_dev) == md_major); to_min = rtxnp->to.mnum; from_min = rtxnp->from.mnum; master_min = md_getminor(un->un_m_dev); log_min = md_getminor(un->un_l_dev); /* * both mechanisms for identifying a trans must be updated */ MD_SID(delta->unp) = from_min; un->un_dev = makedevice(md_major, from_min); /* * parent identifier need not change */ /* * point the set array pointers at the "new" unit and unit in-cores * Note: the other half of this transfer is done in the "update to" * rename/exchange named service. */ MDI_VOIDUNIT(from_min) = delta->uip; MD_VOIDUNIT(from_min) = delta->unp; /* * transfer kstats */ delta->uip->ui_kstat = rtxnp->from.kstatp; /* * the unit in-core reference to the get next link's id changes */ delta->uip->ui_link.ln_id = from_min; /* * which one of our children is changing? */ /* both devices must be metadevices in order to be updated */ ASSERT(md_getmajor(un->un_m_dev) == md_major); ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major))); if ((md_getmajor(un->un_m_dev) == md_major) && (master_min == from_min)) { /* can't be changing log and master */ ASSERT(!(un->un_l_unit && (log_min == to_min))); un->un_m_dev = makedevice(md_major, to_min); sv.key = un->un_m_key; un->un_m_key = rtxnp->to.key; } else if (un->un_l_unit && ((md_getmajor(un->un_l_dev) == md_major) && log_min == to_min)) { /* can't be changing log and master */ ASSERT(master_min != from_min); un->un_l_dev = makedevice(md_major, to_min); sv.key = un->un_l_key; un->un_l_key = rtxnp->to.key; } else { ASSERT(FALSE); panic("trans_exchange_parent_update_to: not a metadevice"); /*NOTREACHED*/ } /* * delete the key for the changed child from the namespace */ sv.setno = MD_MIN2SET(from_min); md_rem_names(&sv, 1); /* * and store the record id (from the unit struct) into recids */ md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp); } /* * MDRNM_LIST_URKIDS: named svc entry point * all all delta entries appropriate for our children onto the * deltalist pointd to by dlpp */ int trans_rename_listkids( md_rendelta_t **dlpp, md_rentxn_t *rtxnp) { minor_t from_min, to_min, master_min, log_min; mt_unit_t *from_un; md_rendelta_t *new, *p; int n_children; ASSERT(rtxnp); ASSERT(dlpp); ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME)); from_min = rtxnp->from.mnum; to_min = rtxnp->to.mnum; n_children = 0; if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) { (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min); return (-1); } for (p = *dlpp; p && p->next != NULL; p = p->next) { /* NULL */ } if (md_getmajor(from_un->un_m_dev) == md_major) { master_min = md_getminor(from_un->un_m_dev); p = new = md_build_rendelta(MDRR_CHILD, to_min == master_min? MDRR_SELF: MDRR_CHILD, from_un->un_m_dev, p, MD_UNIT(master_min), MDI_UNIT(master_min), &rtxnp->mde); if (!new) { if (mdisok(&rtxnp->mde)) { (void) mdsyserror(&rtxnp->mde, ENOMEM); } return (-1); } ++n_children; } if (from_un->un_l_unit && (md_getmajor(from_un->un_l_dev) == md_major)) { log_min = md_getminor(from_un->un_l_dev); new = md_build_rendelta(MDRR_CHILD, to_min == log_min? MDRR_SELF: MDRR_CHILD, from_un->un_l_dev, p, MD_UNIT(log_min), MDI_UNIT(log_min), &rtxnp->mde); if (!new) { if (mdisok(&rtxnp->mde)) { (void) mdsyserror(&rtxnp->mde, ENOMEM); } return (-1); } ++n_children; } return (n_children); } /* * support routine for MDRNM_CHECK */ static int trans_may_renexch_self( mt_unit_t *un, mdi_unit_t *ui, md_rentxn_t *rtxnp) { minor_t from_min; minor_t to_min; ASSERT(rtxnp); ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); from_min = rtxnp->from.mnum; to_min = rtxnp->to.mnum; if (!un || !ui) { (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, from_min); return (EINVAL); } ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD); if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) { (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); return (EINVAL); } if (MD_PARENT(un) == MD_MULTI_PARENT) { (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min); return (EINVAL); } switch (rtxnp->op) { case MDRNOP_EXCHANGE: /* * may only swap with our child (master) if it is a metadevice */ if (md_getmajor(un->un_m_dev) != md_major) { (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD, to_min); return (EINVAL); } if (un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)) { (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD, to_min); return (EINVAL); } if (md_getminor(un->un_m_dev) != to_min) { (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD, to_min); return (EINVAL); } break; case MDRNOP_RENAME: break; default: (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR, from_min); return (EINVAL); } return (0); /* ok */ } /* * Named service entry point: MDRNM_CHECK */ intptr_t trans_rename_check( md_rendelta_t *delta, md_rentxn_t *rtxnp) { int err = 0; mt_unit_t *un; ASSERT(delta); ASSERT(rtxnp); ASSERT(delta->unp); ASSERT(delta->uip); ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE)); if (!delta || !rtxnp || !delta->unp || !delta->uip) { (void) mdsyserror(&rtxnp->mde, EINVAL); return (EINVAL); } un = (mt_unit_t *)delta->unp; if (rtxnp->revision == MD_RENAME_VERSION_OFFLINE) { /* * trans' may not be open, if it is being modified in the exchange * or rename; trans-UFS hasn't been verified to handle the change * out from underneath it. */ if ((md_unit_isopen(delta->uip)) && ((md_getminor(delta->dev) == rtxnp->from.mnum) || (md_getminor(delta->dev) == rtxnp->to.mnum))) { (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, rtxnp->from.mnum); return (EBUSY); } } /* * can't rename or exchange with a log attached */ if (un->un_l_unit) { (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY, rtxnp->from.mnum); return (EBUSY); } switch (delta->old_role) { case MDRR_SELF: /* * self does additional checks */ err = trans_may_renexch_self((mt_unit_t *)delta->unp, delta->uip, rtxnp); if (err != 0) { goto out; } /* FALLTHROUGH */ case MDRR_PARENT: /* * top_is_trans is only used to check for online * rename/exchange when MD_RENAME_VERSION == OFFLINE * since trans holds the sub-devices open */ rtxnp->stat.trans_in_stack = TRUE; break; default: break; } out: return (err); } /* end of rename/exchange */