dsl_pool.c revision 789:b348f31ed315
13584Ssos/*
23584Ssos * CDDL HEADER START
3230132Suqs *
43584Ssos * The contents of this file are subject to the terms of the
53584Ssos * Common Development and Distribution License, Version 1.0 only
63584Ssos * (the "License").  You may not use this file except in compliance
73584Ssos * with the License.
83584Ssos *
93584Ssos * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
103584Ssos * or http://www.opensolaris.org/os/licensing.
113584Ssos * See the License for the specific language governing permissions
123584Ssos * and limitations under the License.
133584Ssos *
143584Ssos * When distributing Covered Code, include this CDDL HEADER in each
153584Ssos * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1613765Smpp * If applicable, add the following below this CDDL HEADER, with the
173584Ssos * fields enclosed by brackets "[]" replaced with your own identifying
183584Ssos * information: Portions Copyright [yyyy] [name of copyright owner]
193584Ssos *
203584Ssos * CDDL HEADER END
213584Ssos */
223584Ssos/*
233584Ssos * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
243584Ssos * Use is subject to license terms.
253584Ssos */
263584Ssos
273584Ssos#pragma ident	"%Z%%M%	%I%	%E% SMI"
283584Ssos
2950477Speter#include <sys/dsl_pool.h>
303584Ssos#include <sys/dsl_dataset.h>
313584Ssos#include <sys/dsl_dir.h>
323584Ssos#include <sys/dmu_tx.h>
333584Ssos#include <sys/dmu_objset.h>
343584Ssos#include <sys/arc.h>
353584Ssos#include <sys/zap.h>
363584Ssos#include <sys/zfs_context.h>
373584Ssos#include <sys/fs/zfs.h>
383584Ssos
393584Ssos/* internal reserved dir name */
403584Ssos#define	MOS_DIR_NAME "$MOS"
413584Ssos
423584Ssosstatic dsl_dir_t *
433584Ssosdsl_pool_open_mos_dir(dsl_pool_t *dp)
443584Ssos{
453584Ssos	uint64_t obj;
463584Ssos	int err;
473584Ssos
483584Ssos	err = zap_lookup(dp->dp_meta_objset,
493584Ssos	    dp->dp_root_dir->dd_phys->dd_child_dir_zapobj,
503584Ssos	    MOS_DIR_NAME, sizeof (obj), 1, &obj);
513584Ssos	ASSERT3U(err, ==, 0);
523584Ssos
533584Ssos	return (dsl_dir_open_obj(dp, obj, MOS_DIR_NAME, dp));
543584Ssos}
553584Ssos
563584Ssosstatic dsl_pool_t *
573584Ssosdsl_pool_open_impl(spa_t *spa, uint64_t txg)
583584Ssos{
593584Ssos	dsl_pool_t *dp;
603584Ssos	blkptr_t *bp = spa_get_rootblkptr(spa);
613584Ssos
623584Ssos	dp = kmem_zalloc(sizeof (dsl_pool_t), KM_SLEEP);
633584Ssos	dp->dp_spa = spa;
643584Ssos	dp->dp_meta_rootbp = *bp;
653584Ssos	txg_init(dp, txg);
663584Ssos
673584Ssos	txg_list_create(&dp->dp_dirty_datasets,
683584Ssos	    offsetof(dsl_dataset_t, ds_dirty_link));
693584Ssos	txg_list_create(&dp->dp_dirty_dirs,
703584Ssos	    offsetof(dsl_dir_t, dd_dirty_link));
713584Ssos	list_create(&dp->dp_synced_objsets, sizeof (dsl_dataset_t),
723584Ssos	    offsetof(dsl_dataset_t, ds_synced_link));
733584Ssos
743584Ssos	return (dp);
753584Ssos}
763584Ssos
773584Ssosdsl_pool_t *
783584Ssosdsl_pool_open(spa_t *spa, uint64_t txg)
793584Ssos{
803584Ssos	int err;
813584Ssos	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
823584Ssos
833584Ssos	dp->dp_meta_objset =
843584Ssos	    &dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp)->os;
853584Ssos
863584Ssos	rw_enter(&dp->dp_config_rwlock, RW_READER);
873584Ssos	err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
883584Ssos	    DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
893584Ssos	    &dp->dp_root_dir_obj);
903584Ssos	ASSERT3U(err, ==, 0);
913584Ssos
923584Ssos	dp->dp_root_dir = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
933584Ssos	    NULL, dp);
943584Ssos	dp->dp_mos_dir = dsl_pool_open_mos_dir(dp);
953584Ssos	rw_exit(&dp->dp_config_rwlock);
963584Ssos
973584Ssos	return (dp);
983584Ssos}
993584Ssos
1003584Ssosvoid
1013584Ssosdsl_pool_close(dsl_pool_t *dp)
1023584Ssos{
1033584Ssos	/* drop our reference from dsl_pool_open() */
1043584Ssos	dsl_dir_close(dp->dp_mos_dir, dp);
1053584Ssos	dsl_dir_close(dp->dp_root_dir, dp);
1063584Ssos
107	/* undo the dmu_objset_open_impl(mos) from dsl_pool_open() */
108	dmu_objset_evict(NULL, dp->dp_meta_objset->os);
109
110	txg_list_destroy(&dp->dp_dirty_datasets);
111	txg_list_destroy(&dp->dp_dirty_dirs);
112	list_destroy(&dp->dp_synced_objsets);
113
114	arc_flush();
115	txg_fini(dp);
116	kmem_free(dp, sizeof (dsl_pool_t));
117}
118
119dsl_pool_t *
120dsl_pool_create(spa_t *spa, uint64_t txg)
121{
122	int err;
123	dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
124	dmu_tx_t *tx = dmu_tx_create_assigned(dp, txg);
125	dp->dp_meta_objset = &dmu_objset_create_impl(spa,
126	    NULL, DMU_OST_META, tx)->os;
127
128	/* create the pool directory */
129	err = zap_create_claim(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
130	    DMU_OT_OBJECT_DIRECTORY, DMU_OT_NONE, 0, tx);
131	ASSERT3U(err, ==, 0);
132
133	/* create and open the root dir */
134	dsl_dataset_create_root(dp, &dp->dp_root_dir_obj, tx);
135	dp->dp_root_dir = dsl_dir_open_obj(dp, dp->dp_root_dir_obj,
136	    NULL, dp);
137
138	/* create and open the meta-objset dir */
139	err = dsl_dir_create_sync(dp->dp_root_dir, MOS_DIR_NAME,
140	    tx);
141	ASSERT3U(err, ==, 0);
142	dp->dp_mos_dir = dsl_pool_open_mos_dir(dp);
143
144	dmu_tx_commit(tx);
145
146	return (dp);
147}
148
149void
150dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
151{
152	dmu_tx_t *tx;
153	objset_impl_t *mosi = dp->dp_meta_objset->os;
154
155	tx = dmu_tx_create_assigned(dp, txg);
156
157	do {
158		dsl_dir_t *dd;
159		dsl_dataset_t *ds;
160
161		while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
162			if (!list_link_active(&ds->ds_synced_link))
163				list_insert_tail(&dp->dp_synced_objsets, ds);
164			dsl_dataset_sync(ds, tx);
165		}
166		while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg))
167			dsl_dir_sync(dd, tx);
168		/*
169		 * We need to loop since dsl_dir_sync() could create a
170		 * new (dirty) objset.
171		 * XXX - isn't this taken care of by the spa's sync to
172		 * convergence loop?
173		 */
174	} while (!txg_list_empty(&dp->dp_dirty_datasets, txg));
175
176	if (list_head(&mosi->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
177	    list_head(&mosi->os_free_dnodes[txg & TXG_MASK]) != NULL) {
178		dmu_objset_sync(mosi, tx);
179		dprintf_bp(&dp->dp_meta_rootbp, "meta objset rootbp is %s", "");
180		spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
181	}
182
183	dmu_tx_commit(tx);
184}
185
186void
187dsl_pool_zil_clean(dsl_pool_t *dp)
188{
189	dsl_dataset_t *ds;
190
191	while (ds = list_head(&dp->dp_synced_objsets)) {
192		list_remove(&dp->dp_synced_objsets, ds);
193		ASSERT(ds->ds_user_ptr != NULL);
194		zil_clean(((objset_impl_t *)ds->ds_user_ptr)->os_zil);
195	}
196}
197
198int
199dsl_pool_sync_context(dsl_pool_t *dp)
200{
201	/*
202	 * Yeah, this is cheesy.  But the SPA needs some way to let
203	 * the sync threads invoke spa_open() and spa_close() while
204	 * it holds the namespace lock.  I'm certainly open to better
205	 * ideas for how to determine whether the current thread is
206	 * operating on behalf of spa_sync().  This works for now.
207	 */
208	return (curthread == dp->dp_tx.tx_sync_thread ||
209	    BP_IS_HOLE(&dp->dp_meta_rootbp));
210}
211
212uint64_t
213dsl_pool_adjustedsize(dsl_pool_t *dp, boolean_t netfree)
214{
215	uint64_t space, resv;
216
217	/*
218	 * Reserve about 1% (1/128), or at least 16MB, for allocation
219	 * efficiency.
220	 * XXX The intent log is not accounted for, so it must fit
221	 * within this slop.
222	 *
223	 * If we're trying to assess whether it's OK to do a free,
224	 * cut the reservation in half to allow forward progress
225	 * (e.g. make it possible to rm(1) files from a full pool).
226	 */
227	space = spa_get_space(dp->dp_spa);
228	resv = MAX(space >> 7, SPA_MINDEVSIZE >> 2);
229	if (netfree)
230		resv >>= 1;
231
232	return (space - resv);
233}
234