1168404Spjd/*
2168404Spjd * CDDL HEADER START
3168404Spjd *
4168404Spjd * The contents of this file are subject to the terms of the
5168404Spjd * Common Development and Distribution License (the "License").
6168404Spjd * You may not use this file except in compliance with the License.
7168404Spjd *
8168404Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9168404Spjd * or http://www.opensolaris.org/os/licensing.
10168404Spjd * See the License for the specific language governing permissions
11168404Spjd * and limitations under the License.
12168404Spjd *
13168404Spjd * When distributing Covered Code, include this CDDL HEADER in each
14168404Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15168404Spjd * If applicable, add the following below this CDDL HEADER, with the
16168404Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17168404Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18168404Spjd *
19168404Spjd * CDDL HEADER END
20168404Spjd */
21168404Spjd/*
22219089Spjd * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23168404Spjd * Use is subject to license terms.
24253821Sdelphij */
25253821Sdelphij/*
26235222Smm * Copyright (c) 2012, Joyent, Inc. All rights reserved.
27235222Smm * Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved.
28253821Sdelphij * Copyright (c) 2013 by Delphix. All rights reserved.
29168404Spjd */
30168404Spjd
31168404Spjd#ifndef _SYS_DMU_IMPL_H
32168404Spjd#define	_SYS_DMU_IMPL_H
33168404Spjd
34168404Spjd#include <sys/txg_impl.h>
35168404Spjd#include <sys/zio.h>
36168404Spjd#include <sys/dnode.h>
37219089Spjd#include <sys/kstat.h>
38168404Spjd#include <sys/zfs_context.h>
39235222Smm#include <sys/zfs_ioctl.h>
40168404Spjd
41168404Spjd#ifdef	__cplusplus
42168404Spjdextern "C" {
43168404Spjd#endif
44168404Spjd
45168404Spjd/*
46168404Spjd * This is the locking strategy for the DMU.  Numbers in parenthesis are
47168404Spjd * cases that use that lock order, referenced below:
48168404Spjd *
49168404Spjd * ARC is self-contained
50168404Spjd * bplist is self-contained
51168404Spjd * refcount is self-contained
52168404Spjd * txg is self-contained (hopefully!)
53168404Spjd * zst_lock
54168404Spjd * zf_rwlock
55168404Spjd *
56168404Spjd * XXX try to improve evicting path?
57168404Spjd *
58168404Spjd * dp_config_rwlock > os_obj_lock > dn_struct_rwlock >
59185029Spjd * 	dn_dbufs_mtx > hash_mutexes > db_mtx > dd_lock > leafs
60168404Spjd *
61168404Spjd * dp_config_rwlock
62168404Spjd *    must be held before: everything
63168404Spjd *    protects dd namespace changes
64168404Spjd *    protects property changes globally
65168404Spjd *    held from:
66168404Spjd *    	dsl_dir_open/r:
67168404Spjd *    	dsl_dir_create_sync/w:
68168404Spjd *    	dsl_dir_sync_destroy/w:
69168404Spjd *    	dsl_dir_rename_sync/w:
70168404Spjd *    	dsl_prop_changed_notify/r:
71168404Spjd *
72168404Spjd * os_obj_lock
73168404Spjd *   must be held before:
74168404Spjd *   	everything except dp_config_rwlock
75168404Spjd *   protects os_obj_next
76168404Spjd *   held from:
77168404Spjd *   	dmu_object_alloc: dn_dbufs_mtx, db_mtx, hash_mutexes, dn_struct_rwlock
78168404Spjd *
79168404Spjd * dn_struct_rwlock
80168404Spjd *   must be held before:
81168404Spjd *   	everything except dp_config_rwlock and os_obj_lock
82168404Spjd *   protects structure of dnode (eg. nlevels)
83168404Spjd *   	db_blkptr can change when syncing out change to nlevels
84168404Spjd *   	dn_maxblkid
85168404Spjd *   	dn_nlevels
86168404Spjd *   	dn_*blksz*
87168404Spjd *   	phys nlevels, maxblkid, physical blkptr_t's (?)
88168404Spjd *   held from:
89168404Spjd *   	callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
90168404Spjd *   	dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
91168404Spjd *   	dmu_tx_count_free:
92168404Spjd *   	dbuf_read_impl: db_mtx, dmu_zfetch()
93168404Spjd *   	dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
94168404Spjd *   	dbuf_new_size: db_mtx
95168404Spjd *   	dbuf_dirty: db_mtx
96168404Spjd *	dbuf_findbp: (callers, phys? - the real need)
97168404Spjd *	dbuf_create: dn_dbufs_mtx, hash_mutexes, db_mtx (phys?)
98168404Spjd *	dbuf_prefetch: dn_dirty_mtx, hash_mutexes, db_mtx, dn_dbufs_mtx
99168404Spjd *	dbuf_hold_impl: hash_mutexes, db_mtx, dn_dbufs_mtx, dbuf_findbp()
100168404Spjd *	dnode_sync/w (increase_indirection): db_mtx (phys)
101168404Spjd *	dnode_set_blksz/w: dn_dbufs_mtx (dn_*blksz*)
102168404Spjd *	dnode_new_blkid/w: (dn_maxblkid)
103168404Spjd *	dnode_free_range/w: dn_dirty_mtx (dn_maxblkid)
104168404Spjd *	dnode_next_offset: (phys)
105168404Spjd *
106168404Spjd * dn_dbufs_mtx
107168404Spjd *    must be held before:
108168404Spjd *    	db_mtx, hash_mutexes
109168404Spjd *    protects:
110168404Spjd *    	dn_dbufs
111168404Spjd *    	dn_evicted
112168404Spjd *    held from:
113168404Spjd *    	dmu_evict_user: db_mtx (dn_dbufs)
114168404Spjd *    	dbuf_free_range: db_mtx (dn_dbufs)
115168404Spjd *    	dbuf_remove_ref: db_mtx, callees:
116168404Spjd *    		dbuf_hash_remove: hash_mutexes, db_mtx
117168404Spjd *    	dbuf_create: hash_mutexes, db_mtx (dn_dbufs)
118168404Spjd *    	dnode_set_blksz: (dn_dbufs)
119168404Spjd *
120168404Spjd * hash_mutexes (global)
121168404Spjd *   must be held before:
122168404Spjd *   	db_mtx
123168404Spjd *   protects dbuf_hash_table (global) and db_hash_next
124168404Spjd *   held from:
125168404Spjd *   	dbuf_find: db_mtx
126168404Spjd *   	dbuf_hash_insert: db_mtx
127168404Spjd *   	dbuf_hash_remove: db_mtx
128168404Spjd *
129168404Spjd * db_mtx (meta-leaf)
130168404Spjd *   must be held before:
131168404Spjd *   	dn_mtx, dn_dirty_mtx, dd_lock (leaf mutexes)
132168404Spjd *   protects:
133168404Spjd *   	db_state
134168404Spjd * 	db_holds
135168404Spjd * 	db_buf
136168404Spjd * 	db_changed
137168404Spjd * 	db_data_pending
138168404Spjd * 	db_dirtied
139168404Spjd * 	db_link
140168404Spjd * 	db_dirty_node (??)
141168404Spjd * 	db_dirtycnt
142168404Spjd * 	db_d.*
143168404Spjd * 	db.*
144168404Spjd *   held from:
145168404Spjd * 	dbuf_dirty: dn_mtx, dn_dirty_mtx
146168404Spjd * 	dbuf_dirty->dsl_dir_willuse_space: dd_lock
147168404Spjd * 	dbuf_dirty->dbuf_new_block->dsl_dataset_block_freeable: dd_lock
148168404Spjd * 	dbuf_undirty: dn_dirty_mtx (db_d)
149168404Spjd * 	dbuf_write_done: dn_dirty_mtx (db_state)
150168404Spjd * 	dbuf_*
151168404Spjd * 	dmu_buf_update_user: none (db_d)
152168404Spjd * 	dmu_evict_user: none (db_d) (maybe can eliminate)
153168404Spjd *   	dbuf_find: none (db_holds)
154168404Spjd *   	dbuf_hash_insert: none (db_holds)
155168404Spjd *   	dmu_buf_read_array_impl: none (db_state, db_changed)
156168404Spjd *   	dmu_sync: none (db_dirty_node, db_d)
157168404Spjd *   	dnode_reallocate: none (db)
158168404Spjd *
159168404Spjd * dn_mtx (leaf)
160168404Spjd *   protects:
161168404Spjd *   	dn_dirty_dbufs
162168404Spjd *   	dn_ranges
163168404Spjd *   	phys accounting
164168404Spjd * 	dn_allocated_txg
165168404Spjd * 	dn_free_txg
166168404Spjd * 	dn_assigned_txg
167168404Spjd * 	dd_assigned_tx
168168404Spjd * 	dn_notxholds
169168404Spjd * 	dn_dirtyctx
170168404Spjd * 	dn_dirtyctx_firstset
171168404Spjd * 	(dn_phys copy fields?)
172168404Spjd * 	(dn_phys contents?)
173168404Spjd *   held from:
174168404Spjd *   	dnode_*
175168404Spjd *   	dbuf_dirty: none
176168404Spjd *   	dbuf_sync: none (phys accounting)
177168404Spjd *   	dbuf_undirty: none (dn_ranges, dn_dirty_dbufs)
178168404Spjd *   	dbuf_write_done: none (phys accounting)
179168404Spjd *   	dmu_object_info_from_dnode: none (accounting)
180168404Spjd *   	dmu_tx_commit: none
181168404Spjd *   	dmu_tx_hold_object_impl: none
182168404Spjd *   	dmu_tx_try_assign: dn_notxholds(cv)
183168404Spjd *   	dmu_tx_unassign: none
184168404Spjd *
185185029Spjd * dd_lock
186185029Spjd *    must be held before:
187185029Spjd *      ds_lock
188185029Spjd *      ancestors' dd_lock
189168404Spjd *    protects:
190168404Spjd *    	dd_prop_cbs
191168404Spjd *    	dd_sync_*
192168404Spjd *    	dd_used_bytes
193168404Spjd *    	dd_tempreserved
194168404Spjd *    	dd_space_towrite
195168404Spjd *    	dd_myname
196168404Spjd *    	dd_phys accounting?
197168404Spjd *    held from:
198168404Spjd *    	dsl_dir_*
199168404Spjd *    	dsl_prop_changed_notify: none (dd_prop_cbs)
200168404Spjd *    	dsl_prop_register: none (dd_prop_cbs)
201168404Spjd *    	dsl_prop_unregister: none (dd_prop_cbs)
202168404Spjd *    	dsl_dataset_block_freeable: none (dd_sync_*)
203168404Spjd *
204168404Spjd * os_lock (leaf)
205168404Spjd *   protects:
206168404Spjd *   	os_dirty_dnodes
207168404Spjd *   	os_free_dnodes
208168404Spjd *   	os_dnodes
209168404Spjd *   	os_downgraded_dbufs
210168404Spjd *   	dn_dirtyblksz
211168404Spjd *   	dn_dirty_link
212168404Spjd *   held from:
213168404Spjd *   	dnode_create: none (os_dnodes)
214168404Spjd *   	dnode_destroy: none (os_dnodes)
215168404Spjd *   	dnode_setdirty: none (dn_dirtyblksz, os_*_dnodes)
216168404Spjd *   	dnode_free: none (dn_dirtyblksz, os_*_dnodes)
217168404Spjd *
218185029Spjd * ds_lock
219168404Spjd *    protects:
220219089Spjd *    	ds_objset
221168404Spjd *    	ds_open_refcount
222168404Spjd *    	ds_snapname
223168404Spjd *    	ds_phys accounting
224219089Spjd *	ds_phys userrefs zapobj
225185029Spjd *	ds_reserved
226168404Spjd *    held from:
227168404Spjd *    	dsl_dataset_*
228168404Spjd *
229168404Spjd * dr_mtx (leaf)
230168404Spjd *    protects:
231168404Spjd *	dr_children
232168404Spjd *    held from:
233168404Spjd *	dbuf_dirty
234168404Spjd *	dbuf_undirty
235168404Spjd *	dbuf_sync_indirect
236168404Spjd *	dnode_new_blkid
237168404Spjd */
238168404Spjd
239168404Spjdstruct objset;
240168404Spjdstruct dmu_pool;
241168404Spjd
242219089Spjdtypedef struct dmu_xuio {
243219089Spjd	int next;
244219089Spjd	int cnt;
245219089Spjd	struct arc_buf **bufs;
246219089Spjd	iovec_t *iovp;
247219089Spjd} dmu_xuio_t;
248219089Spjd
249219089Spjdtypedef struct xuio_stats {
250219089Spjd	/* loaned yet not returned arc_buf */
251219089Spjd	kstat_named_t xuiostat_onloan_rbuf;
252219089Spjd	kstat_named_t xuiostat_onloan_wbuf;
253219089Spjd	/* whether a copy is made when loaning out a read buffer */
254219089Spjd	kstat_named_t xuiostat_rbuf_copied;
255219089Spjd	kstat_named_t xuiostat_rbuf_nocopy;
256219089Spjd	/* whether a copy is made when assigning a write buffer */
257219089Spjd	kstat_named_t xuiostat_wbuf_copied;
258219089Spjd	kstat_named_t xuiostat_wbuf_nocopy;
259219089Spjd} xuio_stats_t;
260219089Spjd
261219089Spjdstatic xuio_stats_t xuio_stats = {
262219089Spjd	{ "onloan_read_buf",	KSTAT_DATA_UINT64 },
263219089Spjd	{ "onloan_write_buf",	KSTAT_DATA_UINT64 },
264219089Spjd	{ "read_buf_copied",	KSTAT_DATA_UINT64 },
265219089Spjd	{ "read_buf_nocopy",	KSTAT_DATA_UINT64 },
266219089Spjd	{ "write_buf_copied",	KSTAT_DATA_UINT64 },
267219089Spjd	{ "write_buf_nocopy",	KSTAT_DATA_UINT64 }
268219089Spjd};
269219089Spjd
270219089Spjd#define	XUIOSTAT_INCR(stat, val)	\
271219089Spjd	atomic_add_64(&xuio_stats.stat.value.ui64, (val))
272219089Spjd#define	XUIOSTAT_BUMP(stat)	XUIOSTAT_INCR(stat, 1)
273219089Spjd
274235222Smm/*
275235222Smm * The list of data whose inclusion in a send stream can be pending from
276235222Smm * one call to backup_cb to another.  Multiple calls to dump_free() and
277235222Smm * dump_freeobjects() can be aggregated into a single DRR_FREE or
278235222Smm * DRR_FREEOBJECTS replay record.
279235222Smm */
280235222Smmtypedef enum {
281235222Smm	PENDING_NONE,
282235222Smm	PENDING_FREE,
283235222Smm	PENDING_FREEOBJECTS
284235222Smm} dmu_pendop_t;
285219089Spjd
286235222Smmtypedef struct dmu_sendarg {
287235222Smm	list_node_t dsa_link;
288235222Smm	dmu_replay_record_t *dsa_drr;
289235222Smm	kthread_t *dsa_td;
290235222Smm	struct file *dsa_fp;
291235222Smm	int dsa_outfd;
292235222Smm	struct proc *dsa_proc;
293235222Smm	offset_t *dsa_off;
294235222Smm	objset_t *dsa_os;
295235222Smm	zio_cksum_t dsa_zc;
296235222Smm	uint64_t dsa_toguid;
297235222Smm	int dsa_err;
298235222Smm	dmu_pendop_t dsa_pending_op;
299253821Sdelphij	boolean_t dsa_incremental;
300268649Sdelphij	uint64_t dsa_featureflags;
301253821Sdelphij	uint64_t dsa_last_data_object;
302253821Sdelphij	uint64_t dsa_last_data_offset;
303235222Smm} dmu_sendarg_t;
304235222Smm
305263390Sdelphijvoid dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
306263390Sdelphijvoid dmu_object_free_zapified(objset_t *, uint64_t, dmu_tx_t *);
307268649Sdelphijint dmu_buf_hold_noread(objset_t *, uint64_t, uint64_t,
308268649Sdelphij    void *, dmu_buf_t **);
309235222Smm
310168404Spjd#ifdef	__cplusplus
311168404Spjd}
312168404Spjd#endif
313168404Spjd
314168404Spjd#endif	/* _SYS_DMU_IMPL_H */
315