1/*
2 * CDDL HEADER START
3 *
4 * This file and its contents are supplied under the terms of the
5 * Common Development and Distribution License ("CDDL"), version 1.0.
6 * You may only use this file in accordance with the terms of version
7 * 1.0 of the CDDL.
8 *
9 * A full copy of the text of the CDDL should have accompanied this
10 * source.  A copy of the CDDL is also available via the Internet at
11 * http://www.illumos.org/license/CDDL.
12 *
13 * CDDL HEADER END
14 */
15
16/*
17 * Copyright (c) 2015 by Delphix. All rights reserved.
18 */
19
20#include <sys/dmu_tx.h>
21#include <sys/spa.h>
22#include <sys/dmu.h>
23#include <sys/dsl_pool.h>
24#include <sys/vdev_indirect_births.h>
25
26static boolean_t
27vdev_indirect_births_verify(vdev_indirect_births_t *vib)
28{
29	ASSERT(vib != NULL);
30
31	ASSERT(vib->vib_object != 0);
32	ASSERT(vib->vib_objset != NULL);
33	ASSERT(vib->vib_phys != NULL);
34	ASSERT(vib->vib_dbuf != NULL);
35
36	EQUIV(vib->vib_phys->vib_count > 0, vib->vib_entries != NULL);
37
38	return (B_TRUE);
39}
40
41uint64_t
42vdev_indirect_births_count(vdev_indirect_births_t *vib)
43{
44	ASSERT(vdev_indirect_births_verify(vib));
45
46	return (vib->vib_phys->vib_count);
47}
48
49uint64_t
50vdev_indirect_births_object(vdev_indirect_births_t *vib)
51{
52	ASSERT(vdev_indirect_births_verify(vib));
53
54	return (vib->vib_object);
55}
56
57static uint64_t
58vdev_indirect_births_size_impl(vdev_indirect_births_t *vib)
59{
60	return (vib->vib_phys->vib_count * sizeof (*vib->vib_entries));
61}
62
63void
64vdev_indirect_births_close(vdev_indirect_births_t *vib)
65{
66	ASSERT(vdev_indirect_births_verify(vib));
67
68	if (vib->vib_phys->vib_count > 0) {
69		uint64_t births_size = vdev_indirect_births_size_impl(vib);
70
71		kmem_free(vib->vib_entries, births_size);
72		vib->vib_entries = NULL;
73	}
74
75	dmu_buf_rele(vib->vib_dbuf, vib);
76
77	vib->vib_objset = NULL;
78	vib->vib_object = 0;
79	vib->vib_dbuf = NULL;
80	vib->vib_phys = NULL;
81
82	kmem_free(vib, sizeof (*vib));
83}
84
85uint64_t
86vdev_indirect_births_alloc(objset_t *os, dmu_tx_t *tx)
87{
88	ASSERT(dmu_tx_is_syncing(tx));
89
90	return (dmu_object_alloc(os,
91	    DMU_OTN_UINT64_METADATA, SPA_OLD_MAXBLOCKSIZE,
92	    DMU_OTN_UINT64_METADATA, sizeof (vdev_indirect_birth_phys_t),
93	    tx));
94}
95
96vdev_indirect_births_t *
97vdev_indirect_births_open(objset_t *os, uint64_t births_object)
98{
99	vdev_indirect_births_t *vib = kmem_zalloc(sizeof (*vib), KM_SLEEP);
100
101	vib->vib_objset = os;
102	vib->vib_object = births_object;
103
104	VERIFY0(dmu_bonus_hold(os, vib->vib_object, vib, &vib->vib_dbuf));
105	vib->vib_phys = vib->vib_dbuf->db_data;
106
107	if (vib->vib_phys->vib_count > 0) {
108		uint64_t births_size = vdev_indirect_births_size_impl(vib);
109		vib->vib_entries = kmem_alloc(births_size, KM_SLEEP);
110		VERIFY0(dmu_read(vib->vib_objset, vib->vib_object, 0,
111		    births_size, vib->vib_entries, DMU_READ_PREFETCH));
112	}
113
114	ASSERT(vdev_indirect_births_verify(vib));
115
116	return (vib);
117}
118
119void
120vdev_indirect_births_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
121{
122	VERIFY0(dmu_object_free(os, object, tx));
123}
124
125void
126vdev_indirect_births_add_entry(vdev_indirect_births_t *vib,
127    uint64_t max_offset, uint64_t txg, dmu_tx_t *tx)
128{
129	vdev_indirect_birth_entry_phys_t vibe;
130	uint64_t old_size;
131	uint64_t new_size;
132	vdev_indirect_birth_entry_phys_t *new_entries;
133
134	ASSERT(dmu_tx_is_syncing(tx));
135	ASSERT(dsl_pool_sync_context(dmu_tx_pool(tx)));
136	ASSERT(vdev_indirect_births_verify(vib));
137
138	dmu_buf_will_dirty(vib->vib_dbuf, tx);
139
140	vibe.vibe_offset = max_offset;
141	vibe.vibe_phys_birth_txg = txg;
142
143	old_size = vdev_indirect_births_size_impl(vib);
144	dmu_write(vib->vib_objset, vib->vib_object, old_size, sizeof (vibe),
145	    &vibe, tx);
146	vib->vib_phys->vib_count++;
147	new_size = vdev_indirect_births_size_impl(vib);
148
149	new_entries = kmem_alloc(new_size, KM_SLEEP);
150	if (old_size > 0) {
151		bcopy(vib->vib_entries, new_entries, old_size);
152		kmem_free(vib->vib_entries, old_size);
153	}
154	new_entries[vib->vib_phys->vib_count - 1] = vibe;
155	vib->vib_entries = new_entries;
156}
157
158uint64_t
159vdev_indirect_births_last_entry_txg(vdev_indirect_births_t *vib)
160{
161	ASSERT(vdev_indirect_births_verify(vib));
162	ASSERT(vib->vib_phys->vib_count > 0);
163
164	vdev_indirect_birth_entry_phys_t *last =
165	    &vib->vib_entries[vib->vib_phys->vib_count - 1];
166	return (last->vibe_phys_birth_txg);
167}
168
169/*
170 * Return the txg in which the given range was copied (i.e. its physical
171 * birth txg).  The specified offset+asize must be contiguously mapped
172 * (i.e. not a split block).
173 *
174 * The entries are sorted by increasing phys_birth, and also by increasing
175 * offset.  We find the specified offset by binary search.  Note that we
176 * can not use bsearch() because looking at each entry independently is
177 * insufficient to find the correct entry.  Each entry implicitly relies
178 * on the previous entry: an entry indicates that the offsets from the
179 * end of the previous entry to the end of this entry were written in the
180 * specified txg.
181 */
182uint64_t
183vdev_indirect_births_physbirth(vdev_indirect_births_t *vib, uint64_t offset,
184    uint64_t asize)
185{
186	vdev_indirect_birth_entry_phys_t *base;
187	vdev_indirect_birth_entry_phys_t *last;
188
189	ASSERT(vdev_indirect_births_verify(vib));
190	ASSERT(vib->vib_phys->vib_count > 0);
191
192	base = vib->vib_entries;
193	last = base + vib->vib_phys->vib_count - 1;
194
195	ASSERT3U(offset, <, last->vibe_offset);
196
197	while (last >= base) {
198		vdev_indirect_birth_entry_phys_t *p =
199		    base + ((last - base) / 2);
200		if (offset >= p->vibe_offset) {
201			base = p + 1;
202		} else if (p == vib->vib_entries ||
203		    offset >= (p - 1)->vibe_offset) {
204			ASSERT3U(offset + asize, <=, p->vibe_offset);
205			return (p->vibe_phys_birth_txg);
206		} else {
207			last = p - 1;
208		}
209	}
210	ASSERT(!"offset not found");
211	return (-1);
212}
213