trim_map.c revision 248575
1240868Spjd/*
2240868Spjd * CDDL HEADER START
3240868Spjd *
4240868Spjd * The contents of this file are subject to the terms of the
5240868Spjd * Common Development and Distribution License (the "License").
6240868Spjd * You may not use this file except in compliance with the License.
7240868Spjd *
8240868Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9240868Spjd * or http://www.opensolaris.org/os/licensing.
10240868Spjd * See the License for the specific language governing permissions
11240868Spjd * and limitations under the License.
12240868Spjd *
13240868Spjd * When distributing Covered Code, include this CDDL HEADER in each
14240868Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15240868Spjd * If applicable, add the following below this CDDL HEADER, with the
16240868Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17240868Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18240868Spjd *
19240868Spjd * CDDL HEADER END
20240868Spjd */
21240868Spjd/*
22240868Spjd * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
23240868Spjd * All rights reserved.
24240868Spjd */
25240868Spjd
26240868Spjd#include <sys/zfs_context.h>
27240868Spjd#include <sys/spa_impl.h>
28240868Spjd#include <sys/vdev_impl.h>
29240868Spjd#include <sys/trim_map.h>
30248575Ssmh#include <sys/time.h>
31240868Spjd
32244187Ssmh/*
33244187Ssmh * Calculate the zio end, upgrading based on ashift which would be
34244187Ssmh * done by zio_vdev_io_start.
35244187Ssmh *
36244187Ssmh * This makes free range consolidation much more effective
37244187Ssmh * than it would otherwise be as well as ensuring that entire
38244187Ssmh * blocks are invalidated by writes.
39244187Ssmh */
40248572Ssmh#define	TRIM_ZIO_END(vd, offset, size)	(offset +		\
41248572Ssmh 	P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift))
42244187Ssmh
43240868Spjdtypedef struct trim_map {
44240868Spjd	list_t		tm_head;		/* List of segments sorted by txg. */
45240868Spjd	avl_tree_t	tm_queued_frees;	/* AVL tree of segments waiting for TRIM. */
46240868Spjd	avl_tree_t	tm_inflight_frees;	/* AVL tree of in-flight TRIMs. */
47240868Spjd	avl_tree_t	tm_inflight_writes;	/* AVL tree of in-flight writes. */
48240868Spjd	list_t		tm_pending_writes;	/* Writes blocked on in-flight frees. */
49240868Spjd	kmutex_t	tm_lock;
50240868Spjd} trim_map_t;
51240868Spjd
52240868Spjdtypedef struct trim_seg {
53240868Spjd	avl_node_t	ts_node;	/* AVL node. */
54240868Spjd	list_node_t	ts_next;	/* List element. */
55240868Spjd	uint64_t	ts_start;	/* Starting offset of this segment. */
56240868Spjd	uint64_t	ts_end;		/* Ending offset (non-inclusive). */
57240868Spjd	uint64_t	ts_txg;		/* Segment creation txg. */
58248575Ssmh	hrtime_t	ts_time;	/* Segment creation time. */
59240868Spjd} trim_seg_t;
60240868Spjd
61240868Spjdextern boolean_t zfs_notrim;
62240868Spjd
63240868SpjdSYSCTL_DECL(_vfs_zfs);
64240868Spjd/* Delay TRIMs by that many TXGs. */
65240868Spjdstatic int trim_txg_limit = 64;
66240868SpjdTUNABLE_INT("vfs.zfs.trim_txg_limit", &trim_txg_limit);
67240868SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, trim_txg_limit, CTLFLAG_RW, &trim_txg_limit, 0,
68240868Spjd    "Delay TRIMs by that many TXGs.");
69240868Spjd
70248575Ssmhstatic int trim_l2arc_limit = 30;
71248575SsmhTUNABLE_INT("vfs.zfs.trim_l2arc_limit", &trim_l2arc_limit);
72248575SsmhSYSCTL_INT(_vfs_zfs, OID_AUTO, trim_l2arc_limit, CTLFLAG_RWTUN, &trim_l2arc_limit, 0,
73248575Ssmh    "Delay TRIMs by this many seconds for cache devices.");
74248575Ssmh
75240868Spjdstatic void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd);
76240868Spjd
77240868Spjdstatic int
78240868Spjdtrim_map_seg_compare(const void *x1, const void *x2)
79240868Spjd{
80240868Spjd	const trim_seg_t *s1 = x1;
81240868Spjd	const trim_seg_t *s2 = x2;
82240868Spjd
83240868Spjd	if (s1->ts_start < s2->ts_start) {
84240868Spjd		if (s1->ts_end > s2->ts_start)
85240868Spjd			return (0);
86240868Spjd		return (-1);
87240868Spjd	}
88240868Spjd	if (s1->ts_start > s2->ts_start) {
89240868Spjd		if (s1->ts_start < s2->ts_end)
90240868Spjd			return (0);
91240868Spjd		return (1);
92240868Spjd	}
93240868Spjd	return (0);
94240868Spjd}
95240868Spjd
96240868Spjdstatic int
97240868Spjdtrim_map_zio_compare(const void *x1, const void *x2)
98240868Spjd{
99240868Spjd	const zio_t *z1 = x1;
100240868Spjd	const zio_t *z2 = x2;
101240868Spjd
102240868Spjd	if (z1->io_offset < z2->io_offset) {
103240868Spjd		if (z1->io_offset + z1->io_size > z2->io_offset)
104240868Spjd			return (0);
105240868Spjd		return (-1);
106240868Spjd	}
107240868Spjd	if (z1->io_offset > z2->io_offset) {
108240868Spjd		if (z1->io_offset < z2->io_offset + z2->io_size)
109240868Spjd			return (0);
110240868Spjd		return (1);
111240868Spjd	}
112240868Spjd	return (0);
113240868Spjd}
114240868Spjd
115240868Spjdvoid
116240868Spjdtrim_map_create(vdev_t *vd)
117240868Spjd{
118240868Spjd	trim_map_t *tm;
119240868Spjd
120240868Spjd	ASSERT(vd->vdev_ops->vdev_op_leaf);
121240868Spjd
122240868Spjd	if (zfs_notrim)
123240868Spjd		return;
124240868Spjd
125240868Spjd	tm = kmem_zalloc(sizeof (*tm), KM_SLEEP);
126240868Spjd	mutex_init(&tm->tm_lock, NULL, MUTEX_DEFAULT, NULL);
127240868Spjd	list_create(&tm->tm_head, sizeof (trim_seg_t),
128240868Spjd	    offsetof(trim_seg_t, ts_next));
129240868Spjd	list_create(&tm->tm_pending_writes, sizeof (zio_t),
130240868Spjd	    offsetof(zio_t, io_trim_link));
131240868Spjd	avl_create(&tm->tm_queued_frees, trim_map_seg_compare,
132240868Spjd	    sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
133240868Spjd	avl_create(&tm->tm_inflight_frees, trim_map_seg_compare,
134240868Spjd	    sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
135240868Spjd	avl_create(&tm->tm_inflight_writes, trim_map_zio_compare,
136240868Spjd	    sizeof (zio_t), offsetof(zio_t, io_trim_node));
137240868Spjd	vd->vdev_trimmap = tm;
138240868Spjd}
139240868Spjd
140240868Spjdvoid
141240868Spjdtrim_map_destroy(vdev_t *vd)
142240868Spjd{
143240868Spjd	trim_map_t *tm;
144240868Spjd	trim_seg_t *ts;
145240868Spjd
146240868Spjd	ASSERT(vd->vdev_ops->vdev_op_leaf);
147240868Spjd
148240868Spjd	if (zfs_notrim)
149240868Spjd		return;
150240868Spjd
151240868Spjd	tm = vd->vdev_trimmap;
152240868Spjd	if (tm == NULL)
153240868Spjd		return;
154240868Spjd
155240868Spjd	/*
156240868Spjd	 * We may have been called before trim_map_vdev_commit_done()
157240868Spjd	 * had a chance to run, so do it now to prune the remaining
158240868Spjd	 * inflight frees.
159240868Spjd	 */
160240868Spjd	trim_map_vdev_commit_done(vd->vdev_spa, vd);
161240868Spjd
162240868Spjd	mutex_enter(&tm->tm_lock);
163240868Spjd	while ((ts = list_head(&tm->tm_head)) != NULL) {
164240868Spjd		avl_remove(&tm->tm_queued_frees, ts);
165240868Spjd		list_remove(&tm->tm_head, ts);
166240868Spjd		kmem_free(ts, sizeof (*ts));
167240868Spjd	}
168240868Spjd	mutex_exit(&tm->tm_lock);
169240868Spjd
170240868Spjd	avl_destroy(&tm->tm_queued_frees);
171240868Spjd	avl_destroy(&tm->tm_inflight_frees);
172240868Spjd	avl_destroy(&tm->tm_inflight_writes);
173240868Spjd	list_destroy(&tm->tm_pending_writes);
174240868Spjd	list_destroy(&tm->tm_head);
175240868Spjd	mutex_destroy(&tm->tm_lock);
176240868Spjd	kmem_free(tm, sizeof (*tm));
177240868Spjd	vd->vdev_trimmap = NULL;
178240868Spjd}
179240868Spjd
180240868Spjdstatic void
181240868Spjdtrim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
182240868Spjd{
183240868Spjd	avl_index_t where;
184240868Spjd	trim_seg_t tsearch, *ts_before, *ts_after, *ts;
185240868Spjd	boolean_t merge_before, merge_after;
186248575Ssmh	hrtime_t time;
187240868Spjd
188240868Spjd	ASSERT(MUTEX_HELD(&tm->tm_lock));
189240868Spjd	VERIFY(start < end);
190240868Spjd
191248575Ssmh	time = gethrtime();
192240868Spjd	tsearch.ts_start = start;
193240868Spjd	tsearch.ts_end = end;
194240868Spjd
195240868Spjd	ts = avl_find(&tm->tm_queued_frees, &tsearch, &where);
196240868Spjd	if (ts != NULL) {
197240868Spjd		if (start < ts->ts_start)
198240868Spjd			trim_map_segment_add(tm, start, ts->ts_start, txg);
199240868Spjd		if (end > ts->ts_end)
200240868Spjd			trim_map_segment_add(tm, ts->ts_end, end, txg);
201240868Spjd		return;
202240868Spjd	}
203240868Spjd
204240868Spjd	ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE);
205240868Spjd	ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER);
206240868Spjd
207240868Spjd	merge_before = (ts_before != NULL && ts_before->ts_end == start &&
208240868Spjd	    ts_before->ts_txg == txg);
209240868Spjd	merge_after = (ts_after != NULL && ts_after->ts_start == end &&
210240868Spjd	    ts_after->ts_txg == txg);
211240868Spjd
212240868Spjd	if (merge_before && merge_after) {
213240868Spjd		avl_remove(&tm->tm_queued_frees, ts_before);
214240868Spjd		list_remove(&tm->tm_head, ts_before);
215240868Spjd		ts_after->ts_start = ts_before->ts_start;
216240868Spjd		kmem_free(ts_before, sizeof (*ts_before));
217240868Spjd	} else if (merge_before) {
218240868Spjd		ts_before->ts_end = end;
219240868Spjd	} else if (merge_after) {
220240868Spjd		ts_after->ts_start = start;
221240868Spjd	} else {
222240868Spjd		ts = kmem_alloc(sizeof (*ts), KM_SLEEP);
223240868Spjd		ts->ts_start = start;
224240868Spjd		ts->ts_end = end;
225240868Spjd		ts->ts_txg = txg;
226248575Ssmh		ts->ts_time = time;
227240868Spjd		avl_insert(&tm->tm_queued_frees, ts, where);
228240868Spjd		list_insert_tail(&tm->tm_head, ts);
229240868Spjd	}
230240868Spjd}
231240868Spjd
232240868Spjdstatic void
233240868Spjdtrim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start,
234240868Spjd    uint64_t end)
235240868Spjd{
236240868Spjd	trim_seg_t *nts;
237240868Spjd	boolean_t left_over, right_over;
238240868Spjd
239240868Spjd	ASSERT(MUTEX_HELD(&tm->tm_lock));
240240868Spjd
241240868Spjd	left_over = (ts->ts_start < start);
242240868Spjd	right_over = (ts->ts_end > end);
243240868Spjd
244240868Spjd	if (left_over && right_over) {
245240868Spjd		nts = kmem_alloc(sizeof (*nts), KM_SLEEP);
246240868Spjd		nts->ts_start = end;
247240868Spjd		nts->ts_end = ts->ts_end;
248240868Spjd		nts->ts_txg = ts->ts_txg;
249248575Ssmh		nts->ts_time = ts->ts_time;
250240868Spjd		ts->ts_end = start;
251240868Spjd		avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
252240868Spjd		list_insert_after(&tm->tm_head, ts, nts);
253240868Spjd	} else if (left_over) {
254240868Spjd		ts->ts_end = start;
255240868Spjd	} else if (right_over) {
256240868Spjd		ts->ts_start = end;
257240868Spjd	} else {
258240868Spjd		avl_remove(&tm->tm_queued_frees, ts);
259240868Spjd		list_remove(&tm->tm_head, ts);
260240868Spjd		kmem_free(ts, sizeof (*ts));
261240868Spjd	}
262240868Spjd}
263240868Spjd
264240868Spjdstatic void
265240868Spjdtrim_map_free_locked(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
266240868Spjd{
267240868Spjd	zio_t zsearch, *zs;
268240868Spjd
269240868Spjd	ASSERT(MUTEX_HELD(&tm->tm_lock));
270240868Spjd
271240868Spjd	zsearch.io_offset = start;
272240868Spjd	zsearch.io_size = end - start;
273240868Spjd
274240868Spjd	zs = avl_find(&tm->tm_inflight_writes, &zsearch, NULL);
275240868Spjd	if (zs == NULL) {
276240868Spjd		trim_map_segment_add(tm, start, end, txg);
277240868Spjd		return;
278240868Spjd	}
279240868Spjd	if (start < zs->io_offset)
280240868Spjd		trim_map_free_locked(tm, start, zs->io_offset, txg);
281240868Spjd	if (zs->io_offset + zs->io_size < end)
282240868Spjd		trim_map_free_locked(tm, zs->io_offset + zs->io_size, end, txg);
283240868Spjd}
284240868Spjd
285240868Spjdvoid
286248574Ssmhtrim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg)
287240868Spjd{
288240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
289240868Spjd
290240868Spjd	if (zfs_notrim || vd->vdev_notrim || tm == NULL)
291240868Spjd		return;
292240868Spjd
293240868Spjd	mutex_enter(&tm->tm_lock);
294248574Ssmh	trim_map_free_locked(tm, offset, TRIM_ZIO_END(vd, offset, size), txg);
295240868Spjd	mutex_exit(&tm->tm_lock);
296240868Spjd}
297240868Spjd
298240868Spjdboolean_t
299240868Spjdtrim_map_write_start(zio_t *zio)
300240868Spjd{
301240868Spjd	vdev_t *vd = zio->io_vd;
302240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
303240868Spjd	trim_seg_t tsearch, *ts;
304240868Spjd	boolean_t left_over, right_over;
305240868Spjd	uint64_t start, end;
306240868Spjd
307240868Spjd	if (zfs_notrim || vd->vdev_notrim || tm == NULL)
308240868Spjd		return (B_TRUE);
309240868Spjd
310240868Spjd	start = zio->io_offset;
311248572Ssmh	end = TRIM_ZIO_END(zio->io_vd, start, zio->io_size);
312240868Spjd	tsearch.ts_start = start;
313240868Spjd	tsearch.ts_end = end;
314240868Spjd
315240868Spjd	mutex_enter(&tm->tm_lock);
316240868Spjd
317240868Spjd	/*
318240868Spjd	 * Checking for colliding in-flight frees.
319240868Spjd	 */
320240868Spjd	ts = avl_find(&tm->tm_inflight_frees, &tsearch, NULL);
321240868Spjd	if (ts != NULL) {
322240868Spjd		list_insert_tail(&tm->tm_pending_writes, zio);
323240868Spjd		mutex_exit(&tm->tm_lock);
324240868Spjd		return (B_FALSE);
325240868Spjd	}
326240868Spjd
327240868Spjd	ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
328240868Spjd	if (ts != NULL) {
329240868Spjd		/*
330240868Spjd		 * Loop until all overlapping segments are removed.
331240868Spjd		 */
332240868Spjd		do {
333240868Spjd			trim_map_segment_remove(tm, ts, start, end);
334240868Spjd			ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
335240868Spjd		} while (ts != NULL);
336240868Spjd	}
337240868Spjd	avl_add(&tm->tm_inflight_writes, zio);
338240868Spjd
339240868Spjd	mutex_exit(&tm->tm_lock);
340240868Spjd
341240868Spjd	return (B_TRUE);
342240868Spjd}
343240868Spjd
344240868Spjdvoid
345240868Spjdtrim_map_write_done(zio_t *zio)
346240868Spjd{
347240868Spjd	vdev_t *vd = zio->io_vd;
348240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
349240868Spjd
350240868Spjd	/*
351240868Spjd	 * Don't check for vdev_notrim, since the write could have
352240868Spjd	 * started before vdev_notrim was set.
353240868Spjd	 */
354240868Spjd	if (zfs_notrim || tm == NULL)
355240868Spjd		return;
356240868Spjd
357240868Spjd	mutex_enter(&tm->tm_lock);
358240868Spjd	/*
359240868Spjd	 * Don't fail if the write isn't in the tree, since the write
360240868Spjd	 * could have started after vdev_notrim was set.
361240868Spjd	 */
362240868Spjd	if (zio->io_trim_node.avl_child[0] ||
363240868Spjd	    zio->io_trim_node.avl_child[1] ||
364240868Spjd	    AVL_XPARENT(&zio->io_trim_node) ||
365240868Spjd	    tm->tm_inflight_writes.avl_root == &zio->io_trim_node)
366240868Spjd		avl_remove(&tm->tm_inflight_writes, zio);
367240868Spjd	mutex_exit(&tm->tm_lock);
368240868Spjd}
369240868Spjd
370240868Spjd/*
371240868Spjd * Return the oldest segment (the one with the lowest txg) or false if
372240868Spjd * the list is empty or the first element's txg is greater than txg given
373248575Ssmh * as function argument, or the first element's time is greater than time
374248575Ssmh * given as function argument
375240868Spjd */
376240868Spjdstatic trim_seg_t *
377248575Ssmhtrim_map_first(trim_map_t *tm, uint64_t txg, hrtime_t time)
378240868Spjd{
379240868Spjd	trim_seg_t *ts;
380240868Spjd
381240868Spjd	ASSERT(MUTEX_HELD(&tm->tm_lock));
382240868Spjd
383240868Spjd	ts = list_head(&tm->tm_head);
384248575Ssmh	if (ts != NULL && ts->ts_txg <= txg && ts->ts_time <= time)
385240868Spjd		return (ts);
386240868Spjd	return (NULL);
387240868Spjd}
388240868Spjd
389240868Spjdstatic void
390240868Spjdtrim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
391240868Spjd{
392240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
393240868Spjd	trim_seg_t *ts;
394240868Spjd	uint64_t start, size, txglimit;
395248575Ssmh	hrtime_t timelimit;
396240868Spjd
397240868Spjd	ASSERT(vd->vdev_ops->vdev_op_leaf);
398240868Spjd
399240868Spjd	if (tm == NULL)
400240868Spjd		return;
401240868Spjd
402248575Ssmh	if (vd->vdev_isl2cache) {
403248575Ssmh		timelimit = gethrtime() - trim_l2arc_limit * NANOSEC;
404248575Ssmh		txglimit = UINT64_MAX;
405248575Ssmh	} else {
406248575Ssmh		timelimit = TIME_MAX;
407248575Ssmh		txglimit = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)) -
408248575Ssmh		    trim_txg_limit;
409248575Ssmh	}
410240868Spjd
411240868Spjd	mutex_enter(&tm->tm_lock);
412240868Spjd	/*
413248575Ssmh	 * Loop until we send all frees up to the txglimit
414248575Ssmh	 * or time limit if this is a cache device.
415240868Spjd	 */
416248575Ssmh	while ((ts = trim_map_first(tm, txglimit, timelimit)) != NULL) {
417240868Spjd		list_remove(&tm->tm_head, ts);
418240868Spjd		avl_remove(&tm->tm_queued_frees, ts);
419240868Spjd		avl_add(&tm->tm_inflight_frees, ts);
420240868Spjd		zio_nowait(zio_trim(zio, spa, vd, ts->ts_start,
421240868Spjd		    ts->ts_end - ts->ts_start));
422240868Spjd	}
423240868Spjd	mutex_exit(&tm->tm_lock);
424240868Spjd}
425240868Spjd
426240868Spjdstatic void
427240868Spjdtrim_map_vdev_commit_done(spa_t *spa, vdev_t *vd)
428240868Spjd{
429240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
430240868Spjd	trim_seg_t *ts;
431240868Spjd	list_t pending_writes;
432240868Spjd	zio_t *zio;
433240868Spjd	uint64_t start, size;
434240868Spjd	void *cookie;
435240868Spjd
436240868Spjd	ASSERT(vd->vdev_ops->vdev_op_leaf);
437240868Spjd
438240868Spjd	if (tm == NULL)
439240868Spjd		return;
440240868Spjd
441240868Spjd	mutex_enter(&tm->tm_lock);
442240868Spjd	if (!avl_is_empty(&tm->tm_inflight_frees)) {
443240868Spjd		cookie = NULL;
444240868Spjd		while ((ts = avl_destroy_nodes(&tm->tm_inflight_frees,
445240868Spjd		    &cookie)) != NULL) {
446240868Spjd			kmem_free(ts, sizeof (*ts));
447240868Spjd		}
448240868Spjd	}
449240868Spjd	list_create(&pending_writes, sizeof (zio_t), offsetof(zio_t,
450240868Spjd	    io_trim_link));
451240868Spjd	list_move_tail(&pending_writes, &tm->tm_pending_writes);
452240868Spjd	mutex_exit(&tm->tm_lock);
453240868Spjd
454240868Spjd	while ((zio = list_remove_head(&pending_writes)) != NULL) {
455240868Spjd		zio_vdev_io_reissue(zio);
456240868Spjd		zio_execute(zio);
457240868Spjd	}
458240868Spjd	list_destroy(&pending_writes);
459240868Spjd}
460240868Spjd
461240868Spjdstatic void
462240868Spjdtrim_map_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
463240868Spjd{
464240868Spjd	int c;
465240868Spjd
466248574Ssmh	if (vd == NULL || spa_last_synced_txg(spa) <= trim_txg_limit)
467240868Spjd		return;
468240868Spjd
469240868Spjd	if (vd->vdev_ops->vdev_op_leaf) {
470240868Spjd		trim_map_vdev_commit(spa, zio, vd);
471240868Spjd	} else {
472240868Spjd		for (c = 0; c < vd->vdev_children; c++)
473240868Spjd			trim_map_commit(spa, zio, vd->vdev_child[c]);
474240868Spjd	}
475240868Spjd}
476240868Spjd
477240868Spjdstatic void
478240868Spjdtrim_map_commit_done(spa_t *spa, vdev_t *vd)
479240868Spjd{
480240868Spjd	int c;
481240868Spjd
482240868Spjd	if (vd == NULL)
483240868Spjd		return;
484240868Spjd
485240868Spjd	if (vd->vdev_ops->vdev_op_leaf) {
486240868Spjd		trim_map_vdev_commit_done(spa, vd);
487240868Spjd	} else {
488240868Spjd		for (c = 0; c < vd->vdev_children; c++)
489240868Spjd			trim_map_commit_done(spa, vd->vdev_child[c]);
490240868Spjd	}
491240868Spjd}
492240868Spjd
493240868Spjdstatic void
494240868Spjdtrim_thread(void *arg)
495240868Spjd{
496240868Spjd	spa_t *spa = arg;
497240868Spjd	zio_t *zio;
498240868Spjd
499240868Spjd	for (;;) {
500240868Spjd		mutex_enter(&spa->spa_trim_lock);
501240868Spjd		if (spa->spa_trim_thread == NULL) {
502240868Spjd			spa->spa_trim_thread = curthread;
503240868Spjd			cv_signal(&spa->spa_trim_cv);
504240868Spjd			mutex_exit(&spa->spa_trim_lock);
505240868Spjd			thread_exit();
506240868Spjd		}
507240868Spjd		cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
508240868Spjd		mutex_exit(&spa->spa_trim_lock);
509240868Spjd
510240868Spjd		zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
511240868Spjd
512240868Spjd		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
513240868Spjd		trim_map_commit(spa, zio, spa->spa_root_vdev);
514240868Spjd		(void) zio_wait(zio);
515240868Spjd		trim_map_commit_done(spa, spa->spa_root_vdev);
516240868Spjd		spa_config_exit(spa, SCL_STATE, FTAG);
517240868Spjd	}
518240868Spjd}
519240868Spjd
520240868Spjdvoid
521240868Spjdtrim_thread_create(spa_t *spa)
522240868Spjd{
523240868Spjd
524240868Spjd	if (zfs_notrim)
525240868Spjd		return;
526240868Spjd
527240868Spjd	mutex_init(&spa->spa_trim_lock, NULL, MUTEX_DEFAULT, NULL);
528240868Spjd	cv_init(&spa->spa_trim_cv, NULL, CV_DEFAULT, NULL);
529240868Spjd	mutex_enter(&spa->spa_trim_lock);
530240868Spjd	spa->spa_trim_thread = thread_create(NULL, 0, trim_thread, spa, 0, &p0,
531240868Spjd	    TS_RUN, minclsyspri);
532240868Spjd	mutex_exit(&spa->spa_trim_lock);
533240868Spjd}
534240868Spjd
535240868Spjdvoid
536240868Spjdtrim_thread_destroy(spa_t *spa)
537240868Spjd{
538240868Spjd
539240868Spjd	if (zfs_notrim)
540240868Spjd		return;
541240868Spjd	if (spa->spa_trim_thread == NULL)
542240868Spjd		return;
543240868Spjd
544240868Spjd	mutex_enter(&spa->spa_trim_lock);
545240868Spjd	/* Setting spa_trim_thread to NULL tells the thread to stop. */
546240868Spjd	spa->spa_trim_thread = NULL;
547240868Spjd	cv_signal(&spa->spa_trim_cv);
548240868Spjd	/* The thread will set it back to != NULL on exit. */
549240868Spjd	while (spa->spa_trim_thread == NULL)
550240868Spjd		cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
551240868Spjd	spa->spa_trim_thread = NULL;
552240868Spjd	mutex_exit(&spa->spa_trim_lock);
553240868Spjd
554240868Spjd	cv_destroy(&spa->spa_trim_cv);
555240868Spjd	mutex_destroy(&spa->spa_trim_lock);
556240868Spjd}
557240868Spjd
558240868Spjdvoid
559240868Spjdtrim_thread_wakeup(spa_t *spa)
560240868Spjd{
561240868Spjd
562240868Spjd	if (zfs_notrim)
563240868Spjd		return;
564240868Spjd	if (spa->spa_trim_thread == NULL)
565240868Spjd		return;
566240868Spjd
567240868Spjd	mutex_enter(&spa->spa_trim_lock);
568240868Spjd	cv_signal(&spa->spa_trim_cv);
569240868Spjd	mutex_exit(&spa->spa_trim_lock);
570240868Spjd}
571