trim_map.c revision 248572
1240868Spjd/*
2240868Spjd * CDDL HEADER START
3240868Spjd *
4240868Spjd * The contents of this file are subject to the terms of the
5240868Spjd * Common Development and Distribution License (the "License").
6240868Spjd * You may not use this file except in compliance with the License.
7240868Spjd *
8240868Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9240868Spjd * or http://www.opensolaris.org/os/licensing.
10240868Spjd * See the License for the specific language governing permissions
11240868Spjd * and limitations under the License.
12240868Spjd *
13240868Spjd * When distributing Covered Code, include this CDDL HEADER in each
14240868Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15240868Spjd * If applicable, add the following below this CDDL HEADER, with the
16240868Spjd * fields enclosed by brackets "[]" replaced with your own identifying
17240868Spjd * information: Portions Copyright [yyyy] [name of copyright owner]
18240868Spjd *
19240868Spjd * CDDL HEADER END
20240868Spjd */
21240868Spjd/*
22240868Spjd * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
23240868Spjd * All rights reserved.
24240868Spjd */
25240868Spjd
26240868Spjd#include <sys/zfs_context.h>
27240868Spjd#include <sys/spa_impl.h>
28240868Spjd#include <sys/vdev_impl.h>
29240868Spjd#include <sys/trim_map.h>
30240868Spjd
31244187Ssmh/*
32244187Ssmh * Calculate the zio end, upgrading based on ashift which would be
33244187Ssmh * done by zio_vdev_io_start.
34244187Ssmh *
35244187Ssmh * This makes free range consolidation much more effective
36244187Ssmh * than it would otherwise be as well as ensuring that entire
37244187Ssmh * blocks are invalidated by writes.
38244187Ssmh */
39248572Ssmh#define	TRIM_ZIO_END(vd, offset, size)	(offset +		\
40248572Ssmh 	P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift))
41244187Ssmh
42240868Spjdtypedef struct trim_map {
43240868Spjd	list_t		tm_head;		/* List of segments sorted by txg. */
44240868Spjd	avl_tree_t	tm_queued_frees;	/* AVL tree of segments waiting for TRIM. */
45240868Spjd	avl_tree_t	tm_inflight_frees;	/* AVL tree of in-flight TRIMs. */
46240868Spjd	avl_tree_t	tm_inflight_writes;	/* AVL tree of in-flight writes. */
47240868Spjd	list_t		tm_pending_writes;	/* Writes blocked on in-flight frees. */
48240868Spjd	kmutex_t	tm_lock;
49240868Spjd} trim_map_t;
50240868Spjd
51240868Spjdtypedef struct trim_seg {
52240868Spjd	avl_node_t	ts_node;	/* AVL node. */
53240868Spjd	list_node_t	ts_next;	/* List element. */
54240868Spjd	uint64_t	ts_start;	/* Starting offset of this segment. */
55240868Spjd	uint64_t	ts_end;		/* Ending offset (non-inclusive). */
56240868Spjd	uint64_t	ts_txg;		/* Segment creation txg. */
57240868Spjd} trim_seg_t;
58240868Spjd
59240868Spjdextern boolean_t zfs_notrim;
60240868Spjd
61240868SpjdSYSCTL_DECL(_vfs_zfs);
62240868Spjd/* Delay TRIMs by that many TXGs. */
63240868Spjdstatic int trim_txg_limit = 64;
64240868SpjdTUNABLE_INT("vfs.zfs.trim_txg_limit", &trim_txg_limit);
65240868SpjdSYSCTL_INT(_vfs_zfs, OID_AUTO, trim_txg_limit, CTLFLAG_RW, &trim_txg_limit, 0,
66240868Spjd    "Delay TRIMs by that many TXGs.");
67240868Spjd
68240868Spjdstatic void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd);
69240868Spjd
70240868Spjdstatic int
71240868Spjdtrim_map_seg_compare(const void *x1, const void *x2)
72240868Spjd{
73240868Spjd	const trim_seg_t *s1 = x1;
74240868Spjd	const trim_seg_t *s2 = x2;
75240868Spjd
76240868Spjd	if (s1->ts_start < s2->ts_start) {
77240868Spjd		if (s1->ts_end > s2->ts_start)
78240868Spjd			return (0);
79240868Spjd		return (-1);
80240868Spjd	}
81240868Spjd	if (s1->ts_start > s2->ts_start) {
82240868Spjd		if (s1->ts_start < s2->ts_end)
83240868Spjd			return (0);
84240868Spjd		return (1);
85240868Spjd	}
86240868Spjd	return (0);
87240868Spjd}
88240868Spjd
89240868Spjdstatic int
90240868Spjdtrim_map_zio_compare(const void *x1, const void *x2)
91240868Spjd{
92240868Spjd	const zio_t *z1 = x1;
93240868Spjd	const zio_t *z2 = x2;
94240868Spjd
95240868Spjd	if (z1->io_offset < z2->io_offset) {
96240868Spjd		if (z1->io_offset + z1->io_size > z2->io_offset)
97240868Spjd			return (0);
98240868Spjd		return (-1);
99240868Spjd	}
100240868Spjd	if (z1->io_offset > z2->io_offset) {
101240868Spjd		if (z1->io_offset < z2->io_offset + z2->io_size)
102240868Spjd			return (0);
103240868Spjd		return (1);
104240868Spjd	}
105240868Spjd	return (0);
106240868Spjd}
107240868Spjd
108240868Spjdvoid
109240868Spjdtrim_map_create(vdev_t *vd)
110240868Spjd{
111240868Spjd	trim_map_t *tm;
112240868Spjd
113240868Spjd	ASSERT(vd->vdev_ops->vdev_op_leaf);
114240868Spjd
115240868Spjd	if (zfs_notrim)
116240868Spjd		return;
117240868Spjd
118240868Spjd	tm = kmem_zalloc(sizeof (*tm), KM_SLEEP);
119240868Spjd	mutex_init(&tm->tm_lock, NULL, MUTEX_DEFAULT, NULL);
120240868Spjd	list_create(&tm->tm_head, sizeof (trim_seg_t),
121240868Spjd	    offsetof(trim_seg_t, ts_next));
122240868Spjd	list_create(&tm->tm_pending_writes, sizeof (zio_t),
123240868Spjd	    offsetof(zio_t, io_trim_link));
124240868Spjd	avl_create(&tm->tm_queued_frees, trim_map_seg_compare,
125240868Spjd	    sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
126240868Spjd	avl_create(&tm->tm_inflight_frees, trim_map_seg_compare,
127240868Spjd	    sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node));
128240868Spjd	avl_create(&tm->tm_inflight_writes, trim_map_zio_compare,
129240868Spjd	    sizeof (zio_t), offsetof(zio_t, io_trim_node));
130240868Spjd	vd->vdev_trimmap = tm;
131240868Spjd}
132240868Spjd
133240868Spjdvoid
134240868Spjdtrim_map_destroy(vdev_t *vd)
135240868Spjd{
136240868Spjd	trim_map_t *tm;
137240868Spjd	trim_seg_t *ts;
138240868Spjd
139240868Spjd	ASSERT(vd->vdev_ops->vdev_op_leaf);
140240868Spjd
141240868Spjd	if (zfs_notrim)
142240868Spjd		return;
143240868Spjd
144240868Spjd	tm = vd->vdev_trimmap;
145240868Spjd	if (tm == NULL)
146240868Spjd		return;
147240868Spjd
148240868Spjd	/*
149240868Spjd	 * We may have been called before trim_map_vdev_commit_done()
150240868Spjd	 * had a chance to run, so do it now to prune the remaining
151240868Spjd	 * inflight frees.
152240868Spjd	 */
153240868Spjd	trim_map_vdev_commit_done(vd->vdev_spa, vd);
154240868Spjd
155240868Spjd	mutex_enter(&tm->tm_lock);
156240868Spjd	while ((ts = list_head(&tm->tm_head)) != NULL) {
157240868Spjd		avl_remove(&tm->tm_queued_frees, ts);
158240868Spjd		list_remove(&tm->tm_head, ts);
159240868Spjd		kmem_free(ts, sizeof (*ts));
160240868Spjd	}
161240868Spjd	mutex_exit(&tm->tm_lock);
162240868Spjd
163240868Spjd	avl_destroy(&tm->tm_queued_frees);
164240868Spjd	avl_destroy(&tm->tm_inflight_frees);
165240868Spjd	avl_destroy(&tm->tm_inflight_writes);
166240868Spjd	list_destroy(&tm->tm_pending_writes);
167240868Spjd	list_destroy(&tm->tm_head);
168240868Spjd	mutex_destroy(&tm->tm_lock);
169240868Spjd	kmem_free(tm, sizeof (*tm));
170240868Spjd	vd->vdev_trimmap = NULL;
171240868Spjd}
172240868Spjd
173240868Spjdstatic void
174240868Spjdtrim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
175240868Spjd{
176240868Spjd	avl_index_t where;
177240868Spjd	trim_seg_t tsearch, *ts_before, *ts_after, *ts;
178240868Spjd	boolean_t merge_before, merge_after;
179240868Spjd
180240868Spjd	ASSERT(MUTEX_HELD(&tm->tm_lock));
181240868Spjd	VERIFY(start < end);
182240868Spjd
183240868Spjd	tsearch.ts_start = start;
184240868Spjd	tsearch.ts_end = end;
185240868Spjd
186240868Spjd	ts = avl_find(&tm->tm_queued_frees, &tsearch, &where);
187240868Spjd	if (ts != NULL) {
188240868Spjd		if (start < ts->ts_start)
189240868Spjd			trim_map_segment_add(tm, start, ts->ts_start, txg);
190240868Spjd		if (end > ts->ts_end)
191240868Spjd			trim_map_segment_add(tm, ts->ts_end, end, txg);
192240868Spjd		return;
193240868Spjd	}
194240868Spjd
195240868Spjd	ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE);
196240868Spjd	ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER);
197240868Spjd
198240868Spjd	merge_before = (ts_before != NULL && ts_before->ts_end == start &&
199240868Spjd	    ts_before->ts_txg == txg);
200240868Spjd	merge_after = (ts_after != NULL && ts_after->ts_start == end &&
201240868Spjd	    ts_after->ts_txg == txg);
202240868Spjd
203240868Spjd	if (merge_before && merge_after) {
204240868Spjd		avl_remove(&tm->tm_queued_frees, ts_before);
205240868Spjd		list_remove(&tm->tm_head, ts_before);
206240868Spjd		ts_after->ts_start = ts_before->ts_start;
207240868Spjd		kmem_free(ts_before, sizeof (*ts_before));
208240868Spjd	} else if (merge_before) {
209240868Spjd		ts_before->ts_end = end;
210240868Spjd	} else if (merge_after) {
211240868Spjd		ts_after->ts_start = start;
212240868Spjd	} else {
213240868Spjd		ts = kmem_alloc(sizeof (*ts), KM_SLEEP);
214240868Spjd		ts->ts_start = start;
215240868Spjd		ts->ts_end = end;
216240868Spjd		ts->ts_txg = txg;
217240868Spjd		avl_insert(&tm->tm_queued_frees, ts, where);
218240868Spjd		list_insert_tail(&tm->tm_head, ts);
219240868Spjd	}
220240868Spjd}
221240868Spjd
222240868Spjdstatic void
223240868Spjdtrim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start,
224240868Spjd    uint64_t end)
225240868Spjd{
226240868Spjd	trim_seg_t *nts;
227240868Spjd	boolean_t left_over, right_over;
228240868Spjd
229240868Spjd	ASSERT(MUTEX_HELD(&tm->tm_lock));
230240868Spjd
231240868Spjd	left_over = (ts->ts_start < start);
232240868Spjd	right_over = (ts->ts_end > end);
233240868Spjd
234240868Spjd	if (left_over && right_over) {
235240868Spjd		nts = kmem_alloc(sizeof (*nts), KM_SLEEP);
236240868Spjd		nts->ts_start = end;
237240868Spjd		nts->ts_end = ts->ts_end;
238240868Spjd		nts->ts_txg = ts->ts_txg;
239240868Spjd		ts->ts_end = start;
240240868Spjd		avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER);
241240868Spjd		list_insert_after(&tm->tm_head, ts, nts);
242240868Spjd	} else if (left_over) {
243240868Spjd		ts->ts_end = start;
244240868Spjd	} else if (right_over) {
245240868Spjd		ts->ts_start = end;
246240868Spjd	} else {
247240868Spjd		avl_remove(&tm->tm_queued_frees, ts);
248240868Spjd		list_remove(&tm->tm_head, ts);
249240868Spjd		kmem_free(ts, sizeof (*ts));
250240868Spjd	}
251240868Spjd}
252240868Spjd
253240868Spjdstatic void
254240868Spjdtrim_map_free_locked(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg)
255240868Spjd{
256240868Spjd	zio_t zsearch, *zs;
257240868Spjd
258240868Spjd	ASSERT(MUTEX_HELD(&tm->tm_lock));
259240868Spjd
260240868Spjd	zsearch.io_offset = start;
261240868Spjd	zsearch.io_size = end - start;
262240868Spjd
263240868Spjd	zs = avl_find(&tm->tm_inflight_writes, &zsearch, NULL);
264240868Spjd	if (zs == NULL) {
265240868Spjd		trim_map_segment_add(tm, start, end, txg);
266240868Spjd		return;
267240868Spjd	}
268240868Spjd	if (start < zs->io_offset)
269240868Spjd		trim_map_free_locked(tm, start, zs->io_offset, txg);
270240868Spjd	if (zs->io_offset + zs->io_size < end)
271240868Spjd		trim_map_free_locked(tm, zs->io_offset + zs->io_size, end, txg);
272240868Spjd}
273240868Spjd
274240868Spjdvoid
275248572Ssmhtrim_map_free(vdev_t *vd, uint64_t offset, uint64_t size)
276240868Spjd{
277240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
278240868Spjd
279240868Spjd	if (zfs_notrim || vd->vdev_notrim || tm == NULL)
280240868Spjd		return;
281240868Spjd
282240868Spjd	mutex_enter(&tm->tm_lock);
283248572Ssmh	trim_map_free_locked(tm, offset, TRIM_ZIO_END(vd, offset, size),
284240868Spjd	    vd->vdev_spa->spa_syncing_txg);
285240868Spjd	mutex_exit(&tm->tm_lock);
286240868Spjd}
287240868Spjd
288240868Spjdboolean_t
289240868Spjdtrim_map_write_start(zio_t *zio)
290240868Spjd{
291240868Spjd	vdev_t *vd = zio->io_vd;
292240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
293240868Spjd	trim_seg_t tsearch, *ts;
294240868Spjd	boolean_t left_over, right_over;
295240868Spjd	uint64_t start, end;
296240868Spjd
297240868Spjd	if (zfs_notrim || vd->vdev_notrim || tm == NULL)
298240868Spjd		return (B_TRUE);
299240868Spjd
300240868Spjd	start = zio->io_offset;
301248572Ssmh	end = TRIM_ZIO_END(zio->io_vd, start, zio->io_size);
302240868Spjd	tsearch.ts_start = start;
303240868Spjd	tsearch.ts_end = end;
304240868Spjd
305240868Spjd	mutex_enter(&tm->tm_lock);
306240868Spjd
307240868Spjd	/*
308240868Spjd	 * Checking for colliding in-flight frees.
309240868Spjd	 */
310240868Spjd	ts = avl_find(&tm->tm_inflight_frees, &tsearch, NULL);
311240868Spjd	if (ts != NULL) {
312240868Spjd		list_insert_tail(&tm->tm_pending_writes, zio);
313240868Spjd		mutex_exit(&tm->tm_lock);
314240868Spjd		return (B_FALSE);
315240868Spjd	}
316240868Spjd
317240868Spjd	ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
318240868Spjd	if (ts != NULL) {
319240868Spjd		/*
320240868Spjd		 * Loop until all overlapping segments are removed.
321240868Spjd		 */
322240868Spjd		do {
323240868Spjd			trim_map_segment_remove(tm, ts, start, end);
324240868Spjd			ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL);
325240868Spjd		} while (ts != NULL);
326240868Spjd	}
327240868Spjd	avl_add(&tm->tm_inflight_writes, zio);
328240868Spjd
329240868Spjd	mutex_exit(&tm->tm_lock);
330240868Spjd
331240868Spjd	return (B_TRUE);
332240868Spjd}
333240868Spjd
334240868Spjdvoid
335240868Spjdtrim_map_write_done(zio_t *zio)
336240868Spjd{
337240868Spjd	vdev_t *vd = zio->io_vd;
338240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
339240868Spjd
340240868Spjd	/*
341240868Spjd	 * Don't check for vdev_notrim, since the write could have
342240868Spjd	 * started before vdev_notrim was set.
343240868Spjd	 */
344240868Spjd	if (zfs_notrim || tm == NULL)
345240868Spjd		return;
346240868Spjd
347240868Spjd	mutex_enter(&tm->tm_lock);
348240868Spjd	/*
349240868Spjd	 * Don't fail if the write isn't in the tree, since the write
350240868Spjd	 * could have started after vdev_notrim was set.
351240868Spjd	 */
352240868Spjd	if (zio->io_trim_node.avl_child[0] ||
353240868Spjd	    zio->io_trim_node.avl_child[1] ||
354240868Spjd	    AVL_XPARENT(&zio->io_trim_node) ||
355240868Spjd	    tm->tm_inflight_writes.avl_root == &zio->io_trim_node)
356240868Spjd		avl_remove(&tm->tm_inflight_writes, zio);
357240868Spjd	mutex_exit(&tm->tm_lock);
358240868Spjd}
359240868Spjd
360240868Spjd/*
361240868Spjd * Return the oldest segment (the one with the lowest txg) or false if
362240868Spjd * the list is empty or the first element's txg is greater than txg given
363240868Spjd * as function argument.
364240868Spjd */
365240868Spjdstatic trim_seg_t *
366240868Spjdtrim_map_first(trim_map_t *tm, uint64_t txg)
367240868Spjd{
368240868Spjd	trim_seg_t *ts;
369240868Spjd
370240868Spjd	ASSERT(MUTEX_HELD(&tm->tm_lock));
371240868Spjd
372240868Spjd	ts = list_head(&tm->tm_head);
373240868Spjd	if (ts != NULL && ts->ts_txg <= txg)
374240868Spjd		return (ts);
375240868Spjd	return (NULL);
376240868Spjd}
377240868Spjd
378240868Spjdstatic void
379240868Spjdtrim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
380240868Spjd{
381240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
382240868Spjd	trim_seg_t *ts;
383240868Spjd	uint64_t start, size, txglimit;
384240868Spjd
385240868Spjd	ASSERT(vd->vdev_ops->vdev_op_leaf);
386240868Spjd
387240868Spjd	if (tm == NULL)
388240868Spjd		return;
389240868Spjd
390240868Spjd	txglimit = MIN(spa->spa_syncing_txg, spa_freeze_txg(spa)) -
391240868Spjd	    trim_txg_limit;
392240868Spjd
393240868Spjd	mutex_enter(&tm->tm_lock);
394240868Spjd	/*
395240868Spjd	 * Loop until we send all frees up to the txglimit.
396240868Spjd	 */
397240868Spjd	while ((ts = trim_map_first(tm, txglimit)) != NULL) {
398240868Spjd		list_remove(&tm->tm_head, ts);
399240868Spjd		avl_remove(&tm->tm_queued_frees, ts);
400240868Spjd		avl_add(&tm->tm_inflight_frees, ts);
401240868Spjd		zio_nowait(zio_trim(zio, spa, vd, ts->ts_start,
402240868Spjd		    ts->ts_end - ts->ts_start));
403240868Spjd	}
404240868Spjd	mutex_exit(&tm->tm_lock);
405240868Spjd}
406240868Spjd
407240868Spjdstatic void
408240868Spjdtrim_map_vdev_commit_done(spa_t *spa, vdev_t *vd)
409240868Spjd{
410240868Spjd	trim_map_t *tm = vd->vdev_trimmap;
411240868Spjd	trim_seg_t *ts;
412240868Spjd	list_t pending_writes;
413240868Spjd	zio_t *zio;
414240868Spjd	uint64_t start, size;
415240868Spjd	void *cookie;
416240868Spjd
417240868Spjd	ASSERT(vd->vdev_ops->vdev_op_leaf);
418240868Spjd
419240868Spjd	if (tm == NULL)
420240868Spjd		return;
421240868Spjd
422240868Spjd	mutex_enter(&tm->tm_lock);
423240868Spjd	if (!avl_is_empty(&tm->tm_inflight_frees)) {
424240868Spjd		cookie = NULL;
425240868Spjd		while ((ts = avl_destroy_nodes(&tm->tm_inflight_frees,
426240868Spjd		    &cookie)) != NULL) {
427240868Spjd			kmem_free(ts, sizeof (*ts));
428240868Spjd		}
429240868Spjd	}
430240868Spjd	list_create(&pending_writes, sizeof (zio_t), offsetof(zio_t,
431240868Spjd	    io_trim_link));
432240868Spjd	list_move_tail(&pending_writes, &tm->tm_pending_writes);
433240868Spjd	mutex_exit(&tm->tm_lock);
434240868Spjd
435240868Spjd	while ((zio = list_remove_head(&pending_writes)) != NULL) {
436240868Spjd		zio_vdev_io_reissue(zio);
437240868Spjd		zio_execute(zio);
438240868Spjd	}
439240868Spjd	list_destroy(&pending_writes);
440240868Spjd}
441240868Spjd
442240868Spjdstatic void
443240868Spjdtrim_map_commit(spa_t *spa, zio_t *zio, vdev_t *vd)
444240868Spjd{
445240868Spjd	int c;
446240868Spjd
447240868Spjd	if (vd == NULL || spa->spa_syncing_txg <= trim_txg_limit)
448240868Spjd		return;
449240868Spjd
450240868Spjd	if (vd->vdev_ops->vdev_op_leaf) {
451240868Spjd		trim_map_vdev_commit(spa, zio, vd);
452240868Spjd	} else {
453240868Spjd		for (c = 0; c < vd->vdev_children; c++)
454240868Spjd			trim_map_commit(spa, zio, vd->vdev_child[c]);
455240868Spjd	}
456240868Spjd}
457240868Spjd
458240868Spjdstatic void
459240868Spjdtrim_map_commit_done(spa_t *spa, vdev_t *vd)
460240868Spjd{
461240868Spjd	int c;
462240868Spjd
463240868Spjd	if (vd == NULL)
464240868Spjd		return;
465240868Spjd
466240868Spjd	if (vd->vdev_ops->vdev_op_leaf) {
467240868Spjd		trim_map_vdev_commit_done(spa, vd);
468240868Spjd	} else {
469240868Spjd		for (c = 0; c < vd->vdev_children; c++)
470240868Spjd			trim_map_commit_done(spa, vd->vdev_child[c]);
471240868Spjd	}
472240868Spjd}
473240868Spjd
474240868Spjdstatic void
475240868Spjdtrim_thread(void *arg)
476240868Spjd{
477240868Spjd	spa_t *spa = arg;
478240868Spjd	zio_t *zio;
479240868Spjd
480240868Spjd	for (;;) {
481240868Spjd		mutex_enter(&spa->spa_trim_lock);
482240868Spjd		if (spa->spa_trim_thread == NULL) {
483240868Spjd			spa->spa_trim_thread = curthread;
484240868Spjd			cv_signal(&spa->spa_trim_cv);
485240868Spjd			mutex_exit(&spa->spa_trim_lock);
486240868Spjd			thread_exit();
487240868Spjd		}
488240868Spjd		cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
489240868Spjd		mutex_exit(&spa->spa_trim_lock);
490240868Spjd
491240868Spjd		zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
492240868Spjd
493240868Spjd		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
494240868Spjd		trim_map_commit(spa, zio, spa->spa_root_vdev);
495240868Spjd		(void) zio_wait(zio);
496240868Spjd		trim_map_commit_done(spa, spa->spa_root_vdev);
497240868Spjd		spa_config_exit(spa, SCL_STATE, FTAG);
498240868Spjd	}
499240868Spjd}
500240868Spjd
501240868Spjdvoid
502240868Spjdtrim_thread_create(spa_t *spa)
503240868Spjd{
504240868Spjd
505240868Spjd	if (zfs_notrim)
506240868Spjd		return;
507240868Spjd
508240868Spjd	mutex_init(&spa->spa_trim_lock, NULL, MUTEX_DEFAULT, NULL);
509240868Spjd	cv_init(&spa->spa_trim_cv, NULL, CV_DEFAULT, NULL);
510240868Spjd	mutex_enter(&spa->spa_trim_lock);
511240868Spjd	spa->spa_trim_thread = thread_create(NULL, 0, trim_thread, spa, 0, &p0,
512240868Spjd	    TS_RUN, minclsyspri);
513240868Spjd	mutex_exit(&spa->spa_trim_lock);
514240868Spjd}
515240868Spjd
516240868Spjdvoid
517240868Spjdtrim_thread_destroy(spa_t *spa)
518240868Spjd{
519240868Spjd
520240868Spjd	if (zfs_notrim)
521240868Spjd		return;
522240868Spjd	if (spa->spa_trim_thread == NULL)
523240868Spjd		return;
524240868Spjd
525240868Spjd	mutex_enter(&spa->spa_trim_lock);
526240868Spjd	/* Setting spa_trim_thread to NULL tells the thread to stop. */
527240868Spjd	spa->spa_trim_thread = NULL;
528240868Spjd	cv_signal(&spa->spa_trim_cv);
529240868Spjd	/* The thread will set it back to != NULL on exit. */
530240868Spjd	while (spa->spa_trim_thread == NULL)
531240868Spjd		cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock);
532240868Spjd	spa->spa_trim_thread = NULL;
533240868Spjd	mutex_exit(&spa->spa_trim_lock);
534240868Spjd
535240868Spjd	cv_destroy(&spa->spa_trim_cv);
536240868Spjd	mutex_destroy(&spa->spa_trim_lock);
537240868Spjd}
538240868Spjd
539240868Spjdvoid
540240868Spjdtrim_thread_wakeup(spa_t *spa)
541240868Spjd{
542240868Spjd
543240868Spjd	if (zfs_notrim)
544240868Spjd		return;
545240868Spjd	if (spa->spa_trim_thread == NULL)
546240868Spjd		return;
547240868Spjd
548240868Spjd	mutex_enter(&spa->spa_trim_lock);
549240868Spjd	cv_signal(&spa->spa_trim_cv);
550240868Spjd	mutex_exit(&spa->spa_trim_lock);
551240868Spjd}
552