1240868Spjd/* 2240868Spjd * CDDL HEADER START 3240868Spjd * 4240868Spjd * The contents of this file are subject to the terms of the 5240868Spjd * Common Development and Distribution License (the "License"). 6240868Spjd * You may not use this file except in compliance with the License. 7240868Spjd * 8240868Spjd * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9240868Spjd * or http://www.opensolaris.org/os/licensing. 10240868Spjd * See the License for the specific language governing permissions 11240868Spjd * and limitations under the License. 12240868Spjd * 13240868Spjd * When distributing Covered Code, include this CDDL HEADER in each 14240868Spjd * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15240868Spjd * If applicable, add the following below this CDDL HEADER, with the 16240868Spjd * fields enclosed by brackets "[]" replaced with your own identifying 17240868Spjd * information: Portions Copyright [yyyy] [name of copyright owner] 18240868Spjd * 19240868Spjd * CDDL HEADER END 20240868Spjd */ 21240868Spjd/* 22240868Spjd * Copyright (c) 2012 Pawel Jakub Dawidek <pawel@dawidek.net>. 23240868Spjd * All rights reserved. 24240868Spjd */ 25240868Spjd 26240868Spjd#include <sys/zfs_context.h> 27240868Spjd#include <sys/spa_impl.h> 28240868Spjd#include <sys/vdev_impl.h> 29240868Spjd#include <sys/trim_map.h> 30248575Ssmh#include <sys/time.h> 31240868Spjd 32244187Ssmh/* 33244187Ssmh * Calculate the zio end, upgrading based on ashift which would be 34244187Ssmh * done by zio_vdev_io_start. 35244187Ssmh * 36244187Ssmh * This makes free range consolidation much more effective 37244187Ssmh * than it would otherwise be as well as ensuring that entire 38244187Ssmh * blocks are invalidated by writes. 39244187Ssmh */ 40248572Ssmh#define TRIM_ZIO_END(vd, offset, size) (offset + \ 41248572Ssmh P2ROUNDUP(size, 1ULL << vd->vdev_top->vdev_ashift)) 42244187Ssmh 43248577Ssmh#define TRIM_MAP_SINC(tm, size) \ 44248577Ssmh atomic_add_64(&(tm)->tm_bytes, (size)) 45248577Ssmh 46248577Ssmh#define TRIM_MAP_SDEC(tm, size) \ 47248602Ssmh atomic_add_64(&(tm)->tm_bytes, -(size)) 48248577Ssmh 49248577Ssmh#define TRIM_MAP_QINC(tm) \ 50248577Ssmh atomic_inc_64(&(tm)->tm_pending); \ 51248577Ssmh 52248577Ssmh#define TRIM_MAP_QDEC(tm) \ 53248577Ssmh atomic_dec_64(&(tm)->tm_pending); 54248577Ssmh 55240868Spjdtypedef struct trim_map { 56240868Spjd list_t tm_head; /* List of segments sorted by txg. */ 57240868Spjd avl_tree_t tm_queued_frees; /* AVL tree of segments waiting for TRIM. */ 58240868Spjd avl_tree_t tm_inflight_frees; /* AVL tree of in-flight TRIMs. */ 59240868Spjd avl_tree_t tm_inflight_writes; /* AVL tree of in-flight writes. */ 60240868Spjd list_t tm_pending_writes; /* Writes blocked on in-flight frees. */ 61240868Spjd kmutex_t tm_lock; 62248577Ssmh uint64_t tm_pending; /* Count of pending TRIMs. */ 63248577Ssmh uint64_t tm_bytes; /* Total size in bytes of queued TRIMs. */ 64240868Spjd} trim_map_t; 65240868Spjd 66240868Spjdtypedef struct trim_seg { 67240868Spjd avl_node_t ts_node; /* AVL node. */ 68240868Spjd list_node_t ts_next; /* List element. */ 69240868Spjd uint64_t ts_start; /* Starting offset of this segment. */ 70240868Spjd uint64_t ts_end; /* Ending offset (non-inclusive). */ 71240868Spjd uint64_t ts_txg; /* Segment creation txg. */ 72248575Ssmh hrtime_t ts_time; /* Segment creation time. */ 73240868Spjd} trim_seg_t; 74240868Spjd 75249921Ssmhextern boolean_t zfs_trim_enabled; 76240868Spjd 77248577Ssmhstatic u_int trim_txg_delay = 32; 78248577Ssmhstatic u_int trim_timeout = 30; 79248577Ssmhstatic u_int trim_max_interval = 1; 80248577Ssmh/* Limit outstanding TRIMs to 2G (max size for a single TRIM request) */ 81248577Ssmhstatic uint64_t trim_vdev_max_bytes = 2147483648; 82248577Ssmh/* Limit outstanding TRIMs to 64 (max ranges for a single TRIM request) */ 83248577Ssmhstatic u_int trim_vdev_max_pending = 64; 84248577Ssmh 85240868SpjdSYSCTL_DECL(_vfs_zfs); 86248577SsmhSYSCTL_NODE(_vfs_zfs, OID_AUTO, trim, CTLFLAG_RD, 0, "ZFS TRIM"); 87240868Spjd 88248577SsmhTUNABLE_INT("vfs.zfs.trim.txg_delay", &trim_txg_delay); 89248577SsmhSYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, txg_delay, CTLFLAG_RWTUN, &trim_txg_delay, 90248577Ssmh 0, "Delay TRIMs by up to this many TXGs"); 91248575Ssmh 92248577SsmhTUNABLE_INT("vfs.zfs.trim.timeout", &trim_timeout); 93248577SsmhSYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, timeout, CTLFLAG_RWTUN, &trim_timeout, 0, 94248577Ssmh "Delay TRIMs by up to this many seconds"); 95248577Ssmh 96248577SsmhTUNABLE_INT("vfs.zfs.trim.max_interval", &trim_max_interval); 97248577SsmhSYSCTL_UINT(_vfs_zfs_trim, OID_AUTO, max_interval, CTLFLAG_RWTUN, 98248577Ssmh &trim_max_interval, 0, 99248577Ssmh "Maximum interval between TRIM queue processing (seconds)"); 100248577Ssmh 101248577SsmhSYSCTL_DECL(_vfs_zfs_vdev); 102248577SsmhTUNABLE_QUAD("vfs.zfs.vdev.trim_max_bytes", &trim_vdev_max_bytes); 103248577SsmhSYSCTL_QUAD(_vfs_zfs_vdev, OID_AUTO, trim_max_bytes, CTLFLAG_RWTUN, 104248577Ssmh &trim_vdev_max_bytes, 0, 105248577Ssmh "Maximum pending TRIM bytes for a vdev"); 106248577Ssmh 107248577SsmhTUNABLE_INT("vfs.zfs.vdev.trim_max_pending", &trim_vdev_max_pending); 108248577SsmhSYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, trim_max_pending, CTLFLAG_RWTUN, 109248577Ssmh &trim_vdev_max_pending, 0, 110248577Ssmh "Maximum pending TRIM segments for a vdev"); 111248577Ssmh 112248577Ssmh 113240868Spjdstatic void trim_map_vdev_commit_done(spa_t *spa, vdev_t *vd); 114240868Spjd 115240868Spjdstatic int 116240868Spjdtrim_map_seg_compare(const void *x1, const void *x2) 117240868Spjd{ 118240868Spjd const trim_seg_t *s1 = x1; 119240868Spjd const trim_seg_t *s2 = x2; 120240868Spjd 121240868Spjd if (s1->ts_start < s2->ts_start) { 122240868Spjd if (s1->ts_end > s2->ts_start) 123240868Spjd return (0); 124240868Spjd return (-1); 125240868Spjd } 126240868Spjd if (s1->ts_start > s2->ts_start) { 127240868Spjd if (s1->ts_start < s2->ts_end) 128240868Spjd return (0); 129240868Spjd return (1); 130240868Spjd } 131240868Spjd return (0); 132240868Spjd} 133240868Spjd 134240868Spjdstatic int 135240868Spjdtrim_map_zio_compare(const void *x1, const void *x2) 136240868Spjd{ 137240868Spjd const zio_t *z1 = x1; 138240868Spjd const zio_t *z2 = x2; 139240868Spjd 140240868Spjd if (z1->io_offset < z2->io_offset) { 141240868Spjd if (z1->io_offset + z1->io_size > z2->io_offset) 142240868Spjd return (0); 143240868Spjd return (-1); 144240868Spjd } 145240868Spjd if (z1->io_offset > z2->io_offset) { 146240868Spjd if (z1->io_offset < z2->io_offset + z2->io_size) 147240868Spjd return (0); 148240868Spjd return (1); 149240868Spjd } 150240868Spjd return (0); 151240868Spjd} 152240868Spjd 153240868Spjdvoid 154240868Spjdtrim_map_create(vdev_t *vd) 155240868Spjd{ 156240868Spjd trim_map_t *tm; 157240868Spjd 158240868Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 159240868Spjd 160249921Ssmh if (!zfs_trim_enabled) 161240868Spjd return; 162240868Spjd 163240868Spjd tm = kmem_zalloc(sizeof (*tm), KM_SLEEP); 164240868Spjd mutex_init(&tm->tm_lock, NULL, MUTEX_DEFAULT, NULL); 165240868Spjd list_create(&tm->tm_head, sizeof (trim_seg_t), 166240868Spjd offsetof(trim_seg_t, ts_next)); 167240868Spjd list_create(&tm->tm_pending_writes, sizeof (zio_t), 168240868Spjd offsetof(zio_t, io_trim_link)); 169240868Spjd avl_create(&tm->tm_queued_frees, trim_map_seg_compare, 170240868Spjd sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node)); 171240868Spjd avl_create(&tm->tm_inflight_frees, trim_map_seg_compare, 172240868Spjd sizeof (trim_seg_t), offsetof(trim_seg_t, ts_node)); 173240868Spjd avl_create(&tm->tm_inflight_writes, trim_map_zio_compare, 174240868Spjd sizeof (zio_t), offsetof(zio_t, io_trim_node)); 175240868Spjd vd->vdev_trimmap = tm; 176240868Spjd} 177240868Spjd 178240868Spjdvoid 179240868Spjdtrim_map_destroy(vdev_t *vd) 180240868Spjd{ 181240868Spjd trim_map_t *tm; 182240868Spjd trim_seg_t *ts; 183240868Spjd 184240868Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 185240868Spjd 186249921Ssmh if (!zfs_trim_enabled) 187240868Spjd return; 188240868Spjd 189240868Spjd tm = vd->vdev_trimmap; 190240868Spjd if (tm == NULL) 191240868Spjd return; 192240868Spjd 193240868Spjd /* 194240868Spjd * We may have been called before trim_map_vdev_commit_done() 195240868Spjd * had a chance to run, so do it now to prune the remaining 196240868Spjd * inflight frees. 197240868Spjd */ 198240868Spjd trim_map_vdev_commit_done(vd->vdev_spa, vd); 199240868Spjd 200240868Spjd mutex_enter(&tm->tm_lock); 201240868Spjd while ((ts = list_head(&tm->tm_head)) != NULL) { 202240868Spjd avl_remove(&tm->tm_queued_frees, ts); 203240868Spjd list_remove(&tm->tm_head, ts); 204240868Spjd kmem_free(ts, sizeof (*ts)); 205248577Ssmh TRIM_MAP_SDEC(tm, ts->ts_end - ts->ts_start); 206248577Ssmh TRIM_MAP_QDEC(tm); 207240868Spjd } 208240868Spjd mutex_exit(&tm->tm_lock); 209240868Spjd 210240868Spjd avl_destroy(&tm->tm_queued_frees); 211240868Spjd avl_destroy(&tm->tm_inflight_frees); 212240868Spjd avl_destroy(&tm->tm_inflight_writes); 213240868Spjd list_destroy(&tm->tm_pending_writes); 214240868Spjd list_destroy(&tm->tm_head); 215240868Spjd mutex_destroy(&tm->tm_lock); 216240868Spjd kmem_free(tm, sizeof (*tm)); 217240868Spjd vd->vdev_trimmap = NULL; 218240868Spjd} 219240868Spjd 220240868Spjdstatic void 221240868Spjdtrim_map_segment_add(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg) 222240868Spjd{ 223240868Spjd avl_index_t where; 224240868Spjd trim_seg_t tsearch, *ts_before, *ts_after, *ts; 225240868Spjd boolean_t merge_before, merge_after; 226248575Ssmh hrtime_t time; 227240868Spjd 228240868Spjd ASSERT(MUTEX_HELD(&tm->tm_lock)); 229240868Spjd VERIFY(start < end); 230240868Spjd 231248575Ssmh time = gethrtime(); 232240868Spjd tsearch.ts_start = start; 233240868Spjd tsearch.ts_end = end; 234240868Spjd 235240868Spjd ts = avl_find(&tm->tm_queued_frees, &tsearch, &where); 236240868Spjd if (ts != NULL) { 237240868Spjd if (start < ts->ts_start) 238240868Spjd trim_map_segment_add(tm, start, ts->ts_start, txg); 239240868Spjd if (end > ts->ts_end) 240240868Spjd trim_map_segment_add(tm, ts->ts_end, end, txg); 241240868Spjd return; 242240868Spjd } 243240868Spjd 244240868Spjd ts_before = avl_nearest(&tm->tm_queued_frees, where, AVL_BEFORE); 245240868Spjd ts_after = avl_nearest(&tm->tm_queued_frees, where, AVL_AFTER); 246240868Spjd 247248577Ssmh merge_before = (ts_before != NULL && ts_before->ts_end == start); 248248577Ssmh merge_after = (ts_after != NULL && ts_after->ts_start == end); 249240868Spjd 250240868Spjd if (merge_before && merge_after) { 251248577Ssmh TRIM_MAP_SINC(tm, ts_after->ts_start - ts_before->ts_end); 252248577Ssmh TRIM_MAP_QDEC(tm); 253240868Spjd avl_remove(&tm->tm_queued_frees, ts_before); 254240868Spjd list_remove(&tm->tm_head, ts_before); 255240868Spjd ts_after->ts_start = ts_before->ts_start; 256248577Ssmh ts_after->ts_txg = txg; 257248577Ssmh ts_after->ts_time = time; 258240868Spjd kmem_free(ts_before, sizeof (*ts_before)); 259240868Spjd } else if (merge_before) { 260248577Ssmh TRIM_MAP_SINC(tm, end - ts_before->ts_end); 261240868Spjd ts_before->ts_end = end; 262248577Ssmh ts_before->ts_txg = txg; 263248577Ssmh ts_before->ts_time = time; 264240868Spjd } else if (merge_after) { 265248577Ssmh TRIM_MAP_SINC(tm, ts_after->ts_start - start); 266240868Spjd ts_after->ts_start = start; 267248577Ssmh ts_after->ts_txg = txg; 268248577Ssmh ts_after->ts_time = time; 269240868Spjd } else { 270248577Ssmh TRIM_MAP_SINC(tm, end - start); 271248577Ssmh TRIM_MAP_QINC(tm); 272240868Spjd ts = kmem_alloc(sizeof (*ts), KM_SLEEP); 273240868Spjd ts->ts_start = start; 274240868Spjd ts->ts_end = end; 275240868Spjd ts->ts_txg = txg; 276248575Ssmh ts->ts_time = time; 277240868Spjd avl_insert(&tm->tm_queued_frees, ts, where); 278240868Spjd list_insert_tail(&tm->tm_head, ts); 279240868Spjd } 280240868Spjd} 281240868Spjd 282240868Spjdstatic void 283240868Spjdtrim_map_segment_remove(trim_map_t *tm, trim_seg_t *ts, uint64_t start, 284240868Spjd uint64_t end) 285240868Spjd{ 286240868Spjd trim_seg_t *nts; 287240868Spjd boolean_t left_over, right_over; 288240868Spjd 289240868Spjd ASSERT(MUTEX_HELD(&tm->tm_lock)); 290240868Spjd 291240868Spjd left_over = (ts->ts_start < start); 292240868Spjd right_over = (ts->ts_end > end); 293240868Spjd 294248577Ssmh TRIM_MAP_SDEC(tm, end - start); 295240868Spjd if (left_over && right_over) { 296240868Spjd nts = kmem_alloc(sizeof (*nts), KM_SLEEP); 297240868Spjd nts->ts_start = end; 298240868Spjd nts->ts_end = ts->ts_end; 299240868Spjd nts->ts_txg = ts->ts_txg; 300248575Ssmh nts->ts_time = ts->ts_time; 301240868Spjd ts->ts_end = start; 302240868Spjd avl_insert_here(&tm->tm_queued_frees, nts, ts, AVL_AFTER); 303240868Spjd list_insert_after(&tm->tm_head, ts, nts); 304248577Ssmh TRIM_MAP_QINC(tm); 305240868Spjd } else if (left_over) { 306240868Spjd ts->ts_end = start; 307240868Spjd } else if (right_over) { 308240868Spjd ts->ts_start = end; 309240868Spjd } else { 310240868Spjd avl_remove(&tm->tm_queued_frees, ts); 311240868Spjd list_remove(&tm->tm_head, ts); 312248577Ssmh TRIM_MAP_QDEC(tm); 313240868Spjd kmem_free(ts, sizeof (*ts)); 314240868Spjd } 315240868Spjd} 316240868Spjd 317240868Spjdstatic void 318240868Spjdtrim_map_free_locked(trim_map_t *tm, uint64_t start, uint64_t end, uint64_t txg) 319240868Spjd{ 320240868Spjd zio_t zsearch, *zs; 321240868Spjd 322240868Spjd ASSERT(MUTEX_HELD(&tm->tm_lock)); 323240868Spjd 324240868Spjd zsearch.io_offset = start; 325240868Spjd zsearch.io_size = end - start; 326240868Spjd 327240868Spjd zs = avl_find(&tm->tm_inflight_writes, &zsearch, NULL); 328240868Spjd if (zs == NULL) { 329240868Spjd trim_map_segment_add(tm, start, end, txg); 330240868Spjd return; 331240868Spjd } 332240868Spjd if (start < zs->io_offset) 333240868Spjd trim_map_free_locked(tm, start, zs->io_offset, txg); 334240868Spjd if (zs->io_offset + zs->io_size < end) 335240868Spjd trim_map_free_locked(tm, zs->io_offset + zs->io_size, end, txg); 336240868Spjd} 337240868Spjd 338240868Spjdvoid 339248574Ssmhtrim_map_free(vdev_t *vd, uint64_t offset, uint64_t size, uint64_t txg) 340240868Spjd{ 341240868Spjd trim_map_t *tm = vd->vdev_trimmap; 342240868Spjd 343249921Ssmh if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL) 344240868Spjd return; 345240868Spjd 346240868Spjd mutex_enter(&tm->tm_lock); 347248574Ssmh trim_map_free_locked(tm, offset, TRIM_ZIO_END(vd, offset, size), txg); 348240868Spjd mutex_exit(&tm->tm_lock); 349240868Spjd} 350240868Spjd 351240868Spjdboolean_t 352240868Spjdtrim_map_write_start(zio_t *zio) 353240868Spjd{ 354240868Spjd vdev_t *vd = zio->io_vd; 355240868Spjd trim_map_t *tm = vd->vdev_trimmap; 356240868Spjd trim_seg_t tsearch, *ts; 357240868Spjd boolean_t left_over, right_over; 358240868Spjd uint64_t start, end; 359240868Spjd 360249921Ssmh if (!zfs_trim_enabled || vd->vdev_notrim || tm == NULL) 361240868Spjd return (B_TRUE); 362240868Spjd 363240868Spjd start = zio->io_offset; 364248572Ssmh end = TRIM_ZIO_END(zio->io_vd, start, zio->io_size); 365240868Spjd tsearch.ts_start = start; 366240868Spjd tsearch.ts_end = end; 367240868Spjd 368240868Spjd mutex_enter(&tm->tm_lock); 369240868Spjd 370240868Spjd /* 371240868Spjd * Checking for colliding in-flight frees. 372240868Spjd */ 373240868Spjd ts = avl_find(&tm->tm_inflight_frees, &tsearch, NULL); 374240868Spjd if (ts != NULL) { 375240868Spjd list_insert_tail(&tm->tm_pending_writes, zio); 376240868Spjd mutex_exit(&tm->tm_lock); 377240868Spjd return (B_FALSE); 378240868Spjd } 379240868Spjd 380240868Spjd ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL); 381240868Spjd if (ts != NULL) { 382240868Spjd /* 383240868Spjd * Loop until all overlapping segments are removed. 384240868Spjd */ 385240868Spjd do { 386240868Spjd trim_map_segment_remove(tm, ts, start, end); 387240868Spjd ts = avl_find(&tm->tm_queued_frees, &tsearch, NULL); 388240868Spjd } while (ts != NULL); 389240868Spjd } 390240868Spjd avl_add(&tm->tm_inflight_writes, zio); 391240868Spjd 392240868Spjd mutex_exit(&tm->tm_lock); 393240868Spjd 394240868Spjd return (B_TRUE); 395240868Spjd} 396240868Spjd 397240868Spjdvoid 398240868Spjdtrim_map_write_done(zio_t *zio) 399240868Spjd{ 400240868Spjd vdev_t *vd = zio->io_vd; 401240868Spjd trim_map_t *tm = vd->vdev_trimmap; 402240868Spjd 403240868Spjd /* 404240868Spjd * Don't check for vdev_notrim, since the write could have 405240868Spjd * started before vdev_notrim was set. 406240868Spjd */ 407249921Ssmh if (!zfs_trim_enabled || tm == NULL) 408240868Spjd return; 409240868Spjd 410240868Spjd mutex_enter(&tm->tm_lock); 411240868Spjd /* 412240868Spjd * Don't fail if the write isn't in the tree, since the write 413240868Spjd * could have started after vdev_notrim was set. 414240868Spjd */ 415240868Spjd if (zio->io_trim_node.avl_child[0] || 416240868Spjd zio->io_trim_node.avl_child[1] || 417240868Spjd AVL_XPARENT(&zio->io_trim_node) || 418240868Spjd tm->tm_inflight_writes.avl_root == &zio->io_trim_node) 419240868Spjd avl_remove(&tm->tm_inflight_writes, zio); 420240868Spjd mutex_exit(&tm->tm_lock); 421240868Spjd} 422240868Spjd 423240868Spjd/* 424248577Ssmh * Return the oldest segment (the one with the lowest txg / time) or NULL if: 425248577Ssmh * 1. The list is empty 426248577Ssmh * 2. The first element's txg is greater than txgsafe 427248577Ssmh * 3. The first element's txg is not greater than the txg argument and the 428248577Ssmh * the first element's time is not greater than time argument 429240868Spjd */ 430240868Spjdstatic trim_seg_t * 431248577Ssmhtrim_map_first(trim_map_t *tm, uint64_t txg, uint64_t txgsafe, hrtime_t time) 432240868Spjd{ 433240868Spjd trim_seg_t *ts; 434240868Spjd 435240868Spjd ASSERT(MUTEX_HELD(&tm->tm_lock)); 436248577Ssmh VERIFY(txgsafe >= txg); 437240868Spjd 438240868Spjd ts = list_head(&tm->tm_head); 439248577Ssmh if (ts != NULL && ts->ts_txg <= txgsafe && 440248577Ssmh (ts->ts_txg <= txg || ts->ts_time <= time || 441248577Ssmh tm->tm_bytes > trim_vdev_max_bytes || 442248577Ssmh tm->tm_pending > trim_vdev_max_pending)) 443240868Spjd return (ts); 444240868Spjd return (NULL); 445240868Spjd} 446240868Spjd 447240868Spjdstatic void 448240868Spjdtrim_map_vdev_commit(spa_t *spa, zio_t *zio, vdev_t *vd) 449240868Spjd{ 450240868Spjd trim_map_t *tm = vd->vdev_trimmap; 451240868Spjd trim_seg_t *ts; 452248577Ssmh uint64_t size, txgtarget, txgsafe; 453248575Ssmh hrtime_t timelimit; 454240868Spjd 455240868Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 456240868Spjd 457240868Spjd if (tm == NULL) 458240868Spjd return; 459240868Spjd 460248577Ssmh timelimit = gethrtime() - trim_timeout * NANOSEC; 461248575Ssmh if (vd->vdev_isl2cache) { 462248577Ssmh txgsafe = UINT64_MAX; 463248577Ssmh txgtarget = UINT64_MAX; 464248575Ssmh } else { 465248577Ssmh txgsafe = MIN(spa_last_synced_txg(spa), spa_freeze_txg(spa)); 466248577Ssmh if (txgsafe > trim_txg_delay) 467248577Ssmh txgtarget = txgsafe - trim_txg_delay; 468248577Ssmh else 469248577Ssmh txgtarget = 0; 470248575Ssmh } 471240868Spjd 472240868Spjd mutex_enter(&tm->tm_lock); 473248577Ssmh /* Loop until we have sent all outstanding free's */ 474248577Ssmh while ((ts = trim_map_first(tm, txgtarget, txgsafe, timelimit)) 475248577Ssmh != NULL) { 476240868Spjd list_remove(&tm->tm_head, ts); 477240868Spjd avl_remove(&tm->tm_queued_frees, ts); 478240868Spjd avl_add(&tm->tm_inflight_frees, ts); 479248577Ssmh size = ts->ts_end - ts->ts_start; 480248577Ssmh zio_nowait(zio_trim(zio, spa, vd, ts->ts_start, size)); 481248577Ssmh TRIM_MAP_SDEC(tm, size); 482248577Ssmh TRIM_MAP_QDEC(tm); 483240868Spjd } 484240868Spjd mutex_exit(&tm->tm_lock); 485240868Spjd} 486240868Spjd 487240868Spjdstatic void 488240868Spjdtrim_map_vdev_commit_done(spa_t *spa, vdev_t *vd) 489240868Spjd{ 490240868Spjd trim_map_t *tm = vd->vdev_trimmap; 491240868Spjd trim_seg_t *ts; 492240868Spjd list_t pending_writes; 493240868Spjd zio_t *zio; 494240868Spjd uint64_t start, size; 495240868Spjd void *cookie; 496240868Spjd 497240868Spjd ASSERT(vd->vdev_ops->vdev_op_leaf); 498240868Spjd 499240868Spjd if (tm == NULL) 500240868Spjd return; 501240868Spjd 502240868Spjd mutex_enter(&tm->tm_lock); 503240868Spjd if (!avl_is_empty(&tm->tm_inflight_frees)) { 504240868Spjd cookie = NULL; 505240868Spjd while ((ts = avl_destroy_nodes(&tm->tm_inflight_frees, 506240868Spjd &cookie)) != NULL) { 507240868Spjd kmem_free(ts, sizeof (*ts)); 508240868Spjd } 509240868Spjd } 510240868Spjd list_create(&pending_writes, sizeof (zio_t), offsetof(zio_t, 511240868Spjd io_trim_link)); 512240868Spjd list_move_tail(&pending_writes, &tm->tm_pending_writes); 513240868Spjd mutex_exit(&tm->tm_lock); 514240868Spjd 515240868Spjd while ((zio = list_remove_head(&pending_writes)) != NULL) { 516240868Spjd zio_vdev_io_reissue(zio); 517240868Spjd zio_execute(zio); 518240868Spjd } 519240868Spjd list_destroy(&pending_writes); 520240868Spjd} 521240868Spjd 522240868Spjdstatic void 523240868Spjdtrim_map_commit(spa_t *spa, zio_t *zio, vdev_t *vd) 524240868Spjd{ 525240868Spjd int c; 526240868Spjd 527248577Ssmh if (vd == NULL) 528240868Spjd return; 529240868Spjd 530240868Spjd if (vd->vdev_ops->vdev_op_leaf) { 531240868Spjd trim_map_vdev_commit(spa, zio, vd); 532240868Spjd } else { 533240868Spjd for (c = 0; c < vd->vdev_children; c++) 534240868Spjd trim_map_commit(spa, zio, vd->vdev_child[c]); 535240868Spjd } 536240868Spjd} 537240868Spjd 538240868Spjdstatic void 539240868Spjdtrim_map_commit_done(spa_t *spa, vdev_t *vd) 540240868Spjd{ 541240868Spjd int c; 542240868Spjd 543240868Spjd if (vd == NULL) 544240868Spjd return; 545240868Spjd 546240868Spjd if (vd->vdev_ops->vdev_op_leaf) { 547240868Spjd trim_map_vdev_commit_done(spa, vd); 548240868Spjd } else { 549240868Spjd for (c = 0; c < vd->vdev_children; c++) 550240868Spjd trim_map_commit_done(spa, vd->vdev_child[c]); 551240868Spjd } 552240868Spjd} 553240868Spjd 554240868Spjdstatic void 555240868Spjdtrim_thread(void *arg) 556240868Spjd{ 557240868Spjd spa_t *spa = arg; 558240868Spjd zio_t *zio; 559240868Spjd 560248576Ssmh#ifdef _KERNEL 561248576Ssmh (void) snprintf(curthread->td_name, sizeof(curthread->td_name), 562248576Ssmh "trim %s", spa_name(spa)); 563248576Ssmh#endif 564248576Ssmh 565240868Spjd for (;;) { 566240868Spjd mutex_enter(&spa->spa_trim_lock); 567240868Spjd if (spa->spa_trim_thread == NULL) { 568240868Spjd spa->spa_trim_thread = curthread; 569240868Spjd cv_signal(&spa->spa_trim_cv); 570240868Spjd mutex_exit(&spa->spa_trim_lock); 571240868Spjd thread_exit(); 572240868Spjd } 573248577Ssmh 574248577Ssmh (void) cv_timedwait(&spa->spa_trim_cv, &spa->spa_trim_lock, 575248577Ssmh hz * trim_max_interval); 576240868Spjd mutex_exit(&spa->spa_trim_lock); 577240868Spjd 578240868Spjd zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL); 579240868Spjd 580240868Spjd spa_config_enter(spa, SCL_STATE, FTAG, RW_READER); 581240868Spjd trim_map_commit(spa, zio, spa->spa_root_vdev); 582240868Spjd (void) zio_wait(zio); 583240868Spjd trim_map_commit_done(spa, spa->spa_root_vdev); 584240868Spjd spa_config_exit(spa, SCL_STATE, FTAG); 585240868Spjd } 586240868Spjd} 587240868Spjd 588240868Spjdvoid 589240868Spjdtrim_thread_create(spa_t *spa) 590240868Spjd{ 591240868Spjd 592249921Ssmh if (!zfs_trim_enabled) 593240868Spjd return; 594240868Spjd 595240868Spjd mutex_init(&spa->spa_trim_lock, NULL, MUTEX_DEFAULT, NULL); 596240868Spjd cv_init(&spa->spa_trim_cv, NULL, CV_DEFAULT, NULL); 597240868Spjd mutex_enter(&spa->spa_trim_lock); 598240868Spjd spa->spa_trim_thread = thread_create(NULL, 0, trim_thread, spa, 0, &p0, 599240868Spjd TS_RUN, minclsyspri); 600240868Spjd mutex_exit(&spa->spa_trim_lock); 601240868Spjd} 602240868Spjd 603240868Spjdvoid 604240868Spjdtrim_thread_destroy(spa_t *spa) 605240868Spjd{ 606240868Spjd 607249921Ssmh if (!zfs_trim_enabled) 608240868Spjd return; 609240868Spjd if (spa->spa_trim_thread == NULL) 610240868Spjd return; 611240868Spjd 612240868Spjd mutex_enter(&spa->spa_trim_lock); 613240868Spjd /* Setting spa_trim_thread to NULL tells the thread to stop. */ 614240868Spjd spa->spa_trim_thread = NULL; 615240868Spjd cv_signal(&spa->spa_trim_cv); 616240868Spjd /* The thread will set it back to != NULL on exit. */ 617240868Spjd while (spa->spa_trim_thread == NULL) 618240868Spjd cv_wait(&spa->spa_trim_cv, &spa->spa_trim_lock); 619240868Spjd spa->spa_trim_thread = NULL; 620240868Spjd mutex_exit(&spa->spa_trim_lock); 621240868Spjd 622240868Spjd cv_destroy(&spa->spa_trim_cv); 623240868Spjd mutex_destroy(&spa->spa_trim_lock); 624240868Spjd} 625240868Spjd 626240868Spjdvoid 627240868Spjdtrim_thread_wakeup(spa_t *spa) 628240868Spjd{ 629240868Spjd 630249921Ssmh if (!zfs_trim_enabled) 631240868Spjd return; 632240868Spjd if (spa->spa_trim_thread == NULL) 633240868Spjd return; 634240868Spjd 635240868Spjd mutex_enter(&spa->spa_trim_lock); 636240868Spjd cv_signal(&spa->spa_trim_cv); 637240868Spjd mutex_exit(&spa->spa_trim_lock); 638240868Spjd} 639