Deleted Added
full compact
22c22
< * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
---
> * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
51,52c51,52
< * i/os will be aggregated into a single large i/o up to
< * zfs_vdev_aggregation_limit bytes long.
---
> * To reduce IOPs, we aggregate small adjacent i/os into one large i/o.
> * For read i/os, we also aggregate across small adjacency gaps.
54a55
> int zfs_vdev_read_gap_limit = 32 << 10;
171,172c172
< zio_t *dio;
< uint64_t offset = 0;
---
> zio_t *pio;
174c174
< while ((dio = aio->io_delegate_list) != NULL) {
---
> while ((pio = zio_walk_parents(aio)) != NULL)
176,184c176,177
< bcopy((char *)aio->io_data + offset, dio->io_data,
< dio->io_size);
< offset += dio->io_size;
< aio->io_delegate_list = dio->io_delegate_next;
< dio->io_delegate_next = NULL;
< dio->io_error = aio->io_error;
< zio_execute(dio);
< }
< ASSERT3U(offset, ==, aio->io_size);
---
> bcopy((char *)aio->io_data + (pio->io_offset -
> aio->io_offset), pio->io_data, pio->io_size);
189,190c182,189
< #define IS_ADJACENT(io, nio) \
< ((io)->io_offset + (io)->io_size == (nio)->io_offset)
---
> /*
> * Compute the range spanned by two i/os, which is the endpoint of the last
> * (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset).
> * Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio);
> * thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0.
> */
> #define IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset)
> #define IO_GAP(fio, lio) (-IO_SPAN(lio, fio))
195,197c194,198
< zio_t *fio, *lio, *aio, *dio;
< avl_tree_t *tree;
< uint64_t size;
---
> zio_t *fio, *lio, *aio, *dio, *nio;
> avl_tree_t *t;
> int flags;
> uint64_t maxspan = zfs_vdev_aggregation_limit;
> uint64_t maxgap;
207,208c208,210
< tree = fio->io_vdev_tree;
< size = fio->io_size;
---
> t = fio->io_vdev_tree;
> flags = fio->io_flags & ZIO_FLAG_AGG_INHERIT;
> maxgap = (t == &vq->vq_read_tree) ? zfs_vdev_read_gap_limit : 0;
210,216c212,223
< while ((dio = AVL_PREV(tree, fio)) != NULL && IS_ADJACENT(dio, fio) &&
< !((dio->io_flags | fio->io_flags) & ZIO_FLAG_DONT_AGGREGATE) &&
< size + dio->io_size <= zfs_vdev_aggregation_limit) {
< dio->io_delegate_next = fio;
< fio = dio;
< size += dio->io_size;
< }
---
> if (!(flags & ZIO_FLAG_DONT_AGGREGATE)) {
> /*
> * We can aggregate I/Os that are adjacent and of the
> * same flavor, as expressed by the AGG_INHERIT flags.
> * The latter is necessary so that certain attributes
> * of the I/O, such as whether it's a normal I/O or a
> * scrub/resilver, can be preserved in the aggregate.
> */
> while ((dio = AVL_PREV(t, fio)) != NULL &&
> (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
> IO_SPAN(dio, lio) <= maxspan && IO_GAP(dio, fio) <= maxgap)
> fio = dio;
218,223c225,228
< while ((dio = AVL_NEXT(tree, lio)) != NULL && IS_ADJACENT(lio, dio) &&
< !((lio->io_flags | dio->io_flags) & ZIO_FLAG_DONT_AGGREGATE) &&
< size + dio->io_size <= zfs_vdev_aggregation_limit) {
< lio->io_delegate_next = dio;
< lio = dio;
< size += dio->io_size;
---
> while ((dio = AVL_NEXT(t, lio)) != NULL &&
> (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
> IO_SPAN(fio, dio) <= maxspan && IO_GAP(lio, dio) <= maxgap)
> lio = dio;
227,229c232
< char *buf = zio_buf_alloc(size);
< uint64_t offset = 0;
<
---
> uint64_t size = IO_SPAN(fio, lio);
233,234c236,237
< buf, size, fio->io_type, ZIO_PRIORITY_NOW,
< ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
---
> zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_NOW,
> flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
237,239c240,243
< aio->io_delegate_list = fio;
<
< for (dio = fio; dio != NULL; dio = dio->io_delegate_next) {
---
> nio = fio;
> do {
> dio = nio;
> nio = AVL_NEXT(t, dio);
241c245,246
< ASSERT(dio->io_vdev_tree == tree);
---
> ASSERT(dio->io_vdev_tree == t);
>
243,244c248,252
< bcopy(dio->io_data, buf + offset, dio->io_size);
< offset += dio->io_size;
---
> bcopy(dio->io_data, (char *)aio->io_data +
> (dio->io_offset - aio->io_offset),
> dio->io_size);
>
> zio_add_child(dio, aio);
247c255,256
< }
---
> zio_execute(dio);
> } while (dio != lio);
249,250d257
< ASSERT(offset == size);
<
256c263
< ASSERT(fio->io_vdev_tree == tree);
---
> ASSERT(fio->io_vdev_tree == t);