116715Ssherman// SPDX-License-Identifier: GPL-2.0-only
216715Ssherman/* Network filesystem write subrequest result collection, assessment
316715Ssherman * and retrying.
416715Ssherman *
516715Ssherman * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
616715Ssherman * Written by David Howells (dhowells@redhat.com)
716715Ssherman */
816715Ssherman
916715Ssherman#include <linux/export.h>
1016715Ssherman#include <linux/fs.h>
1116715Ssherman#include <linux/mm.h>
1216715Ssherman#include <linux/pagemap.h>
1316715Ssherman#include <linux/slab.h>
1416715Ssherman#include "internal.h"
1516715Ssherman
1616715Ssherman/* Notes made in the collector */
1716715Ssherman#define HIT_PENDING		0x01	/* A front op was still pending */
1816715Ssherman#define SOME_EMPTY		0x02	/* One of more streams are empty */
1916715Ssherman#define ALL_EMPTY		0x04	/* All streams are empty */
2016715Ssherman#define MAYBE_DISCONTIG		0x08	/* A front op may be discontiguous (rounded to PAGE_SIZE) */
2116715Ssherman#define NEED_REASSESS		0x10	/* Need to loop round and reassess */
2216715Ssherman#define REASSESS_DISCONTIG	0x20	/* Reassess discontiguity if contiguity advances */
2316715Ssherman#define MADE_PROGRESS		0x40	/* Made progress cleaning up a stream or the folio set */
2416715Ssherman#define BUFFERED		0x80	/* The pagecache needs cleaning up */
2516715Ssherman#define NEED_RETRY		0x100	/* A front op requests retrying */
2616715Ssherman#define SAW_FAILURE		0x200	/* One stream or hit a permanent failure */
2716715Ssherman
2816715Ssherman/*
2916715Ssherman * Successful completion of write of a folio to the server and/or cache.  Note
3016715Ssherman * that we are not allowed to lock the folio here on pain of deadlocking with
3116715Ssherman * truncate.
3216715Ssherman */
3316715Sshermanint netfs_folio_written_back(struct folio *folio)
3416715Ssherman{
3516715Ssherman	enum netfs_folio_trace why = netfs_folio_trace_clear;
3616715Ssherman	struct netfs_folio *finfo;
3716715Ssherman	struct netfs_group *group = NULL;
3816715Ssherman	int gcount = 0;
3916715Ssherman
4016715Ssherman	if ((finfo = netfs_folio_info(folio))) {
4116715Ssherman		/* Streaming writes cannot be redirtied whilst under writeback,
4216715Ssherman		 * so discard the streaming record.
4316715Ssherman		 */
4416715Ssherman		folio_detach_private(folio);
4516715Ssherman		group = finfo->netfs_group;
4616715Ssherman		gcount++;
4716715Ssherman		kfree(finfo);
4816715Ssherman		why = netfs_folio_trace_clear_s;
4916715Ssherman		goto end_wb;
5016715Ssherman	}
5116715Ssherman
5216715Ssherman	if ((group = netfs_folio_group(folio))) {
5316715Ssherman		if (group == NETFS_FOLIO_COPY_TO_CACHE) {
5416715Ssherman			why = netfs_folio_trace_clear_cc;
5516715Ssherman			folio_detach_private(folio);
5616715Ssherman			goto end_wb;
5716715Ssherman		}
5816715Ssherman
5916715Ssherman		/* Need to detach the group pointer if the page didn't get
6016715Ssherman		 * redirtied.  If it has been redirtied, then it must be within
6116715Ssherman		 * the same group.
6216715Ssherman		 */
6316715Ssherman		why = netfs_folio_trace_redirtied;
6416715Ssherman		if (!folio_test_dirty(folio)) {
6516715Ssherman			folio_detach_private(folio);
6616715Ssherman			gcount++;
6716715Ssherman			why = netfs_folio_trace_clear_g;
6816715Ssherman		}
6916715Ssherman	}
7016715Ssherman
7116715Sshermanend_wb:
7216715Ssherman	trace_netfs_folio(folio, why);
7316715Ssherman	folio_end_writeback(folio);
7416715Ssherman	return gcount;
7516715Ssherman}
7616715Ssherman
7716715Ssherman/*
7816715Ssherman * Get hold of a folio we have under writeback.  We don't want to get the
7916715Ssherman * refcount on it.
8016715Ssherman */
8116715Sshermanstatic struct folio *netfs_writeback_lookup_folio(struct netfs_io_request *wreq, loff_t pos)
8216715Ssherman{
8316715Ssherman	XA_STATE(xas, &wreq->mapping->i_pages, pos / PAGE_SIZE);
8416715Ssherman	struct folio *folio;
8516715Ssherman
8616715Ssherman	rcu_read_lock();
8716715Ssherman
8816715Ssherman	for (;;) {
8916715Ssherman		xas_reset(&xas);
9016715Ssherman		folio = xas_load(&xas);
9116715Ssherman		if (xas_retry(&xas, folio))
9216715Ssherman			continue;
9316715Ssherman
9416715Ssherman		if (!folio || xa_is_value(folio))
9516715Ssherman			kdebug("R=%08x: folio %lx (%llx) not present",
9616715Ssherman			       wreq->debug_id, xas.xa_index, pos / PAGE_SIZE);
9716715Ssherman		BUG_ON(!folio || xa_is_value(folio));
9816715Ssherman
9916715Ssherman		if (folio == xas_reload(&xas))
10016715Ssherman			break;
10116715Ssherman	}
10216715Ssherman
10316715Ssherman	rcu_read_unlock();
10416715Ssherman
10516715Ssherman	if (WARN_ONCE(!folio_test_writeback(folio),
10616715Ssherman		      "R=%08x: folio %lx is not under writeback\n",
10716715Ssherman		      wreq->debug_id, folio->index)) {
10816715Ssherman		trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
10916715Ssherman	}
11016715Ssherman	return folio;
11116715Ssherman}
11216715Ssherman
11316715Ssherman/*
11416715Ssherman * Unlock any folios we've finished with.
11516715Ssherman */
11616715Sshermanstatic void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
11716715Ssherman					  unsigned long long collected_to,
11816715Ssherman					  unsigned int *notes)
11916715Ssherman{
12016715Ssherman	for (;;) {
12116715Ssherman		struct folio *folio;
12216715Ssherman		struct netfs_folio *finfo;
12316715Ssherman		unsigned long long fpos, fend;
12416715Ssherman		size_t fsize, flen;
12516715Ssherman
12616715Ssherman		folio = netfs_writeback_lookup_folio(wreq, wreq->cleaned_to);
12716715Ssherman
12816715Ssherman		fpos = folio_pos(folio);
12916715Ssherman		fsize = folio_size(folio);
13016715Ssherman		finfo = netfs_folio_info(folio);
13116715Ssherman		flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize;
13216715Ssherman
13316715Ssherman		fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
13416715Ssherman
13516715Ssherman		trace_netfs_collect_folio(wreq, folio, fend, collected_to);
13616715Ssherman
13716715Ssherman		if (fpos + fsize > wreq->contiguity) {
13816715Ssherman			trace_netfs_collect_contig(wreq, fpos + fsize,
13916715Ssherman						   netfs_contig_trace_unlock);
14016715Ssherman			wreq->contiguity = fpos + fsize;
14116715Ssherman		}
14216715Ssherman
14316715Ssherman		/* Unlock any folio we've transferred all of. */
14416715Ssherman		if (collected_to < fend)
14516715Ssherman			break;
14616715Ssherman
14716715Ssherman		wreq->nr_group_rel += netfs_folio_written_back(folio);
14816715Ssherman		wreq->cleaned_to = fpos + fsize;
14916715Ssherman		*notes |= MADE_PROGRESS;
15016715Ssherman
15116715Ssherman		if (fpos + fsize >= collected_to)
15216715Ssherman			break;
15316715Ssherman	}
15416715Ssherman}
15516715Ssherman
15616715Ssherman/*
15716715Ssherman * Perform retries on the streams that need it.
15816715Ssherman */
15916715Sshermanstatic void netfs_retry_write_stream(struct netfs_io_request *wreq,
16016715Ssherman				     struct netfs_io_stream *stream)
16116715Ssherman{
16216715Ssherman	struct list_head *next;
16316715Ssherman
16416715Ssherman	_enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
16516715Ssherman
16616715Ssherman	if (list_empty(&stream->subrequests))
16716715Ssherman		return;
16816715Ssherman
16916715Ssherman	if (stream->source == NETFS_UPLOAD_TO_SERVER &&
17016715Ssherman	    wreq->netfs_ops->retry_request)
17116715Ssherman		wreq->netfs_ops->retry_request(wreq, stream);
17216715Ssherman
17316715Ssherman	if (unlikely(stream->failed))
17416715Ssherman		return;
17516715Ssherman
17616715Ssherman	/* If there's no renegotiation to do, just resend each failed subreq. */
17716715Ssherman	if (!stream->prepare_write) {
17816715Ssherman		struct netfs_io_subrequest *subreq;
17916715Ssherman
18016715Ssherman		list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
18116715Ssherman			if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
18216715Ssherman				break;
18316715Ssherman			if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
18416715Ssherman				__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
18516715Ssherman				netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
18616715Ssherman				netfs_reissue_write(stream, subreq);
18716715Ssherman			}
18816715Ssherman		}
18916715Ssherman		return;
19016715Ssherman	}
19116715Ssherman
19216715Ssherman	next = stream->subrequests.next;
19316715Ssherman
19416715Ssherman	do {
19516715Ssherman		struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
19616715Ssherman		unsigned long long start, len;
19716715Ssherman		size_t part;
19816715Ssherman		bool boundary = false;
19916715Ssherman
20016715Ssherman		/* Go through the stream and find the next span of contiguous
20116715Ssherman		 * data that we then rejig (cifs, for example, needs the wsize
20216715Ssherman		 * renegotiating) and reissue.
20316715Ssherman		 */
20416715Ssherman		from = list_entry(next, struct netfs_io_subrequest, rreq_link);
20516715Ssherman		to = from;
20616715Ssherman		start = from->start + from->transferred;
20716715Ssherman		len   = from->len   - from->transferred;
20816715Ssherman
20916715Ssherman		if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
21016715Ssherman		    !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
21116715Ssherman			return;
21216715Ssherman
21316715Ssherman		list_for_each_continue(next, &stream->subrequests) {
21416715Ssherman			subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
21516715Ssherman			if (subreq->start + subreq->transferred != start + len ||
21616715Ssherman			    test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
21716715Ssherman			    !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
21816715Ssherman				break;
21916715Ssherman			to = subreq;
22016715Ssherman			len += to->len;
22116715Ssherman		}
22216715Ssherman
22316715Ssherman		/* Work through the sublist. */
22416715Ssherman		subreq = from;
22516715Ssherman		list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
22616715Ssherman			if (!len)
22716715Ssherman				break;
22816715Ssherman			/* Renegotiate max_len (wsize) */
22916715Ssherman			trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
23016715Ssherman			__clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
23116715Ssherman			__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
23216715Ssherman			stream->prepare_write(subreq);
23316715Ssherman
23416715Ssherman			part = min(len, subreq->max_len);
23516715Ssherman			subreq->len = part;
23616715Ssherman			subreq->start = start;
23716715Ssherman			subreq->transferred = 0;
23816715Ssherman			len -= part;
23916715Ssherman			start += part;
24016715Ssherman			if (len && subreq == to &&
24116715Ssherman			    __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
24216715Ssherman				boundary = true;
24316715Ssherman
24416715Ssherman			netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
24516715Ssherman			netfs_reissue_write(stream, subreq);
24616715Ssherman			if (subreq == to)
24716715Ssherman				break;
24816715Ssherman		}
24916715Ssherman
25016715Ssherman		/* If we managed to use fewer subreqs, we can discard the
25116715Ssherman		 * excess; if we used the same number, then we're done.
25216715Ssherman		 */
25316715Ssherman		if (!len) {
25416715Ssherman			if (subreq == to)
25516715Ssherman				continue;
25616715Ssherman			list_for_each_entry_safe_from(subreq, tmp,
25716715Ssherman						      &stream->subrequests, rreq_link) {
25816715Ssherman				trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
25916715Ssherman				list_del(&subreq->rreq_link);
26016715Ssherman				netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
26116715Ssherman				if (subreq == to)
26216715Ssherman					break;
26316715Ssherman			}
26416715Ssherman			continue;
26516715Ssherman		}
26616715Ssherman
26716715Ssherman		/* We ran out of subrequests, so we need to allocate some more
26816715Ssherman		 * and insert them after.
26916715Ssherman		 */
27016715Ssherman		do {
27116715Ssherman			subreq = netfs_alloc_subrequest(wreq);
27216715Ssherman			subreq->source		= to->source;
27316715Ssherman			subreq->start		= start;
27416715Ssherman			subreq->max_len		= len;
27516715Ssherman			subreq->max_nr_segs	= INT_MAX;
27616715Ssherman			subreq->debug_index	= atomic_inc_return(&wreq->subreq_counter);
27716715Ssherman			subreq->stream_nr	= to->stream_nr;
27816715Ssherman			__set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
27916715Ssherman
28016715Ssherman			trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
28116715Ssherman					     refcount_read(&subreq->ref),
28216715Ssherman					     netfs_sreq_trace_new);
28316715Ssherman			netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
28416715Ssherman
28516715Ssherman			list_add(&subreq->rreq_link, &to->rreq_link);
28616715Ssherman			to = list_next_entry(to, rreq_link);
28716715Ssherman			trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
28816715Ssherman
28916715Ssherman			switch (stream->source) {
29016715Ssherman			case NETFS_UPLOAD_TO_SERVER:
29116715Ssherman				netfs_stat(&netfs_n_wh_upload);
29216715Ssherman				subreq->max_len = min(len, wreq->wsize);
29316715Ssherman				break;
29416715Ssherman			case NETFS_WRITE_TO_CACHE:
29516715Ssherman				netfs_stat(&netfs_n_wh_write);
29616715Ssherman				break;
29716715Ssherman			default:
29816715Ssherman				WARN_ON_ONCE(1);
29916715Ssherman			}
30016715Ssherman
30116715Ssherman			stream->prepare_write(subreq);
30216715Ssherman
30316715Ssherman			part = min(len, subreq->max_len);
30416715Ssherman			subreq->len = subreq->transferred + part;
30516715Ssherman			len -= part;
30616715Ssherman			start += part;
30716715Ssherman			if (!len && boundary) {
30816715Ssherman				__set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
30916715Ssherman				boundary = false;
31016715Ssherman			}
31116715Ssherman
31216715Ssherman			netfs_reissue_write(stream, subreq);
31316715Ssherman			if (!len)
31416715Ssherman				break;
31516715Ssherman
31616715Ssherman		} while (len);
31716715Ssherman
31816715Ssherman	} while (!list_is_head(next, &stream->subrequests));
31916715Ssherman}
32016715Ssherman
32116715Ssherman/*
32216715Ssherman * Perform retries on the streams that need it.  If we're doing content
32316715Ssherman * encryption and the server copy changed due to a third-party write, we may
32416715Ssherman * need to do an RMW cycle and also rewrite the data to the cache.
32516715Ssherman */
32616715Sshermanstatic void netfs_retry_writes(struct netfs_io_request *wreq)
32716715Ssherman{
32816715Ssherman	struct netfs_io_subrequest *subreq;
32916715Ssherman	struct netfs_io_stream *stream;
33016715Ssherman	int s;
33116715Ssherman
33216715Ssherman	/* Wait for all outstanding I/O to quiesce before performing retries as
33316715Ssherman	 * we may need to renegotiate the I/O sizes.
33416715Ssherman	 */
33516715Ssherman	for (s = 0; s < NR_IO_STREAMS; s++) {
33616715Ssherman		stream = &wreq->io_streams[s];
33716715Ssherman		if (!stream->active)
33816715Ssherman			continue;
33916715Ssherman
34016715Ssherman		list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
34116715Ssherman			wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
34216715Ssherman				    TASK_UNINTERRUPTIBLE);
34316715Ssherman		}
34416715Ssherman	}
34516715Ssherman
34616715Ssherman	// TODO: Enc: Fetch changed partial pages
34716715Ssherman	// TODO: Enc: Reencrypt content if needed.
34816715Ssherman	// TODO: Enc: Wind back transferred point.
34916715Ssherman	// TODO: Enc: Mark cache pages for retry.
35016715Ssherman
35116715Ssherman	for (s = 0; s < NR_IO_STREAMS; s++) {
35216715Ssherman		stream = &wreq->io_streams[s];
35316715Ssherman		if (stream->need_retry) {
35416715Ssherman			stream->need_retry = false;
35516715Ssherman			netfs_retry_write_stream(wreq, stream);
35616715Ssherman		}
35716715Ssherman	}
35816715Ssherman}
35916715Ssherman
36016715Ssherman/*
36116715Ssherman * Collect and assess the results of various write subrequests.  We may need to
36216715Ssherman * retry some of the results - or even do an RMW cycle for content crypto.
36316715Ssherman *
36416715Ssherman * Note that we have a number of parallel, overlapping lists of subrequests,
36516715Ssherman * one to the server and one to the local cache for example, which may not be
36616715Ssherman * the same size or starting position and may not even correspond in boundary
36716715Ssherman * alignment.
36816715Ssherman */
36916715Sshermanstatic void netfs_collect_write_results(struct netfs_io_request *wreq)
37016715Ssherman{
37116715Ssherman	struct netfs_io_subrequest *front, *remove;
37216715Ssherman	struct netfs_io_stream *stream;
37316715Ssherman	unsigned long long collected_to;
37416715Ssherman	unsigned int notes;
37516715Ssherman	int s;
37616715Ssherman
37716715Ssherman	_enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
37816715Ssherman	trace_netfs_collect(wreq);
37916715Ssherman	trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
38016715Ssherman
38116715Sshermanreassess_streams:
38216715Ssherman	smp_rmb();
38316715Ssherman	collected_to = ULLONG_MAX;
38416715Ssherman	if (wreq->origin == NETFS_WRITEBACK)
38516715Ssherman		notes = ALL_EMPTY | BUFFERED | MAYBE_DISCONTIG;
38616715Ssherman	else if (wreq->origin == NETFS_WRITETHROUGH)
38716715Ssherman		notes = ALL_EMPTY | BUFFERED;
38816715Ssherman	else
38916715Ssherman		notes = ALL_EMPTY;
39016715Ssherman
39116715Ssherman	/* Remove completed subrequests from the front of the streams and
39216715Ssherman	 * advance the completion point on each stream.  We stop when we hit
39316715Ssherman	 * something that's in progress.  The issuer thread may be adding stuff
39416715Ssherman	 * to the tail whilst we're doing this.
39516715Ssherman	 *
39616715Ssherman	 * We must not, however, merge in discontiguities that span whole
39716715Ssherman	 * folios that aren't under writeback.  This is made more complicated
39816715Ssherman	 * by the folios in the gap being of unpredictable sizes - if they even
39916715Ssherman	 * exist - but we don't want to look them up.
40016715Ssherman	 */
40116715Ssherman	for (s = 0; s < NR_IO_STREAMS; s++) {
40216715Ssherman		loff_t rstart, rend;
40316715Ssherman
40416715Ssherman		stream = &wreq->io_streams[s];
40516715Ssherman		/* Read active flag before list pointers */
40616715Ssherman		if (!smp_load_acquire(&stream->active))
40716715Ssherman			continue;
40816715Ssherman
40916715Ssherman		front = stream->front;
41016715Ssherman		while (front) {
41116715Ssherman			trace_netfs_collect_sreq(wreq, front);
41216715Ssherman			//_debug("sreq [%x] %llx %zx/%zx",
41316715Ssherman			//       front->debug_index, front->start, front->transferred, front->len);
41416715Ssherman
41516715Ssherman			/* Stall if there may be a discontinuity. */
41616715Ssherman			rstart = round_down(front->start, PAGE_SIZE);
41716715Ssherman			if (rstart > wreq->contiguity) {
41816715Ssherman				if (wreq->contiguity > stream->collected_to) {
41916715Ssherman					trace_netfs_collect_gap(wreq, stream,
42016715Ssherman								wreq->contiguity, 'D');
42116715Ssherman					stream->collected_to = wreq->contiguity;
42216715Ssherman				}
42316715Ssherman				notes |= REASSESS_DISCONTIG;
42416715Ssherman				break;
42516715Ssherman			}
42616715Ssherman			rend = round_up(front->start + front->len, PAGE_SIZE);
42716715Ssherman			if (rend > wreq->contiguity) {
42816715Ssherman				trace_netfs_collect_contig(wreq, rend,
42916715Ssherman							   netfs_contig_trace_collect);
43016715Ssherman				wreq->contiguity = rend;
43116715Ssherman				if (notes & REASSESS_DISCONTIG)
43216715Ssherman					notes |= NEED_REASSESS;
43316715Ssherman			}
43416715Ssherman			notes &= ~MAYBE_DISCONTIG;
43516715Ssherman
43616715Ssherman			/* Stall if the front is still undergoing I/O. */
43716715Ssherman			if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
43816715Ssherman				notes |= HIT_PENDING;
43916715Ssherman				break;
44016715Ssherman			}
44116715Ssherman			smp_rmb(); /* Read counters after I-P flag. */
44216715Ssherman
44316715Ssherman			if (stream->failed) {
44416715Ssherman				stream->collected_to = front->start + front->len;
44516715Ssherman				notes |= MADE_PROGRESS | SAW_FAILURE;
44616715Ssherman				goto cancel;
44716715Ssherman			}
44816715Ssherman			if (front->start + front->transferred > stream->collected_to) {
44916715Ssherman				stream->collected_to = front->start + front->transferred;
45016715Ssherman				stream->transferred = stream->collected_to - wreq->start;
45116715Ssherman				notes |= MADE_PROGRESS;
45216715Ssherman			}
45316715Ssherman			if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
45416715Ssherman				stream->failed = true;
45516715Ssherman				stream->error = front->error;
45616715Ssherman				if (stream->source == NETFS_UPLOAD_TO_SERVER)
45716715Ssherman					mapping_set_error(wreq->mapping, front->error);
45816715Ssherman				notes |= NEED_REASSESS | SAW_FAILURE;
45916715Ssherman				break;
46016715Ssherman			}
46116715Ssherman			if (front->transferred < front->len) {
46216715Ssherman				stream->need_retry = true;
46316715Ssherman				notes |= NEED_RETRY | MADE_PROGRESS;
46416715Ssherman				break;
46516715Ssherman			}
46616715Ssherman
46716715Ssherman		cancel:
46816715Ssherman			/* Remove if completely consumed. */
46916715Ssherman			spin_lock(&wreq->lock);
47016715Ssherman
47116715Ssherman			remove = front;
47216715Ssherman			list_del_init(&front->rreq_link);
47316715Ssherman			front = list_first_entry_or_null(&stream->subrequests,
47416715Ssherman							 struct netfs_io_subrequest, rreq_link);
47516715Ssherman			stream->front = front;
47616715Ssherman			if (!front) {
47716715Ssherman				unsigned long long jump_to = atomic64_read(&wreq->issued_to);
47816715Ssherman
47916715Ssherman				if (stream->collected_to < jump_to) {
48016715Ssherman					trace_netfs_collect_gap(wreq, stream, jump_to, 'A');
48116715Ssherman					stream->collected_to = jump_to;
48216715Ssherman				}
48316715Ssherman			}
48416715Ssherman
48516715Ssherman			spin_unlock(&wreq->lock);
48616715Ssherman			netfs_put_subrequest(remove, false,
48716715Ssherman					     notes & SAW_FAILURE ?
48816715Ssherman					     netfs_sreq_trace_put_cancel :
48916715Ssherman					     netfs_sreq_trace_put_done);
49016715Ssherman		}
49116715Ssherman
49216715Ssherman		if (front)
49316715Ssherman			notes &= ~ALL_EMPTY;
49416715Ssherman		else
49516715Ssherman			notes |= SOME_EMPTY;
49616715Ssherman
49716715Ssherman		if (stream->collected_to < collected_to)
49816715Ssherman			collected_to = stream->collected_to;
49916715Ssherman	}
50016715Ssherman
50116715Ssherman	if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to)
50216715Ssherman		wreq->collected_to = collected_to;
50316715Ssherman
50416715Ssherman	/* If we have an empty stream, we need to jump it forward over any gap
50516715Ssherman	 * otherwise the collection point will never advance.
50616715Ssherman	 *
50716715Ssherman	 * Note that the issuer always adds to the stream with the lowest
50816715Ssherman	 * so-far submitted start, so if we see two consecutive subreqs in one
50916715Ssherman	 * stream with nothing between then in another stream, then the second
51016715Ssherman	 * stream has a gap that can be jumped.
51116715Ssherman	 */
51216715Ssherman	if (notes & SOME_EMPTY) {
51316715Ssherman		unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted);
51416715Ssherman
51516715Ssherman		for (s = 0; s < NR_IO_STREAMS; s++) {
51616715Ssherman			stream = &wreq->io_streams[s];
51716715Ssherman			if (stream->active &&
51816715Ssherman			    stream->front &&
51916715Ssherman			    stream->front->start < jump_to)
52016715Ssherman				jump_to = stream->front->start;
52116715Ssherman		}
52216715Ssherman
52316715Ssherman		for (s = 0; s < NR_IO_STREAMS; s++) {
52416715Ssherman			stream = &wreq->io_streams[s];
52516715Ssherman			if (stream->active &&
52616715Ssherman			    !stream->front &&
52716715Ssherman			    stream->collected_to < jump_to) {
52816715Ssherman				trace_netfs_collect_gap(wreq, stream, jump_to, 'B');
52916715Ssherman				stream->collected_to = jump_to;
53016715Ssherman			}
53116715Ssherman		}
53216715Ssherman	}
53316715Ssherman
53416715Ssherman	for (s = 0; s < NR_IO_STREAMS; s++) {
53516715Ssherman		stream = &wreq->io_streams[s];
53616715Ssherman		if (stream->active)
53716715Ssherman			trace_netfs_collect_stream(wreq, stream);
53816715Ssherman	}
53916715Ssherman
54016715Ssherman	trace_netfs_collect_state(wreq, wreq->collected_to, notes);
54116715Ssherman
54216715Ssherman	/* Unlock any folios that we have now finished with. */
54316715Ssherman	if (notes & BUFFERED) {
54416715Ssherman		unsigned long long clean_to = min(wreq->collected_to, wreq->contiguity);
54516715Ssherman
54616715Ssherman		if (wreq->cleaned_to < clean_to)
54716715Ssherman			netfs_writeback_unlock_folios(wreq, clean_to, &notes);
54816715Ssherman	} else {
54916715Ssherman		wreq->cleaned_to = wreq->collected_to;
55016715Ssherman	}
55116715Ssherman
55216715Ssherman	// TODO: Discard encryption buffers
55316715Ssherman
55416715Ssherman	/* If all streams are discontiguous with the last folio we cleared, we
55516715Ssherman	 * may need to skip a set of folios.
55616715Ssherman	 */
55716715Ssherman	if ((notes & (MAYBE_DISCONTIG | ALL_EMPTY)) == MAYBE_DISCONTIG) {
55816715Ssherman		unsigned long long jump_to = ULLONG_MAX;
55916715Ssherman
56016715Ssherman		for (s = 0; s < NR_IO_STREAMS; s++) {
56116715Ssherman			stream = &wreq->io_streams[s];
56216715Ssherman			if (stream->active && stream->front &&
56316715Ssherman			    stream->front->start < jump_to)
56416715Ssherman				jump_to = stream->front->start;
56516715Ssherman		}
56616715Ssherman
567		trace_netfs_collect_contig(wreq, jump_to, netfs_contig_trace_jump);
568		wreq->contiguity = jump_to;
569		wreq->cleaned_to = jump_to;
570		wreq->collected_to = jump_to;
571		for (s = 0; s < NR_IO_STREAMS; s++) {
572			stream = &wreq->io_streams[s];
573			if (stream->collected_to < jump_to)
574				stream->collected_to = jump_to;
575		}
576		//cond_resched();
577		notes |= MADE_PROGRESS;
578		goto reassess_streams;
579	}
580
581	if (notes & NEED_RETRY)
582		goto need_retry;
583	if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
584		trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
585		clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
586		wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE);
587	}
588
589	if (notes & NEED_REASSESS) {
590		//cond_resched();
591		goto reassess_streams;
592	}
593	if (notes & MADE_PROGRESS) {
594		//cond_resched();
595		goto reassess_streams;
596	}
597
598out:
599	netfs_put_group_many(wreq->group, wreq->nr_group_rel);
600	wreq->nr_group_rel = 0;
601	_leave(" = %x", notes);
602	return;
603
604need_retry:
605	/* Okay...  We're going to have to retry one or both streams.  Note
606	 * that any partially completed op will have had any wholly transferred
607	 * folios removed from it.
608	 */
609	_debug("retry");
610	netfs_retry_writes(wreq);
611	goto out;
612}
613
614/*
615 * Perform the collection of subrequests, folios and encryption buffers.
616 */
617void netfs_write_collection_worker(struct work_struct *work)
618{
619	struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
620	struct netfs_inode *ictx = netfs_inode(wreq->inode);
621	size_t transferred;
622	int s;
623
624	_enter("R=%x", wreq->debug_id);
625
626	netfs_see_request(wreq, netfs_rreq_trace_see_work);
627	if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
628		netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
629		return;
630	}
631
632	netfs_collect_write_results(wreq);
633
634	/* We're done when the app thread has finished posting subreqs and all
635	 * the queues in all the streams are empty.
636	 */
637	if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) {
638		netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
639		return;
640	}
641	smp_rmb(); /* Read ALL_QUEUED before lists. */
642
643	transferred = LONG_MAX;
644	for (s = 0; s < NR_IO_STREAMS; s++) {
645		struct netfs_io_stream *stream = &wreq->io_streams[s];
646		if (!stream->active)
647			continue;
648		if (!list_empty(&stream->subrequests)) {
649			netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
650			return;
651		}
652		if (stream->transferred < transferred)
653			transferred = stream->transferred;
654	}
655
656	/* Okay, declare that all I/O is complete. */
657	wreq->transferred = transferred;
658	trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
659
660	if (wreq->io_streams[1].active &&
661	    wreq->io_streams[1].failed) {
662		/* Cache write failure doesn't prevent writeback completion
663		 * unless we're in disconnected mode.
664		 */
665		ictx->ops->invalidate_cache(wreq);
666	}
667
668	if (wreq->cleanup)
669		wreq->cleanup(wreq);
670
671	if (wreq->origin == NETFS_DIO_WRITE &&
672	    wreq->mapping->nrpages) {
673		/* mmap may have got underfoot and we may now have folios
674		 * locally covering the region we just wrote.  Attempt to
675		 * discard the folios, but leave in place any modified locally.
676		 * ->write_iter() is prevented from interfering by the DIO
677		 * counter.
678		 */
679		pgoff_t first = wreq->start >> PAGE_SHIFT;
680		pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
681		invalidate_inode_pages2_range(wreq->mapping, first, last);
682	}
683
684	if (wreq->origin == NETFS_DIO_WRITE)
685		inode_dio_end(wreq->inode);
686
687	_debug("finished");
688	trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
689	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
690	wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
691
692	if (wreq->iocb) {
693		size_t written = min(wreq->transferred, wreq->len);
694		wreq->iocb->ki_pos += written;
695		if (wreq->iocb->ki_complete)
696			wreq->iocb->ki_complete(
697				wreq->iocb, wreq->error ? wreq->error : written);
698		wreq->iocb = VFS_PTR_POISON;
699	}
700
701	netfs_clear_subrequests(wreq, false);
702	netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete);
703}
704
705/*
706 * Wake the collection work item.
707 */
708void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
709{
710	if (!work_pending(&wreq->work)) {
711		netfs_get_request(wreq, netfs_rreq_trace_get_work);
712		if (!queue_work(system_unbound_wq, &wreq->work))
713			netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq);
714	}
715}
716
717/**
718 * netfs_write_subrequest_terminated - Note the termination of a write operation.
719 * @_op: The I/O request that has terminated.
720 * @transferred_or_error: The amount of data transferred or an error code.
721 * @was_async: The termination was asynchronous
722 *
723 * This tells the library that a contributory write I/O operation has
724 * terminated, one way or another, and that it should collect the results.
725 *
726 * The caller indicates in @transferred_or_error the outcome of the operation,
727 * supplying a positive value to indicate the number of bytes transferred or a
728 * negative error code.  The library will look after reissuing I/O operations
729 * as appropriate and writing downloaded data to the cache.
730 *
731 * If @was_async is true, the caller might be running in softirq or interrupt
732 * context and we can't sleep.
733 *
734 * When this is called, ownership of the subrequest is transferred back to the
735 * library, along with a ref.
736 *
737 * Note that %_op is a void* so that the function can be passed to
738 * kiocb::term_func without the need for a casting wrapper.
739 */
740void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
741				       bool was_async)
742{
743	struct netfs_io_subrequest *subreq = _op;
744	struct netfs_io_request *wreq = subreq->rreq;
745	struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
746
747	_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
748
749	switch (subreq->source) {
750	case NETFS_UPLOAD_TO_SERVER:
751		netfs_stat(&netfs_n_wh_upload_done);
752		break;
753	case NETFS_WRITE_TO_CACHE:
754		netfs_stat(&netfs_n_wh_write_done);
755		break;
756	case NETFS_INVALID_WRITE:
757		break;
758	default:
759		BUG();
760	}
761
762	if (IS_ERR_VALUE(transferred_or_error)) {
763		subreq->error = transferred_or_error;
764		if (subreq->error == -EAGAIN)
765			set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
766		else
767			set_bit(NETFS_SREQ_FAILED, &subreq->flags);
768		trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write);
769
770		switch (subreq->source) {
771		case NETFS_WRITE_TO_CACHE:
772			netfs_stat(&netfs_n_wh_write_failed);
773			break;
774		case NETFS_UPLOAD_TO_SERVER:
775			netfs_stat(&netfs_n_wh_upload_failed);
776			break;
777		default:
778			break;
779		}
780		trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause);
781		set_bit(NETFS_RREQ_PAUSE, &wreq->flags);
782	} else {
783		if (WARN(transferred_or_error > subreq->len - subreq->transferred,
784			 "Subreq excess write: R=%x[%x] %zd > %zu - %zu",
785			 wreq->debug_id, subreq->debug_index,
786			 transferred_or_error, subreq->len, subreq->transferred))
787			transferred_or_error = subreq->len - subreq->transferred;
788
789		subreq->error = 0;
790		subreq->transferred += transferred_or_error;
791
792		if (subreq->transferred < subreq->len)
793			set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
794	}
795
796	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
797
798	clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
799	wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS);
800
801	/* If we are at the head of the queue, wake up the collector,
802	 * transferring a ref to it if we were the ones to do so.
803	 */
804	if (list_is_first(&subreq->rreq_link, &stream->subrequests))
805		netfs_wake_write_collector(wreq, was_async);
806
807	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
808}
809EXPORT_SYMBOL(netfs_write_subrequest_terminated);
810