116715Ssherman// SPDX-License-Identifier: GPL-2.0-only 216715Ssherman/* Network filesystem write subrequest result collection, assessment 316715Ssherman * and retrying. 416715Ssherman * 516715Ssherman * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. 616715Ssherman * Written by David Howells (dhowells@redhat.com) 716715Ssherman */ 816715Ssherman 916715Ssherman#include <linux/export.h> 1016715Ssherman#include <linux/fs.h> 1116715Ssherman#include <linux/mm.h> 1216715Ssherman#include <linux/pagemap.h> 1316715Ssherman#include <linux/slab.h> 1416715Ssherman#include "internal.h" 1516715Ssherman 1616715Ssherman/* Notes made in the collector */ 1716715Ssherman#define HIT_PENDING 0x01 /* A front op was still pending */ 1816715Ssherman#define SOME_EMPTY 0x02 /* One of more streams are empty */ 1916715Ssherman#define ALL_EMPTY 0x04 /* All streams are empty */ 2016715Ssherman#define MAYBE_DISCONTIG 0x08 /* A front op may be discontiguous (rounded to PAGE_SIZE) */ 2116715Ssherman#define NEED_REASSESS 0x10 /* Need to loop round and reassess */ 2216715Ssherman#define REASSESS_DISCONTIG 0x20 /* Reassess discontiguity if contiguity advances */ 2316715Ssherman#define MADE_PROGRESS 0x40 /* Made progress cleaning up a stream or the folio set */ 2416715Ssherman#define BUFFERED 0x80 /* The pagecache needs cleaning up */ 2516715Ssherman#define NEED_RETRY 0x100 /* A front op requests retrying */ 2616715Ssherman#define SAW_FAILURE 0x200 /* One stream or hit a permanent failure */ 2716715Ssherman 2816715Ssherman/* 2916715Ssherman * Successful completion of write of a folio to the server and/or cache. Note 3016715Ssherman * that we are not allowed to lock the folio here on pain of deadlocking with 3116715Ssherman * truncate. 3216715Ssherman */ 3316715Sshermanint netfs_folio_written_back(struct folio *folio) 3416715Ssherman{ 3516715Ssherman enum netfs_folio_trace why = netfs_folio_trace_clear; 3616715Ssherman struct netfs_folio *finfo; 3716715Ssherman struct netfs_group *group = NULL; 3816715Ssherman int gcount = 0; 3916715Ssherman 4016715Ssherman if ((finfo = netfs_folio_info(folio))) { 4116715Ssherman /* Streaming writes cannot be redirtied whilst under writeback, 4216715Ssherman * so discard the streaming record. 4316715Ssherman */ 4416715Ssherman folio_detach_private(folio); 4516715Ssherman group = finfo->netfs_group; 4616715Ssherman gcount++; 4716715Ssherman kfree(finfo); 4816715Ssherman why = netfs_folio_trace_clear_s; 4916715Ssherman goto end_wb; 5016715Ssherman } 5116715Ssherman 5216715Ssherman if ((group = netfs_folio_group(folio))) { 5316715Ssherman if (group == NETFS_FOLIO_COPY_TO_CACHE) { 5416715Ssherman why = netfs_folio_trace_clear_cc; 5516715Ssherman folio_detach_private(folio); 5616715Ssherman goto end_wb; 5716715Ssherman } 5816715Ssherman 5916715Ssherman /* Need to detach the group pointer if the page didn't get 6016715Ssherman * redirtied. If it has been redirtied, then it must be within 6116715Ssherman * the same group. 6216715Ssherman */ 6316715Ssherman why = netfs_folio_trace_redirtied; 6416715Ssherman if (!folio_test_dirty(folio)) { 6516715Ssherman folio_detach_private(folio); 6616715Ssherman gcount++; 6716715Ssherman why = netfs_folio_trace_clear_g; 6816715Ssherman } 6916715Ssherman } 7016715Ssherman 7116715Sshermanend_wb: 7216715Ssherman trace_netfs_folio(folio, why); 7316715Ssherman folio_end_writeback(folio); 7416715Ssherman return gcount; 7516715Ssherman} 7616715Ssherman 7716715Ssherman/* 7816715Ssherman * Get hold of a folio we have under writeback. We don't want to get the 7916715Ssherman * refcount on it. 8016715Ssherman */ 8116715Sshermanstatic struct folio *netfs_writeback_lookup_folio(struct netfs_io_request *wreq, loff_t pos) 8216715Ssherman{ 8316715Ssherman XA_STATE(xas, &wreq->mapping->i_pages, pos / PAGE_SIZE); 8416715Ssherman struct folio *folio; 8516715Ssherman 8616715Ssherman rcu_read_lock(); 8716715Ssherman 8816715Ssherman for (;;) { 8916715Ssherman xas_reset(&xas); 9016715Ssherman folio = xas_load(&xas); 9116715Ssherman if (xas_retry(&xas, folio)) 9216715Ssherman continue; 9316715Ssherman 9416715Ssherman if (!folio || xa_is_value(folio)) 9516715Ssherman kdebug("R=%08x: folio %lx (%llx) not present", 9616715Ssherman wreq->debug_id, xas.xa_index, pos / PAGE_SIZE); 9716715Ssherman BUG_ON(!folio || xa_is_value(folio)); 9816715Ssherman 9916715Ssherman if (folio == xas_reload(&xas)) 10016715Ssherman break; 10116715Ssherman } 10216715Ssherman 10316715Ssherman rcu_read_unlock(); 10416715Ssherman 10516715Ssherman if (WARN_ONCE(!folio_test_writeback(folio), 10616715Ssherman "R=%08x: folio %lx is not under writeback\n", 10716715Ssherman wreq->debug_id, folio->index)) { 10816715Ssherman trace_netfs_folio(folio, netfs_folio_trace_not_under_wback); 10916715Ssherman } 11016715Ssherman return folio; 11116715Ssherman} 11216715Ssherman 11316715Ssherman/* 11416715Ssherman * Unlock any folios we've finished with. 11516715Ssherman */ 11616715Sshermanstatic void netfs_writeback_unlock_folios(struct netfs_io_request *wreq, 11716715Ssherman unsigned long long collected_to, 11816715Ssherman unsigned int *notes) 11916715Ssherman{ 12016715Ssherman for (;;) { 12116715Ssherman struct folio *folio; 12216715Ssherman struct netfs_folio *finfo; 12316715Ssherman unsigned long long fpos, fend; 12416715Ssherman size_t fsize, flen; 12516715Ssherman 12616715Ssherman folio = netfs_writeback_lookup_folio(wreq, wreq->cleaned_to); 12716715Ssherman 12816715Ssherman fpos = folio_pos(folio); 12916715Ssherman fsize = folio_size(folio); 13016715Ssherman finfo = netfs_folio_info(folio); 13116715Ssherman flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize; 13216715Ssherman 13316715Ssherman fend = min_t(unsigned long long, fpos + flen, wreq->i_size); 13416715Ssherman 13516715Ssherman trace_netfs_collect_folio(wreq, folio, fend, collected_to); 13616715Ssherman 13716715Ssherman if (fpos + fsize > wreq->contiguity) { 13816715Ssherman trace_netfs_collect_contig(wreq, fpos + fsize, 13916715Ssherman netfs_contig_trace_unlock); 14016715Ssherman wreq->contiguity = fpos + fsize; 14116715Ssherman } 14216715Ssherman 14316715Ssherman /* Unlock any folio we've transferred all of. */ 14416715Ssherman if (collected_to < fend) 14516715Ssherman break; 14616715Ssherman 14716715Ssherman wreq->nr_group_rel += netfs_folio_written_back(folio); 14816715Ssherman wreq->cleaned_to = fpos + fsize; 14916715Ssherman *notes |= MADE_PROGRESS; 15016715Ssherman 15116715Ssherman if (fpos + fsize >= collected_to) 15216715Ssherman break; 15316715Ssherman } 15416715Ssherman} 15516715Ssherman 15616715Ssherman/* 15716715Ssherman * Perform retries on the streams that need it. 15816715Ssherman */ 15916715Sshermanstatic void netfs_retry_write_stream(struct netfs_io_request *wreq, 16016715Ssherman struct netfs_io_stream *stream) 16116715Ssherman{ 16216715Ssherman struct list_head *next; 16316715Ssherman 16416715Ssherman _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr); 16516715Ssherman 16616715Ssherman if (list_empty(&stream->subrequests)) 16716715Ssherman return; 16816715Ssherman 16916715Ssherman if (stream->source == NETFS_UPLOAD_TO_SERVER && 17016715Ssherman wreq->netfs_ops->retry_request) 17116715Ssherman wreq->netfs_ops->retry_request(wreq, stream); 17216715Ssherman 17316715Ssherman if (unlikely(stream->failed)) 17416715Ssherman return; 17516715Ssherman 17616715Ssherman /* If there's no renegotiation to do, just resend each failed subreq. */ 17716715Ssherman if (!stream->prepare_write) { 17816715Ssherman struct netfs_io_subrequest *subreq; 17916715Ssherman 18016715Ssherman list_for_each_entry(subreq, &stream->subrequests, rreq_link) { 18116715Ssherman if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) 18216715Ssherman break; 18316715Ssherman if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { 18416715Ssherman __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 18516715Ssherman netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 18616715Ssherman netfs_reissue_write(stream, subreq); 18716715Ssherman } 18816715Ssherman } 18916715Ssherman return; 19016715Ssherman } 19116715Ssherman 19216715Ssherman next = stream->subrequests.next; 19316715Ssherman 19416715Ssherman do { 19516715Ssherman struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp; 19616715Ssherman unsigned long long start, len; 19716715Ssherman size_t part; 19816715Ssherman bool boundary = false; 19916715Ssherman 20016715Ssherman /* Go through the stream and find the next span of contiguous 20116715Ssherman * data that we then rejig (cifs, for example, needs the wsize 20216715Ssherman * renegotiating) and reissue. 20316715Ssherman */ 20416715Ssherman from = list_entry(next, struct netfs_io_subrequest, rreq_link); 20516715Ssherman to = from; 20616715Ssherman start = from->start + from->transferred; 20716715Ssherman len = from->len - from->transferred; 20816715Ssherman 20916715Ssherman if (test_bit(NETFS_SREQ_FAILED, &from->flags) || 21016715Ssherman !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) 21116715Ssherman return; 21216715Ssherman 21316715Ssherman list_for_each_continue(next, &stream->subrequests) { 21416715Ssherman subreq = list_entry(next, struct netfs_io_subrequest, rreq_link); 21516715Ssherman if (subreq->start + subreq->transferred != start + len || 21616715Ssherman test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) || 21716715Ssherman !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) 21816715Ssherman break; 21916715Ssherman to = subreq; 22016715Ssherman len += to->len; 22116715Ssherman } 22216715Ssherman 22316715Ssherman /* Work through the sublist. */ 22416715Ssherman subreq = from; 22516715Ssherman list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) { 22616715Ssherman if (!len) 22716715Ssherman break; 22816715Ssherman /* Renegotiate max_len (wsize) */ 22916715Ssherman trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 23016715Ssherman __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 23116715Ssherman __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 23216715Ssherman stream->prepare_write(subreq); 23316715Ssherman 23416715Ssherman part = min(len, subreq->max_len); 23516715Ssherman subreq->len = part; 23616715Ssherman subreq->start = start; 23716715Ssherman subreq->transferred = 0; 23816715Ssherman len -= part; 23916715Ssherman start += part; 24016715Ssherman if (len && subreq == to && 24116715Ssherman __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags)) 24216715Ssherman boundary = true; 24316715Ssherman 24416715Ssherman netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 24516715Ssherman netfs_reissue_write(stream, subreq); 24616715Ssherman if (subreq == to) 24716715Ssherman break; 24816715Ssherman } 24916715Ssherman 25016715Ssherman /* If we managed to use fewer subreqs, we can discard the 25116715Ssherman * excess; if we used the same number, then we're done. 25216715Ssherman */ 25316715Ssherman if (!len) { 25416715Ssherman if (subreq == to) 25516715Ssherman continue; 25616715Ssherman list_for_each_entry_safe_from(subreq, tmp, 25716715Ssherman &stream->subrequests, rreq_link) { 25816715Ssherman trace_netfs_sreq(subreq, netfs_sreq_trace_discard); 25916715Ssherman list_del(&subreq->rreq_link); 26016715Ssherman netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); 26116715Ssherman if (subreq == to) 26216715Ssherman break; 26316715Ssherman } 26416715Ssherman continue; 26516715Ssherman } 26616715Ssherman 26716715Ssherman /* We ran out of subrequests, so we need to allocate some more 26816715Ssherman * and insert them after. 26916715Ssherman */ 27016715Ssherman do { 27116715Ssherman subreq = netfs_alloc_subrequest(wreq); 27216715Ssherman subreq->source = to->source; 27316715Ssherman subreq->start = start; 27416715Ssherman subreq->max_len = len; 27516715Ssherman subreq->max_nr_segs = INT_MAX; 27616715Ssherman subreq->debug_index = atomic_inc_return(&wreq->subreq_counter); 27716715Ssherman subreq->stream_nr = to->stream_nr; 27816715Ssherman __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); 27916715Ssherman 28016715Ssherman trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index, 28116715Ssherman refcount_read(&subreq->ref), 28216715Ssherman netfs_sreq_trace_new); 28316715Ssherman netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); 28416715Ssherman 28516715Ssherman list_add(&subreq->rreq_link, &to->rreq_link); 28616715Ssherman to = list_next_entry(to, rreq_link); 28716715Ssherman trace_netfs_sreq(subreq, netfs_sreq_trace_retry); 28816715Ssherman 28916715Ssherman switch (stream->source) { 29016715Ssherman case NETFS_UPLOAD_TO_SERVER: 29116715Ssherman netfs_stat(&netfs_n_wh_upload); 29216715Ssherman subreq->max_len = min(len, wreq->wsize); 29316715Ssherman break; 29416715Ssherman case NETFS_WRITE_TO_CACHE: 29516715Ssherman netfs_stat(&netfs_n_wh_write); 29616715Ssherman break; 29716715Ssherman default: 29816715Ssherman WARN_ON_ONCE(1); 29916715Ssherman } 30016715Ssherman 30116715Ssherman stream->prepare_write(subreq); 30216715Ssherman 30316715Ssherman part = min(len, subreq->max_len); 30416715Ssherman subreq->len = subreq->transferred + part; 30516715Ssherman len -= part; 30616715Ssherman start += part; 30716715Ssherman if (!len && boundary) { 30816715Ssherman __set_bit(NETFS_SREQ_BOUNDARY, &to->flags); 30916715Ssherman boundary = false; 31016715Ssherman } 31116715Ssherman 31216715Ssherman netfs_reissue_write(stream, subreq); 31316715Ssherman if (!len) 31416715Ssherman break; 31516715Ssherman 31616715Ssherman } while (len); 31716715Ssherman 31816715Ssherman } while (!list_is_head(next, &stream->subrequests)); 31916715Ssherman} 32016715Ssherman 32116715Ssherman/* 32216715Ssherman * Perform retries on the streams that need it. If we're doing content 32316715Ssherman * encryption and the server copy changed due to a third-party write, we may 32416715Ssherman * need to do an RMW cycle and also rewrite the data to the cache. 32516715Ssherman */ 32616715Sshermanstatic void netfs_retry_writes(struct netfs_io_request *wreq) 32716715Ssherman{ 32816715Ssherman struct netfs_io_subrequest *subreq; 32916715Ssherman struct netfs_io_stream *stream; 33016715Ssherman int s; 33116715Ssherman 33216715Ssherman /* Wait for all outstanding I/O to quiesce before performing retries as 33316715Ssherman * we may need to renegotiate the I/O sizes. 33416715Ssherman */ 33516715Ssherman for (s = 0; s < NR_IO_STREAMS; s++) { 33616715Ssherman stream = &wreq->io_streams[s]; 33716715Ssherman if (!stream->active) 33816715Ssherman continue; 33916715Ssherman 34016715Ssherman list_for_each_entry(subreq, &stream->subrequests, rreq_link) { 34116715Ssherman wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS, 34216715Ssherman TASK_UNINTERRUPTIBLE); 34316715Ssherman } 34416715Ssherman } 34516715Ssherman 34616715Ssherman // TODO: Enc: Fetch changed partial pages 34716715Ssherman // TODO: Enc: Reencrypt content if needed. 34816715Ssherman // TODO: Enc: Wind back transferred point. 34916715Ssherman // TODO: Enc: Mark cache pages for retry. 35016715Ssherman 35116715Ssherman for (s = 0; s < NR_IO_STREAMS; s++) { 35216715Ssherman stream = &wreq->io_streams[s]; 35316715Ssherman if (stream->need_retry) { 35416715Ssherman stream->need_retry = false; 35516715Ssherman netfs_retry_write_stream(wreq, stream); 35616715Ssherman } 35716715Ssherman } 35816715Ssherman} 35916715Ssherman 36016715Ssherman/* 36116715Ssherman * Collect and assess the results of various write subrequests. We may need to 36216715Ssherman * retry some of the results - or even do an RMW cycle for content crypto. 36316715Ssherman * 36416715Ssherman * Note that we have a number of parallel, overlapping lists of subrequests, 36516715Ssherman * one to the server and one to the local cache for example, which may not be 36616715Ssherman * the same size or starting position and may not even correspond in boundary 36716715Ssherman * alignment. 36816715Ssherman */ 36916715Sshermanstatic void netfs_collect_write_results(struct netfs_io_request *wreq) 37016715Ssherman{ 37116715Ssherman struct netfs_io_subrequest *front, *remove; 37216715Ssherman struct netfs_io_stream *stream; 37316715Ssherman unsigned long long collected_to; 37416715Ssherman unsigned int notes; 37516715Ssherman int s; 37616715Ssherman 37716715Ssherman _enter("%llx-%llx", wreq->start, wreq->start + wreq->len); 37816715Ssherman trace_netfs_collect(wreq); 37916715Ssherman trace_netfs_rreq(wreq, netfs_rreq_trace_collect); 38016715Ssherman 38116715Sshermanreassess_streams: 38216715Ssherman smp_rmb(); 38316715Ssherman collected_to = ULLONG_MAX; 38416715Ssherman if (wreq->origin == NETFS_WRITEBACK) 38516715Ssherman notes = ALL_EMPTY | BUFFERED | MAYBE_DISCONTIG; 38616715Ssherman else if (wreq->origin == NETFS_WRITETHROUGH) 38716715Ssherman notes = ALL_EMPTY | BUFFERED; 38816715Ssherman else 38916715Ssherman notes = ALL_EMPTY; 39016715Ssherman 39116715Ssherman /* Remove completed subrequests from the front of the streams and 39216715Ssherman * advance the completion point on each stream. We stop when we hit 39316715Ssherman * something that's in progress. The issuer thread may be adding stuff 39416715Ssherman * to the tail whilst we're doing this. 39516715Ssherman * 39616715Ssherman * We must not, however, merge in discontiguities that span whole 39716715Ssherman * folios that aren't under writeback. This is made more complicated 39816715Ssherman * by the folios in the gap being of unpredictable sizes - if they even 39916715Ssherman * exist - but we don't want to look them up. 40016715Ssherman */ 40116715Ssherman for (s = 0; s < NR_IO_STREAMS; s++) { 40216715Ssherman loff_t rstart, rend; 40316715Ssherman 40416715Ssherman stream = &wreq->io_streams[s]; 40516715Ssherman /* Read active flag before list pointers */ 40616715Ssherman if (!smp_load_acquire(&stream->active)) 40716715Ssherman continue; 40816715Ssherman 40916715Ssherman front = stream->front; 41016715Ssherman while (front) { 41116715Ssherman trace_netfs_collect_sreq(wreq, front); 41216715Ssherman //_debug("sreq [%x] %llx %zx/%zx", 41316715Ssherman // front->debug_index, front->start, front->transferred, front->len); 41416715Ssherman 41516715Ssherman /* Stall if there may be a discontinuity. */ 41616715Ssherman rstart = round_down(front->start, PAGE_SIZE); 41716715Ssherman if (rstart > wreq->contiguity) { 41816715Ssherman if (wreq->contiguity > stream->collected_to) { 41916715Ssherman trace_netfs_collect_gap(wreq, stream, 42016715Ssherman wreq->contiguity, 'D'); 42116715Ssherman stream->collected_to = wreq->contiguity; 42216715Ssherman } 42316715Ssherman notes |= REASSESS_DISCONTIG; 42416715Ssherman break; 42516715Ssherman } 42616715Ssherman rend = round_up(front->start + front->len, PAGE_SIZE); 42716715Ssherman if (rend > wreq->contiguity) { 42816715Ssherman trace_netfs_collect_contig(wreq, rend, 42916715Ssherman netfs_contig_trace_collect); 43016715Ssherman wreq->contiguity = rend; 43116715Ssherman if (notes & REASSESS_DISCONTIG) 43216715Ssherman notes |= NEED_REASSESS; 43316715Ssherman } 43416715Ssherman notes &= ~MAYBE_DISCONTIG; 43516715Ssherman 43616715Ssherman /* Stall if the front is still undergoing I/O. */ 43716715Ssherman if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) { 43816715Ssherman notes |= HIT_PENDING; 43916715Ssherman break; 44016715Ssherman } 44116715Ssherman smp_rmb(); /* Read counters after I-P flag. */ 44216715Ssherman 44316715Ssherman if (stream->failed) { 44416715Ssherman stream->collected_to = front->start + front->len; 44516715Ssherman notes |= MADE_PROGRESS | SAW_FAILURE; 44616715Ssherman goto cancel; 44716715Ssherman } 44816715Ssherman if (front->start + front->transferred > stream->collected_to) { 44916715Ssherman stream->collected_to = front->start + front->transferred; 45016715Ssherman stream->transferred = stream->collected_to - wreq->start; 45116715Ssherman notes |= MADE_PROGRESS; 45216715Ssherman } 45316715Ssherman if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { 45416715Ssherman stream->failed = true; 45516715Ssherman stream->error = front->error; 45616715Ssherman if (stream->source == NETFS_UPLOAD_TO_SERVER) 45716715Ssherman mapping_set_error(wreq->mapping, front->error); 45816715Ssherman notes |= NEED_REASSESS | SAW_FAILURE; 45916715Ssherman break; 46016715Ssherman } 46116715Ssherman if (front->transferred < front->len) { 46216715Ssherman stream->need_retry = true; 46316715Ssherman notes |= NEED_RETRY | MADE_PROGRESS; 46416715Ssherman break; 46516715Ssherman } 46616715Ssherman 46716715Ssherman cancel: 46816715Ssherman /* Remove if completely consumed. */ 46916715Ssherman spin_lock(&wreq->lock); 47016715Ssherman 47116715Ssherman remove = front; 47216715Ssherman list_del_init(&front->rreq_link); 47316715Ssherman front = list_first_entry_or_null(&stream->subrequests, 47416715Ssherman struct netfs_io_subrequest, rreq_link); 47516715Ssherman stream->front = front; 47616715Ssherman if (!front) { 47716715Ssherman unsigned long long jump_to = atomic64_read(&wreq->issued_to); 47816715Ssherman 47916715Ssherman if (stream->collected_to < jump_to) { 48016715Ssherman trace_netfs_collect_gap(wreq, stream, jump_to, 'A'); 48116715Ssherman stream->collected_to = jump_to; 48216715Ssherman } 48316715Ssherman } 48416715Ssherman 48516715Ssherman spin_unlock(&wreq->lock); 48616715Ssherman netfs_put_subrequest(remove, false, 48716715Ssherman notes & SAW_FAILURE ? 48816715Ssherman netfs_sreq_trace_put_cancel : 48916715Ssherman netfs_sreq_trace_put_done); 49016715Ssherman } 49116715Ssherman 49216715Ssherman if (front) 49316715Ssherman notes &= ~ALL_EMPTY; 49416715Ssherman else 49516715Ssherman notes |= SOME_EMPTY; 49616715Ssherman 49716715Ssherman if (stream->collected_to < collected_to) 49816715Ssherman collected_to = stream->collected_to; 49916715Ssherman } 50016715Ssherman 50116715Ssherman if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to) 50216715Ssherman wreq->collected_to = collected_to; 50316715Ssherman 50416715Ssherman /* If we have an empty stream, we need to jump it forward over any gap 50516715Ssherman * otherwise the collection point will never advance. 50616715Ssherman * 50716715Ssherman * Note that the issuer always adds to the stream with the lowest 50816715Ssherman * so-far submitted start, so if we see two consecutive subreqs in one 50916715Ssherman * stream with nothing between then in another stream, then the second 51016715Ssherman * stream has a gap that can be jumped. 51116715Ssherman */ 51216715Ssherman if (notes & SOME_EMPTY) { 51316715Ssherman unsigned long long jump_to = wreq->start + READ_ONCE(wreq->submitted); 51416715Ssherman 51516715Ssherman for (s = 0; s < NR_IO_STREAMS; s++) { 51616715Ssherman stream = &wreq->io_streams[s]; 51716715Ssherman if (stream->active && 51816715Ssherman stream->front && 51916715Ssherman stream->front->start < jump_to) 52016715Ssherman jump_to = stream->front->start; 52116715Ssherman } 52216715Ssherman 52316715Ssherman for (s = 0; s < NR_IO_STREAMS; s++) { 52416715Ssherman stream = &wreq->io_streams[s]; 52516715Ssherman if (stream->active && 52616715Ssherman !stream->front && 52716715Ssherman stream->collected_to < jump_to) { 52816715Ssherman trace_netfs_collect_gap(wreq, stream, jump_to, 'B'); 52916715Ssherman stream->collected_to = jump_to; 53016715Ssherman } 53116715Ssherman } 53216715Ssherman } 53316715Ssherman 53416715Ssherman for (s = 0; s < NR_IO_STREAMS; s++) { 53516715Ssherman stream = &wreq->io_streams[s]; 53616715Ssherman if (stream->active) 53716715Ssherman trace_netfs_collect_stream(wreq, stream); 53816715Ssherman } 53916715Ssherman 54016715Ssherman trace_netfs_collect_state(wreq, wreq->collected_to, notes); 54116715Ssherman 54216715Ssherman /* Unlock any folios that we have now finished with. */ 54316715Ssherman if (notes & BUFFERED) { 54416715Ssherman unsigned long long clean_to = min(wreq->collected_to, wreq->contiguity); 54516715Ssherman 54616715Ssherman if (wreq->cleaned_to < clean_to) 54716715Ssherman netfs_writeback_unlock_folios(wreq, clean_to, ¬es); 54816715Ssherman } else { 54916715Ssherman wreq->cleaned_to = wreq->collected_to; 55016715Ssherman } 55116715Ssherman 55216715Ssherman // TODO: Discard encryption buffers 55316715Ssherman 55416715Ssherman /* If all streams are discontiguous with the last folio we cleared, we 55516715Ssherman * may need to skip a set of folios. 55616715Ssherman */ 55716715Ssherman if ((notes & (MAYBE_DISCONTIG | ALL_EMPTY)) == MAYBE_DISCONTIG) { 55816715Ssherman unsigned long long jump_to = ULLONG_MAX; 55916715Ssherman 56016715Ssherman for (s = 0; s < NR_IO_STREAMS; s++) { 56116715Ssherman stream = &wreq->io_streams[s]; 56216715Ssherman if (stream->active && stream->front && 56316715Ssherman stream->front->start < jump_to) 56416715Ssherman jump_to = stream->front->start; 56516715Ssherman } 56616715Ssherman 567 trace_netfs_collect_contig(wreq, jump_to, netfs_contig_trace_jump); 568 wreq->contiguity = jump_to; 569 wreq->cleaned_to = jump_to; 570 wreq->collected_to = jump_to; 571 for (s = 0; s < NR_IO_STREAMS; s++) { 572 stream = &wreq->io_streams[s]; 573 if (stream->collected_to < jump_to) 574 stream->collected_to = jump_to; 575 } 576 //cond_resched(); 577 notes |= MADE_PROGRESS; 578 goto reassess_streams; 579 } 580 581 if (notes & NEED_RETRY) 582 goto need_retry; 583 if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) { 584 trace_netfs_rreq(wreq, netfs_rreq_trace_unpause); 585 clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags); 586 wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE); 587 } 588 589 if (notes & NEED_REASSESS) { 590 //cond_resched(); 591 goto reassess_streams; 592 } 593 if (notes & MADE_PROGRESS) { 594 //cond_resched(); 595 goto reassess_streams; 596 } 597 598out: 599 netfs_put_group_many(wreq->group, wreq->nr_group_rel); 600 wreq->nr_group_rel = 0; 601 _leave(" = %x", notes); 602 return; 603 604need_retry: 605 /* Okay... We're going to have to retry one or both streams. Note 606 * that any partially completed op will have had any wholly transferred 607 * folios removed from it. 608 */ 609 _debug("retry"); 610 netfs_retry_writes(wreq); 611 goto out; 612} 613 614/* 615 * Perform the collection of subrequests, folios and encryption buffers. 616 */ 617void netfs_write_collection_worker(struct work_struct *work) 618{ 619 struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work); 620 struct netfs_inode *ictx = netfs_inode(wreq->inode); 621 size_t transferred; 622 int s; 623 624 _enter("R=%x", wreq->debug_id); 625 626 netfs_see_request(wreq, netfs_rreq_trace_see_work); 627 if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) { 628 netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 629 return; 630 } 631 632 netfs_collect_write_results(wreq); 633 634 /* We're done when the app thread has finished posting subreqs and all 635 * the queues in all the streams are empty. 636 */ 637 if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) { 638 netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 639 return; 640 } 641 smp_rmb(); /* Read ALL_QUEUED before lists. */ 642 643 transferred = LONG_MAX; 644 for (s = 0; s < NR_IO_STREAMS; s++) { 645 struct netfs_io_stream *stream = &wreq->io_streams[s]; 646 if (!stream->active) 647 continue; 648 if (!list_empty(&stream->subrequests)) { 649 netfs_put_request(wreq, false, netfs_rreq_trace_put_work); 650 return; 651 } 652 if (stream->transferred < transferred) 653 transferred = stream->transferred; 654 } 655 656 /* Okay, declare that all I/O is complete. */ 657 wreq->transferred = transferred; 658 trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); 659 660 if (wreq->io_streams[1].active && 661 wreq->io_streams[1].failed) { 662 /* Cache write failure doesn't prevent writeback completion 663 * unless we're in disconnected mode. 664 */ 665 ictx->ops->invalidate_cache(wreq); 666 } 667 668 if (wreq->cleanup) 669 wreq->cleanup(wreq); 670 671 if (wreq->origin == NETFS_DIO_WRITE && 672 wreq->mapping->nrpages) { 673 /* mmap may have got underfoot and we may now have folios 674 * locally covering the region we just wrote. Attempt to 675 * discard the folios, but leave in place any modified locally. 676 * ->write_iter() is prevented from interfering by the DIO 677 * counter. 678 */ 679 pgoff_t first = wreq->start >> PAGE_SHIFT; 680 pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; 681 invalidate_inode_pages2_range(wreq->mapping, first, last); 682 } 683 684 if (wreq->origin == NETFS_DIO_WRITE) 685 inode_dio_end(wreq->inode); 686 687 _debug("finished"); 688 trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip); 689 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags); 690 wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS); 691 692 if (wreq->iocb) { 693 size_t written = min(wreq->transferred, wreq->len); 694 wreq->iocb->ki_pos += written; 695 if (wreq->iocb->ki_complete) 696 wreq->iocb->ki_complete( 697 wreq->iocb, wreq->error ? wreq->error : written); 698 wreq->iocb = VFS_PTR_POISON; 699 } 700 701 netfs_clear_subrequests(wreq, false); 702 netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete); 703} 704 705/* 706 * Wake the collection work item. 707 */ 708void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async) 709{ 710 if (!work_pending(&wreq->work)) { 711 netfs_get_request(wreq, netfs_rreq_trace_get_work); 712 if (!queue_work(system_unbound_wq, &wreq->work)) 713 netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq); 714 } 715} 716 717/** 718 * netfs_write_subrequest_terminated - Note the termination of a write operation. 719 * @_op: The I/O request that has terminated. 720 * @transferred_or_error: The amount of data transferred or an error code. 721 * @was_async: The termination was asynchronous 722 * 723 * This tells the library that a contributory write I/O operation has 724 * terminated, one way or another, and that it should collect the results. 725 * 726 * The caller indicates in @transferred_or_error the outcome of the operation, 727 * supplying a positive value to indicate the number of bytes transferred or a 728 * negative error code. The library will look after reissuing I/O operations 729 * as appropriate and writing downloaded data to the cache. 730 * 731 * If @was_async is true, the caller might be running in softirq or interrupt 732 * context and we can't sleep. 733 * 734 * When this is called, ownership of the subrequest is transferred back to the 735 * library, along with a ref. 736 * 737 * Note that %_op is a void* so that the function can be passed to 738 * kiocb::term_func without the need for a casting wrapper. 739 */ 740void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, 741 bool was_async) 742{ 743 struct netfs_io_subrequest *subreq = _op; 744 struct netfs_io_request *wreq = subreq->rreq; 745 struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr]; 746 747 _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error); 748 749 switch (subreq->source) { 750 case NETFS_UPLOAD_TO_SERVER: 751 netfs_stat(&netfs_n_wh_upload_done); 752 break; 753 case NETFS_WRITE_TO_CACHE: 754 netfs_stat(&netfs_n_wh_write_done); 755 break; 756 case NETFS_INVALID_WRITE: 757 break; 758 default: 759 BUG(); 760 } 761 762 if (IS_ERR_VALUE(transferred_or_error)) { 763 subreq->error = transferred_or_error; 764 if (subreq->error == -EAGAIN) 765 set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 766 else 767 set_bit(NETFS_SREQ_FAILED, &subreq->flags); 768 trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write); 769 770 switch (subreq->source) { 771 case NETFS_WRITE_TO_CACHE: 772 netfs_stat(&netfs_n_wh_write_failed); 773 break; 774 case NETFS_UPLOAD_TO_SERVER: 775 netfs_stat(&netfs_n_wh_upload_failed); 776 break; 777 default: 778 break; 779 } 780 trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause); 781 set_bit(NETFS_RREQ_PAUSE, &wreq->flags); 782 } else { 783 if (WARN(transferred_or_error > subreq->len - subreq->transferred, 784 "Subreq excess write: R=%x[%x] %zd > %zu - %zu", 785 wreq->debug_id, subreq->debug_index, 786 transferred_or_error, subreq->len, subreq->transferred)) 787 transferred_or_error = subreq->len - subreq->transferred; 788 789 subreq->error = 0; 790 subreq->transferred += transferred_or_error; 791 792 if (subreq->transferred < subreq->len) 793 set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); 794 } 795 796 trace_netfs_sreq(subreq, netfs_sreq_trace_terminated); 797 798 clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags); 799 wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS); 800 801 /* If we are at the head of the queue, wake up the collector, 802 * transferring a ref to it if we were the ones to do so. 803 */ 804 if (list_is_first(&subreq->rreq_link, &stream->subrequests)) 805 netfs_wake_write_collector(wreq, was_async); 806 807 netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); 808} 809EXPORT_SYMBOL(netfs_write_subrequest_terminated); 810