1// SPDX-License-Identifier: GPL-2.0-only
2/* Network filesystem high-level write support.
3 *
4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/fs.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/slab.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include "internal.h"
15
16/**
17 * netfs_create_write_request - Create a write operation.
18 * @wreq: The write request this is storing from.
19 * @dest: The destination type
20 * @start: Start of the region this write will modify
21 * @len: Length of the modification
22 * @worker: The worker function to handle the write(s)
23 *
24 * Allocate a write operation, set it up and add it to the list on a write
25 * request.
26 */
27struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq,
28						       enum netfs_io_source dest,
29						       loff_t start, size_t len,
30						       work_func_t worker)
31{
32	struct netfs_io_subrequest *subreq;
33
34	subreq = netfs_alloc_subrequest(wreq);
35	if (subreq) {
36		INIT_WORK(&subreq->work, worker);
37		subreq->source	= dest;
38		subreq->start	= start;
39		subreq->len	= len;
40		subreq->debug_index = wreq->subreq_counter++;
41
42		switch (subreq->source) {
43		case NETFS_UPLOAD_TO_SERVER:
44			netfs_stat(&netfs_n_wh_upload);
45			break;
46		case NETFS_WRITE_TO_CACHE:
47			netfs_stat(&netfs_n_wh_write);
48			break;
49		default:
50			BUG();
51		}
52
53		subreq->io_iter = wreq->io_iter;
54		iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start);
55		iov_iter_truncate(&subreq->io_iter, subreq->len);
56
57		trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
58				     refcount_read(&subreq->ref),
59				     netfs_sreq_trace_new);
60		atomic_inc(&wreq->nr_outstanding);
61		list_add_tail(&subreq->rreq_link, &wreq->subrequests);
62		trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
63	}
64
65	return subreq;
66}
67EXPORT_SYMBOL(netfs_create_write_request);
68
69/*
70 * Process a completed write request once all the component operations have
71 * been completed.
72 */
73static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async)
74{
75	struct netfs_io_subrequest *subreq;
76	struct netfs_inode *ctx = netfs_inode(wreq->inode);
77	size_t transferred = 0;
78
79	_enter("R=%x[]", wreq->debug_id);
80
81	trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
82
83	list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
84		if (subreq->error || subreq->transferred == 0)
85			break;
86		transferred += subreq->transferred;
87		if (subreq->transferred < subreq->len)
88			break;
89	}
90	wreq->transferred = transferred;
91
92	list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
93		if (!subreq->error)
94			continue;
95		switch (subreq->source) {
96		case NETFS_UPLOAD_TO_SERVER:
97			/* Depending on the type of failure, this may prevent
98			 * writeback completion unless we're in disconnected
99			 * mode.
100			 */
101			if (!wreq->error)
102				wreq->error = subreq->error;
103			break;
104
105		case NETFS_WRITE_TO_CACHE:
106			/* Failure doesn't prevent writeback completion unless
107			 * we're in disconnected mode.
108			 */
109			if (subreq->error != -ENOBUFS)
110				ctx->ops->invalidate_cache(wreq);
111			break;
112
113		default:
114			WARN_ON_ONCE(1);
115			if (!wreq->error)
116				wreq->error = -EIO;
117			return;
118		}
119	}
120
121	wreq->cleanup(wreq);
122
123	if (wreq->origin == NETFS_DIO_WRITE &&
124	    wreq->mapping->nrpages) {
125		pgoff_t first = wreq->start >> PAGE_SHIFT;
126		pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
127		invalidate_inode_pages2_range(wreq->mapping, first, last);
128	}
129
130	if (wreq->origin == NETFS_DIO_WRITE)
131		inode_dio_end(wreq->inode);
132
133	_debug("finished");
134	trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
135	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
136	wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
137
138	if (wreq->iocb) {
139		wreq->iocb->ki_pos += transferred;
140		if (wreq->iocb->ki_complete)
141			wreq->iocb->ki_complete(
142				wreq->iocb, wreq->error ? wreq->error : transferred);
143	}
144
145	netfs_clear_subrequests(wreq, was_async);
146	netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
147}
148
149/*
150 * Deal with the completion of writing the data to the cache.
151 */
152void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
153				       bool was_async)
154{
155	struct netfs_io_subrequest *subreq = _op;
156	struct netfs_io_request *wreq = subreq->rreq;
157	unsigned int u;
158
159	_enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
160
161	switch (subreq->source) {
162	case NETFS_UPLOAD_TO_SERVER:
163		netfs_stat(&netfs_n_wh_upload_done);
164		break;
165	case NETFS_WRITE_TO_CACHE:
166		netfs_stat(&netfs_n_wh_write_done);
167		break;
168	case NETFS_INVALID_WRITE:
169		break;
170	default:
171		BUG();
172	}
173
174	if (IS_ERR_VALUE(transferred_or_error)) {
175		subreq->error = transferred_or_error;
176		trace_netfs_failure(wreq, subreq, transferred_or_error,
177				    netfs_fail_write);
178		goto failed;
179	}
180
181	if (WARN(transferred_or_error > subreq->len - subreq->transferred,
182		 "Subreq excess write: R%x[%x] %zd > %zu - %zu",
183		 wreq->debug_id, subreq->debug_index,
184		 transferred_or_error, subreq->len, subreq->transferred))
185		transferred_or_error = subreq->len - subreq->transferred;
186
187	subreq->error = 0;
188	subreq->transferred += transferred_or_error;
189
190	if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
191		pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n",
192			wreq->debug_id, subreq->debug_index,
193			iov_iter_count(&subreq->io_iter), subreq->len,
194			subreq->transferred, subreq->io_iter.iter_type);
195
196	if (subreq->transferred < subreq->len)
197		goto incomplete;
198
199	__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
200out:
201	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
202
203	/* If we decrement nr_outstanding to 0, the ref belongs to us. */
204	u = atomic_dec_return(&wreq->nr_outstanding);
205	if (u == 0)
206		netfs_write_terminated(wreq, was_async);
207	else if (u == 1)
208		wake_up_var(&wreq->nr_outstanding);
209
210	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
211	return;
212
213incomplete:
214	if (transferred_or_error == 0) {
215		if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
216			subreq->error = -ENODATA;
217			goto failed;
218		}
219	} else {
220		__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
221	}
222
223	__set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
224	set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
225	goto out;
226
227failed:
228	switch (subreq->source) {
229	case NETFS_WRITE_TO_CACHE:
230		netfs_stat(&netfs_n_wh_write_failed);
231		set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
232		break;
233	case NETFS_UPLOAD_TO_SERVER:
234		netfs_stat(&netfs_n_wh_upload_failed);
235		set_bit(NETFS_RREQ_FAILED, &wreq->flags);
236		wreq->error = subreq->error;
237		break;
238	default:
239		break;
240	}
241	goto out;
242}
243EXPORT_SYMBOL(netfs_write_subrequest_terminated);
244
245static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq)
246{
247	struct netfs_io_request *wreq = subreq->rreq;
248	struct netfs_cache_resources *cres = &wreq->cache_resources;
249
250	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
251
252	cres->ops->write(cres, subreq->start, &subreq->io_iter,
253			 netfs_write_subrequest_terminated, subreq);
254}
255
256static void netfs_write_to_cache_op_worker(struct work_struct *work)
257{
258	struct netfs_io_subrequest *subreq =
259		container_of(work, struct netfs_io_subrequest, work);
260
261	netfs_write_to_cache_op(subreq);
262}
263
264/**
265 * netfs_queue_write_request - Queue a write request for attention
266 * @subreq: The write request to be queued
267 *
268 * Queue the specified write request for processing by a worker thread.  We
269 * pass the caller's ref on the request to the worker thread.
270 */
271void netfs_queue_write_request(struct netfs_io_subrequest *subreq)
272{
273	if (!queue_work(system_unbound_wq, &subreq->work))
274		netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip);
275}
276EXPORT_SYMBOL(netfs_queue_write_request);
277
278/*
279 * Set up a op for writing to the cache.
280 */
281static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq)
282{
283	struct netfs_cache_resources *cres = &wreq->cache_resources;
284	struct netfs_io_subrequest *subreq;
285	struct netfs_inode *ctx = netfs_inode(wreq->inode);
286	struct fscache_cookie *cookie = netfs_i_cookie(ctx);
287	loff_t start = wreq->start;
288	size_t len = wreq->len;
289	int ret;
290
291	if (!fscache_cookie_enabled(cookie)) {
292		clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags);
293		return;
294	}
295
296	_debug("write to cache");
297	ret = fscache_begin_write_operation(cres, cookie);
298	if (ret < 0)
299		return;
300
301	ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len,
302				       i_size_read(wreq->inode), true);
303	if (ret < 0)
304		return;
305
306	subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len,
307					    netfs_write_to_cache_op_worker);
308	if (!subreq)
309		return;
310
311	netfs_write_to_cache_op(subreq);
312}
313
314/*
315 * Begin the process of writing out a chunk of data.
316 *
317 * We are given a write request that holds a series of dirty regions and
318 * (partially) covers a sequence of folios, all of which are present.  The
319 * pages must have been marked as writeback as appropriate.
320 *
321 * We need to perform the following steps:
322 *
323 * (1) If encrypting, create an output buffer and encrypt each block of the
324 *     data into it, otherwise the output buffer will point to the original
325 *     folios.
326 *
327 * (2) If the data is to be cached, set up a write op for the entire output
328 *     buffer to the cache, if the cache wants to accept it.
329 *
330 * (3) If the data is to be uploaded (ie. not merely cached):
331 *
332 *     (a) If the data is to be compressed, create a compression buffer and
333 *         compress the data into it.
334 *
335 *     (b) For each destination we want to upload to, set up write ops to write
336 *         to that destination.  We may need multiple writes if the data is not
337 *         contiguous or the span exceeds wsize for a server.
338 */
339int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
340		      enum netfs_write_trace what)
341{
342	struct netfs_inode *ctx = netfs_inode(wreq->inode);
343
344	_enter("R=%x %llx-%llx f=%lx",
345	       wreq->debug_id, wreq->start, wreq->start + wreq->len - 1,
346	       wreq->flags);
347
348	trace_netfs_write(wreq, what);
349	if (wreq->len == 0 || wreq->iter.count == 0) {
350		pr_err("Zero-sized write [R=%x]\n", wreq->debug_id);
351		return -EIO;
352	}
353
354	if (wreq->origin == NETFS_DIO_WRITE)
355		inode_dio_begin(wreq->inode);
356
357	wreq->io_iter = wreq->iter;
358
359	/* ->outstanding > 0 carries a ref */
360	netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
361	atomic_set(&wreq->nr_outstanding, 1);
362
363	/* Start the encryption/compression going.  We can do that in the
364	 * background whilst we generate a list of write ops that we want to
365	 * perform.
366	 */
367	// TODO: Encrypt or compress the region as appropriate
368
369	/* We need to write all of the region to the cache */
370	if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
371		netfs_set_up_write_to_cache(wreq);
372
373	/* However, we don't necessarily write all of the region to the server.
374	 * Caching of reads is being managed this way also.
375	 */
376	if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
377		ctx->ops->create_write_requests(wreq, wreq->start, wreq->len);
378
379	if (atomic_dec_and_test(&wreq->nr_outstanding))
380		netfs_write_terminated(wreq, false);
381
382	if (!may_wait)
383		return -EIOCBQUEUED;
384
385	wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
386		    TASK_UNINTERRUPTIBLE);
387	return wreq->error;
388}
389
390/*
391 * Begin a write operation for writing through the pagecache.
392 */
393struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
394{
395	struct netfs_io_request *wreq;
396	struct file *file = iocb->ki_filp;
397
398	wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len,
399				   NETFS_WRITETHROUGH);
400	if (IS_ERR(wreq))
401		return wreq;
402
403	trace_netfs_write(wreq, netfs_write_trace_writethrough);
404
405	__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
406	iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0);
407	wreq->io_iter = wreq->iter;
408
409	/* ->outstanding > 0 carries a ref */
410	netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
411	atomic_set(&wreq->nr_outstanding, 1);
412	return wreq;
413}
414
415static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final)
416{
417	struct netfs_inode *ictx = netfs_inode(wreq->inode);
418	unsigned long long start;
419	size_t len;
420
421	if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
422		return;
423
424	start = wreq->start + wreq->submitted;
425	len = wreq->iter.count - wreq->submitted;
426	if (!final) {
427		len /= wreq->wsize; /* Round to number of maximum packets */
428		len *= wreq->wsize;
429	}
430
431	ictx->ops->create_write_requests(wreq, start, len);
432	wreq->submitted += len;
433}
434
435/*
436 * Advance the state of the write operation used when writing through the
437 * pagecache.  Data has been copied into the pagecache that we need to append
438 * to the request.  If we've added more than wsize then we need to create a new
439 * subrequest.
440 */
441int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end)
442{
443	_enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u",
444	       wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end);
445
446	wreq->iter.count += copied;
447	wreq->io_iter.count += copied;
448	if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize)
449		netfs_submit_writethrough(wreq, false);
450
451	return wreq->error;
452}
453
454/*
455 * End a write operation used when writing through the pagecache.
456 */
457int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb)
458{
459	int ret = -EIOCBQUEUED;
460
461	_enter("ic=%zu sb=%zu ws=%u",
462	       wreq->iter.count, wreq->submitted, wreq->wsize);
463
464	if (wreq->submitted < wreq->io_iter.count)
465		netfs_submit_writethrough(wreq, true);
466
467	if (atomic_dec_and_test(&wreq->nr_outstanding))
468		netfs_write_terminated(wreq, false);
469
470	if (is_sync_kiocb(iocb)) {
471		wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
472			    TASK_UNINTERRUPTIBLE);
473		ret = wreq->error;
474	}
475
476	netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
477	return ret;
478}
479