1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/* Network filesystem support services.
3 *
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 *
7 * See:
8 *
9 *	Documentation/filesystems/netfs_library.rst
10 *
11 * for a description of the network filesystem interface declared here.
12 */
13
14#ifndef _LINUX_NETFS_H
15#define _LINUX_NETFS_H
16
17#include <linux/workqueue.h>
18#include <linux/fs.h>
19#include <linux/pagemap.h>
20#include <linux/uio.h>
21
22enum netfs_sreq_ref_trace;
23
24/*
25 * Overload PG_private_2 to give us PG_fscache - this is used to indicate that
26 * a page is currently backed by a local disk cache
27 */
28#define folio_test_fscache(folio)	folio_test_private_2(folio)
29#define PageFsCache(page)		PagePrivate2((page))
30#define SetPageFsCache(page)		SetPagePrivate2((page))
31#define ClearPageFsCache(page)		ClearPagePrivate2((page))
32#define TestSetPageFsCache(page)	TestSetPagePrivate2((page))
33#define TestClearPageFsCache(page)	TestClearPagePrivate2((page))
34
35/**
36 * folio_start_fscache - Start an fscache write on a folio.
37 * @folio: The folio.
38 *
39 * Call this function before writing a folio to a local cache.  Starting a
40 * second write before the first one finishes is not allowed.
41 */
42static inline void folio_start_fscache(struct folio *folio)
43{
44	VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
45	folio_get(folio);
46	folio_set_private_2(folio);
47}
48
49/**
50 * folio_end_fscache - End an fscache write on a folio.
51 * @folio: The folio.
52 *
53 * Call this function after the folio has been written to the local cache.
54 * This will wake any sleepers waiting on this folio.
55 */
56static inline void folio_end_fscache(struct folio *folio)
57{
58	folio_end_private_2(folio);
59}
60
61/**
62 * folio_wait_fscache - Wait for an fscache write on this folio to end.
63 * @folio: The folio.
64 *
65 * If this folio is currently being written to a local cache, wait for
66 * the write to finish.  Another write may start after this one finishes,
67 * unless the caller holds the folio lock.
68 */
69static inline void folio_wait_fscache(struct folio *folio)
70{
71	folio_wait_private_2(folio);
72}
73
74/**
75 * folio_wait_fscache_killable - Wait for an fscache write on this folio to end.
76 * @folio: The folio.
77 *
78 * If this folio is currently being written to a local cache, wait
79 * for the write to finish or for a fatal signal to be received.
80 * Another write may start after this one finishes, unless the caller
81 * holds the folio lock.
82 *
83 * Return:
84 * - 0 if successful.
85 * - -EINTR if a fatal signal was encountered.
86 */
87static inline int folio_wait_fscache_killable(struct folio *folio)
88{
89	return folio_wait_private_2_killable(folio);
90}
91
92static inline void set_page_fscache(struct page *page)
93{
94	folio_start_fscache(page_folio(page));
95}
96
97static inline void end_page_fscache(struct page *page)
98{
99	folio_end_private_2(page_folio(page));
100}
101
102static inline void wait_on_page_fscache(struct page *page)
103{
104	folio_wait_private_2(page_folio(page));
105}
106
107static inline int wait_on_page_fscache_killable(struct page *page)
108{
109	return folio_wait_private_2_killable(page_folio(page));
110}
111
112/* Marks used on xarray-based buffers */
113#define NETFS_BUF_PUT_MARK	XA_MARK_0	/* - Page needs putting  */
114#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1	/* - Page needs wb/dirty flag wrangling */
115
116enum netfs_io_source {
117	NETFS_FILL_WITH_ZEROES,
118	NETFS_DOWNLOAD_FROM_SERVER,
119	NETFS_READ_FROM_CACHE,
120	NETFS_INVALID_READ,
121	NETFS_UPLOAD_TO_SERVER,
122	NETFS_WRITE_TO_CACHE,
123	NETFS_INVALID_WRITE,
124} __mode(byte);
125
126typedef void (*netfs_io_terminated_t)(void *priv, ssize_t transferred_or_error,
127				      bool was_async);
128
129/*
130 * Per-inode context.  This wraps the VFS inode.
131 */
132struct netfs_inode {
133	struct inode		inode;		/* The VFS inode */
134	const struct netfs_request_ops *ops;
135#if IS_ENABLED(CONFIG_FSCACHE)
136	struct fscache_cookie	*cache;
137#endif
138	loff_t			remote_i_size;	/* Size of the remote file */
139	loff_t			zero_point;	/* Size after which we assume there's no data
140						 * on the server */
141	unsigned long		flags;
142#define NETFS_ICTX_ODIRECT	0		/* The file has DIO in progress */
143#define NETFS_ICTX_UNBUFFERED	1		/* I/O should not use the pagecache */
144#define NETFS_ICTX_WRITETHROUGH	2		/* Write-through caching */
145#define NETFS_ICTX_NO_WRITE_STREAMING	3	/* Don't engage in write-streaming */
146};
147
148/*
149 * A netfs group - for instance a ceph snap.  This is marked on dirty pages and
150 * pages marked with a group must be flushed before they can be written under
151 * the domain of another group.
152 */
153struct netfs_group {
154	refcount_t		ref;
155	void (*free)(struct netfs_group *netfs_group);
156};
157
158/*
159 * Information about a dirty page (attached only if necessary).
160 * folio->private
161 */
162struct netfs_folio {
163	struct netfs_group	*netfs_group;	/* Filesystem's grouping marker (or NULL). */
164	unsigned int		dirty_offset;	/* Write-streaming dirty data offset */
165	unsigned int		dirty_len;	/* Write-streaming dirty data length */
166};
167#define NETFS_FOLIO_INFO	0x1UL	/* OR'd with folio->private. */
168
169static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
170{
171	void *priv = folio_get_private(folio);
172
173	if ((unsigned long)priv & NETFS_FOLIO_INFO)
174		return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO);
175	return NULL;
176}
177
178static inline struct netfs_group *netfs_folio_group(struct folio *folio)
179{
180	struct netfs_folio *finfo;
181	void *priv = folio_get_private(folio);
182
183	finfo = netfs_folio_info(folio);
184	if (finfo)
185		return finfo->netfs_group;
186	return priv;
187}
188
189/*
190 * Resources required to do operations on a cache.
191 */
192struct netfs_cache_resources {
193	const struct netfs_cache_ops	*ops;
194	void				*cache_priv;
195	void				*cache_priv2;
196	unsigned int			debug_id;	/* Cookie debug ID */
197	unsigned int			inval_counter;	/* object->inval_counter at begin_op */
198};
199
200/*
201 * Descriptor for a single component subrequest.  Each operation represents an
202 * individual read/write from/to a server, a cache, a journal, etc..
203 *
204 * The buffer iterator is persistent for the life of the subrequest struct and
205 * the pages it points to can be relied on to exist for the duration.
206 */
207struct netfs_io_subrequest {
208	struct netfs_io_request *rreq;		/* Supervising I/O request */
209	struct work_struct	work;
210	struct list_head	rreq_link;	/* Link in rreq->subrequests */
211	struct iov_iter		io_iter;	/* Iterator for this subrequest */
212	loff_t			start;		/* Where to start the I/O */
213	size_t			len;		/* Size of the I/O */
214	size_t			transferred;	/* Amount of data transferred */
215	refcount_t		ref;
216	short			error;		/* 0 or error that occurred */
217	unsigned short		debug_index;	/* Index in list (for debugging output) */
218	unsigned int		max_nr_segs;	/* 0 or max number of segments in an iterator */
219	enum netfs_io_source	source;		/* Where to read from/write to */
220	unsigned long		flags;
221#define NETFS_SREQ_COPY_TO_CACHE	0	/* Set if should copy the data to the cache */
222#define NETFS_SREQ_CLEAR_TAIL		1	/* Set if the rest of the read should be cleared */
223#define NETFS_SREQ_SHORT_IO		2	/* Set if the I/O was short */
224#define NETFS_SREQ_SEEK_DATA_READ	3	/* Set if ->read() should SEEK_DATA first */
225#define NETFS_SREQ_NO_PROGRESS		4	/* Set if we didn't manage to read any data */
226#define NETFS_SREQ_ONDEMAND		5	/* Set if it's from on-demand read mode */
227};
228
229enum netfs_io_origin {
230	NETFS_READAHEAD,		/* This read was triggered by readahead */
231	NETFS_READPAGE,			/* This read is a synchronous read */
232	NETFS_READ_FOR_WRITE,		/* This read is to prepare a write */
233	NETFS_WRITEBACK,		/* This write was triggered by writepages */
234	NETFS_WRITETHROUGH,		/* This write was made by netfs_perform_write() */
235	NETFS_LAUNDER_WRITE,		/* This is triggered by ->launder_folio() */
236	NETFS_UNBUFFERED_WRITE,		/* This is an unbuffered write */
237	NETFS_DIO_READ,			/* This is a direct I/O read */
238	NETFS_DIO_WRITE,		/* This is a direct I/O write */
239	nr__netfs_io_origin
240} __mode(byte);
241
242/*
243 * Descriptor for an I/O helper request.  This is used to make multiple I/O
244 * operations to a variety of data stores and then stitch the result together.
245 */
246struct netfs_io_request {
247	union {
248		struct work_struct work;
249		struct rcu_head rcu;
250	};
251	struct inode		*inode;		/* The file being accessed */
252	struct address_space	*mapping;	/* The mapping being accessed */
253	struct kiocb		*iocb;		/* AIO completion vector */
254	struct netfs_cache_resources cache_resources;
255	struct list_head	proc_link;	/* Link in netfs_iorequests */
256	struct list_head	subrequests;	/* Contributory I/O operations */
257	struct iov_iter		iter;		/* Unencrypted-side iterator */
258	struct iov_iter		io_iter;	/* I/O (Encrypted-side) iterator */
259	void			*netfs_priv;	/* Private data for the netfs */
260	struct bio_vec		*direct_bv;	/* DIO buffer list (when handling iovec-iter) */
261	unsigned int		direct_bv_count; /* Number of elements in direct_bv[] */
262	unsigned int		debug_id;
263	unsigned int		rsize;		/* Maximum read size (0 for none) */
264	unsigned int		wsize;		/* Maximum write size (0 for none) */
265	unsigned int		subreq_counter;	/* Next subreq->debug_index */
266	atomic_t		nr_outstanding;	/* Number of ops in progress */
267	atomic_t		nr_copy_ops;	/* Number of copy-to-cache ops in progress */
268	size_t			submitted;	/* Amount submitted for I/O so far */
269	size_t			len;		/* Length of the request */
270	size_t			upper_len;	/* Length can be extended to here */
271	size_t			transferred;	/* Amount to be indicated as transferred */
272	short			error;		/* 0 or error that occurred */
273	enum netfs_io_origin	origin;		/* Origin of the request */
274	bool			direct_bv_unpin; /* T if direct_bv[] must be unpinned */
275	loff_t			i_size;		/* Size of the file */
276	loff_t			start;		/* Start position */
277	pgoff_t			no_unlock_folio; /* Don't unlock this folio after read */
278	refcount_t		ref;
279	unsigned long		flags;
280#define NETFS_RREQ_INCOMPLETE_IO	0	/* Some ioreqs terminated short or with error */
281#define NETFS_RREQ_COPY_TO_CACHE	1	/* Need to write to the cache */
282#define NETFS_RREQ_NO_UNLOCK_FOLIO	2	/* Don't unlock no_unlock_folio on completion */
283#define NETFS_RREQ_DONT_UNLOCK_FOLIOS	3	/* Don't unlock the folios on completion */
284#define NETFS_RREQ_FAILED		4	/* The request failed */
285#define NETFS_RREQ_IN_PROGRESS		5	/* Unlocked when the request completes */
286#define NETFS_RREQ_WRITE_TO_CACHE	7	/* Need to write to the cache */
287#define NETFS_RREQ_UPLOAD_TO_SERVER	8	/* Need to write to the server */
288#define NETFS_RREQ_NONBLOCK		9	/* Don't block if possible (O_NONBLOCK) */
289#define NETFS_RREQ_BLOCKED		10	/* We blocked */
290	const struct netfs_request_ops *netfs_ops;
291	void (*cleanup)(struct netfs_io_request *req);
292};
293
294/*
295 * Operations the network filesystem can/must provide to the helpers.
296 */
297struct netfs_request_ops {
298	unsigned int	io_request_size;	/* Alloc size for netfs_io_request struct */
299	unsigned int	io_subrequest_size;	/* Alloc size for netfs_io_subrequest struct */
300	int (*init_request)(struct netfs_io_request *rreq, struct file *file);
301	void (*free_request)(struct netfs_io_request *rreq);
302	void (*free_subrequest)(struct netfs_io_subrequest *rreq);
303
304	/* Read request handling */
305	void (*expand_readahead)(struct netfs_io_request *rreq);
306	bool (*clamp_length)(struct netfs_io_subrequest *subreq);
307	void (*issue_read)(struct netfs_io_subrequest *subreq);
308	bool (*is_still_valid)(struct netfs_io_request *rreq);
309	int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
310				 struct folio **foliop, void **_fsdata);
311	void (*done)(struct netfs_io_request *rreq);
312
313	/* Modification handling */
314	void (*update_i_size)(struct inode *inode, loff_t i_size);
315
316	/* Write request handling */
317	void (*create_write_requests)(struct netfs_io_request *wreq,
318				      loff_t start, size_t len);
319	void (*invalidate_cache)(struct netfs_io_request *wreq);
320};
321
322/*
323 * How to handle reading from a hole.
324 */
325enum netfs_read_from_hole {
326	NETFS_READ_HOLE_IGNORE,
327	NETFS_READ_HOLE_CLEAR,
328	NETFS_READ_HOLE_FAIL,
329};
330
331/*
332 * Table of operations for access to a cache.
333 */
334struct netfs_cache_ops {
335	/* End an operation */
336	void (*end_operation)(struct netfs_cache_resources *cres);
337
338	/* Read data from the cache */
339	int (*read)(struct netfs_cache_resources *cres,
340		    loff_t start_pos,
341		    struct iov_iter *iter,
342		    enum netfs_read_from_hole read_hole,
343		    netfs_io_terminated_t term_func,
344		    void *term_func_priv);
345
346	/* Write data to the cache */
347	int (*write)(struct netfs_cache_resources *cres,
348		     loff_t start_pos,
349		     struct iov_iter *iter,
350		     netfs_io_terminated_t term_func,
351		     void *term_func_priv);
352
353	/* Expand readahead request */
354	void (*expand_readahead)(struct netfs_cache_resources *cres,
355				 loff_t *_start, size_t *_len, loff_t i_size);
356
357	/* Prepare a read operation, shortening it to a cached/uncached
358	 * boundary as appropriate.
359	 */
360	enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq,
361					     loff_t i_size);
362
363	/* Prepare a write operation, working out what part of the write we can
364	 * actually do.
365	 */
366	int (*prepare_write)(struct netfs_cache_resources *cres,
367			     loff_t *_start, size_t *_len, size_t upper_len,
368			     loff_t i_size, bool no_space_allocated_yet);
369
370	/* Prepare an on-demand read operation, shortening it to a cached/uncached
371	 * boundary as appropriate.
372	 */
373	enum netfs_io_source (*prepare_ondemand_read)(struct netfs_cache_resources *cres,
374						      loff_t start, size_t *_len,
375						      loff_t i_size,
376						      unsigned long *_flags, ino_t ino);
377
378	/* Query the occupancy of the cache in a region, returning where the
379	 * next chunk of data starts and how long it is.
380	 */
381	int (*query_occupancy)(struct netfs_cache_resources *cres,
382			       loff_t start, size_t len, size_t granularity,
383			       loff_t *_data_start, size_t *_data_len);
384};
385
386/* High-level read API. */
387ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
388ssize_t netfs_buffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
389ssize_t netfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
390
391/* High-level write API */
392ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
393			    struct netfs_group *netfs_group);
394ssize_t netfs_buffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *from,
395					 struct netfs_group *netfs_group);
396ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
397ssize_t netfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from);
398
399/* Address operations API */
400struct readahead_control;
401void netfs_readahead(struct readahead_control *);
402int netfs_read_folio(struct file *, struct folio *);
403int netfs_write_begin(struct netfs_inode *, struct file *,
404		      struct address_space *, loff_t pos, unsigned int len,
405		      struct folio **, void **fsdata);
406int netfs_writepages(struct address_space *mapping,
407		     struct writeback_control *wbc);
408bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio);
409int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
410void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
411void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
412bool netfs_release_folio(struct folio *folio, gfp_t gfp);
413int netfs_launder_folio(struct folio *folio);
414
415/* VMA operations API. */
416vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
417
418/* (Sub)request management API. */
419void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool);
420void netfs_get_subrequest(struct netfs_io_subrequest *subreq,
421			  enum netfs_sreq_ref_trace what);
422void netfs_put_subrequest(struct netfs_io_subrequest *subreq,
423			  bool was_async, enum netfs_sreq_ref_trace what);
424ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
425				struct iov_iter *new,
426				iov_iter_extraction_t extraction_flags);
427size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
428			size_t max_size, size_t max_segs);
429struct netfs_io_subrequest *netfs_create_write_request(
430	struct netfs_io_request *wreq, enum netfs_io_source dest,
431	loff_t start, size_t len, work_func_t worker);
432void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
433				       bool was_async);
434void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
435
436int netfs_start_io_read(struct inode *inode);
437void netfs_end_io_read(struct inode *inode);
438int netfs_start_io_write(struct inode *inode);
439void netfs_end_io_write(struct inode *inode);
440int netfs_start_io_direct(struct inode *inode);
441void netfs_end_io_direct(struct inode *inode);
442
443/**
444 * netfs_inode - Get the netfs inode context from the inode
445 * @inode: The inode to query
446 *
447 * Get the netfs lib inode context from the network filesystem's inode.  The
448 * context struct is expected to directly follow on from the VFS inode struct.
449 */
450static inline struct netfs_inode *netfs_inode(struct inode *inode)
451{
452	return container_of(inode, struct netfs_inode, inode);
453}
454
455/**
456 * netfs_inode_init - Initialise a netfslib inode context
457 * @ctx: The netfs inode to initialise
458 * @ops: The netfs's operations list
459 * @use_zero_point: True to use the zero_point read optimisation
460 *
461 * Initialise the netfs library context struct.  This is expected to follow on
462 * directly from the VFS inode struct.
463 */
464static inline void netfs_inode_init(struct netfs_inode *ctx,
465				    const struct netfs_request_ops *ops,
466				    bool use_zero_point)
467{
468	ctx->ops = ops;
469	ctx->remote_i_size = i_size_read(&ctx->inode);
470	ctx->zero_point = LLONG_MAX;
471	ctx->flags = 0;
472#if IS_ENABLED(CONFIG_FSCACHE)
473	ctx->cache = NULL;
474#endif
475	/* ->releasepage() drives zero_point */
476	if (use_zero_point) {
477		ctx->zero_point = ctx->remote_i_size;
478		mapping_set_release_always(ctx->inode.i_mapping);
479	}
480}
481
482/**
483 * netfs_resize_file - Note that a file got resized
484 * @ctx: The netfs inode being resized
485 * @new_i_size: The new file size
486 * @changed_on_server: The change was applied to the server
487 *
488 * Inform the netfs lib that a file got resized so that it can adjust its state.
489 */
490static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size,
491				     bool changed_on_server)
492{
493	if (changed_on_server)
494		ctx->remote_i_size = new_i_size;
495	if (new_i_size < ctx->zero_point)
496		ctx->zero_point = new_i_size;
497}
498
499/**
500 * netfs_i_cookie - Get the cache cookie from the inode
501 * @ctx: The netfs inode to query
502 *
503 * Get the caching cookie (if enabled) from the network filesystem's inode.
504 */
505static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
506{
507#if IS_ENABLED(CONFIG_FSCACHE)
508	return ctx->cache;
509#else
510	return NULL;
511#endif
512}
513
514#endif /* _LINUX_NETFS_H */
515