1// SPDX-License-Identifier: LGPL-2.1
2/*
3 *
4 *   vfs operations that deal with files
5 *
6 *   Copyright (C) International Business Machines  Corp., 2002,2010
7 *   Author(s): Steve French (sfrench@us.ibm.com)
8 *              Jeremy Allison (jra@samba.org)
9 *
10 */
11#include <linux/fs.h>
12#include <linux/filelock.h>
13#include <linux/backing-dev.h>
14#include <linux/stat.h>
15#include <linux/fcntl.h>
16#include <linux/pagemap.h>
17#include <linux/pagevec.h>
18#include <linux/writeback.h>
19#include <linux/task_io_accounting_ops.h>
20#include <linux/delay.h>
21#include <linux/mount.h>
22#include <linux/slab.h>
23#include <linux/swap.h>
24#include <linux/mm.h>
25#include <asm/div64.h>
26#include "cifsfs.h"
27#include "cifspdu.h"
28#include "cifsglob.h"
29#include "cifsproto.h"
30#include "smb2proto.h"
31#include "cifs_unicode.h"
32#include "cifs_debug.h"
33#include "cifs_fs_sb.h"
34#include "fscache.h"
35#include "smbdirect.h"
36#include "fs_context.h"
37#include "cifs_ioctl.h"
38#include "cached_dir.h"
39
40/*
41 * Remove the dirty flags from a span of pages.
42 */
43static void cifs_undirty_folios(struct inode *inode, loff_t start, unsigned int len)
44{
45	struct address_space *mapping = inode->i_mapping;
46	struct folio *folio;
47	pgoff_t end;
48
49	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
50
51	rcu_read_lock();
52
53	end = (start + len - 1) / PAGE_SIZE;
54	xas_for_each_marked(&xas, folio, end, PAGECACHE_TAG_DIRTY) {
55		if (xas_retry(&xas, folio))
56			continue;
57		xas_pause(&xas);
58		rcu_read_unlock();
59		folio_lock(folio);
60		folio_clear_dirty_for_io(folio);
61		folio_unlock(folio);
62		rcu_read_lock();
63	}
64
65	rcu_read_unlock();
66}
67
68/*
69 * Completion of write to server.
70 */
71void cifs_pages_written_back(struct inode *inode, loff_t start, unsigned int len)
72{
73	struct address_space *mapping = inode->i_mapping;
74	struct folio *folio;
75	pgoff_t end;
76
77	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
78
79	if (!len)
80		return;
81
82	rcu_read_lock();
83
84	end = (start + len - 1) / PAGE_SIZE;
85	xas_for_each(&xas, folio, end) {
86		if (xas_retry(&xas, folio))
87			continue;
88		if (!folio_test_writeback(folio)) {
89			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
90				  len, start, folio->index, end);
91			continue;
92		}
93
94		folio_detach_private(folio);
95		folio_end_writeback(folio);
96	}
97
98	rcu_read_unlock();
99}
100
101/*
102 * Failure of write to server.
103 */
104void cifs_pages_write_failed(struct inode *inode, loff_t start, unsigned int len)
105{
106	struct address_space *mapping = inode->i_mapping;
107	struct folio *folio;
108	pgoff_t end;
109
110	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
111
112	if (!len)
113		return;
114
115	rcu_read_lock();
116
117	end = (start + len - 1) / PAGE_SIZE;
118	xas_for_each(&xas, folio, end) {
119		if (xas_retry(&xas, folio))
120			continue;
121		if (!folio_test_writeback(folio)) {
122			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
123				  len, start, folio->index, end);
124			continue;
125		}
126
127		folio_set_error(folio);
128		folio_end_writeback(folio);
129	}
130
131	rcu_read_unlock();
132}
133
134/*
135 * Redirty pages after a temporary failure.
136 */
137void cifs_pages_write_redirty(struct inode *inode, loff_t start, unsigned int len)
138{
139	struct address_space *mapping = inode->i_mapping;
140	struct folio *folio;
141	pgoff_t end;
142
143	XA_STATE(xas, &mapping->i_pages, start / PAGE_SIZE);
144
145	if (!len)
146		return;
147
148	rcu_read_lock();
149
150	end = (start + len - 1) / PAGE_SIZE;
151	xas_for_each(&xas, folio, end) {
152		if (!folio_test_writeback(folio)) {
153			WARN_ONCE(1, "bad %x @%llx page %lx %lx\n",
154				  len, start, folio->index, end);
155			continue;
156		}
157
158		filemap_dirty_folio(folio->mapping, folio);
159		folio_end_writeback(folio);
160	}
161
162	rcu_read_unlock();
163}
164
165/*
166 * Mark as invalid, all open files on tree connections since they
167 * were closed when session to server was lost.
168 */
169void
170cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
171{
172	struct cifsFileInfo *open_file = NULL;
173	struct list_head *tmp;
174	struct list_head *tmp1;
175
176	/* only send once per connect */
177	spin_lock(&tcon->tc_lock);
178	if (tcon->need_reconnect)
179		tcon->status = TID_NEED_RECON;
180
181	if (tcon->status != TID_NEED_RECON) {
182		spin_unlock(&tcon->tc_lock);
183		return;
184	}
185	tcon->status = TID_IN_FILES_INVALIDATE;
186	spin_unlock(&tcon->tc_lock);
187
188	/* list all files open on tree connection and mark them invalid */
189	spin_lock(&tcon->open_file_lock);
190	list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
191		open_file = list_entry(tmp, struct cifsFileInfo, tlist);
192		open_file->invalidHandle = true;
193		open_file->oplock_break_cancelled = true;
194	}
195	spin_unlock(&tcon->open_file_lock);
196
197	invalidate_all_cached_dirs(tcon);
198	spin_lock(&tcon->tc_lock);
199	if (tcon->status == TID_IN_FILES_INVALIDATE)
200		tcon->status = TID_NEED_TCON;
201	spin_unlock(&tcon->tc_lock);
202
203	/*
204	 * BB Add call to invalidate_inodes(sb) for all superblocks mounted
205	 * to this tcon.
206	 */
207}
208
209static inline int cifs_convert_flags(unsigned int flags, int rdwr_for_fscache)
210{
211	if ((flags & O_ACCMODE) == O_RDONLY)
212		return GENERIC_READ;
213	else if ((flags & O_ACCMODE) == O_WRONLY)
214		return rdwr_for_fscache == 1 ? (GENERIC_READ | GENERIC_WRITE) : GENERIC_WRITE;
215	else if ((flags & O_ACCMODE) == O_RDWR) {
216		/* GENERIC_ALL is too much permission to request
217		   can cause unnecessary access denied on create */
218		/* return GENERIC_ALL; */
219		return (GENERIC_READ | GENERIC_WRITE);
220	}
221
222	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
223		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
224		FILE_READ_DATA);
225}
226
227#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
228static u32 cifs_posix_convert_flags(unsigned int flags)
229{
230	u32 posix_flags = 0;
231
232	if ((flags & O_ACCMODE) == O_RDONLY)
233		posix_flags = SMB_O_RDONLY;
234	else if ((flags & O_ACCMODE) == O_WRONLY)
235		posix_flags = SMB_O_WRONLY;
236	else if ((flags & O_ACCMODE) == O_RDWR)
237		posix_flags = SMB_O_RDWR;
238
239	if (flags & O_CREAT) {
240		posix_flags |= SMB_O_CREAT;
241		if (flags & O_EXCL)
242			posix_flags |= SMB_O_EXCL;
243	} else if (flags & O_EXCL)
244		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
245			 current->comm, current->tgid);
246
247	if (flags & O_TRUNC)
248		posix_flags |= SMB_O_TRUNC;
249	/* be safe and imply O_SYNC for O_DSYNC */
250	if (flags & O_DSYNC)
251		posix_flags |= SMB_O_SYNC;
252	if (flags & O_DIRECTORY)
253		posix_flags |= SMB_O_DIRECTORY;
254	if (flags & O_NOFOLLOW)
255		posix_flags |= SMB_O_NOFOLLOW;
256	if (flags & O_DIRECT)
257		posix_flags |= SMB_O_DIRECT;
258
259	return posix_flags;
260}
261#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
262
263static inline int cifs_get_disposition(unsigned int flags)
264{
265	if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
266		return FILE_CREATE;
267	else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
268		return FILE_OVERWRITE_IF;
269	else if ((flags & O_CREAT) == O_CREAT)
270		return FILE_OPEN_IF;
271	else if ((flags & O_TRUNC) == O_TRUNC)
272		return FILE_OVERWRITE;
273	else
274		return FILE_OPEN;
275}
276
277#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
278int cifs_posix_open(const char *full_path, struct inode **pinode,
279			struct super_block *sb, int mode, unsigned int f_flags,
280			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
281{
282	int rc;
283	FILE_UNIX_BASIC_INFO *presp_data;
284	__u32 posix_flags = 0;
285	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
286	struct cifs_fattr fattr;
287	struct tcon_link *tlink;
288	struct cifs_tcon *tcon;
289
290	cifs_dbg(FYI, "posix open %s\n", full_path);
291
292	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
293	if (presp_data == NULL)
294		return -ENOMEM;
295
296	tlink = cifs_sb_tlink(cifs_sb);
297	if (IS_ERR(tlink)) {
298		rc = PTR_ERR(tlink);
299		goto posix_open_ret;
300	}
301
302	tcon = tlink_tcon(tlink);
303	mode &= ~current_umask();
304
305	posix_flags = cifs_posix_convert_flags(f_flags);
306	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
307			     poplock, full_path, cifs_sb->local_nls,
308			     cifs_remap(cifs_sb));
309	cifs_put_tlink(tlink);
310
311	if (rc)
312		goto posix_open_ret;
313
314	if (presp_data->Type == cpu_to_le32(-1))
315		goto posix_open_ret; /* open ok, caller does qpathinfo */
316
317	if (!pinode)
318		goto posix_open_ret; /* caller does not need info */
319
320	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
321
322	/* get new inode and set it up */
323	if (*pinode == NULL) {
324		cifs_fill_uniqueid(sb, &fattr);
325		*pinode = cifs_iget(sb, &fattr);
326		if (!*pinode) {
327			rc = -ENOMEM;
328			goto posix_open_ret;
329		}
330	} else {
331		cifs_revalidate_mapping(*pinode);
332		rc = cifs_fattr_to_inode(*pinode, &fattr, false);
333	}
334
335posix_open_ret:
336	kfree(presp_data);
337	return rc;
338}
339#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
340
341static int cifs_nt_open(const char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
342			struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
343			struct cifs_fid *fid, unsigned int xid, struct cifs_open_info_data *buf)
344{
345	int rc;
346	int desired_access;
347	int disposition;
348	int create_options = CREATE_NOT_DIR;
349	struct TCP_Server_Info *server = tcon->ses->server;
350	struct cifs_open_parms oparms;
351	int rdwr_for_fscache = 0;
352
353	if (!server->ops->open)
354		return -ENOSYS;
355
356	/* If we're caching, we need to be able to fill in around partial writes. */
357	if (cifs_fscache_enabled(inode) && (f_flags & O_ACCMODE) == O_WRONLY)
358		rdwr_for_fscache = 1;
359
360	desired_access = cifs_convert_flags(f_flags, rdwr_for_fscache);
361
362/*********************************************************************
363 *  open flag mapping table:
364 *
365 *	POSIX Flag            CIFS Disposition
366 *	----------            ----------------
367 *	O_CREAT               FILE_OPEN_IF
368 *	O_CREAT | O_EXCL      FILE_CREATE
369 *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
370 *	O_TRUNC               FILE_OVERWRITE
371 *	none of the above     FILE_OPEN
372 *
373 *	Note that there is not a direct match between disposition
374 *	FILE_SUPERSEDE (ie create whether or not file exists although
375 *	O_CREAT | O_TRUNC is similar but truncates the existing
376 *	file rather than creating a new file as FILE_SUPERSEDE does
377 *	(which uses the attributes / metadata passed in on open call)
378 *?
379 *?  O_SYNC is a reasonable match to CIFS writethrough flag
380 *?  and the read write flags match reasonably.  O_LARGEFILE
381 *?  is irrelevant because largefile support is always used
382 *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
383 *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
384 *********************************************************************/
385
386	disposition = cifs_get_disposition(f_flags);
387
388	/* BB pass O_SYNC flag through on file attributes .. BB */
389
390	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
391	if (f_flags & O_SYNC)
392		create_options |= CREATE_WRITE_THROUGH;
393
394	if (f_flags & O_DIRECT)
395		create_options |= CREATE_NO_BUFFER;
396
397retry_open:
398	oparms = (struct cifs_open_parms) {
399		.tcon = tcon,
400		.cifs_sb = cifs_sb,
401		.desired_access = desired_access,
402		.create_options = cifs_create_options(cifs_sb, create_options),
403		.disposition = disposition,
404		.path = full_path,
405		.fid = fid,
406	};
407
408	rc = server->ops->open(xid, &oparms, oplock, buf);
409	if (rc) {
410		if (rc == -EACCES && rdwr_for_fscache == 1) {
411			desired_access = cifs_convert_flags(f_flags, 0);
412			rdwr_for_fscache = 2;
413			goto retry_open;
414		}
415		return rc;
416	}
417	if (rdwr_for_fscache == 2)
418		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
419
420	/* TODO: Add support for calling posix query info but with passing in fid */
421	if (tcon->unix_ext)
422		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
423					      xid);
424	else
425		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
426					 xid, fid);
427
428	if (rc) {
429		server->ops->close(xid, tcon, fid);
430		if (rc == -ESTALE)
431			rc = -EOPENSTALE;
432	}
433
434	return rc;
435}
436
437static bool
438cifs_has_mand_locks(struct cifsInodeInfo *cinode)
439{
440	struct cifs_fid_locks *cur;
441	bool has_locks = false;
442
443	down_read(&cinode->lock_sem);
444	list_for_each_entry(cur, &cinode->llist, llist) {
445		if (!list_empty(&cur->locks)) {
446			has_locks = true;
447			break;
448		}
449	}
450	up_read(&cinode->lock_sem);
451	return has_locks;
452}
453
454void
455cifs_down_write(struct rw_semaphore *sem)
456{
457	while (!down_write_trylock(sem))
458		msleep(10);
459}
460
461static void cifsFileInfo_put_work(struct work_struct *work);
462void serverclose_work(struct work_struct *work);
463
464struct cifsFileInfo *cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
465				       struct tcon_link *tlink, __u32 oplock,
466				       const char *symlink_target)
467{
468	struct dentry *dentry = file_dentry(file);
469	struct inode *inode = d_inode(dentry);
470	struct cifsInodeInfo *cinode = CIFS_I(inode);
471	struct cifsFileInfo *cfile;
472	struct cifs_fid_locks *fdlocks;
473	struct cifs_tcon *tcon = tlink_tcon(tlink);
474	struct TCP_Server_Info *server = tcon->ses->server;
475
476	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
477	if (cfile == NULL)
478		return cfile;
479
480	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
481	if (!fdlocks) {
482		kfree(cfile);
483		return NULL;
484	}
485
486	if (symlink_target) {
487		cfile->symlink_target = kstrdup(symlink_target, GFP_KERNEL);
488		if (!cfile->symlink_target) {
489			kfree(fdlocks);
490			kfree(cfile);
491			return NULL;
492		}
493	}
494
495	INIT_LIST_HEAD(&fdlocks->locks);
496	fdlocks->cfile = cfile;
497	cfile->llist = fdlocks;
498
499	cfile->count = 1;
500	cfile->pid = current->tgid;
501	cfile->uid = current_fsuid();
502	cfile->dentry = dget(dentry);
503	cfile->f_flags = file->f_flags;
504	cfile->invalidHandle = false;
505	cfile->deferred_close_scheduled = false;
506	cfile->tlink = cifs_get_tlink(tlink);
507	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
508	INIT_WORK(&cfile->put, cifsFileInfo_put_work);
509	INIT_WORK(&cfile->serverclose, serverclose_work);
510	INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
511	mutex_init(&cfile->fh_mutex);
512	spin_lock_init(&cfile->file_info_lock);
513
514	cifs_sb_active(inode->i_sb);
515
516	/*
517	 * If the server returned a read oplock and we have mandatory brlocks,
518	 * set oplock level to None.
519	 */
520	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
521		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
522		oplock = 0;
523	}
524
525	cifs_down_write(&cinode->lock_sem);
526	list_add(&fdlocks->llist, &cinode->llist);
527	up_write(&cinode->lock_sem);
528
529	spin_lock(&tcon->open_file_lock);
530	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
531		oplock = fid->pending_open->oplock;
532	list_del(&fid->pending_open->olist);
533
534	fid->purge_cache = false;
535	server->ops->set_fid(cfile, fid, oplock);
536
537	list_add(&cfile->tlist, &tcon->openFileList);
538	atomic_inc(&tcon->num_local_opens);
539
540	/* if readable file instance put first in list*/
541	spin_lock(&cinode->open_file_lock);
542	if (file->f_mode & FMODE_READ)
543		list_add(&cfile->flist, &cinode->openFileList);
544	else
545		list_add_tail(&cfile->flist, &cinode->openFileList);
546	spin_unlock(&cinode->open_file_lock);
547	spin_unlock(&tcon->open_file_lock);
548
549	if (fid->purge_cache)
550		cifs_zap_mapping(inode);
551
552	file->private_data = cfile;
553	return cfile;
554}
555
556struct cifsFileInfo *
557cifsFileInfo_get(struct cifsFileInfo *cifs_file)
558{
559	spin_lock(&cifs_file->file_info_lock);
560	cifsFileInfo_get_locked(cifs_file);
561	spin_unlock(&cifs_file->file_info_lock);
562	return cifs_file;
563}
564
565static void cifsFileInfo_put_final(struct cifsFileInfo *cifs_file)
566{
567	struct inode *inode = d_inode(cifs_file->dentry);
568	struct cifsInodeInfo *cifsi = CIFS_I(inode);
569	struct cifsLockInfo *li, *tmp;
570	struct super_block *sb = inode->i_sb;
571
572	/*
573	 * Delete any outstanding lock records. We'll lose them when the file
574	 * is closed anyway.
575	 */
576	cifs_down_write(&cifsi->lock_sem);
577	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
578		list_del(&li->llist);
579		cifs_del_lock_waiters(li);
580		kfree(li);
581	}
582	list_del(&cifs_file->llist->llist);
583	kfree(cifs_file->llist);
584	up_write(&cifsi->lock_sem);
585
586	cifs_put_tlink(cifs_file->tlink);
587	dput(cifs_file->dentry);
588	cifs_sb_deactive(sb);
589	kfree(cifs_file->symlink_target);
590	kfree(cifs_file);
591}
592
593static void cifsFileInfo_put_work(struct work_struct *work)
594{
595	struct cifsFileInfo *cifs_file = container_of(work,
596			struct cifsFileInfo, put);
597
598	cifsFileInfo_put_final(cifs_file);
599}
600
601void serverclose_work(struct work_struct *work)
602{
603	struct cifsFileInfo *cifs_file = container_of(work,
604			struct cifsFileInfo, serverclose);
605
606	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
607
608	struct TCP_Server_Info *server = tcon->ses->server;
609	int rc = 0;
610	int retries = 0;
611	int MAX_RETRIES = 4;
612
613	do {
614		if (server->ops->close_getattr)
615			rc = server->ops->close_getattr(0, tcon, cifs_file);
616		else if (server->ops->close)
617			rc = server->ops->close(0, tcon, &cifs_file->fid);
618
619		if (rc == -EBUSY || rc == -EAGAIN) {
620			retries++;
621			msleep(250);
622		}
623	} while ((rc == -EBUSY || rc == -EAGAIN) && (retries < MAX_RETRIES)
624	);
625
626	if (retries == MAX_RETRIES)
627		pr_warn("Serverclose failed %d times, giving up\n", MAX_RETRIES);
628
629	if (cifs_file->offload)
630		queue_work(fileinfo_put_wq, &cifs_file->put);
631	else
632		cifsFileInfo_put_final(cifs_file);
633}
634
635/**
636 * cifsFileInfo_put - release a reference of file priv data
637 *
638 * Always potentially wait for oplock handler. See _cifsFileInfo_put().
639 *
640 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
641 */
642void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
643{
644	_cifsFileInfo_put(cifs_file, true, true);
645}
646
647/**
648 * _cifsFileInfo_put - release a reference of file priv data
649 *
650 * This may involve closing the filehandle @cifs_file out on the
651 * server. Must be called without holding tcon->open_file_lock,
652 * cinode->open_file_lock and cifs_file->file_info_lock.
653 *
654 * If @wait_for_oplock_handler is true and we are releasing the last
655 * reference, wait for any running oplock break handler of the file
656 * and cancel any pending one.
657 *
658 * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
659 * @wait_oplock_handler: must be false if called from oplock_break_handler
660 * @offload:	not offloaded on close and oplock breaks
661 *
662 */
663void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
664		       bool wait_oplock_handler, bool offload)
665{
666	struct inode *inode = d_inode(cifs_file->dentry);
667	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
668	struct TCP_Server_Info *server = tcon->ses->server;
669	struct cifsInodeInfo *cifsi = CIFS_I(inode);
670	struct super_block *sb = inode->i_sb;
671	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
672	struct cifs_fid fid = {};
673	struct cifs_pending_open open;
674	bool oplock_break_cancelled;
675	bool serverclose_offloaded = false;
676
677	spin_lock(&tcon->open_file_lock);
678	spin_lock(&cifsi->open_file_lock);
679	spin_lock(&cifs_file->file_info_lock);
680
681	cifs_file->offload = offload;
682	if (--cifs_file->count > 0) {
683		spin_unlock(&cifs_file->file_info_lock);
684		spin_unlock(&cifsi->open_file_lock);
685		spin_unlock(&tcon->open_file_lock);
686		return;
687	}
688	spin_unlock(&cifs_file->file_info_lock);
689
690	if (server->ops->get_lease_key)
691		server->ops->get_lease_key(inode, &fid);
692
693	/* store open in pending opens to make sure we don't miss lease break */
694	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
695
696	/* remove it from the lists */
697	list_del(&cifs_file->flist);
698	list_del(&cifs_file->tlist);
699	atomic_dec(&tcon->num_local_opens);
700
701	if (list_empty(&cifsi->openFileList)) {
702		cifs_dbg(FYI, "closing last open instance for inode %p\n",
703			 d_inode(cifs_file->dentry));
704		/*
705		 * In strict cache mode we need invalidate mapping on the last
706		 * close  because it may cause a error when we open this file
707		 * again and get at least level II oplock.
708		 */
709		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
710			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
711		cifs_set_oplock_level(cifsi, 0);
712	}
713
714	spin_unlock(&cifsi->open_file_lock);
715	spin_unlock(&tcon->open_file_lock);
716
717	oplock_break_cancelled = wait_oplock_handler ?
718		cancel_work_sync(&cifs_file->oplock_break) : false;
719
720	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
721		struct TCP_Server_Info *server = tcon->ses->server;
722		unsigned int xid;
723		int rc = 0;
724
725		xid = get_xid();
726		if (server->ops->close_getattr)
727			rc = server->ops->close_getattr(xid, tcon, cifs_file);
728		else if (server->ops->close)
729			rc = server->ops->close(xid, tcon, &cifs_file->fid);
730		_free_xid(xid);
731
732		if (rc == -EBUSY || rc == -EAGAIN) {
733			// Server close failed, hence offloading it as an async op
734			queue_work(serverclose_wq, &cifs_file->serverclose);
735			serverclose_offloaded = true;
736		}
737	}
738
739	if (oplock_break_cancelled)
740		cifs_done_oplock_break(cifsi);
741
742	cifs_del_pending_open(&open);
743
744	// if serverclose has been offloaded to wq (on failure), it will
745	// handle offloading put as well. If serverclose not offloaded,
746	// we need to handle offloading put here.
747	if (!serverclose_offloaded) {
748		if (offload)
749			queue_work(fileinfo_put_wq, &cifs_file->put);
750		else
751			cifsFileInfo_put_final(cifs_file);
752	}
753}
754
755int cifs_open(struct inode *inode, struct file *file)
756
757{
758	int rc = -EACCES;
759	unsigned int xid;
760	__u32 oplock;
761	struct cifs_sb_info *cifs_sb;
762	struct TCP_Server_Info *server;
763	struct cifs_tcon *tcon;
764	struct tcon_link *tlink;
765	struct cifsFileInfo *cfile = NULL;
766	void *page;
767	const char *full_path;
768	bool posix_open_ok = false;
769	struct cifs_fid fid = {};
770	struct cifs_pending_open open;
771	struct cifs_open_info_data data = {};
772
773	xid = get_xid();
774
775	cifs_sb = CIFS_SB(inode->i_sb);
776	if (unlikely(cifs_forced_shutdown(cifs_sb))) {
777		free_xid(xid);
778		return -EIO;
779	}
780
781	tlink = cifs_sb_tlink(cifs_sb);
782	if (IS_ERR(tlink)) {
783		free_xid(xid);
784		return PTR_ERR(tlink);
785	}
786	tcon = tlink_tcon(tlink);
787	server = tcon->ses->server;
788
789	page = alloc_dentry_path();
790	full_path = build_path_from_dentry(file_dentry(file), page);
791	if (IS_ERR(full_path)) {
792		rc = PTR_ERR(full_path);
793		goto out;
794	}
795
796	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
797		 inode, file->f_flags, full_path);
798
799	if (file->f_flags & O_DIRECT &&
800	    cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
801		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
802			file->f_op = &cifs_file_direct_nobrl_ops;
803		else
804			file->f_op = &cifs_file_direct_ops;
805	}
806
807	/* Get the cached handle as SMB2 close is deferred */
808	rc = cifs_get_readable_path(tcon, full_path, &cfile);
809	if (rc == 0) {
810		if (file->f_flags == cfile->f_flags) {
811			file->private_data = cfile;
812			spin_lock(&CIFS_I(inode)->deferred_lock);
813			cifs_del_deferred_close(cfile);
814			spin_unlock(&CIFS_I(inode)->deferred_lock);
815			goto use_cache;
816		} else {
817			_cifsFileInfo_put(cfile, true, false);
818		}
819	}
820
821	if (server->oplocks)
822		oplock = REQ_OPLOCK;
823	else
824		oplock = 0;
825
826#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
827	if (!tcon->broken_posix_open && tcon->unix_ext &&
828	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
829				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
830		/* can not refresh inode info since size could be stale */
831		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
832				cifs_sb->ctx->file_mode /* ignored */,
833				file->f_flags, &oplock, &fid.netfid, xid);
834		if (rc == 0) {
835			cifs_dbg(FYI, "posix open succeeded\n");
836			posix_open_ok = true;
837		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
838			if (tcon->ses->serverNOS)
839				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
840					 tcon->ses->ip_addr,
841					 tcon->ses->serverNOS);
842			tcon->broken_posix_open = true;
843		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
844			 (rc != -EOPNOTSUPP)) /* path not found or net err */
845			goto out;
846		/*
847		 * Else fallthrough to retry open the old way on network i/o
848		 * or DFS errors.
849		 */
850	}
851#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
852
853	if (server->ops->get_lease_key)
854		server->ops->get_lease_key(inode, &fid);
855
856	cifs_add_pending_open(&fid, tlink, &open);
857
858	if (!posix_open_ok) {
859		if (server->ops->get_lease_key)
860			server->ops->get_lease_key(inode, &fid);
861
862		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon, file->f_flags, &oplock, &fid,
863				  xid, &data);
864		if (rc) {
865			cifs_del_pending_open(&open);
866			goto out;
867		}
868	}
869
870	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock, data.symlink_target);
871	if (cfile == NULL) {
872		if (server->ops->close)
873			server->ops->close(xid, tcon, &fid);
874		cifs_del_pending_open(&open);
875		rc = -ENOMEM;
876		goto out;
877	}
878
879#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
880	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
881		/*
882		 * Time to set mode which we can not set earlier due to
883		 * problems creating new read-only files.
884		 */
885		struct cifs_unix_set_info_args args = {
886			.mode	= inode->i_mode,
887			.uid	= INVALID_UID, /* no change */
888			.gid	= INVALID_GID, /* no change */
889			.ctime	= NO_CHANGE_64,
890			.atime	= NO_CHANGE_64,
891			.mtime	= NO_CHANGE_64,
892			.device	= 0,
893		};
894		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
895				       cfile->pid);
896	}
897#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
898
899use_cache:
900	fscache_use_cookie(cifs_inode_cookie(file_inode(file)),
901			   file->f_mode & FMODE_WRITE);
902	if (!(file->f_flags & O_DIRECT))
903		goto out;
904	if ((file->f_flags & (O_ACCMODE | O_APPEND)) == O_RDONLY)
905		goto out;
906	cifs_invalidate_cache(file_inode(file), FSCACHE_INVAL_DIO_WRITE);
907
908out:
909	free_dentry_path(page);
910	free_xid(xid);
911	cifs_put_tlink(tlink);
912	cifs_free_open_info(&data);
913	return rc;
914}
915
916#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
917static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
918#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
919
920/*
921 * Try to reacquire byte range locks that were released when session
922 * to server was lost.
923 */
924static int
925cifs_relock_file(struct cifsFileInfo *cfile)
926{
927	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
928	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
929	int rc = 0;
930#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
931	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
932#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
933
934	down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
935	if (cinode->can_cache_brlcks) {
936		/* can cache locks - no need to relock */
937		up_read(&cinode->lock_sem);
938		return rc;
939	}
940
941#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
942	if (cap_unix(tcon->ses) &&
943	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
944	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
945		rc = cifs_push_posix_locks(cfile);
946	else
947#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
948		rc = tcon->ses->server->ops->push_mand_locks(cfile);
949
950	up_read(&cinode->lock_sem);
951	return rc;
952}
953
954static int
955cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
956{
957	int rc = -EACCES;
958	unsigned int xid;
959	__u32 oplock;
960	struct cifs_sb_info *cifs_sb;
961	struct cifs_tcon *tcon;
962	struct TCP_Server_Info *server;
963	struct cifsInodeInfo *cinode;
964	struct inode *inode;
965	void *page;
966	const char *full_path;
967	int desired_access;
968	int disposition = FILE_OPEN;
969	int create_options = CREATE_NOT_DIR;
970	struct cifs_open_parms oparms;
971	int rdwr_for_fscache = 0;
972
973	xid = get_xid();
974	mutex_lock(&cfile->fh_mutex);
975	if (!cfile->invalidHandle) {
976		mutex_unlock(&cfile->fh_mutex);
977		free_xid(xid);
978		return 0;
979	}
980
981	inode = d_inode(cfile->dentry);
982	cifs_sb = CIFS_SB(inode->i_sb);
983	tcon = tlink_tcon(cfile->tlink);
984	server = tcon->ses->server;
985
986	/*
987	 * Can not grab rename sem here because various ops, including those
988	 * that already have the rename sem can end up causing writepage to get
989	 * called and if the server was down that means we end up here, and we
990	 * can never tell if the caller already has the rename_sem.
991	 */
992	page = alloc_dentry_path();
993	full_path = build_path_from_dentry(cfile->dentry, page);
994	if (IS_ERR(full_path)) {
995		mutex_unlock(&cfile->fh_mutex);
996		free_dentry_path(page);
997		free_xid(xid);
998		return PTR_ERR(full_path);
999	}
1000
1001	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
1002		 inode, cfile->f_flags, full_path);
1003
1004	if (tcon->ses->server->oplocks)
1005		oplock = REQ_OPLOCK;
1006	else
1007		oplock = 0;
1008
1009#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1010	if (tcon->unix_ext && cap_unix(tcon->ses) &&
1011	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
1012				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
1013		/*
1014		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
1015		 * original open. Must mask them off for a reopen.
1016		 */
1017		unsigned int oflags = cfile->f_flags &
1018						~(O_CREAT | O_EXCL | O_TRUNC);
1019
1020		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
1021				     cifs_sb->ctx->file_mode /* ignored */,
1022				     oflags, &oplock, &cfile->fid.netfid, xid);
1023		if (rc == 0) {
1024			cifs_dbg(FYI, "posix reopen succeeded\n");
1025			oparms.reconnect = true;
1026			goto reopen_success;
1027		}
1028		/*
1029		 * fallthrough to retry open the old way on errors, especially
1030		 * in the reconnect path it is important to retry hard
1031		 */
1032	}
1033#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1034
1035	/* If we're caching, we need to be able to fill in around partial writes. */
1036	if (cifs_fscache_enabled(inode) && (cfile->f_flags & O_ACCMODE) == O_WRONLY)
1037		rdwr_for_fscache = 1;
1038
1039	desired_access = cifs_convert_flags(cfile->f_flags, rdwr_for_fscache);
1040
1041	/* O_SYNC also has bit for O_DSYNC so following check picks up either */
1042	if (cfile->f_flags & O_SYNC)
1043		create_options |= CREATE_WRITE_THROUGH;
1044
1045	if (cfile->f_flags & O_DIRECT)
1046		create_options |= CREATE_NO_BUFFER;
1047
1048	if (server->ops->get_lease_key)
1049		server->ops->get_lease_key(inode, &cfile->fid);
1050
1051retry_open:
1052	oparms = (struct cifs_open_parms) {
1053		.tcon = tcon,
1054		.cifs_sb = cifs_sb,
1055		.desired_access = desired_access,
1056		.create_options = cifs_create_options(cifs_sb, create_options),
1057		.disposition = disposition,
1058		.path = full_path,
1059		.fid = &cfile->fid,
1060		.reconnect = true,
1061	};
1062
1063	/*
1064	 * Can not refresh inode by passing in file_info buf to be returned by
1065	 * ops->open and then calling get_inode_info with returned buf since
1066	 * file might have write behind data that needs to be flushed and server
1067	 * version of file size can be stale. If we knew for sure that inode was
1068	 * not dirty locally we could do this.
1069	 */
1070	rc = server->ops->open(xid, &oparms, &oplock, NULL);
1071	if (rc == -ENOENT && oparms.reconnect == false) {
1072		/* durable handle timeout is expired - open the file again */
1073		rc = server->ops->open(xid, &oparms, &oplock, NULL);
1074		/* indicate that we need to relock the file */
1075		oparms.reconnect = true;
1076	}
1077	if (rc == -EACCES && rdwr_for_fscache == 1) {
1078		desired_access = cifs_convert_flags(cfile->f_flags, 0);
1079		rdwr_for_fscache = 2;
1080		goto retry_open;
1081	}
1082
1083	if (rc) {
1084		mutex_unlock(&cfile->fh_mutex);
1085		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
1086		cifs_dbg(FYI, "oplock: %d\n", oplock);
1087		goto reopen_error_exit;
1088	}
1089
1090	if (rdwr_for_fscache == 2)
1091		cifs_invalidate_cache(inode, FSCACHE_INVAL_DIO_WRITE);
1092
1093#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1094reopen_success:
1095#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1096	cfile->invalidHandle = false;
1097	mutex_unlock(&cfile->fh_mutex);
1098	cinode = CIFS_I(inode);
1099
1100	if (can_flush) {
1101		rc = filemap_write_and_wait(inode->i_mapping);
1102		if (!is_interrupt_error(rc))
1103			mapping_set_error(inode->i_mapping, rc);
1104
1105		if (tcon->posix_extensions) {
1106			rc = smb311_posix_get_inode_info(&inode, full_path,
1107							 NULL, inode->i_sb, xid);
1108		} else if (tcon->unix_ext) {
1109			rc = cifs_get_inode_info_unix(&inode, full_path,
1110						      inode->i_sb, xid);
1111		} else {
1112			rc = cifs_get_inode_info(&inode, full_path, NULL,
1113						 inode->i_sb, xid, NULL);
1114		}
1115	}
1116	/*
1117	 * Else we are writing out data to server already and could deadlock if
1118	 * we tried to flush data, and since we do not know if we have data that
1119	 * would invalidate the current end of file on the server we can not go
1120	 * to the server to get the new inode info.
1121	 */
1122
1123	/*
1124	 * If the server returned a read oplock and we have mandatory brlocks,
1125	 * set oplock level to None.
1126	 */
1127	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
1128		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
1129		oplock = 0;
1130	}
1131
1132	server->ops->set_fid(cfile, &cfile->fid, oplock);
1133	if (oparms.reconnect)
1134		cifs_relock_file(cfile);
1135
1136reopen_error_exit:
1137	free_dentry_path(page);
1138	free_xid(xid);
1139	return rc;
1140}
1141
1142void smb2_deferred_work_close(struct work_struct *work)
1143{
1144	struct cifsFileInfo *cfile = container_of(work,
1145			struct cifsFileInfo, deferred.work);
1146
1147	spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1148	cifs_del_deferred_close(cfile);
1149	cfile->deferred_close_scheduled = false;
1150	spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
1151	_cifsFileInfo_put(cfile, true, false);
1152}
1153
1154static bool
1155smb2_can_defer_close(struct inode *inode, struct cifs_deferred_close *dclose)
1156{
1157	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1158	struct cifsInodeInfo *cinode = CIFS_I(inode);
1159
1160	return (cifs_sb->ctx->closetimeo && cinode->lease_granted && dclose &&
1161			(cinode->oplock == CIFS_CACHE_RHW_FLG ||
1162			 cinode->oplock == CIFS_CACHE_RH_FLG) &&
1163			!test_bit(CIFS_INO_CLOSE_ON_LOCK, &cinode->flags));
1164
1165}
1166
1167int cifs_close(struct inode *inode, struct file *file)
1168{
1169	struct cifsFileInfo *cfile;
1170	struct cifsInodeInfo *cinode = CIFS_I(inode);
1171	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1172	struct cifs_deferred_close *dclose;
1173
1174	cifs_fscache_unuse_inode_cookie(inode, file->f_mode & FMODE_WRITE);
1175
1176	if (file->private_data != NULL) {
1177		cfile = file->private_data;
1178		file->private_data = NULL;
1179		dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
1180		if ((cfile->status_file_deleted == false) &&
1181		    (smb2_can_defer_close(inode, dclose))) {
1182			if (test_and_clear_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags)) {
1183				inode_set_mtime_to_ts(inode,
1184						      inode_set_ctime_current(inode));
1185			}
1186			spin_lock(&cinode->deferred_lock);
1187			cifs_add_deferred_close(cfile, dclose);
1188			if (cfile->deferred_close_scheduled &&
1189			    delayed_work_pending(&cfile->deferred)) {
1190				/*
1191				 * If there is no pending work, mod_delayed_work queues new work.
1192				 * So, Increase the ref count to avoid use-after-free.
1193				 */
1194				if (!mod_delayed_work(deferredclose_wq,
1195						&cfile->deferred, cifs_sb->ctx->closetimeo))
1196					cifsFileInfo_get(cfile);
1197			} else {
1198				/* Deferred close for files */
1199				queue_delayed_work(deferredclose_wq,
1200						&cfile->deferred, cifs_sb->ctx->closetimeo);
1201				cfile->deferred_close_scheduled = true;
1202				spin_unlock(&cinode->deferred_lock);
1203				return 0;
1204			}
1205			spin_unlock(&cinode->deferred_lock);
1206			_cifsFileInfo_put(cfile, true, false);
1207		} else {
1208			_cifsFileInfo_put(cfile, true, false);
1209			kfree(dclose);
1210		}
1211	}
1212
1213	/* return code from the ->release op is always ignored */
1214	return 0;
1215}
1216
1217void
1218cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
1219{
1220	struct cifsFileInfo *open_file, *tmp;
1221	struct list_head tmp_list;
1222
1223	if (!tcon->use_persistent || !tcon->need_reopen_files)
1224		return;
1225
1226	tcon->need_reopen_files = false;
1227
1228	cifs_dbg(FYI, "Reopen persistent handles\n");
1229	INIT_LIST_HEAD(&tmp_list);
1230
1231	/* list all files open on tree connection, reopen resilient handles  */
1232	spin_lock(&tcon->open_file_lock);
1233	list_for_each_entry(open_file, &tcon->openFileList, tlist) {
1234		if (!open_file->invalidHandle)
1235			continue;
1236		cifsFileInfo_get(open_file);
1237		list_add_tail(&open_file->rlist, &tmp_list);
1238	}
1239	spin_unlock(&tcon->open_file_lock);
1240
1241	list_for_each_entry_safe(open_file, tmp, &tmp_list, rlist) {
1242		if (cifs_reopen_file(open_file, false /* do not flush */))
1243			tcon->need_reopen_files = true;
1244		list_del_init(&open_file->rlist);
1245		cifsFileInfo_put(open_file);
1246	}
1247}
1248
1249int cifs_closedir(struct inode *inode, struct file *file)
1250{
1251	int rc = 0;
1252	unsigned int xid;
1253	struct cifsFileInfo *cfile = file->private_data;
1254	struct cifs_tcon *tcon;
1255	struct TCP_Server_Info *server;
1256	char *buf;
1257
1258	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
1259
1260	if (cfile == NULL)
1261		return rc;
1262
1263	xid = get_xid();
1264	tcon = tlink_tcon(cfile->tlink);
1265	server = tcon->ses->server;
1266
1267	cifs_dbg(FYI, "Freeing private data in close dir\n");
1268	spin_lock(&cfile->file_info_lock);
1269	if (server->ops->dir_needs_close(cfile)) {
1270		cfile->invalidHandle = true;
1271		spin_unlock(&cfile->file_info_lock);
1272		if (server->ops->close_dir)
1273			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
1274		else
1275			rc = -ENOSYS;
1276		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
1277		/* not much we can do if it fails anyway, ignore rc */
1278		rc = 0;
1279	} else
1280		spin_unlock(&cfile->file_info_lock);
1281
1282	buf = cfile->srch_inf.ntwrk_buf_start;
1283	if (buf) {
1284		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
1285		cfile->srch_inf.ntwrk_buf_start = NULL;
1286		if (cfile->srch_inf.smallBuf)
1287			cifs_small_buf_release(buf);
1288		else
1289			cifs_buf_release(buf);
1290	}
1291
1292	cifs_put_tlink(cfile->tlink);
1293	kfree(file->private_data);
1294	file->private_data = NULL;
1295	/* BB can we lock the filestruct while this is going on? */
1296	free_xid(xid);
1297	return rc;
1298}
1299
1300static struct cifsLockInfo *
1301cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
1302{
1303	struct cifsLockInfo *lock =
1304		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
1305	if (!lock)
1306		return lock;
1307	lock->offset = offset;
1308	lock->length = length;
1309	lock->type = type;
1310	lock->pid = current->tgid;
1311	lock->flags = flags;
1312	INIT_LIST_HEAD(&lock->blist);
1313	init_waitqueue_head(&lock->block_q);
1314	return lock;
1315}
1316
1317void
1318cifs_del_lock_waiters(struct cifsLockInfo *lock)
1319{
1320	struct cifsLockInfo *li, *tmp;
1321	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
1322		list_del_init(&li->blist);
1323		wake_up(&li->block_q);
1324	}
1325}
1326
1327#define CIFS_LOCK_OP	0
1328#define CIFS_READ_OP	1
1329#define CIFS_WRITE_OP	2
1330
1331/* @rw_check : 0 - no op, 1 - read, 2 - write */
1332static bool
1333cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
1334			    __u64 length, __u8 type, __u16 flags,
1335			    struct cifsFileInfo *cfile,
1336			    struct cifsLockInfo **conf_lock, int rw_check)
1337{
1338	struct cifsLockInfo *li;
1339	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
1340	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1341
1342	list_for_each_entry(li, &fdlocks->locks, llist) {
1343		if (offset + length <= li->offset ||
1344		    offset >= li->offset + li->length)
1345			continue;
1346		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
1347		    server->ops->compare_fids(cfile, cur_cfile)) {
1348			/* shared lock prevents write op through the same fid */
1349			if (!(li->type & server->vals->shared_lock_type) ||
1350			    rw_check != CIFS_WRITE_OP)
1351				continue;
1352		}
1353		if ((type & server->vals->shared_lock_type) &&
1354		    ((server->ops->compare_fids(cfile, cur_cfile) &&
1355		     current->tgid == li->pid) || type == li->type))
1356			continue;
1357		if (rw_check == CIFS_LOCK_OP &&
1358		    (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
1359		    server->ops->compare_fids(cfile, cur_cfile))
1360			continue;
1361		if (conf_lock)
1362			*conf_lock = li;
1363		return true;
1364	}
1365	return false;
1366}
1367
1368bool
1369cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1370			__u8 type, __u16 flags,
1371			struct cifsLockInfo **conf_lock, int rw_check)
1372{
1373	bool rc = false;
1374	struct cifs_fid_locks *cur;
1375	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1376
1377	list_for_each_entry(cur, &cinode->llist, llist) {
1378		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
1379						 flags, cfile, conf_lock,
1380						 rw_check);
1381		if (rc)
1382			break;
1383	}
1384
1385	return rc;
1386}
1387
1388/*
1389 * Check if there is another lock that prevents us to set the lock (mandatory
1390 * style). If such a lock exists, update the flock structure with its
1391 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1392 * or leave it the same if we can't. Returns 0 if we don't need to request to
1393 * the server or 1 otherwise.
1394 */
1395static int
1396cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
1397	       __u8 type, struct file_lock *flock)
1398{
1399	int rc = 0;
1400	struct cifsLockInfo *conf_lock;
1401	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1402	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1403	bool exist;
1404
1405	down_read(&cinode->lock_sem);
1406
1407	exist = cifs_find_lock_conflict(cfile, offset, length, type,
1408					flock->c.flc_flags, &conf_lock,
1409					CIFS_LOCK_OP);
1410	if (exist) {
1411		flock->fl_start = conf_lock->offset;
1412		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1413		flock->c.flc_pid = conf_lock->pid;
1414		if (conf_lock->type & server->vals->shared_lock_type)
1415			flock->c.flc_type = F_RDLCK;
1416		else
1417			flock->c.flc_type = F_WRLCK;
1418	} else if (!cinode->can_cache_brlcks)
1419		rc = 1;
1420	else
1421		flock->c.flc_type = F_UNLCK;
1422
1423	up_read(&cinode->lock_sem);
1424	return rc;
1425}
1426
1427static void
1428cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1429{
1430	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1431	cifs_down_write(&cinode->lock_sem);
1432	list_add_tail(&lock->llist, &cfile->llist->locks);
1433	up_write(&cinode->lock_sem);
1434}
1435
1436/*
1437 * Set the byte-range lock (mandatory style). Returns:
1438 * 1) 0, if we set the lock and don't need to request to the server;
1439 * 2) 1, if no locks prevent us but we need to request to the server;
1440 * 3) -EACCES, if there is a lock that prevents us and wait is false.
1441 */
1442static int
1443cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1444		 bool wait)
1445{
1446	struct cifsLockInfo *conf_lock;
1447	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1448	bool exist;
1449	int rc = 0;
1450
1451try_again:
1452	exist = false;
1453	cifs_down_write(&cinode->lock_sem);
1454
1455	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1456					lock->type, lock->flags, &conf_lock,
1457					CIFS_LOCK_OP);
1458	if (!exist && cinode->can_cache_brlcks) {
1459		list_add_tail(&lock->llist, &cfile->llist->locks);
1460		up_write(&cinode->lock_sem);
1461		return rc;
1462	}
1463
1464	if (!exist)
1465		rc = 1;
1466	else if (!wait)
1467		rc = -EACCES;
1468	else {
1469		list_add_tail(&lock->blist, &conf_lock->blist);
1470		up_write(&cinode->lock_sem);
1471		rc = wait_event_interruptible(lock->block_q,
1472					(lock->blist.prev == &lock->blist) &&
1473					(lock->blist.next == &lock->blist));
1474		if (!rc)
1475			goto try_again;
1476		cifs_down_write(&cinode->lock_sem);
1477		list_del_init(&lock->blist);
1478	}
1479
1480	up_write(&cinode->lock_sem);
1481	return rc;
1482}
1483
1484#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1485/*
1486 * Check if there is another lock that prevents us to set the lock (posix
1487 * style). If such a lock exists, update the flock structure with its
1488 * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1489 * or leave it the same if we can't. Returns 0 if we don't need to request to
1490 * the server or 1 otherwise.
1491 */
1492static int
1493cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1494{
1495	int rc = 0;
1496	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1497	unsigned char saved_type = flock->c.flc_type;
1498
1499	if ((flock->c.flc_flags & FL_POSIX) == 0)
1500		return 1;
1501
1502	down_read(&cinode->lock_sem);
1503	posix_test_lock(file, flock);
1504
1505	if (lock_is_unlock(flock) && !cinode->can_cache_brlcks) {
1506		flock->c.flc_type = saved_type;
1507		rc = 1;
1508	}
1509
1510	up_read(&cinode->lock_sem);
1511	return rc;
1512}
1513
1514/*
1515 * Set the byte-range lock (posix style). Returns:
1516 * 1) <0, if the error occurs while setting the lock;
1517 * 2) 0, if we set the lock and don't need to request to the server;
1518 * 3) FILE_LOCK_DEFERRED, if we will wait for some other file_lock;
1519 * 4) FILE_LOCK_DEFERRED + 1, if we need to request to the server.
1520 */
1521static int
1522cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1523{
1524	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1525	int rc = FILE_LOCK_DEFERRED + 1;
1526
1527	if ((flock->c.flc_flags & FL_POSIX) == 0)
1528		return rc;
1529
1530	cifs_down_write(&cinode->lock_sem);
1531	if (!cinode->can_cache_brlcks) {
1532		up_write(&cinode->lock_sem);
1533		return rc;
1534	}
1535
1536	rc = posix_lock_file(file, flock, NULL);
1537	up_write(&cinode->lock_sem);
1538	return rc;
1539}
1540
1541int
1542cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1543{
1544	unsigned int xid;
1545	int rc = 0, stored_rc;
1546	struct cifsLockInfo *li, *tmp;
1547	struct cifs_tcon *tcon;
1548	unsigned int num, max_num, max_buf;
1549	LOCKING_ANDX_RANGE *buf, *cur;
1550	static const int types[] = {
1551		LOCKING_ANDX_LARGE_FILES,
1552		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1553	};
1554	int i;
1555
1556	xid = get_xid();
1557	tcon = tlink_tcon(cfile->tlink);
1558
1559	/*
1560	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1561	 * and check it before using.
1562	 */
1563	max_buf = tcon->ses->server->maxBuf;
1564	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1565		free_xid(xid);
1566		return -EINVAL;
1567	}
1568
1569	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1570		     PAGE_SIZE);
1571	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1572			PAGE_SIZE);
1573	max_num = (max_buf - sizeof(struct smb_hdr)) /
1574						sizeof(LOCKING_ANDX_RANGE);
1575	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1576	if (!buf) {
1577		free_xid(xid);
1578		return -ENOMEM;
1579	}
1580
1581	for (i = 0; i < 2; i++) {
1582		cur = buf;
1583		num = 0;
1584		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1585			if (li->type != types[i])
1586				continue;
1587			cur->Pid = cpu_to_le16(li->pid);
1588			cur->LengthLow = cpu_to_le32((u32)li->length);
1589			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1590			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1591			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1592			if (++num == max_num) {
1593				stored_rc = cifs_lockv(xid, tcon,
1594						       cfile->fid.netfid,
1595						       (__u8)li->type, 0, num,
1596						       buf);
1597				if (stored_rc)
1598					rc = stored_rc;
1599				cur = buf;
1600				num = 0;
1601			} else
1602				cur++;
1603		}
1604
1605		if (num) {
1606			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1607					       (__u8)types[i], 0, num, buf);
1608			if (stored_rc)
1609				rc = stored_rc;
1610		}
1611	}
1612
1613	kfree(buf);
1614	free_xid(xid);
1615	return rc;
1616}
1617
1618static __u32
1619hash_lockowner(fl_owner_t owner)
1620{
1621	return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1622}
1623#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1624
1625struct lock_to_push {
1626	struct list_head llist;
1627	__u64 offset;
1628	__u64 length;
1629	__u32 pid;
1630	__u16 netfid;
1631	__u8 type;
1632};
1633
1634#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1635static int
1636cifs_push_posix_locks(struct cifsFileInfo *cfile)
1637{
1638	struct inode *inode = d_inode(cfile->dentry);
1639	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1640	struct file_lock *flock;
1641	struct file_lock_context *flctx = locks_inode_context(inode);
1642	unsigned int count = 0, i;
1643	int rc = 0, xid, type;
1644	struct list_head locks_to_send, *el;
1645	struct lock_to_push *lck, *tmp;
1646	__u64 length;
1647
1648	xid = get_xid();
1649
1650	if (!flctx)
1651		goto out;
1652
1653	spin_lock(&flctx->flc_lock);
1654	list_for_each(el, &flctx->flc_posix) {
1655		count++;
1656	}
1657	spin_unlock(&flctx->flc_lock);
1658
1659	INIT_LIST_HEAD(&locks_to_send);
1660
1661	/*
1662	 * Allocating count locks is enough because no FL_POSIX locks can be
1663	 * added to the list while we are holding cinode->lock_sem that
1664	 * protects locking operations of this inode.
1665	 */
1666	for (i = 0; i < count; i++) {
1667		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1668		if (!lck) {
1669			rc = -ENOMEM;
1670			goto err_out;
1671		}
1672		list_add_tail(&lck->llist, &locks_to_send);
1673	}
1674
1675	el = locks_to_send.next;
1676	spin_lock(&flctx->flc_lock);
1677	for_each_file_lock(flock, &flctx->flc_posix) {
1678		unsigned char ftype = flock->c.flc_type;
1679
1680		if (el == &locks_to_send) {
1681			/*
1682			 * The list ended. We don't have enough allocated
1683			 * structures - something is really wrong.
1684			 */
1685			cifs_dbg(VFS, "Can't push all brlocks!\n");
1686			break;
1687		}
1688		length = cifs_flock_len(flock);
1689		if (ftype == F_RDLCK || ftype == F_SHLCK)
1690			type = CIFS_RDLCK;
1691		else
1692			type = CIFS_WRLCK;
1693		lck = list_entry(el, struct lock_to_push, llist);
1694		lck->pid = hash_lockowner(flock->c.flc_owner);
1695		lck->netfid = cfile->fid.netfid;
1696		lck->length = length;
1697		lck->type = type;
1698		lck->offset = flock->fl_start;
1699	}
1700	spin_unlock(&flctx->flc_lock);
1701
1702	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1703		int stored_rc;
1704
1705		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1706					     lck->offset, lck->length, NULL,
1707					     lck->type, 0);
1708		if (stored_rc)
1709			rc = stored_rc;
1710		list_del(&lck->llist);
1711		kfree(lck);
1712	}
1713
1714out:
1715	free_xid(xid);
1716	return rc;
1717err_out:
1718	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1719		list_del(&lck->llist);
1720		kfree(lck);
1721	}
1722	goto out;
1723}
1724#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1725
1726static int
1727cifs_push_locks(struct cifsFileInfo *cfile)
1728{
1729	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1730	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1731	int rc = 0;
1732#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1733	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1734#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1735
1736	/* we are going to update can_cache_brlcks here - need a write access */
1737	cifs_down_write(&cinode->lock_sem);
1738	if (!cinode->can_cache_brlcks) {
1739		up_write(&cinode->lock_sem);
1740		return rc;
1741	}
1742
1743#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1744	if (cap_unix(tcon->ses) &&
1745	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1746	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1747		rc = cifs_push_posix_locks(cfile);
1748	else
1749#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1750		rc = tcon->ses->server->ops->push_mand_locks(cfile);
1751
1752	cinode->can_cache_brlcks = false;
1753	up_write(&cinode->lock_sem);
1754	return rc;
1755}
1756
1757static void
1758cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1759		bool *wait_flag, struct TCP_Server_Info *server)
1760{
1761	if (flock->c.flc_flags & FL_POSIX)
1762		cifs_dbg(FYI, "Posix\n");
1763	if (flock->c.flc_flags & FL_FLOCK)
1764		cifs_dbg(FYI, "Flock\n");
1765	if (flock->c.flc_flags & FL_SLEEP) {
1766		cifs_dbg(FYI, "Blocking lock\n");
1767		*wait_flag = true;
1768	}
1769	if (flock->c.flc_flags & FL_ACCESS)
1770		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1771	if (flock->c.flc_flags & FL_LEASE)
1772		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1773	if (flock->c.flc_flags &
1774	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1775	       FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
1776		cifs_dbg(FYI, "Unknown lock flags 0x%x\n",
1777		         flock->c.flc_flags);
1778
1779	*type = server->vals->large_lock_type;
1780	if (lock_is_write(flock)) {
1781		cifs_dbg(FYI, "F_WRLCK\n");
1782		*type |= server->vals->exclusive_lock_type;
1783		*lock = 1;
1784	} else if (lock_is_unlock(flock)) {
1785		cifs_dbg(FYI, "F_UNLCK\n");
1786		*type |= server->vals->unlock_lock_type;
1787		*unlock = 1;
1788		/* Check if unlock includes more than one lock range */
1789	} else if (lock_is_read(flock)) {
1790		cifs_dbg(FYI, "F_RDLCK\n");
1791		*type |= server->vals->shared_lock_type;
1792		*lock = 1;
1793	} else if (flock->c.flc_type == F_EXLCK) {
1794		cifs_dbg(FYI, "F_EXLCK\n");
1795		*type |= server->vals->exclusive_lock_type;
1796		*lock = 1;
1797	} else if (flock->c.flc_type == F_SHLCK) {
1798		cifs_dbg(FYI, "F_SHLCK\n");
1799		*type |= server->vals->shared_lock_type;
1800		*lock = 1;
1801	} else
1802		cifs_dbg(FYI, "Unknown type of lock\n");
1803}
1804
1805static int
1806cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1807	   bool wait_flag, bool posix_lck, unsigned int xid)
1808{
1809	int rc = 0;
1810	__u64 length = cifs_flock_len(flock);
1811	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1812	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1813	struct TCP_Server_Info *server = tcon->ses->server;
1814#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1815	__u16 netfid = cfile->fid.netfid;
1816
1817	if (posix_lck) {
1818		int posix_lock_type;
1819
1820		rc = cifs_posix_lock_test(file, flock);
1821		if (!rc)
1822			return rc;
1823
1824		if (type & server->vals->shared_lock_type)
1825			posix_lock_type = CIFS_RDLCK;
1826		else
1827			posix_lock_type = CIFS_WRLCK;
1828		rc = CIFSSMBPosixLock(xid, tcon, netfid,
1829				      hash_lockowner(flock->c.flc_owner),
1830				      flock->fl_start, length, flock,
1831				      posix_lock_type, wait_flag);
1832		return rc;
1833	}
1834#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
1835
1836	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1837	if (!rc)
1838		return rc;
1839
1840	/* BB we could chain these into one lock request BB */
1841	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1842				    1, 0, false);
1843	if (rc == 0) {
1844		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1845					    type, 0, 1, false);
1846		flock->c.flc_type = F_UNLCK;
1847		if (rc != 0)
1848			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1849				 rc);
1850		return 0;
1851	}
1852
1853	if (type & server->vals->shared_lock_type) {
1854		flock->c.flc_type = F_WRLCK;
1855		return 0;
1856	}
1857
1858	type &= ~server->vals->exclusive_lock_type;
1859
1860	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1861				    type | server->vals->shared_lock_type,
1862				    1, 0, false);
1863	if (rc == 0) {
1864		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1865			type | server->vals->shared_lock_type, 0, 1, false);
1866		flock->c.flc_type = F_RDLCK;
1867		if (rc != 0)
1868			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1869				 rc);
1870	} else
1871		flock->c.flc_type = F_WRLCK;
1872
1873	return 0;
1874}
1875
1876void
1877cifs_move_llist(struct list_head *source, struct list_head *dest)
1878{
1879	struct list_head *li, *tmp;
1880	list_for_each_safe(li, tmp, source)
1881		list_move(li, dest);
1882}
1883
1884void
1885cifs_free_llist(struct list_head *llist)
1886{
1887	struct cifsLockInfo *li, *tmp;
1888	list_for_each_entry_safe(li, tmp, llist, llist) {
1889		cifs_del_lock_waiters(li);
1890		list_del(&li->llist);
1891		kfree(li);
1892	}
1893}
1894
1895#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
1896int
1897cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1898		  unsigned int xid)
1899{
1900	int rc = 0, stored_rc;
1901	static const int types[] = {
1902		LOCKING_ANDX_LARGE_FILES,
1903		LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1904	};
1905	unsigned int i;
1906	unsigned int max_num, num, max_buf;
1907	LOCKING_ANDX_RANGE *buf, *cur;
1908	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1909	struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1910	struct cifsLockInfo *li, *tmp;
1911	__u64 length = cifs_flock_len(flock);
1912	struct list_head tmp_llist;
1913
1914	INIT_LIST_HEAD(&tmp_llist);
1915
1916	/*
1917	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
1918	 * and check it before using.
1919	 */
1920	max_buf = tcon->ses->server->maxBuf;
1921	if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1922		return -EINVAL;
1923
1924	BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1925		     PAGE_SIZE);
1926	max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1927			PAGE_SIZE);
1928	max_num = (max_buf - sizeof(struct smb_hdr)) /
1929						sizeof(LOCKING_ANDX_RANGE);
1930	buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1931	if (!buf)
1932		return -ENOMEM;
1933
1934	cifs_down_write(&cinode->lock_sem);
1935	for (i = 0; i < 2; i++) {
1936		cur = buf;
1937		num = 0;
1938		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1939			if (flock->fl_start > li->offset ||
1940			    (flock->fl_start + length) <
1941			    (li->offset + li->length))
1942				continue;
1943			if (current->tgid != li->pid)
1944				continue;
1945			if (types[i] != li->type)
1946				continue;
1947			if (cinode->can_cache_brlcks) {
1948				/*
1949				 * We can cache brlock requests - simply remove
1950				 * a lock from the file's list.
1951				 */
1952				list_del(&li->llist);
1953				cifs_del_lock_waiters(li);
1954				kfree(li);
1955				continue;
1956			}
1957			cur->Pid = cpu_to_le16(li->pid);
1958			cur->LengthLow = cpu_to_le32((u32)li->length);
1959			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1960			cur->OffsetLow = cpu_to_le32((u32)li->offset);
1961			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1962			/*
1963			 * We need to save a lock here to let us add it again to
1964			 * the file's list if the unlock range request fails on
1965			 * the server.
1966			 */
1967			list_move(&li->llist, &tmp_llist);
1968			if (++num == max_num) {
1969				stored_rc = cifs_lockv(xid, tcon,
1970						       cfile->fid.netfid,
1971						       li->type, num, 0, buf);
1972				if (stored_rc) {
1973					/*
1974					 * We failed on the unlock range
1975					 * request - add all locks from the tmp
1976					 * list to the head of the file's list.
1977					 */
1978					cifs_move_llist(&tmp_llist,
1979							&cfile->llist->locks);
1980					rc = stored_rc;
1981				} else
1982					/*
1983					 * The unlock range request succeed -
1984					 * free the tmp list.
1985					 */
1986					cifs_free_llist(&tmp_llist);
1987				cur = buf;
1988				num = 0;
1989			} else
1990				cur++;
1991		}
1992		if (num) {
1993			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1994					       types[i], num, 0, buf);
1995			if (stored_rc) {
1996				cifs_move_llist(&tmp_llist,
1997						&cfile->llist->locks);
1998				rc = stored_rc;
1999			} else
2000				cifs_free_llist(&tmp_llist);
2001		}
2002	}
2003
2004	up_write(&cinode->lock_sem);
2005	kfree(buf);
2006	return rc;
2007}
2008#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2009
2010static int
2011cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
2012	   bool wait_flag, bool posix_lck, int lock, int unlock,
2013	   unsigned int xid)
2014{
2015	int rc = 0;
2016	__u64 length = cifs_flock_len(flock);
2017	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2018	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2019	struct TCP_Server_Info *server = tcon->ses->server;
2020	struct inode *inode = d_inode(cfile->dentry);
2021
2022#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
2023	if (posix_lck) {
2024		int posix_lock_type;
2025
2026		rc = cifs_posix_lock_set(file, flock);
2027		if (rc <= FILE_LOCK_DEFERRED)
2028			return rc;
2029
2030		if (type & server->vals->shared_lock_type)
2031			posix_lock_type = CIFS_RDLCK;
2032		else
2033			posix_lock_type = CIFS_WRLCK;
2034
2035		if (unlock == 1)
2036			posix_lock_type = CIFS_UNLCK;
2037
2038		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
2039				      hash_lockowner(flock->c.flc_owner),
2040				      flock->fl_start, length,
2041				      NULL, posix_lock_type, wait_flag);
2042		goto out;
2043	}
2044#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
2045	if (lock) {
2046		struct cifsLockInfo *lock;
2047
2048		lock = cifs_lock_init(flock->fl_start, length, type,
2049				      flock->c.flc_flags);
2050		if (!lock)
2051			return -ENOMEM;
2052
2053		rc = cifs_lock_add_if(cfile, lock, wait_flag);
2054		if (rc < 0) {
2055			kfree(lock);
2056			return rc;
2057		}
2058		if (!rc)
2059			goto out;
2060
2061		/*
2062		 * Windows 7 server can delay breaking lease from read to None
2063		 * if we set a byte-range lock on a file - break it explicitly
2064		 * before sending the lock to the server to be sure the next
2065		 * read won't conflict with non-overlapted locks due to
2066		 * pagereading.
2067		 */
2068		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
2069					CIFS_CACHE_READ(CIFS_I(inode))) {
2070			cifs_zap_mapping(inode);
2071			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
2072				 inode);
2073			CIFS_I(inode)->oplock = 0;
2074		}
2075
2076		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
2077					    type, 1, 0, wait_flag);
2078		if (rc) {
2079			kfree(lock);
2080			return rc;
2081		}
2082
2083		cifs_lock_add(cfile, lock);
2084	} else if (unlock)
2085		rc = server->ops->mand_unlock_range(cfile, flock, xid);
2086
2087out:
2088	if ((flock->c.flc_flags & FL_POSIX) || (flock->c.flc_flags & FL_FLOCK)) {
2089		/*
2090		 * If this is a request to remove all locks because we
2091		 * are closing the file, it doesn't matter if the
2092		 * unlocking failed as both cifs.ko and the SMB server
2093		 * remove the lock on file close
2094		 */
2095		if (rc) {
2096			cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
2097			if (!(flock->c.flc_flags & FL_CLOSE))
2098				return rc;
2099		}
2100		rc = locks_lock_file_wait(file, flock);
2101	}
2102	return rc;
2103}
2104
2105int cifs_flock(struct file *file, int cmd, struct file_lock *fl)
2106{
2107	int rc, xid;
2108	int lock = 0, unlock = 0;
2109	bool wait_flag = false;
2110	bool posix_lck = false;
2111	struct cifs_sb_info *cifs_sb;
2112	struct cifs_tcon *tcon;
2113	struct cifsFileInfo *cfile;
2114	__u32 type;
2115
2116	xid = get_xid();
2117
2118	if (!(fl->c.flc_flags & FL_FLOCK)) {
2119		rc = -ENOLCK;
2120		free_xid(xid);
2121		return rc;
2122	}
2123
2124	cfile = (struct cifsFileInfo *)file->private_data;
2125	tcon = tlink_tcon(cfile->tlink);
2126
2127	cifs_read_flock(fl, &type, &lock, &unlock, &wait_flag,
2128			tcon->ses->server);
2129	cifs_sb = CIFS_FILE_SB(file);
2130
2131	if (cap_unix(tcon->ses) &&
2132	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2133	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2134		posix_lck = true;
2135
2136	if (!lock && !unlock) {
2137		/*
2138		 * if no lock or unlock then nothing to do since we do not
2139		 * know what it is
2140		 */
2141		rc = -EOPNOTSUPP;
2142		free_xid(xid);
2143		return rc;
2144	}
2145
2146	rc = cifs_setlk(file, fl, type, wait_flag, posix_lck, lock, unlock,
2147			xid);
2148	free_xid(xid);
2149	return rc;
2150
2151
2152}
2153
2154int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
2155{
2156	int rc, xid;
2157	int lock = 0, unlock = 0;
2158	bool wait_flag = false;
2159	bool posix_lck = false;
2160	struct cifs_sb_info *cifs_sb;
2161	struct cifs_tcon *tcon;
2162	struct cifsFileInfo *cfile;
2163	__u32 type;
2164
2165	rc = -EACCES;
2166	xid = get_xid();
2167
2168	cifs_dbg(FYI, "%s: %pD2 cmd=0x%x type=0x%x flags=0x%x r=%lld:%lld\n", __func__, file, cmd,
2169		 flock->c.flc_flags, flock->c.flc_type,
2170		 (long long)flock->fl_start,
2171		 (long long)flock->fl_end);
2172
2173	cfile = (struct cifsFileInfo *)file->private_data;
2174	tcon = tlink_tcon(cfile->tlink);
2175
2176	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
2177			tcon->ses->server);
2178	cifs_sb = CIFS_FILE_SB(file);
2179	set_bit(CIFS_INO_CLOSE_ON_LOCK, &CIFS_I(d_inode(cfile->dentry))->flags);
2180
2181	if (cap_unix(tcon->ses) &&
2182	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2183	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2184		posix_lck = true;
2185	/*
2186	 * BB add code here to normalize offset and length to account for
2187	 * negative length which we can not accept over the wire.
2188	 */
2189	if (IS_GETLK(cmd)) {
2190		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
2191		free_xid(xid);
2192		return rc;
2193	}
2194
2195	if (!lock && !unlock) {
2196		/*
2197		 * if no lock or unlock then nothing to do since we do not
2198		 * know what it is
2199		 */
2200		free_xid(xid);
2201		return -EOPNOTSUPP;
2202	}
2203
2204	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
2205			xid);
2206	free_xid(xid);
2207	return rc;
2208}
2209
2210/*
2211 * update the file size (if needed) after a write. Should be called with
2212 * the inode->i_lock held
2213 */
2214void
2215cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
2216		      unsigned int bytes_written)
2217{
2218	loff_t end_of_write = offset + bytes_written;
2219
2220	if (end_of_write > cifsi->netfs.remote_i_size)
2221		netfs_resize_file(&cifsi->netfs, end_of_write, true);
2222}
2223
2224static ssize_t
2225cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
2226	   size_t write_size, loff_t *offset)
2227{
2228	int rc = 0;
2229	unsigned int bytes_written = 0;
2230	unsigned int total_written;
2231	struct cifs_tcon *tcon;
2232	struct TCP_Server_Info *server;
2233	unsigned int xid;
2234	struct dentry *dentry = open_file->dentry;
2235	struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
2236	struct cifs_io_parms io_parms = {0};
2237
2238	cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
2239		 write_size, *offset, dentry);
2240
2241	tcon = tlink_tcon(open_file->tlink);
2242	server = tcon->ses->server;
2243
2244	if (!server->ops->sync_write)
2245		return -ENOSYS;
2246
2247	xid = get_xid();
2248
2249	for (total_written = 0; write_size > total_written;
2250	     total_written += bytes_written) {
2251		rc = -EAGAIN;
2252		while (rc == -EAGAIN) {
2253			struct kvec iov[2];
2254			unsigned int len;
2255
2256			if (open_file->invalidHandle) {
2257				/* we could deadlock if we called
2258				   filemap_fdatawait from here so tell
2259				   reopen_file not to flush data to
2260				   server now */
2261				rc = cifs_reopen_file(open_file, false);
2262				if (rc != 0)
2263					break;
2264			}
2265
2266			len = min(server->ops->wp_retry_size(d_inode(dentry)),
2267				  (unsigned int)write_size - total_written);
2268			/* iov[0] is reserved for smb header */
2269			iov[1].iov_base = (char *)write_data + total_written;
2270			iov[1].iov_len = len;
2271			io_parms.pid = pid;
2272			io_parms.tcon = tcon;
2273			io_parms.offset = *offset;
2274			io_parms.length = len;
2275			rc = server->ops->sync_write(xid, &open_file->fid,
2276					&io_parms, &bytes_written, iov, 1);
2277		}
2278		if (rc || (bytes_written == 0)) {
2279			if (total_written)
2280				break;
2281			else {
2282				free_xid(xid);
2283				return rc;
2284			}
2285		} else {
2286			spin_lock(&d_inode(dentry)->i_lock);
2287			cifs_update_eof(cifsi, *offset, bytes_written);
2288			spin_unlock(&d_inode(dentry)->i_lock);
2289			*offset += bytes_written;
2290		}
2291	}
2292
2293	cifs_stats_bytes_written(tcon, total_written);
2294
2295	if (total_written > 0) {
2296		spin_lock(&d_inode(dentry)->i_lock);
2297		if (*offset > d_inode(dentry)->i_size) {
2298			i_size_write(d_inode(dentry), *offset);
2299			d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
2300		}
2301		spin_unlock(&d_inode(dentry)->i_lock);
2302	}
2303	mark_inode_dirty_sync(d_inode(dentry));
2304	free_xid(xid);
2305	return total_written;
2306}
2307
2308struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
2309					bool fsuid_only)
2310{
2311	struct cifsFileInfo *open_file = NULL;
2312	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2313
2314	/* only filter by fsuid on multiuser mounts */
2315	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2316		fsuid_only = false;
2317
2318	spin_lock(&cifs_inode->open_file_lock);
2319	/* we could simply get the first_list_entry since write-only entries
2320	   are always at the end of the list but since the first entry might
2321	   have a close pending, we go through the whole list */
2322	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2323		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2324			continue;
2325		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
2326			if ((!open_file->invalidHandle)) {
2327				/* found a good file */
2328				/* lock it so it will not be closed on us */
2329				cifsFileInfo_get(open_file);
2330				spin_unlock(&cifs_inode->open_file_lock);
2331				return open_file;
2332			} /* else might as well continue, and look for
2333			     another, or simply have the caller reopen it
2334			     again rather than trying to fix this handle */
2335		} else /* write only file */
2336			break; /* write only files are last so must be done */
2337	}
2338	spin_unlock(&cifs_inode->open_file_lock);
2339	return NULL;
2340}
2341
2342/* Return -EBADF if no handle is found and general rc otherwise */
2343int
2344cifs_get_writable_file(struct cifsInodeInfo *cifs_inode, int flags,
2345		       struct cifsFileInfo **ret_file)
2346{
2347	struct cifsFileInfo *open_file, *inv_file = NULL;
2348	struct cifs_sb_info *cifs_sb;
2349	bool any_available = false;
2350	int rc = -EBADF;
2351	unsigned int refind = 0;
2352	bool fsuid_only = flags & FIND_WR_FSUID_ONLY;
2353	bool with_delete = flags & FIND_WR_WITH_DELETE;
2354	*ret_file = NULL;
2355
2356	/*
2357	 * Having a null inode here (because mapping->host was set to zero by
2358	 * the VFS or MM) should not happen but we had reports of on oops (due
2359	 * to it being zero) during stress testcases so we need to check for it
2360	 */
2361
2362	if (cifs_inode == NULL) {
2363		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
2364		dump_stack();
2365		return rc;
2366	}
2367
2368	cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
2369
2370	/* only filter by fsuid on multiuser mounts */
2371	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
2372		fsuid_only = false;
2373
2374	spin_lock(&cifs_inode->open_file_lock);
2375refind_writable:
2376	if (refind > MAX_REOPEN_ATT) {
2377		spin_unlock(&cifs_inode->open_file_lock);
2378		return rc;
2379	}
2380	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2381		if (!any_available && open_file->pid != current->tgid)
2382			continue;
2383		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
2384			continue;
2385		if (with_delete && !(open_file->fid.access & DELETE))
2386			continue;
2387		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2388			if (!open_file->invalidHandle) {
2389				/* found a good writable file */
2390				cifsFileInfo_get(open_file);
2391				spin_unlock(&cifs_inode->open_file_lock);
2392				*ret_file = open_file;
2393				return 0;
2394			} else {
2395				if (!inv_file)
2396					inv_file = open_file;
2397			}
2398		}
2399	}
2400	/* couldn't find useable FH with same pid, try any available */
2401	if (!any_available) {
2402		any_available = true;
2403		goto refind_writable;
2404	}
2405
2406	if (inv_file) {
2407		any_available = false;
2408		cifsFileInfo_get(inv_file);
2409	}
2410
2411	spin_unlock(&cifs_inode->open_file_lock);
2412
2413	if (inv_file) {
2414		rc = cifs_reopen_file(inv_file, false);
2415		if (!rc) {
2416			*ret_file = inv_file;
2417			return 0;
2418		}
2419
2420		spin_lock(&cifs_inode->open_file_lock);
2421		list_move_tail(&inv_file->flist, &cifs_inode->openFileList);
2422		spin_unlock(&cifs_inode->open_file_lock);
2423		cifsFileInfo_put(inv_file);
2424		++refind;
2425		inv_file = NULL;
2426		spin_lock(&cifs_inode->open_file_lock);
2427		goto refind_writable;
2428	}
2429
2430	return rc;
2431}
2432
2433struct cifsFileInfo *
2434find_writable_file(struct cifsInodeInfo *cifs_inode, int flags)
2435{
2436	struct cifsFileInfo *cfile;
2437	int rc;
2438
2439	rc = cifs_get_writable_file(cifs_inode, flags, &cfile);
2440	if (rc)
2441		cifs_dbg(FYI, "Couldn't find writable handle rc=%d\n", rc);
2442
2443	return cfile;
2444}
2445
2446int
2447cifs_get_writable_path(struct cifs_tcon *tcon, const char *name,
2448		       int flags,
2449		       struct cifsFileInfo **ret_file)
2450{
2451	struct cifsFileInfo *cfile;
2452	void *page = alloc_dentry_path();
2453
2454	*ret_file = NULL;
2455
2456	spin_lock(&tcon->open_file_lock);
2457	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2458		struct cifsInodeInfo *cinode;
2459		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2460		if (IS_ERR(full_path)) {
2461			spin_unlock(&tcon->open_file_lock);
2462			free_dentry_path(page);
2463			return PTR_ERR(full_path);
2464		}
2465		if (strcmp(full_path, name))
2466			continue;
2467
2468		cinode = CIFS_I(d_inode(cfile->dentry));
2469		spin_unlock(&tcon->open_file_lock);
2470		free_dentry_path(page);
2471		return cifs_get_writable_file(cinode, flags, ret_file);
2472	}
2473
2474	spin_unlock(&tcon->open_file_lock);
2475	free_dentry_path(page);
2476	return -ENOENT;
2477}
2478
2479int
2480cifs_get_readable_path(struct cifs_tcon *tcon, const char *name,
2481		       struct cifsFileInfo **ret_file)
2482{
2483	struct cifsFileInfo *cfile;
2484	void *page = alloc_dentry_path();
2485
2486	*ret_file = NULL;
2487
2488	spin_lock(&tcon->open_file_lock);
2489	list_for_each_entry(cfile, &tcon->openFileList, tlist) {
2490		struct cifsInodeInfo *cinode;
2491		const char *full_path = build_path_from_dentry(cfile->dentry, page);
2492		if (IS_ERR(full_path)) {
2493			spin_unlock(&tcon->open_file_lock);
2494			free_dentry_path(page);
2495			return PTR_ERR(full_path);
2496		}
2497		if (strcmp(full_path, name))
2498			continue;
2499
2500		cinode = CIFS_I(d_inode(cfile->dentry));
2501		spin_unlock(&tcon->open_file_lock);
2502		free_dentry_path(page);
2503		*ret_file = find_readable_file(cinode, 0);
2504		return *ret_file ? 0 : -ENOENT;
2505	}
2506
2507	spin_unlock(&tcon->open_file_lock);
2508	free_dentry_path(page);
2509	return -ENOENT;
2510}
2511
2512void
2513cifs_writedata_release(struct kref *refcount)
2514{
2515	struct cifs_writedata *wdata = container_of(refcount,
2516					struct cifs_writedata, refcount);
2517#ifdef CONFIG_CIFS_SMB_DIRECT
2518	if (wdata->mr) {
2519		smbd_deregister_mr(wdata->mr);
2520		wdata->mr = NULL;
2521	}
2522#endif
2523
2524	if (wdata->cfile)
2525		cifsFileInfo_put(wdata->cfile);
2526
2527	kfree(wdata);
2528}
2529
2530/*
2531 * Write failed with a retryable error. Resend the write request. It's also
2532 * possible that the page was redirtied so re-clean the page.
2533 */
2534static void
2535cifs_writev_requeue(struct cifs_writedata *wdata)
2536{
2537	int rc = 0;
2538	struct inode *inode = d_inode(wdata->cfile->dentry);
2539	struct TCP_Server_Info *server;
2540	unsigned int rest_len = wdata->bytes;
2541	loff_t fpos = wdata->offset;
2542
2543	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2544	do {
2545		struct cifs_writedata *wdata2;
2546		unsigned int wsize, cur_len;
2547
2548		wsize = server->ops->wp_retry_size(inode);
2549		if (wsize < rest_len) {
2550			if (wsize < PAGE_SIZE) {
2551				rc = -EOPNOTSUPP;
2552				break;
2553			}
2554			cur_len = min(round_down(wsize, PAGE_SIZE), rest_len);
2555		} else {
2556			cur_len = rest_len;
2557		}
2558
2559		wdata2 = cifs_writedata_alloc(cifs_writev_complete);
2560		if (!wdata2) {
2561			rc = -ENOMEM;
2562			break;
2563		}
2564
2565		wdata2->sync_mode = wdata->sync_mode;
2566		wdata2->offset	= fpos;
2567		wdata2->bytes	= cur_len;
2568		wdata2->iter	= wdata->iter;
2569
2570		iov_iter_advance(&wdata2->iter, fpos - wdata->offset);
2571		iov_iter_truncate(&wdata2->iter, wdata2->bytes);
2572
2573		if (iov_iter_is_xarray(&wdata2->iter))
2574			/* Check for pages having been redirtied and clean
2575			 * them.  We can do this by walking the xarray.  If
2576			 * it's not an xarray, then it's a DIO and we shouldn't
2577			 * be mucking around with the page bits.
2578			 */
2579			cifs_undirty_folios(inode, fpos, cur_len);
2580
2581		rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY,
2582					    &wdata2->cfile);
2583		if (!wdata2->cfile) {
2584			cifs_dbg(VFS, "No writable handle to retry writepages rc=%d\n",
2585				 rc);
2586			if (!is_retryable_error(rc))
2587				rc = -EBADF;
2588		} else {
2589			wdata2->pid = wdata2->cfile->pid;
2590			rc = server->ops->async_writev(wdata2,
2591						       cifs_writedata_release);
2592		}
2593
2594		kref_put(&wdata2->refcount, cifs_writedata_release);
2595		if (rc) {
2596			if (is_retryable_error(rc))
2597				continue;
2598			fpos += cur_len;
2599			rest_len -= cur_len;
2600			break;
2601		}
2602
2603		fpos += cur_len;
2604		rest_len -= cur_len;
2605	} while (rest_len > 0);
2606
2607	/* Clean up remaining pages from the original wdata */
2608	if (iov_iter_is_xarray(&wdata->iter))
2609		cifs_pages_write_failed(inode, fpos, rest_len);
2610
2611	if (rc != 0 && !is_retryable_error(rc))
2612		mapping_set_error(inode->i_mapping, rc);
2613	kref_put(&wdata->refcount, cifs_writedata_release);
2614}
2615
2616void
2617cifs_writev_complete(struct work_struct *work)
2618{
2619	struct cifs_writedata *wdata = container_of(work,
2620						struct cifs_writedata, work);
2621	struct inode *inode = d_inode(wdata->cfile->dentry);
2622
2623	if (wdata->result == 0) {
2624		spin_lock(&inode->i_lock);
2625		cifs_update_eof(CIFS_I(inode), wdata->offset, wdata->bytes);
2626		spin_unlock(&inode->i_lock);
2627		cifs_stats_bytes_written(tlink_tcon(wdata->cfile->tlink),
2628					 wdata->bytes);
2629	} else if (wdata->sync_mode == WB_SYNC_ALL && wdata->result == -EAGAIN)
2630		return cifs_writev_requeue(wdata);
2631
2632	if (wdata->result == -EAGAIN)
2633		cifs_pages_write_redirty(inode, wdata->offset, wdata->bytes);
2634	else if (wdata->result < 0)
2635		cifs_pages_write_failed(inode, wdata->offset, wdata->bytes);
2636	else
2637		cifs_pages_written_back(inode, wdata->offset, wdata->bytes);
2638
2639	if (wdata->result != -EAGAIN)
2640		mapping_set_error(inode->i_mapping, wdata->result);
2641	kref_put(&wdata->refcount, cifs_writedata_release);
2642}
2643
2644struct cifs_writedata *cifs_writedata_alloc(work_func_t complete)
2645{
2646	struct cifs_writedata *wdata;
2647
2648	wdata = kzalloc(sizeof(*wdata), GFP_NOFS);
2649	if (wdata != NULL) {
2650		kref_init(&wdata->refcount);
2651		INIT_LIST_HEAD(&wdata->list);
2652		init_completion(&wdata->done);
2653		INIT_WORK(&wdata->work, complete);
2654	}
2655	return wdata;
2656}
2657
2658static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
2659{
2660	struct address_space *mapping = page->mapping;
2661	loff_t offset = (loff_t)page->index << PAGE_SHIFT;
2662	char *write_data;
2663	int rc = -EFAULT;
2664	int bytes_written = 0;
2665	struct inode *inode;
2666	struct cifsFileInfo *open_file;
2667
2668	if (!mapping || !mapping->host)
2669		return -EFAULT;
2670
2671	inode = page->mapping->host;
2672
2673	offset += (loff_t)from;
2674	write_data = kmap(page);
2675	write_data += from;
2676
2677	if ((to > PAGE_SIZE) || (from > to)) {
2678		kunmap(page);
2679		return -EIO;
2680	}
2681
2682	/* racing with truncate? */
2683	if (offset > mapping->host->i_size) {
2684		kunmap(page);
2685		return 0; /* don't care */
2686	}
2687
2688	/* check to make sure that we are not extending the file */
2689	if (mapping->host->i_size - offset < (loff_t)to)
2690		to = (unsigned)(mapping->host->i_size - offset);
2691
2692	rc = cifs_get_writable_file(CIFS_I(mapping->host), FIND_WR_ANY,
2693				    &open_file);
2694	if (!rc) {
2695		bytes_written = cifs_write(open_file, open_file->pid,
2696					   write_data, to - from, &offset);
2697		cifsFileInfo_put(open_file);
2698		/* Does mm or vfs already set times? */
2699		simple_inode_init_ts(inode);
2700		if ((bytes_written > 0) && (offset))
2701			rc = 0;
2702		else if (bytes_written < 0)
2703			rc = bytes_written;
2704		else
2705			rc = -EFAULT;
2706	} else {
2707		cifs_dbg(FYI, "No writable handle for write page rc=%d\n", rc);
2708		if (!is_retryable_error(rc))
2709			rc = -EIO;
2710	}
2711
2712	kunmap(page);
2713	return rc;
2714}
2715
2716/*
2717 * Extend the region to be written back to include subsequent contiguously
2718 * dirty pages if possible, but don't sleep while doing so.
2719 */
2720static void cifs_extend_writeback(struct address_space *mapping,
2721				  struct xa_state *xas,
2722				  long *_count,
2723				  loff_t start,
2724				  int max_pages,
2725				  loff_t max_len,
2726				  size_t *_len)
2727{
2728	struct folio_batch batch;
2729	struct folio *folio;
2730	unsigned int nr_pages;
2731	pgoff_t index = (start + *_len) / PAGE_SIZE;
2732	size_t len;
2733	bool stop = true;
2734	unsigned int i;
2735
2736	folio_batch_init(&batch);
2737
2738	do {
2739		/* Firstly, we gather up a batch of contiguous dirty pages
2740		 * under the RCU read lock - but we can't clear the dirty flags
2741		 * there if any of those pages are mapped.
2742		 */
2743		rcu_read_lock();
2744
2745		xas_for_each(xas, folio, ULONG_MAX) {
2746			stop = true;
2747			if (xas_retry(xas, folio))
2748				continue;
2749			if (xa_is_value(folio))
2750				break;
2751			if (folio->index != index) {
2752				xas_reset(xas);
2753				break;
2754			}
2755
2756			if (!folio_try_get_rcu(folio)) {
2757				xas_reset(xas);
2758				continue;
2759			}
2760			nr_pages = folio_nr_pages(folio);
2761			if (nr_pages > max_pages) {
2762				xas_reset(xas);
2763				break;
2764			}
2765
2766			/* Has the page moved or been split? */
2767			if (unlikely(folio != xas_reload(xas))) {
2768				folio_put(folio);
2769				xas_reset(xas);
2770				break;
2771			}
2772
2773			if (!folio_trylock(folio)) {
2774				folio_put(folio);
2775				xas_reset(xas);
2776				break;
2777			}
2778			if (!folio_test_dirty(folio) ||
2779			    folio_test_writeback(folio)) {
2780				folio_unlock(folio);
2781				folio_put(folio);
2782				xas_reset(xas);
2783				break;
2784			}
2785
2786			max_pages -= nr_pages;
2787			len = folio_size(folio);
2788			stop = false;
2789
2790			index += nr_pages;
2791			*_count -= nr_pages;
2792			*_len += len;
2793			if (max_pages <= 0 || *_len >= max_len || *_count <= 0)
2794				stop = true;
2795
2796			if (!folio_batch_add(&batch, folio))
2797				break;
2798			if (stop)
2799				break;
2800		}
2801
2802		xas_pause(xas);
2803		rcu_read_unlock();
2804
2805		/* Now, if we obtained any pages, we can shift them to being
2806		 * writable and mark them for caching.
2807		 */
2808		if (!folio_batch_count(&batch))
2809			break;
2810
2811		for (i = 0; i < folio_batch_count(&batch); i++) {
2812			folio = batch.folios[i];
2813			/* The folio should be locked, dirty and not undergoing
2814			 * writeback from the loop above.
2815			 */
2816			if (!folio_clear_dirty_for_io(folio))
2817				WARN_ON(1);
2818			folio_start_writeback(folio);
2819			folio_unlock(folio);
2820		}
2821
2822		folio_batch_release(&batch);
2823		cond_resched();
2824	} while (!stop);
2825}
2826
2827/*
2828 * Write back the locked page and any subsequent non-locked dirty pages.
2829 */
2830static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping,
2831						 struct writeback_control *wbc,
2832						 struct xa_state *xas,
2833						 struct folio *folio,
2834						 unsigned long long start,
2835						 unsigned long long end)
2836{
2837	struct inode *inode = mapping->host;
2838	struct TCP_Server_Info *server;
2839	struct cifs_writedata *wdata;
2840	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2841	struct cifs_credits credits_on_stack;
2842	struct cifs_credits *credits = &credits_on_stack;
2843	struct cifsFileInfo *cfile = NULL;
2844	unsigned long long i_size = i_size_read(inode), max_len;
2845	unsigned int xid, wsize;
2846	size_t len = folio_size(folio);
2847	long count = wbc->nr_to_write;
2848	int rc;
2849
2850	/* The folio should be locked, dirty and not undergoing writeback. */
2851	if (!folio_clear_dirty_for_io(folio))
2852		WARN_ON_ONCE(1);
2853	folio_start_writeback(folio);
2854
2855	count -= folio_nr_pages(folio);
2856
2857	xid = get_xid();
2858	server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses);
2859
2860	rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile);
2861	if (rc) {
2862		cifs_dbg(VFS, "No writable handle in writepages rc=%d\n", rc);
2863		goto err_xid;
2864	}
2865
2866	rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
2867					   &wsize, credits);
2868	if (rc != 0)
2869		goto err_close;
2870
2871	wdata = cifs_writedata_alloc(cifs_writev_complete);
2872	if (!wdata) {
2873		rc = -ENOMEM;
2874		goto err_uncredit;
2875	}
2876
2877	wdata->sync_mode = wbc->sync_mode;
2878	wdata->offset = folio_pos(folio);
2879	wdata->pid = cfile->pid;
2880	wdata->credits = credits_on_stack;
2881	wdata->cfile = cfile;
2882	wdata->server = server;
2883	cfile = NULL;
2884
2885	/* Find all consecutive lockable dirty pages that have contiguous
2886	 * written regions, stopping when we find a page that is not
2887	 * immediately lockable, is not dirty or is missing, or we reach the
2888	 * end of the range.
2889	 */
2890	if (start < i_size) {
2891		/* Trim the write to the EOF; the extra data is ignored.  Also
2892		 * put an upper limit on the size of a single storedata op.
2893		 */
2894		max_len = wsize;
2895		max_len = min_t(unsigned long long, max_len, end - start + 1);
2896		max_len = min_t(unsigned long long, max_len, i_size - start);
2897
2898		if (len < max_len) {
2899			int max_pages = INT_MAX;
2900
2901#ifdef CONFIG_CIFS_SMB_DIRECT
2902			if (server->smbd_conn)
2903				max_pages = server->smbd_conn->max_frmr_depth;
2904#endif
2905			max_pages -= folio_nr_pages(folio);
2906
2907			if (max_pages > 0)
2908				cifs_extend_writeback(mapping, xas, &count, start,
2909						      max_pages, max_len, &len);
2910		}
2911	}
2912	len = min_t(unsigned long long, len, i_size - start);
2913
2914	/* We now have a contiguous set of dirty pages, each with writeback
2915	 * set; the first page is still locked at this point, but all the rest
2916	 * have been unlocked.
2917	 */
2918	folio_unlock(folio);
2919	wdata->bytes = len;
2920
2921	if (start < i_size) {
2922		iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages,
2923				start, len);
2924
2925		rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes);
2926		if (rc)
2927			goto err_wdata;
2928
2929		if (wdata->cfile->invalidHandle)
2930			rc = -EAGAIN;
2931		else
2932			rc = wdata->server->ops->async_writev(wdata,
2933							      cifs_writedata_release);
2934		if (rc >= 0) {
2935			kref_put(&wdata->refcount, cifs_writedata_release);
2936			goto err_close;
2937		}
2938	} else {
2939		/* The dirty region was entirely beyond the EOF. */
2940		cifs_pages_written_back(inode, start, len);
2941		rc = 0;
2942	}
2943
2944err_wdata:
2945	kref_put(&wdata->refcount, cifs_writedata_release);
2946err_uncredit:
2947	add_credits_and_wake_if(server, credits, 0);
2948err_close:
2949	if (cfile)
2950		cifsFileInfo_put(cfile);
2951err_xid:
2952	free_xid(xid);
2953	if (rc == 0) {
2954		wbc->nr_to_write = count;
2955		rc = len;
2956	} else if (is_retryable_error(rc)) {
2957		cifs_pages_write_redirty(inode, start, len);
2958	} else {
2959		cifs_pages_write_failed(inode, start, len);
2960		mapping_set_error(mapping, rc);
2961	}
2962	/* Indication to update ctime and mtime as close is deferred */
2963	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
2964	return rc;
2965}
2966
2967/*
2968 * write a region of pages back to the server
2969 */
2970static ssize_t cifs_writepages_begin(struct address_space *mapping,
2971				     struct writeback_control *wbc,
2972				     struct xa_state *xas,
2973				     unsigned long long *_start,
2974				     unsigned long long end)
2975{
2976	struct folio *folio;
2977	unsigned long long start = *_start;
2978	ssize_t ret;
2979	int skips = 0;
2980
2981search_again:
2982	/* Find the first dirty page. */
2983	rcu_read_lock();
2984
2985	for (;;) {
2986		folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
2987		if (xas_retry(xas, folio) || xa_is_value(folio))
2988			continue;
2989		if (!folio)
2990			break;
2991
2992		if (!folio_try_get_rcu(folio)) {
2993			xas_reset(xas);
2994			continue;
2995		}
2996
2997		if (unlikely(folio != xas_reload(xas))) {
2998			folio_put(folio);
2999			xas_reset(xas);
3000			continue;
3001		}
3002
3003		xas_pause(xas);
3004		break;
3005	}
3006	rcu_read_unlock();
3007	if (!folio)
3008		return 0;
3009
3010	start = folio_pos(folio); /* May regress with THPs */
3011
3012	/* At this point we hold neither the i_pages lock nor the page lock:
3013	 * the page may be truncated or invalidated (changing page->mapping to
3014	 * NULL), or even swizzled back from swapper_space to tmpfs file
3015	 * mapping
3016	 */
3017lock_again:
3018	if (wbc->sync_mode != WB_SYNC_NONE) {
3019		ret = folio_lock_killable(folio);
3020		if (ret < 0)
3021			return ret;
3022	} else {
3023		if (!folio_trylock(folio))
3024			goto search_again;
3025	}
3026
3027	if (folio->mapping != mapping ||
3028	    !folio_test_dirty(folio)) {
3029		start += folio_size(folio);
3030		folio_unlock(folio);
3031		goto search_again;
3032	}
3033
3034	if (folio_test_writeback(folio) ||
3035	    folio_test_fscache(folio)) {
3036		folio_unlock(folio);
3037		if (wbc->sync_mode != WB_SYNC_NONE) {
3038			folio_wait_writeback(folio);
3039#ifdef CONFIG_CIFS_FSCACHE
3040			folio_wait_fscache(folio);
3041#endif
3042			goto lock_again;
3043		}
3044
3045		start += folio_size(folio);
3046		if (wbc->sync_mode == WB_SYNC_NONE) {
3047			if (skips >= 5 || need_resched()) {
3048				ret = 0;
3049				goto out;
3050			}
3051			skips++;
3052		}
3053		goto search_again;
3054	}
3055
3056	ret = cifs_write_back_from_locked_folio(mapping, wbc, xas, folio, start, end);
3057out:
3058	if (ret > 0)
3059		*_start = start + ret;
3060	return ret;
3061}
3062
3063/*
3064 * Write a region of pages back to the server
3065 */
3066static int cifs_writepages_region(struct address_space *mapping,
3067				  struct writeback_control *wbc,
3068				  unsigned long long *_start,
3069				  unsigned long long end)
3070{
3071	ssize_t ret;
3072
3073	XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
3074
3075	do {
3076		ret = cifs_writepages_begin(mapping, wbc, &xas, _start, end);
3077		if (ret > 0 && wbc->nr_to_write > 0)
3078			cond_resched();
3079	} while (ret > 0 && wbc->nr_to_write > 0);
3080
3081	return ret > 0 ? 0 : ret;
3082}
3083
3084/*
3085 * Write some of the pending data back to the server
3086 */
3087static int cifs_writepages(struct address_space *mapping,
3088			   struct writeback_control *wbc)
3089{
3090	loff_t start, end;
3091	int ret;
3092
3093	/* We have to be careful as we can end up racing with setattr()
3094	 * truncating the pagecache since the caller doesn't take a lock here
3095	 * to prevent it.
3096	 */
3097
3098	if (wbc->range_cyclic && mapping->writeback_index) {
3099		start = mapping->writeback_index * PAGE_SIZE;
3100		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3101		if (ret < 0)
3102			goto out;
3103
3104		if (wbc->nr_to_write <= 0) {
3105			mapping->writeback_index = start / PAGE_SIZE;
3106			goto out;
3107		}
3108
3109		start = 0;
3110		end = mapping->writeback_index * PAGE_SIZE;
3111		mapping->writeback_index = 0;
3112		ret = cifs_writepages_region(mapping, wbc, &start, end);
3113		if (ret == 0)
3114			mapping->writeback_index = start / PAGE_SIZE;
3115	} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
3116		start = 0;
3117		ret = cifs_writepages_region(mapping, wbc, &start, LLONG_MAX);
3118		if (wbc->nr_to_write > 0 && ret == 0)
3119			mapping->writeback_index = start / PAGE_SIZE;
3120	} else {
3121		start = wbc->range_start;
3122		ret = cifs_writepages_region(mapping, wbc, &start, wbc->range_end);
3123	}
3124
3125out:
3126	return ret;
3127}
3128
3129static int
3130cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
3131{
3132	int rc;
3133	unsigned int xid;
3134
3135	xid = get_xid();
3136/* BB add check for wbc flags */
3137	get_page(page);
3138	if (!PageUptodate(page))
3139		cifs_dbg(FYI, "ppw - page not up to date\n");
3140
3141	/*
3142	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
3143	 *
3144	 * A writepage() implementation always needs to do either this,
3145	 * or re-dirty the page with "redirty_page_for_writepage()" in
3146	 * the case of a failure.
3147	 *
3148	 * Just unlocking the page will cause the radix tree tag-bits
3149	 * to fail to update with the state of the page correctly.
3150	 */
3151	set_page_writeback(page);
3152retry_write:
3153	rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
3154	if (is_retryable_error(rc)) {
3155		if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
3156			goto retry_write;
3157		redirty_page_for_writepage(wbc, page);
3158	} else if (rc != 0) {
3159		SetPageError(page);
3160		mapping_set_error(page->mapping, rc);
3161	} else {
3162		SetPageUptodate(page);
3163	}
3164	end_page_writeback(page);
3165	put_page(page);
3166	free_xid(xid);
3167	return rc;
3168}
3169
3170static int cifs_write_end(struct file *file, struct address_space *mapping,
3171			loff_t pos, unsigned len, unsigned copied,
3172			struct page *page, void *fsdata)
3173{
3174	int rc;
3175	struct inode *inode = mapping->host;
3176	struct cifsFileInfo *cfile = file->private_data;
3177	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
3178	struct folio *folio = page_folio(page);
3179	__u32 pid;
3180
3181	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3182		pid = cfile->pid;
3183	else
3184		pid = current->tgid;
3185
3186	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
3187		 page, pos, copied);
3188
3189	if (folio_test_checked(folio)) {
3190		if (copied == len)
3191			folio_mark_uptodate(folio);
3192		folio_clear_checked(folio);
3193	} else if (!folio_test_uptodate(folio) && copied == PAGE_SIZE)
3194		folio_mark_uptodate(folio);
3195
3196	if (!folio_test_uptodate(folio)) {
3197		char *page_data;
3198		unsigned offset = pos & (PAGE_SIZE - 1);
3199		unsigned int xid;
3200
3201		xid = get_xid();
3202		/* this is probably better than directly calling
3203		   partialpage_write since in this function the file handle is
3204		   known which we might as well	leverage */
3205		/* BB check if anything else missing out of ppw
3206		   such as updating last write time */
3207		page_data = kmap(page);
3208		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
3209		/* if (rc < 0) should we set writebehind rc? */
3210		kunmap(page);
3211
3212		free_xid(xid);
3213	} else {
3214		rc = copied;
3215		pos += copied;
3216		set_page_dirty(page);
3217	}
3218
3219	if (rc > 0) {
3220		spin_lock(&inode->i_lock);
3221		if (pos > inode->i_size) {
3222			loff_t additional_blocks = (512 - 1 + copied) >> 9;
3223
3224			i_size_write(inode, pos);
3225			/*
3226			 * Estimate new allocation size based on the amount written.
3227			 * This will be updated from server on close (and on queryinfo)
3228			 */
3229			inode->i_blocks = min_t(blkcnt_t, (512 - 1 + pos) >> 9,
3230						inode->i_blocks + additional_blocks);
3231		}
3232		spin_unlock(&inode->i_lock);
3233	}
3234
3235	unlock_page(page);
3236	put_page(page);
3237	/* Indication to update ctime and mtime as close is deferred */
3238	set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
3239
3240	return rc;
3241}
3242
3243int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
3244		      int datasync)
3245{
3246	unsigned int xid;
3247	int rc = 0;
3248	struct cifs_tcon *tcon;
3249	struct TCP_Server_Info *server;
3250	struct cifsFileInfo *smbfile = file->private_data;
3251	struct inode *inode = file_inode(file);
3252	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3253
3254	rc = file_write_and_wait_range(file, start, end);
3255	if (rc) {
3256		trace_cifs_fsync_err(inode->i_ino, rc);
3257		return rc;
3258	}
3259
3260	xid = get_xid();
3261
3262	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3263		 file, datasync);
3264
3265	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
3266		rc = cifs_zap_mapping(inode);
3267		if (rc) {
3268			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
3269			rc = 0; /* don't care about it in fsync */
3270		}
3271	}
3272
3273	tcon = tlink_tcon(smbfile->tlink);
3274	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3275		server = tcon->ses->server;
3276		if (server->ops->flush == NULL) {
3277			rc = -ENOSYS;
3278			goto strict_fsync_exit;
3279		}
3280
3281		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3282			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3283			if (smbfile) {
3284				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3285				cifsFileInfo_put(smbfile);
3286			} else
3287				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3288		} else
3289			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3290	}
3291
3292strict_fsync_exit:
3293	free_xid(xid);
3294	return rc;
3295}
3296
3297int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
3298{
3299	unsigned int xid;
3300	int rc = 0;
3301	struct cifs_tcon *tcon;
3302	struct TCP_Server_Info *server;
3303	struct cifsFileInfo *smbfile = file->private_data;
3304	struct inode *inode = file_inode(file);
3305	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3306
3307	rc = file_write_and_wait_range(file, start, end);
3308	if (rc) {
3309		trace_cifs_fsync_err(file_inode(file)->i_ino, rc);
3310		return rc;
3311	}
3312
3313	xid = get_xid();
3314
3315	cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
3316		 file, datasync);
3317
3318	tcon = tlink_tcon(smbfile->tlink);
3319	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
3320		server = tcon->ses->server;
3321		if (server->ops->flush == NULL) {
3322			rc = -ENOSYS;
3323			goto fsync_exit;
3324		}
3325
3326		if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) {
3327			smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY);
3328			if (smbfile) {
3329				rc = server->ops->flush(xid, tcon, &smbfile->fid);
3330				cifsFileInfo_put(smbfile);
3331			} else
3332				cifs_dbg(FYI, "ignore fsync for file not open for write\n");
3333		} else
3334			rc = server->ops->flush(xid, tcon, &smbfile->fid);
3335	}
3336
3337fsync_exit:
3338	free_xid(xid);
3339	return rc;
3340}
3341
3342/*
3343 * As file closes, flush all cached write data for this inode checking
3344 * for write behind errors.
3345 */
3346int cifs_flush(struct file *file, fl_owner_t id)
3347{
3348	struct inode *inode = file_inode(file);
3349	int rc = 0;
3350
3351	if (file->f_mode & FMODE_WRITE)
3352		rc = filemap_write_and_wait(inode->i_mapping);
3353
3354	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
3355	if (rc) {
3356		/* get more nuanced writeback errors */
3357		rc = filemap_check_wb_err(file->f_mapping, 0);
3358		trace_cifs_flush_err(inode->i_ino, rc);
3359	}
3360	return rc;
3361}
3362
3363static void
3364cifs_uncached_writedata_release(struct kref *refcount)
3365{
3366	struct cifs_writedata *wdata = container_of(refcount,
3367					struct cifs_writedata, refcount);
3368
3369	kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
3370	cifs_writedata_release(refcount);
3371}
3372
3373static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
3374
3375static void
3376cifs_uncached_writev_complete(struct work_struct *work)
3377{
3378	struct cifs_writedata *wdata = container_of(work,
3379					struct cifs_writedata, work);
3380	struct inode *inode = d_inode(wdata->cfile->dentry);
3381	struct cifsInodeInfo *cifsi = CIFS_I(inode);
3382
3383	spin_lock(&inode->i_lock);
3384	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
3385	if (cifsi->netfs.remote_i_size > inode->i_size)
3386		i_size_write(inode, cifsi->netfs.remote_i_size);
3387	spin_unlock(&inode->i_lock);
3388
3389	complete(&wdata->done);
3390	collect_uncached_write_data(wdata->ctx);
3391	/* the below call can possibly free the last ref to aio ctx */
3392	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3393}
3394
3395static int
3396cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list,
3397	struct cifs_aio_ctx *ctx)
3398{
3399	unsigned int wsize;
3400	struct cifs_credits credits;
3401	int rc;
3402	struct TCP_Server_Info *server = wdata->server;
3403
3404	do {
3405		if (wdata->cfile->invalidHandle) {
3406			rc = cifs_reopen_file(wdata->cfile, false);
3407			if (rc == -EAGAIN)
3408				continue;
3409			else if (rc)
3410				break;
3411		}
3412
3413
3414		/*
3415		 * Wait for credits to resend this wdata.
3416		 * Note: we are attempting to resend the whole wdata not in
3417		 * segments
3418		 */
3419		do {
3420			rc = server->ops->wait_mtu_credits(server, wdata->bytes,
3421						&wsize, &credits);
3422			if (rc)
3423				goto fail;
3424
3425			if (wsize < wdata->bytes) {
3426				add_credits_and_wake_if(server, &credits, 0);
3427				msleep(1000);
3428			}
3429		} while (wsize < wdata->bytes);
3430		wdata->credits = credits;
3431
3432		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3433
3434		if (!rc) {
3435			if (wdata->cfile->invalidHandle)
3436				rc = -EAGAIN;
3437			else {
3438				wdata->replay = true;
3439#ifdef CONFIG_CIFS_SMB_DIRECT
3440				if (wdata->mr) {
3441					wdata->mr->need_invalidate = true;
3442					smbd_deregister_mr(wdata->mr);
3443					wdata->mr = NULL;
3444				}
3445#endif
3446				rc = server->ops->async_writev(wdata,
3447					cifs_uncached_writedata_release);
3448			}
3449		}
3450
3451		/* If the write was successfully sent, we are done */
3452		if (!rc) {
3453			list_add_tail(&wdata->list, wdata_list);
3454			return 0;
3455		}
3456
3457		/* Roll back credits and retry if needed */
3458		add_credits_and_wake_if(server, &wdata->credits, 0);
3459	} while (rc == -EAGAIN);
3460
3461fail:
3462	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3463	return rc;
3464}
3465
3466/*
3467 * Select span of a bvec iterator we're going to use.  Limit it by both maximum
3468 * size and maximum number of segments.
3469 */
3470static size_t cifs_limit_bvec_subset(const struct iov_iter *iter, size_t max_size,
3471				     size_t max_segs, unsigned int *_nsegs)
3472{
3473	const struct bio_vec *bvecs = iter->bvec;
3474	unsigned int nbv = iter->nr_segs, ix = 0, nsegs = 0;
3475	size_t len, span = 0, n = iter->count;
3476	size_t skip = iter->iov_offset;
3477
3478	if (WARN_ON(!iov_iter_is_bvec(iter)) || n == 0)
3479		return 0;
3480
3481	while (n && ix < nbv && skip) {
3482		len = bvecs[ix].bv_len;
3483		if (skip < len)
3484			break;
3485		skip -= len;
3486		n -= len;
3487		ix++;
3488	}
3489
3490	while (n && ix < nbv) {
3491		len = min3(n, bvecs[ix].bv_len - skip, max_size);
3492		span += len;
3493		max_size -= len;
3494		nsegs++;
3495		ix++;
3496		if (max_size == 0 || nsegs >= max_segs)
3497			break;
3498		skip = 0;
3499		n -= len;
3500	}
3501
3502	*_nsegs = nsegs;
3503	return span;
3504}
3505
3506static int
3507cifs_write_from_iter(loff_t fpos, size_t len, struct iov_iter *from,
3508		     struct cifsFileInfo *open_file,
3509		     struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
3510		     struct cifs_aio_ctx *ctx)
3511{
3512	int rc = 0;
3513	size_t cur_len, max_len;
3514	struct cifs_writedata *wdata;
3515	pid_t pid;
3516	struct TCP_Server_Info *server;
3517	unsigned int xid, max_segs = INT_MAX;
3518
3519	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3520		pid = open_file->pid;
3521	else
3522		pid = current->tgid;
3523
3524	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
3525	xid = get_xid();
3526
3527#ifdef CONFIG_CIFS_SMB_DIRECT
3528	if (server->smbd_conn)
3529		max_segs = server->smbd_conn->max_frmr_depth;
3530#endif
3531
3532	do {
3533		struct cifs_credits credits_on_stack;
3534		struct cifs_credits *credits = &credits_on_stack;
3535		unsigned int wsize, nsegs = 0;
3536
3537		if (signal_pending(current)) {
3538			rc = -EINTR;
3539			break;
3540		}
3541
3542		if (open_file->invalidHandle) {
3543			rc = cifs_reopen_file(open_file, false);
3544			if (rc == -EAGAIN)
3545				continue;
3546			else if (rc)
3547				break;
3548		}
3549
3550		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
3551						   &wsize, credits);
3552		if (rc)
3553			break;
3554
3555		max_len = min_t(const size_t, len, wsize);
3556		if (!max_len) {
3557			rc = -EAGAIN;
3558			add_credits_and_wake_if(server, credits, 0);
3559			break;
3560		}
3561
3562		cur_len = cifs_limit_bvec_subset(from, max_len, max_segs, &nsegs);
3563		cifs_dbg(FYI, "write_from_iter len=%zx/%zx nsegs=%u/%lu/%u\n",
3564			 cur_len, max_len, nsegs, from->nr_segs, max_segs);
3565		if (cur_len == 0) {
3566			rc = -EIO;
3567			add_credits_and_wake_if(server, credits, 0);
3568			break;
3569		}
3570
3571		wdata = cifs_writedata_alloc(cifs_uncached_writev_complete);
3572		if (!wdata) {
3573			rc = -ENOMEM;
3574			add_credits_and_wake_if(server, credits, 0);
3575			break;
3576		}
3577
3578		wdata->sync_mode = WB_SYNC_ALL;
3579		wdata->offset	= (__u64)fpos;
3580		wdata->cfile	= cifsFileInfo_get(open_file);
3581		wdata->server	= server;
3582		wdata->pid	= pid;
3583		wdata->bytes	= cur_len;
3584		wdata->credits	= credits_on_stack;
3585		wdata->iter	= *from;
3586		wdata->ctx	= ctx;
3587		kref_get(&ctx->refcount);
3588
3589		iov_iter_truncate(&wdata->iter, cur_len);
3590
3591		rc = adjust_credits(server, &wdata->credits, wdata->bytes);
3592
3593		if (!rc) {
3594			if (wdata->cfile->invalidHandle)
3595				rc = -EAGAIN;
3596			else
3597				rc = server->ops->async_writev(wdata,
3598					cifs_uncached_writedata_release);
3599		}
3600
3601		if (rc) {
3602			add_credits_and_wake_if(server, &wdata->credits, 0);
3603			kref_put(&wdata->refcount,
3604				 cifs_uncached_writedata_release);
3605			if (rc == -EAGAIN)
3606				continue;
3607			break;
3608		}
3609
3610		list_add_tail(&wdata->list, wdata_list);
3611		iov_iter_advance(from, cur_len);
3612		fpos += cur_len;
3613		len -= cur_len;
3614	} while (len > 0);
3615
3616	free_xid(xid);
3617	return rc;
3618}
3619
3620static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
3621{
3622	struct cifs_writedata *wdata, *tmp;
3623	struct cifs_tcon *tcon;
3624	struct cifs_sb_info *cifs_sb;
3625	struct dentry *dentry = ctx->cfile->dentry;
3626	ssize_t rc;
3627
3628	tcon = tlink_tcon(ctx->cfile->tlink);
3629	cifs_sb = CIFS_SB(dentry->d_sb);
3630
3631	mutex_lock(&ctx->aio_mutex);
3632
3633	if (list_empty(&ctx->list)) {
3634		mutex_unlock(&ctx->aio_mutex);
3635		return;
3636	}
3637
3638	rc = ctx->rc;
3639	/*
3640	 * Wait for and collect replies for any successful sends in order of
3641	 * increasing offset. Once an error is hit, then return without waiting
3642	 * for any more replies.
3643	 */
3644restart_loop:
3645	list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
3646		if (!rc) {
3647			if (!try_wait_for_completion(&wdata->done)) {
3648				mutex_unlock(&ctx->aio_mutex);
3649				return;
3650			}
3651
3652			if (wdata->result)
3653				rc = wdata->result;
3654			else
3655				ctx->total_len += wdata->bytes;
3656
3657			/* resend call if it's a retryable error */
3658			if (rc == -EAGAIN) {
3659				struct list_head tmp_list;
3660				struct iov_iter tmp_from = ctx->iter;
3661
3662				INIT_LIST_HEAD(&tmp_list);
3663				list_del_init(&wdata->list);
3664
3665				if (ctx->direct_io)
3666					rc = cifs_resend_wdata(
3667						wdata, &tmp_list, ctx);
3668				else {
3669					iov_iter_advance(&tmp_from,
3670						 wdata->offset - ctx->pos);
3671
3672					rc = cifs_write_from_iter(wdata->offset,
3673						wdata->bytes, &tmp_from,
3674						ctx->cfile, cifs_sb, &tmp_list,
3675						ctx);
3676
3677					kref_put(&wdata->refcount,
3678						cifs_uncached_writedata_release);
3679				}
3680
3681				list_splice(&tmp_list, &ctx->list);
3682				goto restart_loop;
3683			}
3684		}
3685		list_del_init(&wdata->list);
3686		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
3687	}
3688
3689	cifs_stats_bytes_written(tcon, ctx->total_len);
3690	set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
3691
3692	ctx->rc = (rc == 0) ? ctx->total_len : rc;
3693
3694	mutex_unlock(&ctx->aio_mutex);
3695
3696	if (ctx->iocb && ctx->iocb->ki_complete)
3697		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
3698	else
3699		complete(&ctx->done);
3700}
3701
3702static ssize_t __cifs_writev(
3703	struct kiocb *iocb, struct iov_iter *from, bool direct)
3704{
3705	struct file *file = iocb->ki_filp;
3706	ssize_t total_written = 0;
3707	struct cifsFileInfo *cfile;
3708	struct cifs_tcon *tcon;
3709	struct cifs_sb_info *cifs_sb;
3710	struct cifs_aio_ctx *ctx;
3711	int rc;
3712
3713	rc = generic_write_checks(iocb, from);
3714	if (rc <= 0)
3715		return rc;
3716
3717	cifs_sb = CIFS_FILE_SB(file);
3718	cfile = file->private_data;
3719	tcon = tlink_tcon(cfile->tlink);
3720
3721	if (!tcon->ses->server->ops->async_writev)
3722		return -ENOSYS;
3723
3724	ctx = cifs_aio_ctx_alloc();
3725	if (!ctx)
3726		return -ENOMEM;
3727
3728	ctx->cfile = cifsFileInfo_get(cfile);
3729
3730	if (!is_sync_kiocb(iocb))
3731		ctx->iocb = iocb;
3732
3733	ctx->pos = iocb->ki_pos;
3734	ctx->direct_io = direct;
3735	ctx->nr_pinned_pages = 0;
3736
3737	if (user_backed_iter(from)) {
3738		/*
3739		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
3740		 * they contain references to the calling process's virtual
3741		 * memory layout which won't be available in an async worker
3742		 * thread.  This also takes a pin on every folio involved.
3743		 */
3744		rc = netfs_extract_user_iter(from, iov_iter_count(from),
3745					     &ctx->iter, 0);
3746		if (rc < 0) {
3747			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3748			return rc;
3749		}
3750
3751		ctx->nr_pinned_pages = rc;
3752		ctx->bv = (void *)ctx->iter.bvec;
3753		ctx->bv_need_unpin = iov_iter_extract_will_pin(from);
3754	} else if ((iov_iter_is_bvec(from) || iov_iter_is_kvec(from)) &&
3755		   !is_sync_kiocb(iocb)) {
3756		/*
3757		 * If the op is asynchronous, we need to copy the list attached
3758		 * to a BVEC/KVEC-type iterator, but we assume that the storage
3759		 * will be pinned by the caller; in any case, we may or may not
3760		 * be able to pin the pages, so we don't try.
3761		 */
3762		ctx->bv = (void *)dup_iter(&ctx->iter, from, GFP_KERNEL);
3763		if (!ctx->bv) {
3764			kref_put(&ctx->refcount, cifs_aio_ctx_release);
3765			return -ENOMEM;
3766		}
3767	} else {
3768		/*
3769		 * Otherwise, we just pass the iterator down as-is and rely on
3770		 * the caller to make sure the pages referred to by the
3771		 * iterator don't evaporate.
3772		 */
3773		ctx->iter = *from;
3774	}
3775
3776	ctx->len = iov_iter_count(&ctx->iter);
3777
3778	/* grab a lock here due to read response handlers can access ctx */
3779	mutex_lock(&ctx->aio_mutex);
3780
3781	rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &ctx->iter,
3782				  cfile, cifs_sb, &ctx->list, ctx);
3783
3784	/*
3785	 * If at least one write was successfully sent, then discard any rc
3786	 * value from the later writes. If the other write succeeds, then
3787	 * we'll end up returning whatever was written. If it fails, then
3788	 * we'll get a new rc value from that.
3789	 */
3790	if (!list_empty(&ctx->list))
3791		rc = 0;
3792
3793	mutex_unlock(&ctx->aio_mutex);
3794
3795	if (rc) {
3796		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3797		return rc;
3798	}
3799
3800	if (!is_sync_kiocb(iocb)) {
3801		kref_put(&ctx->refcount, cifs_aio_ctx_release);
3802		return -EIOCBQUEUED;
3803	}
3804
3805	rc = wait_for_completion_killable(&ctx->done);
3806	if (rc) {
3807		mutex_lock(&ctx->aio_mutex);
3808		ctx->rc = rc = -EINTR;
3809		total_written = ctx->total_len;
3810		mutex_unlock(&ctx->aio_mutex);
3811	} else {
3812		rc = ctx->rc;
3813		total_written = ctx->total_len;
3814	}
3815
3816	kref_put(&ctx->refcount, cifs_aio_ctx_release);
3817
3818	if (unlikely(!total_written))
3819		return rc;
3820
3821	iocb->ki_pos += total_written;
3822	return total_written;
3823}
3824
3825ssize_t cifs_direct_writev(struct kiocb *iocb, struct iov_iter *from)
3826{
3827	struct file *file = iocb->ki_filp;
3828
3829	cifs_revalidate_mapping(file->f_inode);
3830	return __cifs_writev(iocb, from, true);
3831}
3832
3833ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
3834{
3835	return __cifs_writev(iocb, from, false);
3836}
3837
3838static ssize_t
3839cifs_writev(struct kiocb *iocb, struct iov_iter *from)
3840{
3841	struct file *file = iocb->ki_filp;
3842	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
3843	struct inode *inode = file->f_mapping->host;
3844	struct cifsInodeInfo *cinode = CIFS_I(inode);
3845	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
3846	ssize_t rc;
3847
3848	inode_lock(inode);
3849	/*
3850	 * We need to hold the sem to be sure nobody modifies lock list
3851	 * with a brlock that prevents writing.
3852	 */
3853	down_read(&cinode->lock_sem);
3854
3855	rc = generic_write_checks(iocb, from);
3856	if (rc <= 0)
3857		goto out;
3858
3859	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
3860				     server->vals->exclusive_lock_type, 0,
3861				     NULL, CIFS_WRITE_OP))
3862		rc = __generic_file_write_iter(iocb, from);
3863	else
3864		rc = -EACCES;
3865out:
3866	up_read(&cinode->lock_sem);
3867	inode_unlock(inode);
3868
3869	if (rc > 0)
3870		rc = generic_write_sync(iocb, rc);
3871	return rc;
3872}
3873
3874ssize_t
3875cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
3876{
3877	struct inode *inode = file_inode(iocb->ki_filp);
3878	struct cifsInodeInfo *cinode = CIFS_I(inode);
3879	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3880	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3881						iocb->ki_filp->private_data;
3882	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3883	ssize_t written;
3884
3885	written = cifs_get_writer(cinode);
3886	if (written)
3887		return written;
3888
3889	if (CIFS_CACHE_WRITE(cinode)) {
3890		if (cap_unix(tcon->ses) &&
3891		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
3892		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
3893			written = generic_file_write_iter(iocb, from);
3894			goto out;
3895		}
3896		written = cifs_writev(iocb, from);
3897		goto out;
3898	}
3899	/*
3900	 * For non-oplocked files in strict cache mode we need to write the data
3901	 * to the server exactly from the pos to pos+len-1 rather than flush all
3902	 * affected pages because it may cause a error with mandatory locks on
3903	 * these pages but not on the region from pos to ppos+len-1.
3904	 */
3905	written = cifs_user_writev(iocb, from);
3906	if (CIFS_CACHE_READ(cinode)) {
3907		/*
3908		 * We have read level caching and we have just sent a write
3909		 * request to the server thus making data in the cache stale.
3910		 * Zap the cache and set oplock/lease level to NONE to avoid
3911		 * reading stale data from the cache. All subsequent read
3912		 * operations will read new data from the server.
3913		 */
3914		cifs_zap_mapping(inode);
3915		cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
3916			 inode);
3917		cinode->oplock = 0;
3918	}
3919out:
3920	cifs_put_writer(cinode);
3921	return written;
3922}
3923
3924static struct cifs_readdata *cifs_readdata_alloc(work_func_t complete)
3925{
3926	struct cifs_readdata *rdata;
3927
3928	rdata = kzalloc(sizeof(*rdata), GFP_KERNEL);
3929	if (rdata) {
3930		kref_init(&rdata->refcount);
3931		INIT_LIST_HEAD(&rdata->list);
3932		init_completion(&rdata->done);
3933		INIT_WORK(&rdata->work, complete);
3934	}
3935
3936	return rdata;
3937}
3938
3939void
3940cifs_readdata_release(struct kref *refcount)
3941{
3942	struct cifs_readdata *rdata = container_of(refcount,
3943					struct cifs_readdata, refcount);
3944
3945	if (rdata->ctx)
3946		kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3947#ifdef CONFIG_CIFS_SMB_DIRECT
3948	if (rdata->mr) {
3949		smbd_deregister_mr(rdata->mr);
3950		rdata->mr = NULL;
3951	}
3952#endif
3953	if (rdata->cfile)
3954		cifsFileInfo_put(rdata->cfile);
3955
3956	kfree(rdata);
3957}
3958
3959static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3960
3961static void
3962cifs_uncached_readv_complete(struct work_struct *work)
3963{
3964	struct cifs_readdata *rdata = container_of(work,
3965						struct cifs_readdata, work);
3966
3967	complete(&rdata->done);
3968	collect_uncached_read_data(rdata->ctx);
3969	/* the below call can possibly free the last ref to aio ctx */
3970	kref_put(&rdata->refcount, cifs_readdata_release);
3971}
3972
3973static int cifs_resend_rdata(struct cifs_readdata *rdata,
3974			struct list_head *rdata_list,
3975			struct cifs_aio_ctx *ctx)
3976{
3977	unsigned int rsize;
3978	struct cifs_credits credits;
3979	int rc;
3980	struct TCP_Server_Info *server;
3981
3982	/* XXX: should we pick a new channel here? */
3983	server = rdata->server;
3984
3985	do {
3986		if (rdata->cfile->invalidHandle) {
3987			rc = cifs_reopen_file(rdata->cfile, true);
3988			if (rc == -EAGAIN)
3989				continue;
3990			else if (rc)
3991				break;
3992		}
3993
3994		/*
3995		 * Wait for credits to resend this rdata.
3996		 * Note: we are attempting to resend the whole rdata not in
3997		 * segments
3998		 */
3999		do {
4000			rc = server->ops->wait_mtu_credits(server, rdata->bytes,
4001						&rsize, &credits);
4002
4003			if (rc)
4004				goto fail;
4005
4006			if (rsize < rdata->bytes) {
4007				add_credits_and_wake_if(server, &credits, 0);
4008				msleep(1000);
4009			}
4010		} while (rsize < rdata->bytes);
4011		rdata->credits = credits;
4012
4013		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4014		if (!rc) {
4015			if (rdata->cfile->invalidHandle)
4016				rc = -EAGAIN;
4017			else {
4018#ifdef CONFIG_CIFS_SMB_DIRECT
4019				if (rdata->mr) {
4020					rdata->mr->need_invalidate = true;
4021					smbd_deregister_mr(rdata->mr);
4022					rdata->mr = NULL;
4023				}
4024#endif
4025				rc = server->ops->async_readv(rdata);
4026			}
4027		}
4028
4029		/* If the read was successfully sent, we are done */
4030		if (!rc) {
4031			/* Add to aio pending list */
4032			list_add_tail(&rdata->list, rdata_list);
4033			return 0;
4034		}
4035
4036		/* Roll back credits and retry if needed */
4037		add_credits_and_wake_if(server, &rdata->credits, 0);
4038	} while (rc == -EAGAIN);
4039
4040fail:
4041	kref_put(&rdata->refcount, cifs_readdata_release);
4042	return rc;
4043}
4044
4045static int
4046cifs_send_async_read(loff_t fpos, size_t len, struct cifsFileInfo *open_file,
4047		     struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
4048		     struct cifs_aio_ctx *ctx)
4049{
4050	struct cifs_readdata *rdata;
4051	unsigned int rsize, nsegs, max_segs = INT_MAX;
4052	struct cifs_credits credits_on_stack;
4053	struct cifs_credits *credits = &credits_on_stack;
4054	size_t cur_len, max_len;
4055	int rc;
4056	pid_t pid;
4057	struct TCP_Server_Info *server;
4058
4059	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4060
4061#ifdef CONFIG_CIFS_SMB_DIRECT
4062	if (server->smbd_conn)
4063		max_segs = server->smbd_conn->max_frmr_depth;
4064#endif
4065
4066	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4067		pid = open_file->pid;
4068	else
4069		pid = current->tgid;
4070
4071	do {
4072		if (open_file->invalidHandle) {
4073			rc = cifs_reopen_file(open_file, true);
4074			if (rc == -EAGAIN)
4075				continue;
4076			else if (rc)
4077				break;
4078		}
4079
4080		if (cifs_sb->ctx->rsize == 0)
4081			cifs_sb->ctx->rsize =
4082				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4083							     cifs_sb->ctx);
4084
4085		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4086						   &rsize, credits);
4087		if (rc)
4088			break;
4089
4090		max_len = min_t(size_t, len, rsize);
4091
4092		cur_len = cifs_limit_bvec_subset(&ctx->iter, max_len,
4093						 max_segs, &nsegs);
4094		cifs_dbg(FYI, "read-to-iter len=%zx/%zx nsegs=%u/%lu/%u\n",
4095			 cur_len, max_len, nsegs, ctx->iter.nr_segs, max_segs);
4096		if (cur_len == 0) {
4097			rc = -EIO;
4098			add_credits_and_wake_if(server, credits, 0);
4099			break;
4100		}
4101
4102		rdata = cifs_readdata_alloc(cifs_uncached_readv_complete);
4103		if (!rdata) {
4104			add_credits_and_wake_if(server, credits, 0);
4105			rc = -ENOMEM;
4106			break;
4107		}
4108
4109		rdata->server	= server;
4110		rdata->cfile	= cifsFileInfo_get(open_file);
4111		rdata->offset	= fpos;
4112		rdata->bytes	= cur_len;
4113		rdata->pid	= pid;
4114		rdata->credits	= credits_on_stack;
4115		rdata->ctx	= ctx;
4116		kref_get(&ctx->refcount);
4117
4118		rdata->iter	= ctx->iter;
4119		iov_iter_truncate(&rdata->iter, cur_len);
4120
4121		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4122
4123		if (!rc) {
4124			if (rdata->cfile->invalidHandle)
4125				rc = -EAGAIN;
4126			else
4127				rc = server->ops->async_readv(rdata);
4128		}
4129
4130		if (rc) {
4131			add_credits_and_wake_if(server, &rdata->credits, 0);
4132			kref_put(&rdata->refcount, cifs_readdata_release);
4133			if (rc == -EAGAIN)
4134				continue;
4135			break;
4136		}
4137
4138		list_add_tail(&rdata->list, rdata_list);
4139		iov_iter_advance(&ctx->iter, cur_len);
4140		fpos += cur_len;
4141		len -= cur_len;
4142	} while (len > 0);
4143
4144	return rc;
4145}
4146
4147static void
4148collect_uncached_read_data(struct cifs_aio_ctx *ctx)
4149{
4150	struct cifs_readdata *rdata, *tmp;
4151	struct cifs_sb_info *cifs_sb;
4152	int rc;
4153
4154	cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
4155
4156	mutex_lock(&ctx->aio_mutex);
4157
4158	if (list_empty(&ctx->list)) {
4159		mutex_unlock(&ctx->aio_mutex);
4160		return;
4161	}
4162
4163	rc = ctx->rc;
4164	/* the loop below should proceed in the order of increasing offsets */
4165again:
4166	list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
4167		if (!rc) {
4168			if (!try_wait_for_completion(&rdata->done)) {
4169				mutex_unlock(&ctx->aio_mutex);
4170				return;
4171			}
4172
4173			if (rdata->result == -EAGAIN) {
4174				/* resend call if it's a retryable error */
4175				struct list_head tmp_list;
4176				unsigned int got_bytes = rdata->got_bytes;
4177
4178				list_del_init(&rdata->list);
4179				INIT_LIST_HEAD(&tmp_list);
4180
4181				if (ctx->direct_io) {
4182					/*
4183					 * Re-use rdata as this is a
4184					 * direct I/O
4185					 */
4186					rc = cifs_resend_rdata(
4187						rdata,
4188						&tmp_list, ctx);
4189				} else {
4190					rc = cifs_send_async_read(
4191						rdata->offset + got_bytes,
4192						rdata->bytes - got_bytes,
4193						rdata->cfile, cifs_sb,
4194						&tmp_list, ctx);
4195
4196					kref_put(&rdata->refcount,
4197						cifs_readdata_release);
4198				}
4199
4200				list_splice(&tmp_list, &ctx->list);
4201
4202				goto again;
4203			} else if (rdata->result)
4204				rc = rdata->result;
4205
4206			/* if there was a short read -- discard anything left */
4207			if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
4208				rc = -ENODATA;
4209
4210			ctx->total_len += rdata->got_bytes;
4211		}
4212		list_del_init(&rdata->list);
4213		kref_put(&rdata->refcount, cifs_readdata_release);
4214	}
4215
4216	/* mask nodata case */
4217	if (rc == -ENODATA)
4218		rc = 0;
4219
4220	ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
4221
4222	mutex_unlock(&ctx->aio_mutex);
4223
4224	if (ctx->iocb && ctx->iocb->ki_complete)
4225		ctx->iocb->ki_complete(ctx->iocb, ctx->rc);
4226	else
4227		complete(&ctx->done);
4228}
4229
4230static ssize_t __cifs_readv(
4231	struct kiocb *iocb, struct iov_iter *to, bool direct)
4232{
4233	size_t len;
4234	struct file *file = iocb->ki_filp;
4235	struct cifs_sb_info *cifs_sb;
4236	struct cifsFileInfo *cfile;
4237	struct cifs_tcon *tcon;
4238	ssize_t rc, total_read = 0;
4239	loff_t offset = iocb->ki_pos;
4240	struct cifs_aio_ctx *ctx;
4241
4242	len = iov_iter_count(to);
4243	if (!len)
4244		return 0;
4245
4246	cifs_sb = CIFS_FILE_SB(file);
4247	cfile = file->private_data;
4248	tcon = tlink_tcon(cfile->tlink);
4249
4250	if (!tcon->ses->server->ops->async_readv)
4251		return -ENOSYS;
4252
4253	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4254		cifs_dbg(FYI, "attempting read on write only file instance\n");
4255
4256	ctx = cifs_aio_ctx_alloc();
4257	if (!ctx)
4258		return -ENOMEM;
4259
4260	ctx->pos	= offset;
4261	ctx->direct_io	= direct;
4262	ctx->len	= len;
4263	ctx->cfile	= cifsFileInfo_get(cfile);
4264	ctx->nr_pinned_pages = 0;
4265
4266	if (!is_sync_kiocb(iocb))
4267		ctx->iocb = iocb;
4268
4269	if (user_backed_iter(to)) {
4270		/*
4271		 * Extract IOVEC/UBUF-type iterators to a BVEC-type iterator as
4272		 * they contain references to the calling process's virtual
4273		 * memory layout which won't be available in an async worker
4274		 * thread.  This also takes a pin on every folio involved.
4275		 */
4276		rc = netfs_extract_user_iter(to, iov_iter_count(to),
4277					     &ctx->iter, 0);
4278		if (rc < 0) {
4279			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4280			return rc;
4281		}
4282
4283		ctx->nr_pinned_pages = rc;
4284		ctx->bv = (void *)ctx->iter.bvec;
4285		ctx->bv_need_unpin = iov_iter_extract_will_pin(to);
4286		ctx->should_dirty = true;
4287	} else if ((iov_iter_is_bvec(to) || iov_iter_is_kvec(to)) &&
4288		   !is_sync_kiocb(iocb)) {
4289		/*
4290		 * If the op is asynchronous, we need to copy the list attached
4291		 * to a BVEC/KVEC-type iterator, but we assume that the storage
4292		 * will be retained by the caller; in any case, we may or may
4293		 * not be able to pin the pages, so we don't try.
4294		 */
4295		ctx->bv = (void *)dup_iter(&ctx->iter, to, GFP_KERNEL);
4296		if (!ctx->bv) {
4297			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4298			return -ENOMEM;
4299		}
4300	} else {
4301		/*
4302		 * Otherwise, we just pass the iterator down as-is and rely on
4303		 * the caller to make sure the pages referred to by the
4304		 * iterator don't evaporate.
4305		 */
4306		ctx->iter = *to;
4307	}
4308
4309	if (direct) {
4310		rc = filemap_write_and_wait_range(file->f_inode->i_mapping,
4311						  offset, offset + len - 1);
4312		if (rc) {
4313			kref_put(&ctx->refcount, cifs_aio_ctx_release);
4314			return -EAGAIN;
4315		}
4316	}
4317
4318	/* grab a lock here due to read response handlers can access ctx */
4319	mutex_lock(&ctx->aio_mutex);
4320
4321	rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
4322
4323	/* if at least one read request send succeeded, then reset rc */
4324	if (!list_empty(&ctx->list))
4325		rc = 0;
4326
4327	mutex_unlock(&ctx->aio_mutex);
4328
4329	if (rc) {
4330		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4331		return rc;
4332	}
4333
4334	if (!is_sync_kiocb(iocb)) {
4335		kref_put(&ctx->refcount, cifs_aio_ctx_release);
4336		return -EIOCBQUEUED;
4337	}
4338
4339	rc = wait_for_completion_killable(&ctx->done);
4340	if (rc) {
4341		mutex_lock(&ctx->aio_mutex);
4342		ctx->rc = rc = -EINTR;
4343		total_read = ctx->total_len;
4344		mutex_unlock(&ctx->aio_mutex);
4345	} else {
4346		rc = ctx->rc;
4347		total_read = ctx->total_len;
4348	}
4349
4350	kref_put(&ctx->refcount, cifs_aio_ctx_release);
4351
4352	if (total_read) {
4353		iocb->ki_pos += total_read;
4354		return total_read;
4355	}
4356	return rc;
4357}
4358
4359ssize_t cifs_direct_readv(struct kiocb *iocb, struct iov_iter *to)
4360{
4361	return __cifs_readv(iocb, to, true);
4362}
4363
4364ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
4365{
4366	return __cifs_readv(iocb, to, false);
4367}
4368
4369ssize_t
4370cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
4371{
4372	struct inode *inode = file_inode(iocb->ki_filp);
4373	struct cifsInodeInfo *cinode = CIFS_I(inode);
4374	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
4375	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
4376						iocb->ki_filp->private_data;
4377	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4378	int rc = -EACCES;
4379
4380	/*
4381	 * In strict cache mode we need to read from the server all the time
4382	 * if we don't have level II oplock because the server can delay mtime
4383	 * change - so we can't make a decision about inode invalidating.
4384	 * And we can also fail with pagereading if there are mandatory locks
4385	 * on pages affected by this read but not on the region from pos to
4386	 * pos+len-1.
4387	 */
4388	if (!CIFS_CACHE_READ(cinode))
4389		return cifs_user_readv(iocb, to);
4390
4391	if (cap_unix(tcon->ses) &&
4392	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
4393	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
4394		return generic_file_read_iter(iocb, to);
4395
4396	/*
4397	 * We need to hold the sem to be sure nobody modifies lock list
4398	 * with a brlock that prevents reading.
4399	 */
4400	down_read(&cinode->lock_sem);
4401	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
4402				     tcon->ses->server->vals->shared_lock_type,
4403				     0, NULL, CIFS_READ_OP))
4404		rc = generic_file_read_iter(iocb, to);
4405	up_read(&cinode->lock_sem);
4406	return rc;
4407}
4408
4409static ssize_t
4410cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
4411{
4412	int rc = -EACCES;
4413	unsigned int bytes_read = 0;
4414	unsigned int total_read;
4415	unsigned int current_read_size;
4416	unsigned int rsize;
4417	struct cifs_sb_info *cifs_sb;
4418	struct cifs_tcon *tcon;
4419	struct TCP_Server_Info *server;
4420	unsigned int xid;
4421	char *cur_offset;
4422	struct cifsFileInfo *open_file;
4423	struct cifs_io_parms io_parms = {0};
4424	int buf_type = CIFS_NO_BUFFER;
4425	__u32 pid;
4426
4427	xid = get_xid();
4428	cifs_sb = CIFS_FILE_SB(file);
4429
4430	/* FIXME: set up handlers for larger reads and/or convert to async */
4431	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
4432
4433	if (file->private_data == NULL) {
4434		rc = -EBADF;
4435		free_xid(xid);
4436		return rc;
4437	}
4438	open_file = file->private_data;
4439	tcon = tlink_tcon(open_file->tlink);
4440	server = cifs_pick_channel(tcon->ses);
4441
4442	if (!server->ops->sync_read) {
4443		free_xid(xid);
4444		return -ENOSYS;
4445	}
4446
4447	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4448		pid = open_file->pid;
4449	else
4450		pid = current->tgid;
4451
4452	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
4453		cifs_dbg(FYI, "attempting read on write only file instance\n");
4454
4455	for (total_read = 0, cur_offset = read_data; read_size > total_read;
4456	     total_read += bytes_read, cur_offset += bytes_read) {
4457		do {
4458			current_read_size = min_t(uint, read_size - total_read,
4459						  rsize);
4460			/*
4461			 * For windows me and 9x we do not want to request more
4462			 * than it negotiated since it will refuse the read
4463			 * then.
4464			 */
4465			if (!(tcon->ses->capabilities &
4466				tcon->ses->server->vals->cap_large_files)) {
4467				current_read_size = min_t(uint,
4468					current_read_size, CIFSMaxBufSize);
4469			}
4470			if (open_file->invalidHandle) {
4471				rc = cifs_reopen_file(open_file, true);
4472				if (rc != 0)
4473					break;
4474			}
4475			io_parms.pid = pid;
4476			io_parms.tcon = tcon;
4477			io_parms.offset = *offset;
4478			io_parms.length = current_read_size;
4479			io_parms.server = server;
4480			rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
4481						    &bytes_read, &cur_offset,
4482						    &buf_type);
4483		} while (rc == -EAGAIN);
4484
4485		if (rc || (bytes_read == 0)) {
4486			if (total_read) {
4487				break;
4488			} else {
4489				free_xid(xid);
4490				return rc;
4491			}
4492		} else {
4493			cifs_stats_bytes_read(tcon, total_read);
4494			*offset += bytes_read;
4495		}
4496	}
4497	free_xid(xid);
4498	return total_read;
4499}
4500
4501/*
4502 * If the page is mmap'ed into a process' page tables, then we need to make
4503 * sure that it doesn't change while being written back.
4504 */
4505static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
4506{
4507	struct folio *folio = page_folio(vmf->page);
4508
4509	/* Wait for the folio to be written to the cache before we allow it to
4510	 * be modified.  We then assume the entire folio will need writing back.
4511	 */
4512#ifdef CONFIG_CIFS_FSCACHE
4513	if (folio_test_fscache(folio) &&
4514	    folio_wait_fscache_killable(folio) < 0)
4515		return VM_FAULT_RETRY;
4516#endif
4517
4518	folio_wait_writeback(folio);
4519
4520	if (folio_lock_killable(folio) < 0)
4521		return VM_FAULT_RETRY;
4522	return VM_FAULT_LOCKED;
4523}
4524
4525static const struct vm_operations_struct cifs_file_vm_ops = {
4526	.fault = filemap_fault,
4527	.map_pages = filemap_map_pages,
4528	.page_mkwrite = cifs_page_mkwrite,
4529};
4530
4531int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
4532{
4533	int xid, rc = 0;
4534	struct inode *inode = file_inode(file);
4535
4536	xid = get_xid();
4537
4538	if (!CIFS_CACHE_READ(CIFS_I(inode)))
4539		rc = cifs_zap_mapping(inode);
4540	if (!rc)
4541		rc = generic_file_mmap(file, vma);
4542	if (!rc)
4543		vma->vm_ops = &cifs_file_vm_ops;
4544
4545	free_xid(xid);
4546	return rc;
4547}
4548
4549int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
4550{
4551	int rc, xid;
4552
4553	xid = get_xid();
4554
4555	rc = cifs_revalidate_file(file);
4556	if (rc)
4557		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
4558			 rc);
4559	if (!rc)
4560		rc = generic_file_mmap(file, vma);
4561	if (!rc)
4562		vma->vm_ops = &cifs_file_vm_ops;
4563
4564	free_xid(xid);
4565	return rc;
4566}
4567
4568/*
4569 * Unlock a bunch of folios in the pagecache.
4570 */
4571static void cifs_unlock_folios(struct address_space *mapping, pgoff_t first, pgoff_t last)
4572{
4573	struct folio *folio;
4574	XA_STATE(xas, &mapping->i_pages, first);
4575
4576	rcu_read_lock();
4577	xas_for_each(&xas, folio, last) {
4578		folio_unlock(folio);
4579	}
4580	rcu_read_unlock();
4581}
4582
4583static void cifs_readahead_complete(struct work_struct *work)
4584{
4585	struct cifs_readdata *rdata = container_of(work,
4586						   struct cifs_readdata, work);
4587	struct folio *folio;
4588	pgoff_t last;
4589	bool good = rdata->result == 0 || (rdata->result == -EAGAIN && rdata->got_bytes);
4590
4591	XA_STATE(xas, &rdata->mapping->i_pages, rdata->offset / PAGE_SIZE);
4592
4593	if (good)
4594		cifs_readahead_to_fscache(rdata->mapping->host,
4595					  rdata->offset, rdata->bytes);
4596
4597	if (iov_iter_count(&rdata->iter) > 0)
4598		iov_iter_zero(iov_iter_count(&rdata->iter), &rdata->iter);
4599
4600	last = (rdata->offset + rdata->bytes - 1) / PAGE_SIZE;
4601
4602	rcu_read_lock();
4603	xas_for_each(&xas, folio, last) {
4604		if (good) {
4605			flush_dcache_folio(folio);
4606			folio_mark_uptodate(folio);
4607		}
4608		folio_unlock(folio);
4609	}
4610	rcu_read_unlock();
4611
4612	kref_put(&rdata->refcount, cifs_readdata_release);
4613}
4614
4615static void cifs_readahead(struct readahead_control *ractl)
4616{
4617	struct cifsFileInfo *open_file = ractl->file->private_data;
4618	struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(ractl->file);
4619	struct TCP_Server_Info *server;
4620	unsigned int xid, nr_pages, cache_nr_pages = 0;
4621	unsigned int ra_pages;
4622	pgoff_t next_cached = ULONG_MAX, ra_index;
4623	bool caching = fscache_cookie_enabled(cifs_inode_cookie(ractl->mapping->host)) &&
4624		cifs_inode_cookie(ractl->mapping->host)->cache_priv;
4625	bool check_cache = caching;
4626	pid_t pid;
4627	int rc = 0;
4628
4629	/* Note that readahead_count() lags behind our dequeuing of pages from
4630	 * the ractl, wo we have to keep track for ourselves.
4631	 */
4632	ra_pages = readahead_count(ractl);
4633	ra_index = readahead_index(ractl);
4634
4635	xid = get_xid();
4636
4637	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
4638		pid = open_file->pid;
4639	else
4640		pid = current->tgid;
4641
4642	server = cifs_pick_channel(tlink_tcon(open_file->tlink)->ses);
4643
4644	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
4645		 __func__, ractl->file, ractl->mapping, ra_pages);
4646
4647	/*
4648	 * Chop the readahead request up into rsize-sized read requests.
4649	 */
4650	while ((nr_pages = ra_pages)) {
4651		unsigned int i, rsize;
4652		struct cifs_readdata *rdata;
4653		struct cifs_credits credits_on_stack;
4654		struct cifs_credits *credits = &credits_on_stack;
4655		struct folio *folio;
4656		pgoff_t fsize;
4657
4658		/*
4659		 * Find out if we have anything cached in the range of
4660		 * interest, and if so, where the next chunk of cached data is.
4661		 */
4662		if (caching) {
4663			if (check_cache) {
4664				rc = cifs_fscache_query_occupancy(
4665					ractl->mapping->host, ra_index, nr_pages,
4666					&next_cached, &cache_nr_pages);
4667				if (rc < 0)
4668					caching = false;
4669				check_cache = false;
4670			}
4671
4672			if (ra_index == next_cached) {
4673				/*
4674				 * TODO: Send a whole batch of pages to be read
4675				 * by the cache.
4676				 */
4677				folio = readahead_folio(ractl);
4678				fsize = folio_nr_pages(folio);
4679				ra_pages -= fsize;
4680				ra_index += fsize;
4681				if (cifs_readpage_from_fscache(ractl->mapping->host,
4682							       &folio->page) < 0) {
4683					/*
4684					 * TODO: Deal with cache read failure
4685					 * here, but for the moment, delegate
4686					 * that to readpage.
4687					 */
4688					caching = false;
4689				}
4690				folio_unlock(folio);
4691				next_cached += fsize;
4692				cache_nr_pages -= fsize;
4693				if (cache_nr_pages == 0)
4694					check_cache = true;
4695				continue;
4696			}
4697		}
4698
4699		if (open_file->invalidHandle) {
4700			rc = cifs_reopen_file(open_file, true);
4701			if (rc) {
4702				if (rc == -EAGAIN)
4703					continue;
4704				break;
4705			}
4706		}
4707
4708		if (cifs_sb->ctx->rsize == 0)
4709			cifs_sb->ctx->rsize =
4710				server->ops->negotiate_rsize(tlink_tcon(open_file->tlink),
4711							     cifs_sb->ctx);
4712
4713		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
4714						   &rsize, credits);
4715		if (rc)
4716			break;
4717		nr_pages = min_t(size_t, rsize / PAGE_SIZE, ra_pages);
4718		if (next_cached != ULONG_MAX)
4719			nr_pages = min_t(size_t, nr_pages, next_cached - ra_index);
4720
4721		/*
4722		 * Give up immediately if rsize is too small to read an entire
4723		 * page. The VFS will fall back to readpage. We should never
4724		 * reach this point however since we set ra_pages to 0 when the
4725		 * rsize is smaller than a cache page.
4726		 */
4727		if (unlikely(!nr_pages)) {
4728			add_credits_and_wake_if(server, credits, 0);
4729			break;
4730		}
4731
4732		rdata = cifs_readdata_alloc(cifs_readahead_complete);
4733		if (!rdata) {
4734			/* best to give up if we're out of mem */
4735			add_credits_and_wake_if(server, credits, 0);
4736			break;
4737		}
4738
4739		rdata->offset	= ra_index * PAGE_SIZE;
4740		rdata->bytes	= nr_pages * PAGE_SIZE;
4741		rdata->cfile	= cifsFileInfo_get(open_file);
4742		rdata->server	= server;
4743		rdata->mapping	= ractl->mapping;
4744		rdata->pid	= pid;
4745		rdata->credits	= credits_on_stack;
4746
4747		for (i = 0; i < nr_pages; i++) {
4748			if (!readahead_folio(ractl))
4749				WARN_ON(1);
4750		}
4751		ra_pages -= nr_pages;
4752		ra_index += nr_pages;
4753
4754		iov_iter_xarray(&rdata->iter, ITER_DEST, &rdata->mapping->i_pages,
4755				rdata->offset, rdata->bytes);
4756
4757		rc = adjust_credits(server, &rdata->credits, rdata->bytes);
4758		if (!rc) {
4759			if (rdata->cfile->invalidHandle)
4760				rc = -EAGAIN;
4761			else
4762				rc = server->ops->async_readv(rdata);
4763		}
4764
4765		if (rc) {
4766			add_credits_and_wake_if(server, &rdata->credits, 0);
4767			cifs_unlock_folios(rdata->mapping,
4768					   rdata->offset / PAGE_SIZE,
4769					   (rdata->offset + rdata->bytes - 1) / PAGE_SIZE);
4770			/* Fallback to the readpage in error/reconnect cases */
4771			kref_put(&rdata->refcount, cifs_readdata_release);
4772			break;
4773		}
4774
4775		kref_put(&rdata->refcount, cifs_readdata_release);
4776	}
4777
4778	free_xid(xid);
4779}
4780
4781/*
4782 * cifs_readpage_worker must be called with the page pinned
4783 */
4784static int cifs_readpage_worker(struct file *file, struct page *page,
4785	loff_t *poffset)
4786{
4787	struct inode *inode = file_inode(file);
4788	struct timespec64 atime, mtime;
4789	char *read_data;
4790	int rc;
4791
4792	/* Is the page cached? */
4793	rc = cifs_readpage_from_fscache(inode, page);
4794	if (rc == 0)
4795		goto read_complete;
4796
4797	read_data = kmap(page);
4798	/* for reads over a certain size could initiate async read ahead */
4799
4800	rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
4801
4802	if (rc < 0)
4803		goto io_error;
4804	else
4805		cifs_dbg(FYI, "Bytes read %d\n", rc);
4806
4807	/* we do not want atime to be less than mtime, it broke some apps */
4808	atime = inode_set_atime_to_ts(inode, current_time(inode));
4809	mtime = inode_get_mtime(inode);
4810	if (timespec64_compare(&atime, &mtime) < 0)
4811		inode_set_atime_to_ts(inode, inode_get_mtime(inode));
4812
4813	if (PAGE_SIZE > rc)
4814		memset(read_data + rc, 0, PAGE_SIZE - rc);
4815
4816	flush_dcache_page(page);
4817	SetPageUptodate(page);
4818	rc = 0;
4819
4820io_error:
4821	kunmap(page);
4822
4823read_complete:
4824	unlock_page(page);
4825	return rc;
4826}
4827
4828static int cifs_read_folio(struct file *file, struct folio *folio)
4829{
4830	struct page *page = &folio->page;
4831	loff_t offset = page_file_offset(page);
4832	int rc = -EACCES;
4833	unsigned int xid;
4834
4835	xid = get_xid();
4836
4837	if (file->private_data == NULL) {
4838		rc = -EBADF;
4839		free_xid(xid);
4840		return rc;
4841	}
4842
4843	cifs_dbg(FYI, "read_folio %p at offset %d 0x%x\n",
4844		 page, (int)offset, (int)offset);
4845
4846	rc = cifs_readpage_worker(file, page, &offset);
4847
4848	free_xid(xid);
4849	return rc;
4850}
4851
4852static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
4853{
4854	struct cifsFileInfo *open_file;
4855
4856	spin_lock(&cifs_inode->open_file_lock);
4857	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
4858		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
4859			spin_unlock(&cifs_inode->open_file_lock);
4860			return 1;
4861		}
4862	}
4863	spin_unlock(&cifs_inode->open_file_lock);
4864	return 0;
4865}
4866
4867/* We do not want to update the file size from server for inodes
4868   open for write - to avoid races with writepage extending
4869   the file - in the future we could consider allowing
4870   refreshing the inode only on increases in the file size
4871   but this is tricky to do without racing with writebehind
4872   page caching in the current Linux kernel design */
4873bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file,
4874			    bool from_readdir)
4875{
4876	if (!cifsInode)
4877		return true;
4878
4879	if (is_inode_writable(cifsInode) ||
4880		((cifsInode->oplock & CIFS_CACHE_RW_FLG) != 0 && from_readdir)) {
4881		/* This inode is open for write at least once */
4882		struct cifs_sb_info *cifs_sb;
4883
4884		cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
4885		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4886			/* since no page cache to corrupt on directio
4887			we can change size safely */
4888			return true;
4889		}
4890
4891		if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
4892			return true;
4893
4894		return false;
4895	} else
4896		return true;
4897}
4898
4899static int cifs_write_begin(struct file *file, struct address_space *mapping,
4900			loff_t pos, unsigned len,
4901			struct page **pagep, void **fsdata)
4902{
4903	int oncethru = 0;
4904	pgoff_t index = pos >> PAGE_SHIFT;
4905	loff_t offset = pos & (PAGE_SIZE - 1);
4906	loff_t page_start = pos & PAGE_MASK;
4907	loff_t i_size;
4908	struct page *page;
4909	int rc = 0;
4910
4911	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4912
4913start:
4914	page = grab_cache_page_write_begin(mapping, index);
4915	if (!page) {
4916		rc = -ENOMEM;
4917		goto out;
4918	}
4919
4920	if (PageUptodate(page))
4921		goto out;
4922
4923	/*
4924	 * If we write a full page it will be up to date, no need to read from
4925	 * the server. If the write is short, we'll end up doing a sync write
4926	 * instead.
4927	 */
4928	if (len == PAGE_SIZE)
4929		goto out;
4930
4931	/*
4932	 * optimize away the read when we have an oplock, and we're not
4933	 * expecting to use any of the data we'd be reading in. That
4934	 * is, when the page lies beyond the EOF, or straddles the EOF
4935	 * and the write will cover all of the existing data.
4936	 */
4937	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4938		i_size = i_size_read(mapping->host);
4939		if (page_start >= i_size ||
4940		    (offset == 0 && (pos + len) >= i_size)) {
4941			zero_user_segments(page, 0, offset,
4942					   offset + len,
4943					   PAGE_SIZE);
4944			/*
4945			 * PageChecked means that the parts of the page
4946			 * to which we're not writing are considered up
4947			 * to date. Once the data is copied to the
4948			 * page, it can be set uptodate.
4949			 */
4950			SetPageChecked(page);
4951			goto out;
4952		}
4953	}
4954
4955	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4956		/*
4957		 * might as well read a page, it is fast enough. If we get
4958		 * an error, we don't need to return it. cifs_write_end will
4959		 * do a sync write instead since PG_uptodate isn't set.
4960		 */
4961		cifs_readpage_worker(file, page, &page_start);
4962		put_page(page);
4963		oncethru = 1;
4964		goto start;
4965	} else {
4966		/* we could try using another file handle if there is one -
4967		   but how would we lock it to prevent close of that handle
4968		   racing with this read? In any case
4969		   this will be written out by write_end so is fine */
4970	}
4971out:
4972	*pagep = page;
4973	return rc;
4974}
4975
4976static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
4977{
4978	if (folio_test_private(folio))
4979		return 0;
4980	if (folio_test_fscache(folio)) {
4981		if (current_is_kswapd() || !(gfp & __GFP_FS))
4982			return false;
4983		folio_wait_fscache(folio);
4984	}
4985	fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
4986	return true;
4987}
4988
4989static void cifs_invalidate_folio(struct folio *folio, size_t offset,
4990				 size_t length)
4991{
4992	folio_wait_fscache(folio);
4993}
4994
4995static int cifs_launder_folio(struct folio *folio)
4996{
4997	int rc = 0;
4998	loff_t range_start = folio_pos(folio);
4999	loff_t range_end = range_start + folio_size(folio);
5000	struct writeback_control wbc = {
5001		.sync_mode = WB_SYNC_ALL,
5002		.nr_to_write = 0,
5003		.range_start = range_start,
5004		.range_end = range_end,
5005	};
5006
5007	cifs_dbg(FYI, "Launder page: %lu\n", folio->index);
5008
5009	if (folio_clear_dirty_for_io(folio))
5010		rc = cifs_writepage_locked(&folio->page, &wbc);
5011
5012	folio_wait_fscache(folio);
5013	return rc;
5014}
5015
5016void cifs_oplock_break(struct work_struct *work)
5017{
5018	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
5019						  oplock_break);
5020	struct inode *inode = d_inode(cfile->dentry);
5021	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
5022	struct cifsInodeInfo *cinode = CIFS_I(inode);
5023	struct cifs_tcon *tcon;
5024	struct TCP_Server_Info *server;
5025	struct tcon_link *tlink;
5026	int rc = 0;
5027	bool purge_cache = false, oplock_break_cancelled;
5028	__u64 persistent_fid, volatile_fid;
5029	__u16 net_fid;
5030
5031	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
5032			TASK_UNINTERRUPTIBLE);
5033
5034	tlink = cifs_sb_tlink(cifs_sb);
5035	if (IS_ERR(tlink))
5036		goto out;
5037	tcon = tlink_tcon(tlink);
5038	server = tcon->ses->server;
5039
5040	server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
5041				      cfile->oplock_epoch, &purge_cache);
5042
5043	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
5044						cifs_has_mand_locks(cinode)) {
5045		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
5046			 inode);
5047		cinode->oplock = 0;
5048	}
5049
5050	if (inode && S_ISREG(inode->i_mode)) {
5051		if (CIFS_CACHE_READ(cinode))
5052			break_lease(inode, O_RDONLY);
5053		else
5054			break_lease(inode, O_WRONLY);
5055		rc = filemap_fdatawrite(inode->i_mapping);
5056		if (!CIFS_CACHE_READ(cinode) || purge_cache) {
5057			rc = filemap_fdatawait(inode->i_mapping);
5058			mapping_set_error(inode->i_mapping, rc);
5059			cifs_zap_mapping(inode);
5060		}
5061		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
5062		if (CIFS_CACHE_WRITE(cinode))
5063			goto oplock_break_ack;
5064	}
5065
5066	rc = cifs_push_locks(cfile);
5067	if (rc)
5068		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
5069
5070oplock_break_ack:
5071	/*
5072	 * When oplock break is received and there are no active
5073	 * file handles but cached, then schedule deferred close immediately.
5074	 * So, new open will not use cached handle.
5075	 */
5076
5077	if (!CIFS_CACHE_HANDLE(cinode) && !list_empty(&cinode->deferred_closes))
5078		cifs_close_deferred_file(cinode);
5079
5080	persistent_fid = cfile->fid.persistent_fid;
5081	volatile_fid = cfile->fid.volatile_fid;
5082	net_fid = cfile->fid.netfid;
5083	oplock_break_cancelled = cfile->oplock_break_cancelled;
5084
5085	_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
5086	/*
5087	 * MS-SMB2 3.2.5.19.1 and 3.2.5.19.2 (and MS-CIFS 3.2.5.42) do not require
5088	 * an acknowledgment to be sent when the file has already been closed.
5089	 */
5090	spin_lock(&cinode->open_file_lock);
5091	/* check list empty since can race with kill_sb calling tree disconnect */
5092	if (!oplock_break_cancelled && !list_empty(&cinode->openFileList)) {
5093		spin_unlock(&cinode->open_file_lock);
5094		rc = server->ops->oplock_response(tcon, persistent_fid,
5095						  volatile_fid, net_fid, cinode);
5096		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
5097	} else
5098		spin_unlock(&cinode->open_file_lock);
5099
5100	cifs_put_tlink(tlink);
5101out:
5102	cifs_done_oplock_break(cinode);
5103}
5104
5105/*
5106 * The presence of cifs_direct_io() in the address space ops vector
5107 * allowes open() O_DIRECT flags which would have failed otherwise.
5108 *
5109 * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
5110 * so this method should never be called.
5111 *
5112 * Direct IO is not yet supported in the cached mode.
5113 */
5114static ssize_t
5115cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
5116{
5117        /*
5118         * FIXME
5119         * Eventually need to support direct IO for non forcedirectio mounts
5120         */
5121        return -EINVAL;
5122}
5123
5124static int cifs_swap_activate(struct swap_info_struct *sis,
5125			      struct file *swap_file, sector_t *span)
5126{
5127	struct cifsFileInfo *cfile = swap_file->private_data;
5128	struct inode *inode = swap_file->f_mapping->host;
5129	unsigned long blocks;
5130	long long isize;
5131
5132	cifs_dbg(FYI, "swap activate\n");
5133
5134	if (!swap_file->f_mapping->a_ops->swap_rw)
5135		/* Cannot support swap */
5136		return -EINVAL;
5137
5138	spin_lock(&inode->i_lock);
5139	blocks = inode->i_blocks;
5140	isize = inode->i_size;
5141	spin_unlock(&inode->i_lock);
5142	if (blocks*512 < isize) {
5143		pr_warn("swap activate: swapfile has holes\n");
5144		return -EINVAL;
5145	}
5146	*span = sis->pages;
5147
5148	pr_warn_once("Swap support over SMB3 is experimental\n");
5149
5150	/*
5151	 * TODO: consider adding ACL (or documenting how) to prevent other
5152	 * users (on this or other systems) from reading it
5153	 */
5154
5155
5156	/* TODO: add sk_set_memalloc(inet) or similar */
5157
5158	if (cfile)
5159		cfile->swapfile = true;
5160	/*
5161	 * TODO: Since file already open, we can't open with DENY_ALL here
5162	 * but we could add call to grab a byte range lock to prevent others
5163	 * from reading or writing the file
5164	 */
5165
5166	sis->flags |= SWP_FS_OPS;
5167	return add_swap_extent(sis, 0, sis->max, 0);
5168}
5169
5170static void cifs_swap_deactivate(struct file *file)
5171{
5172	struct cifsFileInfo *cfile = file->private_data;
5173
5174	cifs_dbg(FYI, "swap deactivate\n");
5175
5176	/* TODO: undo sk_set_memalloc(inet) will eventually be needed */
5177
5178	if (cfile)
5179		cfile->swapfile = false;
5180
5181	/* do we need to unpin (or unlock) the file */
5182}
5183
5184const struct address_space_operations cifs_addr_ops = {
5185	.read_folio = cifs_read_folio,
5186	.readahead = cifs_readahead,
5187	.writepages = cifs_writepages,
5188	.write_begin = cifs_write_begin,
5189	.write_end = cifs_write_end,
5190	.dirty_folio = netfs_dirty_folio,
5191	.release_folio = cifs_release_folio,
5192	.direct_IO = cifs_direct_io,
5193	.invalidate_folio = cifs_invalidate_folio,
5194	.launder_folio = cifs_launder_folio,
5195	.migrate_folio = filemap_migrate_folio,
5196	/*
5197	 * TODO: investigate and if useful we could add an is_dirty_writeback
5198	 * helper if needed
5199	 */
5200	.swap_activate = cifs_swap_activate,
5201	.swap_deactivate = cifs_swap_deactivate,
5202};
5203
5204/*
5205 * cifs_readahead requires the server to support a buffer large enough to
5206 * contain the header plus one complete page of data.  Otherwise, we need
5207 * to leave cifs_readahead out of the address space operations.
5208 */
5209const struct address_space_operations cifs_addr_ops_smallbuf = {
5210	.read_folio = cifs_read_folio,
5211	.writepages = cifs_writepages,
5212	.write_begin = cifs_write_begin,
5213	.write_end = cifs_write_end,
5214	.dirty_folio = netfs_dirty_folio,
5215	.release_folio = cifs_release_folio,
5216	.invalidate_folio = cifs_invalidate_folio,
5217	.launder_folio = cifs_launder_folio,
5218	.migrate_folio = filemap_migrate_folio,
5219};
5220